feat(ws): retry failed reconciles much less aggressively (#256)
* feat:Notebooks 2.0 // retry reconciliation failures less aggressively Signed-off-by: Adriana Theodorakopoulou <atheodorak@outlook.com> * mathew: fixes 1 Signed-off-by: Mathew Wicks <5735406+thesuperzapper@users.noreply.github.com> --------- Signed-off-by: Adriana Theodorakopoulou <atheodorak@outlook.com> Signed-off-by: Mathew Wicks <5735406+thesuperzapper@users.noreply.github.com> Co-authored-by: Mathew Wicks <5735406+thesuperzapper@users.noreply.github.com>
This commit is contained in:
parent
c69ba08e22
commit
91484fee07
|
@ -29,6 +29,7 @@ import (
|
|||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
|
||||
ctrl "sigs.k8s.io/controller-runtime"
|
||||
"sigs.k8s.io/controller-runtime/pkg/controller"
|
||||
"sigs.k8s.io/controller-runtime/pkg/healthz"
|
||||
"sigs.k8s.io/controller-runtime/pkg/log/zap"
|
||||
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
|
||||
|
@ -136,14 +137,18 @@ func main() {
|
|||
if err = (&controllerInternal.WorkspaceReconciler{
|
||||
Client: mgr.GetClient(),
|
||||
Scheme: mgr.GetScheme(),
|
||||
}).SetupWithManager(mgr); err != nil {
|
||||
}).SetupWithManager(mgr, controller.Options{
|
||||
RateLimiter: helper.BuildRateLimiter(),
|
||||
}); err != nil {
|
||||
setupLog.Error(err, "unable to create controller", "controller", "Workspace")
|
||||
os.Exit(1)
|
||||
}
|
||||
if err = (&controllerInternal.WorkspaceKindReconciler{
|
||||
Client: mgr.GetClient(),
|
||||
Scheme: mgr.GetScheme(),
|
||||
}).SetupWithManager(mgr); err != nil {
|
||||
}).SetupWithManager(mgr, controller.Options{
|
||||
RateLimiter: helper.BuildRateLimiter(),
|
||||
}); err != nil {
|
||||
setupLog.Error(err, "unable to create controller", "controller", "WorkspaceKind")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ import (
|
|||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/utils/ptr"
|
||||
ctrl "sigs.k8s.io/controller-runtime"
|
||||
"sigs.k8s.io/controller-runtime/pkg/controller"
|
||||
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
|
||||
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
|
@ -112,14 +113,18 @@ var _ = BeforeSuite(func() {
|
|||
err = (&WorkspaceReconciler{
|
||||
Client: k8sManager.GetClient(),
|
||||
Scheme: k8sManager.GetScheme(),
|
||||
}).SetupWithManager(k8sManager)
|
||||
}).SetupWithManager(k8sManager, controller.Options{
|
||||
RateLimiter: helper.BuildRateLimiter(),
|
||||
})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
By("setting up the WorkspaceKind controller")
|
||||
err = (&WorkspaceKindReconciler{
|
||||
Client: k8sManager.GetClient(),
|
||||
Scheme: k8sManager.GetScheme(),
|
||||
}).SetupWithManager(k8sManager)
|
||||
}).SetupWithManager(k8sManager, controller.Options{
|
||||
RateLimiter: helper.BuildRateLimiter(),
|
||||
})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
go func() {
|
||||
|
|
|
@ -36,6 +36,7 @@ import (
|
|||
ctrl "sigs.k8s.io/controller-runtime"
|
||||
"sigs.k8s.io/controller-runtime/pkg/builder"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
"sigs.k8s.io/controller-runtime/pkg/controller"
|
||||
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
|
||||
"sigs.k8s.io/controller-runtime/pkg/handler"
|
||||
"sigs.k8s.io/controller-runtime/pkg/log"
|
||||
|
@ -394,7 +395,7 @@ func (r *WorkspaceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
|
|||
}
|
||||
|
||||
// SetupWithManager sets up the controller with the Manager.
|
||||
func (r *WorkspaceReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||
func (r *WorkspaceReconciler) SetupWithManager(mgr ctrl.Manager, opts controller.Options) error {
|
||||
|
||||
// NOTE: the SetupManagerFieldIndexers() helper in `helper/index.go` should have already been
|
||||
// called on `mgr` by the time this function is called, so the indexes are already set up
|
||||
|
@ -418,6 +419,7 @@ func (r *WorkspaceReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
|||
})
|
||||
|
||||
return ctrl.NewControllerManagedBy(mgr).
|
||||
WithOptions(opts).
|
||||
For(&kubefloworgv1beta1.Workspace{}).
|
||||
Owns(&appsv1.StatefulSet{}).
|
||||
Owns(&corev1.Service{}).
|
||||
|
|
|
@ -27,6 +27,7 @@ import (
|
|||
ctrl "sigs.k8s.io/controller-runtime"
|
||||
"sigs.k8s.io/controller-runtime/pkg/builder"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
"sigs.k8s.io/controller-runtime/pkg/controller"
|
||||
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
|
||||
"sigs.k8s.io/controller-runtime/pkg/handler"
|
||||
"sigs.k8s.io/controller-runtime/pkg/log"
|
||||
|
@ -153,7 +154,7 @@ func (r *WorkspaceKindReconciler) Reconcile(ctx context.Context, req ctrl.Reques
|
|||
}
|
||||
|
||||
// SetupWithManager sets up the controller with the Manager.
|
||||
func (r *WorkspaceKindReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||
func (r *WorkspaceKindReconciler) SetupWithManager(mgr ctrl.Manager, opts controller.Options) error {
|
||||
|
||||
// NOTE: the SetupManagerFieldIndexers() helper in `helper/index.go` should have already been
|
||||
// called on `mgr` by the time this function is called, so the indexes are already set up
|
||||
|
@ -170,6 +171,7 @@ func (r *WorkspaceKindReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
|||
}
|
||||
|
||||
return ctrl.NewControllerManagedBy(mgr).
|
||||
WithOptions(opts).
|
||||
For(&kubefloworgv1beta1.WorkspaceKind{}).
|
||||
Watches(
|
||||
&kubefloworgv1beta1.Workspace{},
|
||||
|
|
|
@ -204,7 +204,12 @@ var _ = Describe("WorkspaceKind Controller", func() {
|
|||
}, timeout, interval).Should(Equal(expectedStatus))
|
||||
|
||||
By("having a finalizer set on the WorkspaceKind")
|
||||
Expect(workspaceKind.GetFinalizers()).To(ContainElement(WorkspaceKindFinalizer))
|
||||
Eventually(func() []string {
|
||||
if err := k8sClient.Get(ctx, workspaceKindKey, workspaceKind); err != nil {
|
||||
return nil
|
||||
}
|
||||
return workspaceKind.GetFinalizers()
|
||||
}, timeout, interval).Should(ContainElement(WorkspaceKindFinalizer))
|
||||
|
||||
By("deleting the Workspace")
|
||||
Expect(k8sClient.Delete(ctx, workspace)).To(Succeed())
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
Copyright 2024.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package helper
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"golang.org/x/time/rate"
|
||||
"k8s.io/client-go/util/workqueue"
|
||||
"sigs.k8s.io/controller-runtime/pkg/reconcile"
|
||||
)
|
||||
|
||||
// BuildRateLimiter creates a new rate limiter for our controllers.
|
||||
// NOTE: we dont use `DefaultTypedControllerRateLimiter` because it retries very aggressively, starting at 5ms!
|
||||
func BuildRateLimiter() workqueue.TypedRateLimiter[reconcile.Request] {
|
||||
// exponential backoff rate limiter
|
||||
// - this handles per-item rate limiting for ~failures~
|
||||
// - it uses an exponential backoff strategy were: delay = baseDelay * 2^failures
|
||||
// - graph visualization: https://www.desmos.com/calculator/fexlpdmiti
|
||||
failureBaseDelay := 1 * time.Second
|
||||
failureMaxDelay := 7 * time.Minute
|
||||
failureRateLimiter := workqueue.NewTypedItemExponentialFailureRateLimiter[reconcile.Request](failureBaseDelay, failureMaxDelay)
|
||||
|
||||
// overall rate limiter
|
||||
// - this handles overall rate limiting, ignoring individual items and only considering the overall rate
|
||||
// - it implements a "token bucket" of size totalMaxBurst that is initially full,
|
||||
// and which is refilled at rate totalEventsPerSecond tokens per second.
|
||||
totalEventsPerSecond := 10
|
||||
totalMaxBurst := 100
|
||||
totalRateLimiter := &workqueue.TypedBucketRateLimiter[reconcile.Request]{
|
||||
Limiter: rate.NewLimiter(rate.Limit(totalEventsPerSecond), totalMaxBurst),
|
||||
}
|
||||
|
||||
// return the worst-case (longest) of the rate limiters for a given item
|
||||
return workqueue.NewTypedMaxOfRateLimiter[reconcile.Request](failureRateLimiter, totalRateLimiter)
|
||||
}
|
Loading…
Reference in New Issue