feat(ws): retry failed reconciles much less aggressively (#256)

* feat:Notebooks 2.0 // retry reconciliation failures less aggressively

Signed-off-by: Adriana Theodorakopoulou <atheodorak@outlook.com>

* mathew: fixes 1

Signed-off-by: Mathew Wicks <5735406+thesuperzapper@users.noreply.github.com>

---------

Signed-off-by: Adriana Theodorakopoulou <atheodorak@outlook.com>
Signed-off-by: Mathew Wicks <5735406+thesuperzapper@users.noreply.github.com>
Co-authored-by: Mathew Wicks <5735406+thesuperzapper@users.noreply.github.com>
This commit is contained in:
aTheo 2025-04-16 01:12:23 +02:00 committed by GitHub
parent c69ba08e22
commit 91484fee07
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 76 additions and 7 deletions

View File

@ -29,6 +29,7 @@ import (
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
@ -136,14 +137,18 @@ func main() {
if err = (&controllerInternal.WorkspaceReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
}).SetupWithManager(mgr); err != nil {
}).SetupWithManager(mgr, controller.Options{
RateLimiter: helper.BuildRateLimiter(),
}); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Workspace")
os.Exit(1)
}
if err = (&controllerInternal.WorkspaceKindReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
}).SetupWithManager(mgr); err != nil {
}).SetupWithManager(mgr, controller.Options{
RateLimiter: helper.BuildRateLimiter(),
}); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "WorkspaceKind")
os.Exit(1)
}

View File

@ -28,6 +28,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/controller"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
. "github.com/onsi/ginkgo/v2"
@ -112,14 +113,18 @@ var _ = BeforeSuite(func() {
err = (&WorkspaceReconciler{
Client: k8sManager.GetClient(),
Scheme: k8sManager.GetScheme(),
}).SetupWithManager(k8sManager)
}).SetupWithManager(k8sManager, controller.Options{
RateLimiter: helper.BuildRateLimiter(),
})
Expect(err).NotTo(HaveOccurred())
By("setting up the WorkspaceKind controller")
err = (&WorkspaceKindReconciler{
Client: k8sManager.GetClient(),
Scheme: k8sManager.GetScheme(),
}).SetupWithManager(k8sManager)
}).SetupWithManager(k8sManager, controller.Options{
RateLimiter: helper.BuildRateLimiter(),
})
Expect(err).NotTo(HaveOccurred())
go func() {

View File

@ -36,6 +36,7 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/log"
@ -394,7 +395,7 @@ func (r *WorkspaceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
}
// SetupWithManager sets up the controller with the Manager.
func (r *WorkspaceReconciler) SetupWithManager(mgr ctrl.Manager) error {
func (r *WorkspaceReconciler) SetupWithManager(mgr ctrl.Manager, opts controller.Options) error {
// NOTE: the SetupManagerFieldIndexers() helper in `helper/index.go` should have already been
// called on `mgr` by the time this function is called, so the indexes are already set up
@ -418,6 +419,7 @@ func (r *WorkspaceReconciler) SetupWithManager(mgr ctrl.Manager) error {
})
return ctrl.NewControllerManagedBy(mgr).
WithOptions(opts).
For(&kubefloworgv1beta1.Workspace{}).
Owns(&appsv1.StatefulSet{}).
Owns(&corev1.Service{}).

View File

@ -27,6 +27,7 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/log"
@ -153,7 +154,7 @@ func (r *WorkspaceKindReconciler) Reconcile(ctx context.Context, req ctrl.Reques
}
// SetupWithManager sets up the controller with the Manager.
func (r *WorkspaceKindReconciler) SetupWithManager(mgr ctrl.Manager) error {
func (r *WorkspaceKindReconciler) SetupWithManager(mgr ctrl.Manager, opts controller.Options) error {
// NOTE: the SetupManagerFieldIndexers() helper in `helper/index.go` should have already been
// called on `mgr` by the time this function is called, so the indexes are already set up
@ -170,6 +171,7 @@ func (r *WorkspaceKindReconciler) SetupWithManager(mgr ctrl.Manager) error {
}
return ctrl.NewControllerManagedBy(mgr).
WithOptions(opts).
For(&kubefloworgv1beta1.WorkspaceKind{}).
Watches(
&kubefloworgv1beta1.Workspace{},

View File

@ -204,7 +204,12 @@ var _ = Describe("WorkspaceKind Controller", func() {
}, timeout, interval).Should(Equal(expectedStatus))
By("having a finalizer set on the WorkspaceKind")
Expect(workspaceKind.GetFinalizers()).To(ContainElement(WorkspaceKindFinalizer))
Eventually(func() []string {
if err := k8sClient.Get(ctx, workspaceKindKey, workspaceKind); err != nil {
return nil
}
return workspaceKind.GetFinalizers()
}, timeout, interval).Should(ContainElement(WorkspaceKindFinalizer))
By("deleting the Workspace")
Expect(k8sClient.Delete(ctx, workspace)).To(Succeed())

View File

@ -0,0 +1,50 @@
/*
Copyright 2024.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package helper
import (
"time"
"golang.org/x/time/rate"
"k8s.io/client-go/util/workqueue"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)
// BuildRateLimiter creates a new rate limiter for our controllers.
// NOTE: we dont use `DefaultTypedControllerRateLimiter` because it retries very aggressively, starting at 5ms!
func BuildRateLimiter() workqueue.TypedRateLimiter[reconcile.Request] {
// exponential backoff rate limiter
// - this handles per-item rate limiting for ~failures~
// - it uses an exponential backoff strategy were: delay = baseDelay * 2^failures
// - graph visualization: https://www.desmos.com/calculator/fexlpdmiti
failureBaseDelay := 1 * time.Second
failureMaxDelay := 7 * time.Minute
failureRateLimiter := workqueue.NewTypedItemExponentialFailureRateLimiter[reconcile.Request](failureBaseDelay, failureMaxDelay)
// overall rate limiter
// - this handles overall rate limiting, ignoring individual items and only considering the overall rate
// - it implements a "token bucket" of size totalMaxBurst that is initially full,
// and which is refilled at rate totalEventsPerSecond tokens per second.
totalEventsPerSecond := 10
totalMaxBurst := 100
totalRateLimiter := &workqueue.TypedBucketRateLimiter[reconcile.Request]{
Limiter: rate.NewLimiter(rate.Limit(totalEventsPerSecond), totalMaxBurst),
}
// return the worst-case (longest) of the rate limiters for a given item
return workqueue.NewTypedMaxOfRateLimiter[reconcile.Request](failureRateLimiter, totalRateLimiter)
}