controller: jitter requeue interval

This adds a `--interval-jitter-percentage` flag to the controller to
add a +/- percentage jitter to the interval defined in a HelmRelease
(defaults to 5%).

Effectively, this results in a reconciliation every 9.5 - 10.5 minutes
for a resource with an interval of 10 minutes.

Main reason to add this change is to mitigate spikes in memory and
CPU usage caused by many resources being configured with the same
interval.

Signed-off-by: Hidde Beydals <hidde@hhh.computer>
This commit is contained in:
Hidde Beydals 2023-08-09 16:19:03 +02:00
parent 3a98126200
commit d76f3a355b
No known key found for this signature in database
GPG Key ID: 979F380FC2341744
6 changed files with 30 additions and 6 deletions

View File

@ -70,6 +70,8 @@ type HelmReleaseSpec struct {
Chart HelmChartTemplate `json:"chart"`
// Interval at which to reconcile the Helm release.
// This interval is approximate and may be subject to jitter to ensure
// efficient use of resources.
// +kubebuilder:validation:Type=string
// +kubebuilder:validation:Pattern="^([0-9]+(\\.[0-9]+)?(ms|s|m|h))+$"
// +required

View File

@ -282,7 +282,9 @@ spec:
type: string
type: object
interval:
description: Interval at which to reconcile the Helm release.
description: Interval at which to reconcile the Helm release. This
interval is approximate and may be subject to jitter to ensure efficient
use of resources.
pattern: ^([0-9]+(\.[0-9]+)?(ms|s|m|h))+$
type: string
kubeConfig:

View File

@ -92,7 +92,9 @@ Kubernetes meta/v1.Duration
</em>
</td>
<td>
<p>Interval at which to reconcile the Helm release.</p>
<p>Interval at which to reconcile the Helm release.
This interval is approximate and may be subject to jitter to ensure
efficient use of resources.</p>
</td>
</tr>
<tr>
@ -901,7 +903,9 @@ Kubernetes meta/v1.Duration
</em>
</td>
<td>
<p>Interval at which to reconcile the Helm release.</p>
<p>Interval at which to reconcile the Helm release.
This interval is approximate and may be subject to jitter to ensure
efficient use of resources.</p>
</td>
</tr>
<tr>

View File

@ -20,6 +20,8 @@ type HelmReleaseSpec struct {
Chart HelmChartTemplate `json:"chart"`
// Interval at which to reconcile the Helm release.
// This interval is approximate and may be subject to jitter to ensure
// efficient use of resources.
// +required
Interval metav1.Duration `json:"interval"`
@ -822,6 +824,11 @@ desired state, so an upgrade is made in this case as well.
The `spec.interval` tells the reconciler at which interval to reconcile the release. The
interval time units are `s`, `m` and `h` e.g. `interval: 5m`, the minimum value should be 60 seconds.
**Note:** The controller can be configured to apply a jitter to the interval in
order to distribute the load more evenly when multiple HelmRelease objects are
set up with the same interval. For more information, please refer to the
[helm-controller configuration options](https://fluxcd.io/flux/components/helm/options/).
The reconciler can be told to reconcile the `HelmRelease` outside of the specified interval
by annotating the object with a `reconcile.fluxcd.io/requestedAt` annotation. For example:

View File

@ -54,6 +54,7 @@ import (
"github.com/fluxcd/pkg/apis/meta"
"github.com/fluxcd/pkg/runtime/acl"
runtimeClient "github.com/fluxcd/pkg/runtime/client"
"github.com/fluxcd/pkg/runtime/jitter"
"github.com/fluxcd/pkg/runtime/metrics"
"github.com/fluxcd/pkg/runtime/predicates"
"github.com/fluxcd/pkg/runtime/transform"
@ -233,7 +234,7 @@ func (r *HelmReleaseReconciler) reconcile(ctx context.Context, hr v2.HelmRelease
log.Error(reconcileErr, "access denied to cross-namespace source")
r.event(ctx, hr, hr.Status.LastAttemptedRevision, eventv1.EventSeverityError, reconcileErr.Error())
return v2.HelmReleaseNotReady(hr, apiacl.AccessDeniedReason, reconcileErr.Error()),
ctrl.Result{RequeueAfter: hr.Spec.Interval.Duration}, nil
jitter.JitteredRequeueInterval(ctrl.Result{RequeueAfter: hr.GetRequeueAfter()}), nil
}
msg := fmt.Sprintf("chart reconciliation failed: %s", reconcileErr.Error())
@ -248,7 +249,7 @@ func (r *HelmReleaseReconciler) reconcile(ctx context.Context, hr v2.HelmRelease
log.Info(msg)
// Do not requeue immediately, when the artifact is created
// the watcher should trigger a reconciliation.
return v2.HelmReleaseNotReady(hr, v2.ArtifactFailedReason, msg), ctrl.Result{RequeueAfter: hc.Spec.Interval.Duration}, nil
return v2.HelmReleaseNotReady(hr, v2.ArtifactFailedReason, msg), jitter.JitteredRequeueInterval(ctrl.Result{RequeueAfter: hr.GetRequeueAfter()}), nil
}
// Check dependencies
@ -287,7 +288,7 @@ func (r *HelmReleaseReconciler) reconcile(ctx context.Context, hr v2.HelmRelease
r.event(ctx, hr, hc.GetArtifact().Revision, eventv1.EventSeverityError,
fmt.Sprintf("reconciliation failed: %s", reconcileErr.Error()))
}
return reconciledHr, ctrl.Result{RequeueAfter: hr.Spec.Interval.Duration}, reconcileErr
return reconciledHr, jitter.JitteredRequeueInterval(ctrl.Result{RequeueAfter: hr.GetRequeueAfter()}), reconcileErr
}
type HelmReleaseReconcilerOptions struct {

View File

@ -41,6 +41,7 @@ import (
helper "github.com/fluxcd/pkg/runtime/controller"
"github.com/fluxcd/pkg/runtime/events"
feathelper "github.com/fluxcd/pkg/runtime/features"
"github.com/fluxcd/pkg/runtime/jitter"
"github.com/fluxcd/pkg/runtime/leaderelection"
"github.com/fluxcd/pkg/runtime/logger"
"github.com/fluxcd/pkg/runtime/metrics"
@ -89,6 +90,7 @@ func main() {
leaderElectionOptions leaderelection.Options
rateLimiterOptions helper.RateLimiterOptions
watchOptions helper.WatchOptions
intervalJitterOptions jitter.IntervalOptions
oomWatchInterval time.Duration
oomWatchMemoryThreshold uint8
oomWatchMaxMemoryPath string
@ -128,6 +130,7 @@ func main() {
kubeConfigOpts.BindFlags(flag.CommandLine)
featureGates.BindFlags(flag.CommandLine)
watchOptions.BindFlags(flag.CommandLine)
intervalJitterOptions.BindFlags(flag.CommandLine)
flag.Parse()
@ -143,6 +146,11 @@ func main() {
metricsRecorder := metrics.NewRecorder()
crtlmetrics.Registry.MustRegister(metricsRecorder.Collectors()...)
if err := intervalJitterOptions.SetGlobalJitter(nil); err != nil {
setupLog.Error(err, "unable to set global jitter")
os.Exit(1)
}
watchNamespace := ""
if !watchOptions.AllNamespaces {
watchNamespace = os.Getenv("RUNTIME_NAMESPACE")