From b8853ad7a56fd6a34fe493cbccd74e777592f0bb Mon Sep 17 00:00:00 2001 From: Sean Eagan Date: Tue, 25 Aug 2020 17:26:53 +0200 Subject: [PATCH] Implement conditional remediation This commit adds support for conditional remediation, enabling the user to: * configure if test failures should be ignored * configure what action should taken when a Helm install or upgrade action fails (e.g. rollback, uninstall) * configure if a failed Helm action should be retried * configure if a failed release should be kept for debugging purposes The previous behaviour where failed Helm tests did not mark the `HelmRelease` as not `Ready` has changed, it now marks them as failed by default. Co-authored-by: Hidde Beydals --- api/v2alpha1/condition_types.go | 3 + api/v2alpha1/helmrelease_types.go | 330 ++++++++++++++++++-------- api/v2alpha1/zz_generated.deepcopy.go | 65 +++++ controllers/helmrelease_controller.go | 199 +++++++++++----- docs/api/helmrelease.md | 235 ++++++++++++++++-- docs/spec/v2alpha1/helmreleases.md | 136 +++++++++-- go.sum | 3 + 7 files changed, 784 insertions(+), 187 deletions(-) diff --git a/api/v2alpha1/condition_types.go b/api/v2alpha1/condition_types.go index 171e2e1..d5a97c5 100644 --- a/api/v2alpha1/condition_types.go +++ b/api/v2alpha1/condition_types.go @@ -110,6 +110,9 @@ const ( // InitFailedReason represents the fact that the initialization of the Helm configuration failed. InitFailedReason string = "InitFailed" + // GetLastReleaseFailedReason represents the fact that observing the last release failed. + GetLastReleaseFailedReason string = "GetLastReleaseFailed" + // ProgressingReason represents the fact that the reconciliation for the resource is underway. ProgressingReason string = "Progressing" diff --git a/api/v2alpha1/helmrelease_types.go b/api/v2alpha1/helmrelease_types.go index d97cde2..d7c6943 100644 --- a/api/v2alpha1/helmrelease_types.go +++ b/api/v2alpha1/helmrelease_types.go @@ -176,6 +176,25 @@ func (in HelmChartTemplate) GetNamespace(defaultNamespace string) string { return in.SourceRef.Namespace } +// DeploymentAction defines a consistent interface for Install and Upgrade. +// +kubebuilder:object:generate=false +type DeploymentAction interface { + GetDescription() string + GetRemediation() Remediation +} + +// Remediation defines a consistent interface for InstallRemediation and UpgradeRemediation. +// +kubebuilder:object:generate=false +type Remediation interface { + GetRetries() int + MustIgnoreTestFailures(bool) bool + MustRemediateLastFailure() bool + GetStrategy() RemediationStrategy + GetFailureCount(hr HelmRelease) int64 + IncrementFailureCount(hr *HelmRelease) + RetriesExhausted(hr HelmRelease) bool +} + // Install holds the configuration for Helm install actions performed for this HelmRelease. type Install struct { // Timeout is the time to wait for any individual Kubernetes operation (like Jobs @@ -184,6 +203,12 @@ type Install struct { // +optional Timeout *metav1.Duration `json:"timeout,omitempty"` + // Remediation holds the remediation configuration for when the + // Helm install action for the HelmRelease fails. The default + // is to not perform any action. + // +optional + Remediation *InstallRemediation `json:"remediation,omitempty"` + // DisableWait disables the waiting for resources to be ready after a // Helm install has been performed. // +optional @@ -218,6 +243,80 @@ func (in Install) GetTimeout(defaultTimeout metav1.Duration) metav1.Duration { return *in.Timeout } +// GetDescription returns a description for the Helm install action. +func (in Install) GetDescription() string { + return "install" +} + +// GetRemediation returns the configured Remediation for the Helm install action. +func (in Install) GetRemediation() Remediation { + if in.Remediation == nil { + return InstallRemediation{} + } + return *in.Remediation +} + +// InstallRemediation holds the configuration for Helm install remediation. +type InstallRemediation struct { + // Retries is the number of retries that should be attempted on failures before + // bailing. Remediation, using an uninstall, is performed between each attempt. + // Defaults to '0', a negative integer equals to unlimited retries. + // +optional + Retries int `json:"retries,omitempty"` + + // IgnoreTestFailures tells the controller to skip remediation when + // the Helm tests are run after an install action but fail. + // Defaults to 'Test.IgnoreFailures'. + // +optional + IgnoreTestFailures *bool `json:"ignoreTestFailures,omitempty"` + + // RemediateLastFailure tells the controller to remediate the last + // failure, when no retries remain. Defaults to 'false'. + // +optional + RemediateLastFailure *bool `json:"remediateLastFailure,omitempty"` +} + +// GetRetries returns the number of retries that should be attempted on failures. +func (in InstallRemediation) GetRetries() int { + return in.Retries +} + +// MustIgnoreTestFailures returns the configured IgnoreTestFailures or the given default. +func (in InstallRemediation) MustIgnoreTestFailures(def bool) bool { + if in.IgnoreTestFailures == nil { + return def + } + return *in.IgnoreTestFailures +} + +// MustRemediateLastFailure returns whether to remediate the last failure when no retries remain. +func (in InstallRemediation) MustRemediateLastFailure() bool { + if in.RemediateLastFailure == nil { + return false + } + return *in.RemediateLastFailure +} + +// GetStrategy returns the strategy to use for failure remediation. +func (in InstallRemediation) GetStrategy() RemediationStrategy { + return UninstallRemediationStrategy +} + +// GetFailureCount gets the failure count. +func (in InstallRemediation) GetFailureCount(hr HelmRelease) int64 { + return hr.Status.InstallFailures +} + +// IncrementFailureCount increments the failure count. +func (in InstallRemediation) IncrementFailureCount(hr *HelmRelease) { + hr.Status.InstallFailures++ +} + +// RetriesExhausted returns true if there are no remaining retries. +func (in InstallRemediation) RetriesExhausted(hr HelmRelease) bool { + return in.Retries >= 0 && in.GetFailureCount(hr) > int64(in.Retries) +} + // Upgrade holds the configuration for Helm upgrade actions for this HelmRelease. type Upgrade struct { // Timeout is the time to wait for any individual Kubernetes operation (like Jobs @@ -226,10 +325,11 @@ type Upgrade struct { // +optional Timeout *metav1.Duration `json:"timeout,omitempty"` - // MaxRetries is the number of retries that should be attempted on failures before - // bailing. Defaults to '0', a negative integer equals to unlimited retries. + // Remediation holds the remediation configuration for when the + // Helm upgrade action for the HelmRelease fails. The default + // is to not perform any action. // +optional - MaxRetries int `json:"maxRetries,omitempty"` + Remediation *UpgradeRemediation `json:"remediation,omitempty"` // DisableWait disables the waiting for resources to be ready after a // Helm upgrade has been performed. @@ -270,6 +370,100 @@ func (in Upgrade) GetTimeout(defaultTimeout metav1.Duration) metav1.Duration { return *in.Timeout } +// GetDescription returns a description for the Helm upgrade action. +func (in Upgrade) GetDescription() string { + return "upgrade" +} + +// GetRemediation returns the configured Remediation for the Helm upgrade action. +func (in Upgrade) GetRemediation() Remediation { + if in.Remediation == nil { + return UpgradeRemediation{} + } + return *in.Remediation +} + +// UpgradeRemediation holds the configuration for Helm upgrade remediation. +type UpgradeRemediation struct { + // Retries is the number of retries that should be attempted on failures before + // bailing. Remediation, using 'Strategy', is performed between each attempt. + // Defaults to '0', a negative integer equals to unlimited retries. + // +optional + Retries int `json:"retries,omitempty"` + + // IgnoreTestFailures tells the controller to skip remediation when + // the Helm tests are run after an upgrade action but fail. + // Defaults to 'Test.IgnoreFailures'. + // +optional + IgnoreTestFailures *bool `json:"ignoreTestFailures,omitempty"` + + // RemediateLastFailure tells the controller to remediate the last + // failure, when no retries remain. Defaults to 'false' unless 'Retries' + // is greater than 0. + // +optional + RemediateLastFailure *bool `json:"remediateLastFailure,omitempty"` + + // Strategy to use for failure remediation. + // Defaults to 'rollback'. + // +kubebuilder:validation:Enum=rollback;uninstall + // +optional + Strategy *RemediationStrategy `json:"strategy,omitempty"` +} + +// GetRetries returns the number of retries that should be attempted on failures. +func (in UpgradeRemediation) GetRetries() int { + return in.Retries +} + +// MustIgnoreTestFailures returns the configured IgnoreTestFailures or the given default. +func (in UpgradeRemediation) MustIgnoreTestFailures(def bool) bool { + if in.IgnoreTestFailures == nil { + return def + } + return *in.IgnoreTestFailures +} + +// MustRemediateLastFailure returns whether to remediate the last failure when no retries remain. +func (in UpgradeRemediation) MustRemediateLastFailure() bool { + if in.RemediateLastFailure == nil { + return in.Retries > 0 + } + return *in.RemediateLastFailure +} + +// GetStrategy returns the strategy to use for failure remediation. +func (in UpgradeRemediation) GetStrategy() RemediationStrategy { + if in.Strategy == nil { + return RollbackRemediationStrategy + } + return *in.Strategy +} + +// GetFailureCount gets the failure count. +func (in UpgradeRemediation) GetFailureCount(hr HelmRelease) int64 { + return hr.Status.UpgradeFailures +} + +// IncrementFailureCount increments the failure count. +func (in UpgradeRemediation) IncrementFailureCount(hr *HelmRelease) { + hr.Status.UpgradeFailures++ +} + +// RetriesExhausted returns true if there are no remaining retries. +func (in UpgradeRemediation) RetriesExhausted(hr HelmRelease) bool { + return in.Retries >= 0 && in.GetFailureCount(hr) > int64(in.Retries) +} + +// RemediationStrategy returns the strategy to use to remediate a failed install or upgrade. +type RemediationStrategy string + +const ( + // RollbackRemediationStrategy represents a Helm remediation strategy of Helm rollback. + RollbackRemediationStrategy RemediationStrategy = "rollback" + // UninstallRemediationStrategy represents a Helm remediation strategy of Helm uninstall. + UninstallRemediationStrategy RemediationStrategy = "uninstall" +) + // Test holds the configuration for Helm test actions for this HelmRelease. type Test struct { // Enable enables Helm test actions for this HelmRelease after an @@ -282,6 +476,13 @@ type Test struct { // 'HelmReleaseSpec.Timeout'. // +optional Timeout *metav1.Duration `json:"timeout,omitempty"` + + // IgnoreFailures tells the controller to skip remediation when + // the Helm tests are run but fail. + // Can be overwritten for tests run after install or upgrade actions + // in 'Install.IgnoreTestFailures' and 'Upgrade.IgnoreTestFailures'. + // +optional + IgnoreFailures bool `json:"ignoreFailures,omitempty"` } // GetTimeout returns the configured timeout for the Helm test action, @@ -295,11 +496,6 @@ func (in Test) GetTimeout(defaultTimeout metav1.Duration) metav1.Duration { // Rollback holds the configuration for Helm rollback actions for this HelmRelease. type Rollback struct { - // Enable enables Helm rollback actions for this HelmRelease after an - // Helm install or upgrade action failure. - // +optional - Enable bool `json:"enable,omitempty"` - // Timeout is the time to wait for any individual Kubernetes operation (like Jobs // for hooks) during the performance of a Helm rollback action. Defaults to // 'HelmReleaseSpec.Timeout'. @@ -396,10 +592,17 @@ type HelmReleaseStatus struct { // +optional HelmChart string `json:"helmChart,omitempty"` - // Failures is the reconciliation failure count. It is reset after a successful - // reconciliation. + // Failures is the reconciliation failure count. // +optional Failures int64 `json:"failures,omitempty"` + + // InstallFailures is the install failure count. + // +optional + InstallFailures int64 `json:"installFailures,omitempty"` + + // UpgradeFailures is the upgrade failure count. + // +optional + UpgradeFailures int64 `json:"upgradeFailures,omitempty"` } // GetHelmChart returns the namespace and name of the HelmChart. @@ -411,18 +614,14 @@ func (in HelmReleaseStatus) GetHelmChart() (string, string) { return split[0], split[1] } -// HelmReleaseProgressing resets the conditions of the given HelmRelease to a single -// ReadyCondition with status ConditionUnknown. +// HelmReleaseProgressing resets any failures and registers progress toward reconciling the given HelmRelease +// by setting the ReadyCondition to ConditionUnknown for ProgressingReason. func HelmReleaseProgressing(hr HelmRelease) HelmRelease { - hr.Status.Conditions = []Condition{ - { - Type: ReadyCondition, - Status: corev1.ConditionUnknown, - LastTransitionTime: metav1.Now(), - Reason: ProgressingReason, - Message: "reconciliation in progress", - }, - } + hr.Status.Failures = 0 + hr.Status.InstallFailures = 0 + hr.Status.UpgradeFailures = 0 + hr.Status.Conditions = []Condition{} + SetHelmReleaseCondition(&hr, ReadyCondition, corev1.ConditionUnknown, ProgressingReason, "reconciliation in progress") return hr } @@ -439,88 +638,31 @@ func SetHelmReleaseCondition(hr *HelmRelease, condition string, status corev1.Co }) } -// SetHelmReleaseReadiness sets the ReadyCondition, ObservedGeneration, LastAttemptedRevision, -// and LastReleaseRevision, on the HelmRelease. -func SetHelmReleaseReadiness(hr *HelmRelease, status corev1.ConditionStatus, reason, message string, revision string, releaseRevision int, valuesChecksum string) { - SetHelmReleaseCondition(hr, ReadyCondition, status, reason, message) - hr.Status.ObservedGeneration = hr.Generation - hr.Status.LastAttemptedRevision = revision - hr.Status.LastReleaseRevision = releaseRevision - hr.Status.LastAttemptedValuesChecksum = valuesChecksum -} - // HelmReleaseNotReady registers a failed release attempt of the given HelmRelease. -func HelmReleaseNotReady(hr HelmRelease, revision string, releaseRevision int, valuesChecksum, reason, message string) HelmRelease { - SetHelmReleaseReadiness(&hr, corev1.ConditionFalse, reason, message, revision, releaseRevision, valuesChecksum) - hr.Status.Failures = hr.Status.Failures + 1 +func HelmReleaseNotReady(hr HelmRelease, reason, message string) HelmRelease { + SetHelmReleaseCondition(&hr, ReadyCondition, corev1.ConditionFalse, reason, message) + hr.Status.Failures++ return hr } // HelmReleaseReady registers a successful release attempt of the given HelmRelease. -func HelmReleaseReady(hr HelmRelease, revision string, releaseRevision int, valuesChecksum, reason, message string) HelmRelease { - SetHelmReleaseReadiness(&hr, corev1.ConditionTrue, reason, message, revision, releaseRevision, valuesChecksum) - hr.Status.LastAppliedRevision = revision - hr.Status.Failures = 0 +func HelmReleaseReady(hr HelmRelease, reason, message string) HelmRelease { + SetHelmReleaseCondition(&hr, ReadyCondition, corev1.ConditionTrue, reason, message) + hr.Status.LastAppliedRevision = hr.Status.LastAttemptedRevision return hr } -// ShouldUpgrade determines if an Helm upgrade action needs to be performed for the given HelmRelease. -func ShouldUpgrade(hr HelmRelease, revision string, releaseRevision int, valuesChecksum string) bool { - switch { - case hr.Status.LastAttemptedRevision != revision: - return true - case hr.Status.LastReleaseRevision != releaseRevision: - return true - case hr.Generation != hr.Status.ObservedGeneration: - return true - case hr.Status.LastAttemptedValuesChecksum != valuesChecksum: - return true - case hr.Status.Failures > 0 && - (hr.Spec.GetUpgrade().MaxRetries < 0 || hr.Status.Failures < int64(hr.Spec.GetUpgrade().MaxRetries)): - return true - default: - return false - } -} +// HelmReleaseAttempted registers an attempt of the given HelmRelease with the given state. +// and returns the modified HelmRelease and a boolean indicating a state change. +func HelmReleaseAttempted(hr HelmRelease, revision string, releaseRevision int, valuesChecksum string) (HelmRelease, bool) { + changed := hr.Status.LastAttemptedRevision != revision || + hr.Status.LastReleaseRevision != releaseRevision || + hr.Status.LastAttemptedValuesChecksum != valuesChecksum + hr.Status.LastAttemptedRevision = revision + hr.Status.LastReleaseRevision = releaseRevision + hr.Status.LastAttemptedValuesChecksum = valuesChecksum -// ShouldTest determines if a Helm test actions needs to be performed for the given HelmRelease. -func ShouldTest(hr HelmRelease) bool { - if hr.Spec.Test.Enable { - for _, c := range hr.Status.Conditions { - if c.Status == corev1.ConditionTrue && (c.Type == InstalledCondition || c.Type == UpgradedCondition) { - return true - } - } - } - return false -} - -// ShouldRollback determines if a Helm rollback action needs to be performed for the given HelmRelease. -func ShouldRollback(hr HelmRelease, releaseRevision int) bool { - if hr.Spec.GetRollback().Enable { - if hr.Status.LastReleaseRevision <= releaseRevision { - return false - } - for _, c := range hr.Status.Conditions { - if c.Type == UpgradedCondition && c.Status == corev1.ConditionFalse { - return true - } - } - } - return false -} - -// ShouldUninstall determines if a Helm uninstall action needs to be performed for the given HelmRelease. -func ShouldUninstall(hr HelmRelease, releaseRevision int) bool { - if releaseRevision <= 0 { - return false - } - for _, c := range hr.Status.Conditions { - if c.Type == InstalledCondition && c.Status == corev1.ConditionFalse { - return true - } - } - return false + return hr, changed } const ( diff --git a/api/v2alpha1/zz_generated.deepcopy.go b/api/v2alpha1/zz_generated.deepcopy.go index 145511a..93b9603 100644 --- a/api/v2alpha1/zz_generated.deepcopy.go +++ b/api/v2alpha1/zz_generated.deepcopy.go @@ -234,6 +234,11 @@ func (in *Install) DeepCopyInto(out *Install) { *out = new(v1.Duration) **out = **in } + if in.Remediation != nil { + in, out := &in.Remediation, &out.Remediation + *out = new(InstallRemediation) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Install. @@ -246,6 +251,31 @@ func (in *Install) DeepCopy() *Install { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InstallRemediation) DeepCopyInto(out *InstallRemediation) { + *out = *in + if in.IgnoreTestFailures != nil { + in, out := &in.IgnoreTestFailures, &out.IgnoreTestFailures + *out = new(bool) + **out = **in + } + if in.RemediateLastFailure != nil { + in, out := &in.RemediateLastFailure, &out.RemediateLastFailure + *out = new(bool) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InstallRemediation. +func (in *InstallRemediation) DeepCopy() *InstallRemediation { + if in == nil { + return nil + } + out := new(InstallRemediation) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Rollback) DeepCopyInto(out *Rollback) { *out = *in @@ -314,6 +344,11 @@ func (in *Upgrade) DeepCopyInto(out *Upgrade) { *out = new(v1.Duration) **out = **in } + if in.Remediation != nil { + in, out := &in.Remediation, &out.Remediation + *out = new(UpgradeRemediation) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Upgrade. @@ -326,6 +361,36 @@ func (in *Upgrade) DeepCopy() *Upgrade { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *UpgradeRemediation) DeepCopyInto(out *UpgradeRemediation) { + *out = *in + if in.IgnoreTestFailures != nil { + in, out := &in.IgnoreTestFailures, &out.IgnoreTestFailures + *out = new(bool) + **out = **in + } + if in.RemediateLastFailure != nil { + in, out := &in.RemediateLastFailure, &out.RemediateLastFailure + *out = new(bool) + **out = **in + } + if in.Strategy != nil { + in, out := &in.Strategy, &out.Strategy + *out = new(RemediationStrategy) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UpgradeRemediation. +func (in *UpgradeRemediation) DeepCopy() *UpgradeRemediation { + if in == nil { + return nil + } + out := new(UpgradeRemediation) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ValuesReference) DeepCopyInto(out *ValuesReference) { *out = *in diff --git a/controllers/helmrelease_controller.go b/controllers/helmrelease_controller.go index bf34dce..c81673a 100644 --- a/controllers/helmrelease_controller.go +++ b/controllers/helmrelease_controller.go @@ -69,6 +69,16 @@ type HelmReleaseReconciler struct { ExternalEventRecorder *recorder.EventRecorder } +// ConditionError represents an error with a status condition reason attached. +type ConditionError struct { + Reason string + Err error +} + +func (c ConditionError) Error() string { + return c.Err.Error() +} + // +kubebuilder:rbac:groups=helm.toolkit.fluxcd.io,resources=helmreleases,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=helm.toolkit.fluxcd.io,resources=helmreleases/status,verbs=get;update;patch @@ -117,7 +127,7 @@ func (r *HelmReleaseReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) if hr.Spec.Suspend { msg := "HelmRelease is suspended, skipping reconciliation" - hr = v2.HelmReleaseNotReady(hr, hr.Status.LastAttemptedRevision, hr.Status.LastReleaseRevision, hr.Status.LastAttemptedValuesChecksum, v2.SuspendedReason, msg) + hr = v2.HelmReleaseNotReady(hr, v2.SuspendedReason, msg) if err := r.Status().Update(ctx, &hr); err != nil { log.Error(err, "unable to update status") return ctrl.Result{Requeue: true}, err @@ -126,7 +136,13 @@ func (r *HelmReleaseReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) return ctrl.Result{}, nil } - hr = v2.HelmReleaseProgressing(hr) + // Observe the HelmRelease generation. + hasNewGeneration := hr.Status.ObservedGeneration != hr.Generation + if hasNewGeneration { + hr.Status.ObservedGeneration = hr.Generation + hr = v2.HelmReleaseProgressing(hr) + } + if err := r.Status().Update(ctx, &hr); err != nil { log.Error(err, "unable to update status") return ctrl.Result{Requeue: true}, err @@ -143,7 +159,7 @@ func (r *HelmReleaseReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) msg = "HelmChart is not ready" r.event(hr, hr.Status.LastAttemptedRevision, recorder.EventSeverityInfo, msg) } - hr = v2.HelmReleaseNotReady(hr, hr.Status.LastAttemptedRevision, hr.Status.LastReleaseRevision, hr.Status.LastAttemptedValuesChecksum, v2.ArtifactFailedReason, msg) + hr = v2.HelmReleaseNotReady(hr, v2.ArtifactFailedReason, msg) if err := r.Status().Update(ctx, &hr); err != nil { log.Error(err, "unable to update status") return ctrl.Result{Requeue: true}, err @@ -154,7 +170,7 @@ func (r *HelmReleaseReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) // Check chart artifact readiness if hc.GetArtifact() == nil { msg := "HelmChart is not ready" - hr = v2.HelmReleaseNotReady(hr, hr.Status.LastAttemptedRevision, hr.Status.LastReleaseRevision, hr.Status.LastAttemptedValuesChecksum, v2.ArtifactFailedReason, msg) + hr = v2.HelmReleaseNotReady(hr, v2.ArtifactFailedReason, msg) r.event(hr, hr.Status.LastAttemptedRevision, recorder.EventSeverityInfo, msg) log.Info(msg) if err := r.Status().Update(ctx, &hr); err != nil { @@ -171,7 +187,7 @@ func (r *HelmReleaseReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) r.event(hr, hc.GetArtifact().Revision, recorder.EventSeverityInfo, msg) log.Info(msg) - hr = v2.HelmReleaseNotReady(hr, hr.Status.LastAttemptedRevision, hr.Status.LastReleaseRevision, hr.Status.LastAttemptedValuesChecksum, v2.DependencyNotReadyReason, err.Error()) + hr = v2.HelmReleaseNotReady(hr, v2.DependencyNotReadyReason, err.Error()) if err := r.Status().Update(ctx, &hr); err != nil { log.Error(err, "unable to update status") return ctrl.Result{Requeue: true}, err @@ -186,7 +202,7 @@ func (r *HelmReleaseReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) // Compose values values, err := r.composeValues(ctx, hr) if err != nil { - hr = v2.HelmReleaseNotReady(hr, hr.Status.LastAttemptedRevision, hr.Status.LastReleaseRevision, hr.Status.LastAttemptedValuesChecksum, v2.InitFailedReason, err.Error()) + hr = v2.HelmReleaseNotReady(hr, v2.InitFailedReason, err.Error()) r.event(hr, hr.Status.LastAttemptedRevision, recorder.EventSeverityError, err.Error()) if err := r.Status().Update(ctx, &hr); err != nil { log.Error(err, "unable to update status") @@ -195,7 +211,7 @@ func (r *HelmReleaseReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) return ctrl.Result{}, nil } - reconciledHr, reconcileErr := r.release(log, *hr.DeepCopy(), hc, values) + reconciledHr, reconcileErr := r.release(log, *hr.DeepCopy(), hc, values, hasNewGeneration) if reconcileErr != nil { r.event(hr, hc.GetArtifact().Revision, recorder.EventSeverityError, fmt.Sprintf("reconciliation failed: %s", reconcileErr.Error())) } @@ -279,12 +295,12 @@ func (r *HelmReleaseReconciler) reconcileChart(ctx context.Context, hr *v2.HelmR return &helmChart, true, nil } -func (r *HelmReleaseReconciler) release(log logr.Logger, hr v2.HelmRelease, source sourcev1.Source, values chartutil.Values) (v2.HelmRelease, error) { +func (r *HelmReleaseReconciler) release(log logr.Logger, hr v2.HelmRelease, source sourcev1.Source, values chartutil.Values, hasNewGeneration bool) (v2.HelmRelease, error) { // Acquire lock unlock, err := lock(fmt.Sprintf("%s-%s", hr.GetName(), hr.GetNamespace())) if err != nil { err = fmt.Errorf("lockfile error: %w", err) - return v2.HelmReleaseNotReady(hr, hr.Status.LastAttemptedRevision, hr.Status.LastReleaseRevision, hr.Status.LastAttemptedValuesChecksum, sourcev1.StorageOperationFailedReason, err.Error()), err + return v2.HelmReleaseNotReady(hr, sourcev1.StorageOperationFailedReason, err.Error()), err } defer unlock() @@ -298,74 +314,119 @@ func (r *HelmReleaseReconciler) release(log logr.Logger, hr v2.HelmRelease, sour // Download artifact artifactPath, err := download(source.GetArtifact().URL, tmpDir) if err != nil { - return v2.HelmReleaseNotReady(hr, hr.Status.LastAttemptedRevision, hr.Status.LastReleaseRevision, hr.Status.LastAttemptedValuesChecksum, v2.ArtifactFailedReason, "artifact acquisition failed"), err + return v2.HelmReleaseNotReady(hr, v2.ArtifactFailedReason, "artifact acquisition failed"), err } // Load chart loadedChart, err := loader.Load(artifactPath) if err != nil { - return v2.HelmReleaseNotReady(hr, hr.Status.LastAttemptedRevision, hr.Status.LastReleaseRevision, hr.Status.LastAttemptedValuesChecksum, v2.ArtifactFailedReason, "failed to load chart"), err + return v2.HelmReleaseNotReady(hr, v2.ArtifactFailedReason, "failed to load chart"), err } // Initialize config cfg, err := newActionCfg(log, r.Config, hr) if err != nil { - return v2.HelmReleaseNotReady(hr, hr.Status.LastAttemptedRevision, hr.Status.LastReleaseRevision, hr.Status.LastAttemptedValuesChecksum, v2.InitFailedReason, "failed to initialize Helm action configuration"), err + return v2.HelmReleaseNotReady(hr, v2.InitFailedReason, "failed to initialize Helm action configuration"), err } - // Get the current release - rel, err := cfg.Releases.Deployed(hr.GetReleaseName()) - if err != nil && !errors.Is(err, driver.ErrNoDeployedReleases) { - return v2.HelmReleaseNotReady(hr, hr.Status.LastAttemptedRevision, hr.Status.LastReleaseRevision, hr.Status.LastAttemptedValuesChecksum, v2.InitFailedReason, "failed to determine if release exists"), err + // Determine last release revision. + rel, observeLastReleaseErr := observeLastRelease(cfg, hr) + if observeLastReleaseErr != nil { + return v2.HelmReleaseNotReady(hr, v2.GetLastReleaseFailedReason, "failed to get last release revision"), err } + // Register the current release attempt. + revision := source.GetArtifact().Revision + releaseRevision := getReleaseRevision(rel) valuesChecksum := calculateValuesChecksum(values) + hr, hasNewState := v2.HelmReleaseAttempted(hr, revision, releaseRevision, valuesChecksum) + if hasNewState { + hr = v2.HelmReleaseProgressing(hr) + } - // Install or upgrade the release - success := true - if errors.Is(err, driver.ErrNoDeployedReleases) { + // Determine release deployment action. + var deployAction v2.DeploymentAction + switch { + // Install if there is none. + case rel == nil: + deployAction = hr.Spec.GetInstall() + // Upgrade if there is a new generation, new state, or this is an upgrade retry. + case hasNewGeneration || hasNewState || hr.Spec.GetUpgrade().GetRemediation().GetFailureCount(hr) > 0: + deployAction = hr.Spec.GetUpgrade() + // Otherwise no action needed. + default: + return hr, nil + } + + // Check if retries exhausted. + remediation := deployAction.GetRemediation() + if remediation.RetriesExhausted(hr) { + return hr, fmt.Errorf("%s retries exhausted", deployAction.GetDescription()) + } + + // Deploy the release. + switch a := deployAction.(type) { + case v2.Install: rel, err = install(cfg, loadedChart, hr, values) - r.handleHelmActionResult(hr, source, err, "install", v2.InstalledCondition, v2.InstallSucceededReason, v2.InstallFailedReason) - success = err == nil - } else if v2.ShouldUpgrade(hr, source.GetArtifact().Revision, rel.Version, valuesChecksum) { + err = r.handleHelmActionResult(&hr, revision, err, a.GetDescription(), v2.InstalledCondition, v2.InstallSucceededReason, v2.InstallFailedReason) + case v2.Upgrade: rel, err = upgrade(cfg, loadedChart, hr, values) - r.handleHelmActionResult(hr, source, err, "upgrade", v2.UpgradedCondition, v2.UpgradeSucceededReason, v2.UpgradeFailedReason) - success = err == nil + err = r.handleHelmActionResult(&hr, revision, err, a.GetDescription(), v2.UpgradedCondition, v2.UpgradeSucceededReason, v2.UpgradeFailedReason) } - // Run tests - if v2.ShouldTest(hr) { - rel, err = test(cfg, hr) - r.handleHelmActionResult(hr, source, err, "test", v2.TestedCondition, v2.TestSucceededReason, v2.TestFailedReason) - } - - // Run rollback - if rel != nil && v2.ShouldRollback(hr, rel.Version) { - success = false - err = rollback(cfg, hr) - r.handleHelmActionResult(hr, source, err, "rollback", v2.RolledBackCondition, v2.RollbackSucceededReason, v2.RollbackFailedReason) - } - - // Determine release number after action runs - var releaseRevision int - if curRel, err := cfg.Releases.Deployed(hr.GetReleaseName()); err == nil { - releaseRevision = curRel.Version - } - - // Run uninstall - if v2.ShouldUninstall(hr, releaseRevision) { - success = false - err = uninstall(cfg, hr) - if err == nil { - releaseRevision = 0 + // Run tests if enabled and there is a successful new release revision. + if getReleaseRevision(rel) > releaseRevision && err == nil && hr.Spec.GetTest().Enable { + _, testErr := test(cfg, hr) + testErr = r.handleHelmActionResult(&hr, revision, testErr, "test", v2.TestedCondition, v2.TestSucceededReason, v2.TestFailedReason) + // Propagate any test error if not marked ignored. + if testErr != nil && !remediation.MustIgnoreTestFailures(hr.Spec.GetTest().IgnoreFailures) { + err = testErr } - r.handleHelmActionResult(hr, source, err, "uninstall", v2.UninstalledCondition, v2.UninstallSucceededReason, v2.UninstallFailedReason) } - if !success { - return v2.HelmReleaseNotReady(hr, source.GetArtifact().Revision, releaseRevision, valuesChecksum, v2.ReconciliationFailedReason, "release reconciliation failed"), err + if err != nil { + // Increment failure count for deployment action. + remediation.IncrementFailureCount(&hr) + // Remediate deployment failure if necessary. + if !remediation.RetriesExhausted(hr) || remediation.MustRemediateLastFailure() { + switch { + case getReleaseRevision(rel) <= releaseRevision: + log.Info(fmt.Sprintf("skipping remediation, no new release revision created")) + case remediation.GetStrategy() == v2.RollbackRemediationStrategy: + rollbackErr := rollback(cfg, hr) + rollbackConditionErr := r.handleHelmActionResult(&hr, revision, rollbackErr, "rollback", v2.RolledBackCondition, v2.RollbackSucceededReason, v2.RollbackFailedReason) + if rollbackConditionErr != nil { + err = rollbackConditionErr + } + case remediation.GetStrategy() == v2.UninstallRemediationStrategy: + uninstallErr := uninstall(cfg, hr) + uninstallConditionErr := r.handleHelmActionResult(&hr, revision, uninstallErr, "uninstall", v2.UninstalledCondition, v2.UninstallSucceededReason, v2.UninstallFailedReason) + if uninstallConditionErr != nil { + err = uninstallConditionErr + } + } + } } - return v2.HelmReleaseReady(hr, source.GetArtifact().Revision, releaseRevision, valuesChecksum, v2.ReconciliationSucceededReason, "release reconciliation succeeded"), nil + + // Determine release revision after deployment/remediation. + rel, observeLastReleaseErr = observeLastRelease(cfg, hr) + if observeLastReleaseErr != nil { + err = &ConditionError{ + Reason: v2.GetLastReleaseFailedReason, + Err: errors.New("failed to get last release revision after deployment/remediation"), + } + } + hr.Status.LastReleaseRevision = getReleaseRevision(rel) + + if err != nil { + reason := v2.ReconciliationFailedReason + var cerr *ConditionError + if errors.As(err, &cerr) { + reason = cerr.Reason + } + return v2.HelmReleaseNotReady(hr, reason, err.Error()), err + } + return v2.HelmReleaseReady(hr, v2.ReconciliationSucceededReason, "release reconciliation succeeded"), nil } func (r *HelmReleaseReconciler) checkDependencies(hr v2.HelmRelease) error { @@ -493,14 +554,17 @@ func (r *HelmReleaseReconciler) composeValues(ctx context.Context, hr v2.HelmRel return mergeMaps(result, hr.GetValues()), nil } -func (r *HelmReleaseReconciler) handleHelmActionResult(hr v2.HelmRelease, source sourcev1.Source, err error, action string, condition string, succeededReason string, failedReason string) { +func (r *HelmReleaseReconciler) handleHelmActionResult(hr *v2.HelmRelease, revision string, err error, action string, condition string, succeededReason string, failedReason string) error { if err != nil { - v2.SetHelmReleaseCondition(&hr, condition, corev1.ConditionFalse, failedReason, err.Error()) - r.event(hr, source.GetArtifact().Revision, recorder.EventSeverityError, fmt.Sprintf("Helm %s failed: %s", action, err.Error())) + msg := fmt.Sprintf("Helm %s failed: %s", action, err.Error()) + v2.SetHelmReleaseCondition(hr, condition, corev1.ConditionFalse, failedReason, msg) + r.event(*hr, revision, recorder.EventSeverityError, msg) + return &ConditionError{Reason: failedReason, Err: errors.New(msg)} } else { msg := fmt.Sprintf("Helm %s succeeded", action) - v2.SetHelmReleaseCondition(&hr, condition, corev1.ConditionTrue, succeededReason, msg) - r.event(hr, source.GetArtifact().Revision, recorder.EventSeverityInfo, msg) + v2.SetHelmReleaseCondition(hr, condition, corev1.ConditionTrue, succeededReason, msg) + r.event(*hr, revision, recorder.EventSeverityInfo, msg) + return nil } } @@ -565,6 +629,23 @@ func helmChartRequiresUpdate(hr v2.HelmRelease, chart sourcev1.HelmChart) bool { } } +// observeLastRelease observes the last revision, if there is one, for for actual helm release associated with the given HelmRelease. +func observeLastRelease(cfg *action.Configuration, hr v2.HelmRelease) (*release.Release, error) { + rel, err := cfg.Releases.Last(hr.GetReleaseName()) + if err != nil && errors.Is(err, driver.ErrReleaseNotFound) { + err = nil + } + return rel, err +} + +// getReleaseRevision returns the revision of the given release.Release. +func getReleaseRevision(rel *release.Release) int { + if rel == nil { + return 0 + } + return rel.Version +} + func install(cfg *action.Configuration, chart *chart.Chart, hr v2.HelmRelease, values chartutil.Values) (*release.Release, error) { install := action.NewInstall(cfg) install.ReleaseName = hr.GetReleaseName() @@ -597,7 +678,7 @@ func upgrade(cfg *action.Configuration, chart *chart.Chart, hr v2.HelmRelease, v func test(cfg *action.Configuration, hr v2.HelmRelease) (*release.Release, error) { test := action.NewReleaseTesting(cfg) test.Namespace = hr.GetReleaseNamespace() - test.Timeout = hr.Spec.Test.GetTimeout(hr.GetTimeout()).Duration + test.Timeout = hr.Spec.GetTest().GetTimeout(hr.GetTimeout()).Duration return test.Run(hr.GetReleaseName()) } diff --git a/docs/api/helmrelease.md b/docs/api/helmrelease.md index c994cc4..2d7acab 100644 --- a/docs/api/helmrelease.md +++ b/docs/api/helmrelease.md @@ -445,6 +445,9 @@ string +

DeploymentAction +

+

DeploymentAction defines a consistent interface for Install and Upgrade.

HelmChartTemplate

@@ -854,8 +857,31 @@ int64 (Optional) -

Failures is the reconciliation failure count. It is reset after a successful -reconciliation.

+

Failures is the reconciliation failure count.

+ + + + +installFailures
+ +int64 + + + +(Optional) +

InstallFailures is the install failure count.

+ + + + +upgradeFailures
+ +int64 + + + +(Optional) +

UpgradeFailures is the upgrade failure count.

@@ -897,6 +923,22 @@ for hooks) during the performance of a Helm install action. Defaults to +remediation
+ + +InstallRemediation + + + + +(Optional) +

Remediation holds the remediation configuration for when the +Helm install action for the HelmRelease fails. The default +is to not perform any action.

+ + + + disableWait
bool @@ -963,6 +1005,78 @@ CRDs are installed if not already present.

+

InstallRemediation +

+

+(Appears on: +Install) +

+

InstallRemediation holds the configuration for Helm install remediation.

+
+
+ + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+retries
+ +int + +
+(Optional) +

Retries is the number of retries that should be attempted on failures before +bailing. Remediation, using an uninstall, is performed between each attempt. +Defaults to ‘0’, a negative integer equals to unlimited retries.

+
+ignoreTestFailures
+ +bool + +
+(Optional) +

IgnoreTestFailures tells the controller to skip remediation when +the Helm tests are run after an install action but fail. +Defaults to ‘Test.IgnoreFailures’.

+
+remediateLastFailure
+ +bool + +
+(Optional) +

RemediateLastFailure tells the controller to remediate the last +failure, when no retries remain. Defaults to ‘false’.

+
+
+
+

Remediation +

+

Remediation defines a consistent interface for InstallRemediation and UpgradeRemediation.

+

RemediationStrategy +(string alias)

+

+(Appears on: +UpgradeRemediation) +

+

RemediationStrategy returns the strategy to use to remediate a failed install or upgrade.

Rollback

@@ -982,19 +1096,6 @@ CRDs are installed if not already present.

-enable
- -bool - - - -(Optional) -

Enable enables Helm rollback actions for this HelmRelease after an -Helm install or upgrade action failure.

- - - - timeout
@@ -1121,6 +1222,21 @@ during the performance of a Helm test action. Defaults to ‘HelmReleaseSpec.Timeout’.

+ + +ignoreFailures
+ +bool + + + +(Optional) +

IgnoreFailures tells the controller to skip remediation when +the Helm tests are run but fail. +Can be overwritten for tests run after install or upgrade actions +in ‘Install.IgnoreTestFailures’ and ‘Upgrade.IgnoreTestFailures’.

+ + @@ -1222,15 +1338,18 @@ for hooks) during the performance of a Helm upgrade action. Defaults to -maxRetries
+remediation
-int +
+UpgradeRemediation + (Optional) -

MaxRetries is the number of retries that should be attempted on failures before -bailing. Defaults to ‘0’, a negative integer equals to unlimited retries.

+

Remediation holds the remediation configuration for when the +Helm upgrade action for the HelmRelease fails. The default +is to not perform any action.

@@ -1314,6 +1433,84 @@ upgrade action when it fails.

+

UpgradeRemediation +

+

+(Appears on: +Upgrade) +

+

UpgradeRemediation holds the configuration for Helm upgrade remediation.

+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
+retries
+ +int + +
+(Optional) +

Retries is the number of retries that should be attempted on failures before +bailing. Remediation, using ‘Strategy’, is performed between each attempt. +Defaults to ‘0’, a negative integer equals to unlimited retries.

+
+ignoreTestFailures
+ +bool + +
+(Optional) +

IgnoreTestFailures tells the controller to skip remediation when +the Helm tests are run after an upgrade action but fail. +Defaults to ‘Test.IgnoreFailures’.

+
+remediateLastFailure
+ +bool + +
+(Optional) +

RemediateLastFailure tells the controller to remediate the last +failure, when no retries remain. Defaults to ‘false’ unless ‘Retries’ +is greater than 0.

+
+strategy
+ + +RemediationStrategy + + +
+(Optional) +

Strategy to use for failure remediation. +Defaults to ‘rollback’.

+
+
+

ValuesReference

diff --git a/docs/spec/v2alpha1/helmreleases.md b/docs/spec/v2alpha1/helmreleases.md index 0f91455..9b7b186 100644 --- a/docs/spec/v2alpha1/helmreleases.md +++ b/docs/spec/v2alpha1/helmreleases.md @@ -108,6 +108,12 @@ type Install struct { // +optional Timeout *metav1.Duration `json:"timeout,omitempty"` + // Remediation holds the remediation configuration for when the + // Helm install action for the HelmRelease fails. The default + // is to not perform any action. + // +optional + Remediation *InstallRemediation `json:"remediation,omitempty"` + // DisableWait disables the waiting for resources to be ready after a // Helm install has been performed. // +optional @@ -133,6 +139,26 @@ type Install struct { SkipCRDs bool `json:"skipCRDs,omitempty"` } +// InstallRemediation holds the configuration for Helm install remediation. +type InstallRemediation struct { + // Retries is the number of retries that should be attempted on failures before + // bailing. Remediation, using an uninstall, is performed between each attempt. + // Defaults to '0', a negative integer equals to unlimited retries. + // +optional + Retries int `json:"retries,omitempty"` + + // IgnoreTestFailures tells the controller to skip remediation when + // the Helm tests are run after an install action but fail. + // Defaults to 'Test.IgnoreFailures'. + // +optional + IgnoreTestFailures *bool `json:"ignoreTestFailures,omitempty"` + + // RemediateLastFailure tells the controller to remediate the last + // failure, when no retries remain. Defaults to 'false'. + // +optional + RemediateLastFailure *bool `json:"remediateLastFailure,omitempty"` +} + // Upgrade holds the configuration for Helm upgrade actions for this HelmRelease. type Upgrade struct { // Timeout is the time to wait for any individual Kubernetes operation (like Jobs @@ -141,10 +167,11 @@ type Upgrade struct { // +optional Timeout *metav1.Duration `json:"timeout,omitempty"` - // MaxRetries is the number of retries that should be attempted on failures before - // bailing. Defaults to '0', a negative integer equals to unlimited retries. + // Remediation holds the remediation configuration for when the + // Helm upgrade action for the HelmRelease fails. The default + // is to not perform any action. // +optional - MaxRetries int `json:"maxRetries,omitempty"` + Remediation *UpgradeRemediation `json:"remediation,omitempty"` // DisableWait disables the waiting for resources to be ready after a // Helm upgrade has been performed. @@ -176,6 +203,33 @@ type Upgrade struct { CleanupOnFail bool `json:"cleanupOnFail,omitempty"` } +// UpgradeRemediation holds the configuration for Helm upgrade remediation. +type UpgradeRemediation struct { + // Retries is the number of retries that should be attempted on failures before + // bailing. Remediation, using 'Strategy', is performed between each attempt. + // Defaults to '0', a negative integer equals to unlimited retries. + // +optional + Retries int `json:"retries,omitempty"` + + // IgnoreTestFailures tells the controller to skip remediation when + // the Helm tests are run after an upgrade action but fail. + // Defaults to 'Test.IgnoreFailures'. + // +optional + IgnoreTestFailures *bool `json:"ignoreTestFailures,omitempty"` + + // RemediateLastFailure tells the controller to remediate the last + // failure, when no retries remain. Defaults to 'false' unless 'Retries' + // is greater than 0. + // +optional + RemediateLastFailure *bool `json:"remediateLastFailure,omitempty"` + + // Strategy to use for failure remediation. + // Defaults to 'rollback'. + // +kubebuilder:validation:Enum=rollback;uninstall + // +optional + Strategy *RemediationStrategy `json:"strategy,omitempty"` +} + // Test holds the configuration for Helm test actions for this HelmRelease. type Test struct { // Enable enables Helm test actions for this HelmRelease after an @@ -188,15 +242,17 @@ type Test struct { // 'HelmReleaseSpec.Timeout'. // +optional Timeout *metav1.Duration `json:"timeout,omitempty"` + + // IgnoreFailures tells the controller to skip remediation when + // the Helm tests are run but fail. + // Can be overwritten for tests run after install or upgrade actions + // in 'Install.IgnoreTestFailures' and 'Upgrade.IgnoreTestFailures'. + // +optional + IgnoreFailures bool `json:"ignoreFailures,omitempty"` } // Rollback holds the configuration for Helm rollback actions for this HelmRelease. type Rollback struct { - // Enable enables Helm rollback actions for this HelmRelease after an - // Helm install or upgrade action failure. - // +optional - Enable bool `json:"enable,omitempty"` - // Timeout is the time to wait for any individual Kubernetes operation (like Jobs // for hooks) during the performance of a Helm rollback action. Defaults to // 'HelmReleaseSpec.Timeout'. @@ -365,6 +421,9 @@ const ( // InitFailedReason represents the fact that the initialization of the Helm configuration failed. InitFailedReason string = "InitFailed" + // GetLastReleaseFailedReason represents the fact that observing the last release failed. + GetLastReleaseFailedReason string = "GetLastReleaseFailed" + // ProgressingReason represents the fact that the reconciliation for the resource is underway. ProgressingReason string = "Progressing" @@ -511,14 +570,64 @@ spec: memory: 64Mi ``` -At present, rollbacks are only supported for failed upgrades. Rollback support for other failed -actions (i.e. tests) is in the scope of the controller but awaits a proper design. +## Configuring failure remediation -## Enabling Helm test actions +By default, when a Helm action (install/upgrade/test) fails, no remediation is taken +(uninstall/rollback/retries). However, remediation can be opted in to in several ways +using `spec.install.remediation` and `spec.upgrade.remediation`. + +Each of these support `retries`, to configure the number of additional attempts after an initial +failure. A negative integer results in infinite retries. This implicitly opts-in to a remediation +action between each attempt. The remediation action for install failures is an uninstall. The +remediation action for upgrade failures is by default a rollback, however +`spec.upgrade.remediation.strategy` can be set to `uninstall`, in which case after the uninstall, +the `spec.install` configuration takes over. + +One can also opt-in to remediation of the last failure (when no retries remain) by: + +1. For installs, setting `spec.install.remediation.remediateLastFailure` to `true`. +2. For upgrades, setting `spec.upgrade.remediation.remediateLastFailure` to `true`, or configuring + at least one retry. + +```yaml +apiVersion: helm.fluxcd.io/v2alpha1 +kind: HelmRelease +metadata: + name: podinfo +spec: + interval: 5m + chart: + name: podinfo + version: '^4.0.0' + sourceRef: + kind: HelmRepository + name: podinfo + interval: 1m + install: + remediation: + retries: 3 + upgrade: + remediation: + remediateLastFailure: false + values: + resources: + requests: + cpu: 100m + memory: 64Mi +``` + +## Configuring Helm test actions To make the controller run the Helm tests available for your chart after a successful Helm install or upgrade, `spec.test.enable` should be set to `true`. +By default, when tests are enabled, failures in tests are considered release failures, and thus +are subject to the triggering Helm action's `remediation` configuration. However, test failures +can be ignored by setting `spec.test.ignoreFailures` to `true`. In this case, no remediation will +be taken, and the test failure will not affect the `Ready` status condition. This can be further +configured per Helm action by setting `spec.install.remediation.ignoreTestFailures` or +`spec.upgrade.remediation.ignoreTestFailures`, which default to `spec.test.ignoreFailures`. + ```yaml apiVersion: helm.toolkit.fluxcd.io/v2alpha1 kind: HelmRelease @@ -535,6 +644,7 @@ spec: interval: 1m test: enable: true + ignoreFailures: true values: resources: requests: @@ -542,10 +652,6 @@ spec: memory: 64Mi ``` -At present, failed tests do not mark the `HelmRelease` as not `Ready`. Making this configurable is -in the scope of the controller but awaits a proper design, as well as running them on a schedule or -for other actions than a successful Helm install or upgrade. - ## Status When the controller completes a reconciliation, it reports the result in the status sub-resource. diff --git a/go.sum b/go.sum index 25de791..7843e05 100644 --- a/go.sum +++ b/go.sum @@ -79,6 +79,7 @@ github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kB github.com/bitly/go-simplejson v0.5.0 h1:6IH+V8/tVMab511d5bn4M7EwGXZf9Hj6i2xSwkNEM+Y= github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA= github.com/blang/semver v3.1.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= +github.com/blang/semver v3.5.0+incompatible h1:CGxCgetQ64DKk7rdZ++Vfnb1+ogGNnB17OJKJXD2Cfs= github.com/blang/semver v3.5.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY= github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= @@ -469,6 +470,7 @@ github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceT github.com/mitchellh/copystructure v1.0.0 h1:Laisrj+bAB6b/yJwB5Bt3ITZhGJdqmxquMKeZ+mmkFQ= github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw= github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= github.com/mitchellh/go-wordwrap v1.0.0 h1:6GlHJ/LTGMrIJbwgdqdl2eEH8o+Exx/0m8ir9Gns0u4= @@ -901,6 +903,7 @@ gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= gopkg.in/square/go-jose.v2 v2.2.2/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME= gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI= gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=