Implement conditional remediation

This commit adds support for conditional remediation, enabling the user
to:

* configure if test failures should be ignored
* configure what action should taken when a Helm install or upgrade
  action fails (e.g. rollback, uninstall)
* configure if a failed Helm action should be retried
* configure if a failed release should be kept for debugging purposes

The previous behaviour where failed Helm tests did not mark the
`HelmRelease` as not `Ready` has changed, it now marks them as failed
by default.

Co-authored-by: Hidde Beydals <hello@hidde.co>
This commit is contained in:
Sean Eagan 2020-08-25 17:26:53 +02:00 committed by Hidde Beydals
parent 69a6f3c463
commit b8853ad7a5
7 changed files with 784 additions and 187 deletions

View File

@ -110,6 +110,9 @@ const (
// InitFailedReason represents the fact that the initialization of the Helm configuration failed.
InitFailedReason string = "InitFailed"
// GetLastReleaseFailedReason represents the fact that observing the last release failed.
GetLastReleaseFailedReason string = "GetLastReleaseFailed"
// ProgressingReason represents the fact that the reconciliation for the resource is underway.
ProgressingReason string = "Progressing"

View File

@ -176,6 +176,25 @@ func (in HelmChartTemplate) GetNamespace(defaultNamespace string) string {
return in.SourceRef.Namespace
}
// DeploymentAction defines a consistent interface for Install and Upgrade.
// +kubebuilder:object:generate=false
type DeploymentAction interface {
GetDescription() string
GetRemediation() Remediation
}
// Remediation defines a consistent interface for InstallRemediation and UpgradeRemediation.
// +kubebuilder:object:generate=false
type Remediation interface {
GetRetries() int
MustIgnoreTestFailures(bool) bool
MustRemediateLastFailure() bool
GetStrategy() RemediationStrategy
GetFailureCount(hr HelmRelease) int64
IncrementFailureCount(hr *HelmRelease)
RetriesExhausted(hr HelmRelease) bool
}
// Install holds the configuration for Helm install actions performed for this HelmRelease.
type Install struct {
// Timeout is the time to wait for any individual Kubernetes operation (like Jobs
@ -184,6 +203,12 @@ type Install struct {
// +optional
Timeout *metav1.Duration `json:"timeout,omitempty"`
// Remediation holds the remediation configuration for when the
// Helm install action for the HelmRelease fails. The default
// is to not perform any action.
// +optional
Remediation *InstallRemediation `json:"remediation,omitempty"`
// DisableWait disables the waiting for resources to be ready after a
// Helm install has been performed.
// +optional
@ -218,6 +243,80 @@ func (in Install) GetTimeout(defaultTimeout metav1.Duration) metav1.Duration {
return *in.Timeout
}
// GetDescription returns a description for the Helm install action.
func (in Install) GetDescription() string {
return "install"
}
// GetRemediation returns the configured Remediation for the Helm install action.
func (in Install) GetRemediation() Remediation {
if in.Remediation == nil {
return InstallRemediation{}
}
return *in.Remediation
}
// InstallRemediation holds the configuration for Helm install remediation.
type InstallRemediation struct {
// Retries is the number of retries that should be attempted on failures before
// bailing. Remediation, using an uninstall, is performed between each attempt.
// Defaults to '0', a negative integer equals to unlimited retries.
// +optional
Retries int `json:"retries,omitempty"`
// IgnoreTestFailures tells the controller to skip remediation when
// the Helm tests are run after an install action but fail.
// Defaults to 'Test.IgnoreFailures'.
// +optional
IgnoreTestFailures *bool `json:"ignoreTestFailures,omitempty"`
// RemediateLastFailure tells the controller to remediate the last
// failure, when no retries remain. Defaults to 'false'.
// +optional
RemediateLastFailure *bool `json:"remediateLastFailure,omitempty"`
}
// GetRetries returns the number of retries that should be attempted on failures.
func (in InstallRemediation) GetRetries() int {
return in.Retries
}
// MustIgnoreTestFailures returns the configured IgnoreTestFailures or the given default.
func (in InstallRemediation) MustIgnoreTestFailures(def bool) bool {
if in.IgnoreTestFailures == nil {
return def
}
return *in.IgnoreTestFailures
}
// MustRemediateLastFailure returns whether to remediate the last failure when no retries remain.
func (in InstallRemediation) MustRemediateLastFailure() bool {
if in.RemediateLastFailure == nil {
return false
}
return *in.RemediateLastFailure
}
// GetStrategy returns the strategy to use for failure remediation.
func (in InstallRemediation) GetStrategy() RemediationStrategy {
return UninstallRemediationStrategy
}
// GetFailureCount gets the failure count.
func (in InstallRemediation) GetFailureCount(hr HelmRelease) int64 {
return hr.Status.InstallFailures
}
// IncrementFailureCount increments the failure count.
func (in InstallRemediation) IncrementFailureCount(hr *HelmRelease) {
hr.Status.InstallFailures++
}
// RetriesExhausted returns true if there are no remaining retries.
func (in InstallRemediation) RetriesExhausted(hr HelmRelease) bool {
return in.Retries >= 0 && in.GetFailureCount(hr) > int64(in.Retries)
}
// Upgrade holds the configuration for Helm upgrade actions for this HelmRelease.
type Upgrade struct {
// Timeout is the time to wait for any individual Kubernetes operation (like Jobs
@ -226,10 +325,11 @@ type Upgrade struct {
// +optional
Timeout *metav1.Duration `json:"timeout,omitempty"`
// MaxRetries is the number of retries that should be attempted on failures before
// bailing. Defaults to '0', a negative integer equals to unlimited retries.
// Remediation holds the remediation configuration for when the
// Helm upgrade action for the HelmRelease fails. The default
// is to not perform any action.
// +optional
MaxRetries int `json:"maxRetries,omitempty"`
Remediation *UpgradeRemediation `json:"remediation,omitempty"`
// DisableWait disables the waiting for resources to be ready after a
// Helm upgrade has been performed.
@ -270,6 +370,100 @@ func (in Upgrade) GetTimeout(defaultTimeout metav1.Duration) metav1.Duration {
return *in.Timeout
}
// GetDescription returns a description for the Helm upgrade action.
func (in Upgrade) GetDescription() string {
return "upgrade"
}
// GetRemediation returns the configured Remediation for the Helm upgrade action.
func (in Upgrade) GetRemediation() Remediation {
if in.Remediation == nil {
return UpgradeRemediation{}
}
return *in.Remediation
}
// UpgradeRemediation holds the configuration for Helm upgrade remediation.
type UpgradeRemediation struct {
// Retries is the number of retries that should be attempted on failures before
// bailing. Remediation, using 'Strategy', is performed between each attempt.
// Defaults to '0', a negative integer equals to unlimited retries.
// +optional
Retries int `json:"retries,omitempty"`
// IgnoreTestFailures tells the controller to skip remediation when
// the Helm tests are run after an upgrade action but fail.
// Defaults to 'Test.IgnoreFailures'.
// +optional
IgnoreTestFailures *bool `json:"ignoreTestFailures,omitempty"`
// RemediateLastFailure tells the controller to remediate the last
// failure, when no retries remain. Defaults to 'false' unless 'Retries'
// is greater than 0.
// +optional
RemediateLastFailure *bool `json:"remediateLastFailure,omitempty"`
// Strategy to use for failure remediation.
// Defaults to 'rollback'.
// +kubebuilder:validation:Enum=rollback;uninstall
// +optional
Strategy *RemediationStrategy `json:"strategy,omitempty"`
}
// GetRetries returns the number of retries that should be attempted on failures.
func (in UpgradeRemediation) GetRetries() int {
return in.Retries
}
// MustIgnoreTestFailures returns the configured IgnoreTestFailures or the given default.
func (in UpgradeRemediation) MustIgnoreTestFailures(def bool) bool {
if in.IgnoreTestFailures == nil {
return def
}
return *in.IgnoreTestFailures
}
// MustRemediateLastFailure returns whether to remediate the last failure when no retries remain.
func (in UpgradeRemediation) MustRemediateLastFailure() bool {
if in.RemediateLastFailure == nil {
return in.Retries > 0
}
return *in.RemediateLastFailure
}
// GetStrategy returns the strategy to use for failure remediation.
func (in UpgradeRemediation) GetStrategy() RemediationStrategy {
if in.Strategy == nil {
return RollbackRemediationStrategy
}
return *in.Strategy
}
// GetFailureCount gets the failure count.
func (in UpgradeRemediation) GetFailureCount(hr HelmRelease) int64 {
return hr.Status.UpgradeFailures
}
// IncrementFailureCount increments the failure count.
func (in UpgradeRemediation) IncrementFailureCount(hr *HelmRelease) {
hr.Status.UpgradeFailures++
}
// RetriesExhausted returns true if there are no remaining retries.
func (in UpgradeRemediation) RetriesExhausted(hr HelmRelease) bool {
return in.Retries >= 0 && in.GetFailureCount(hr) > int64(in.Retries)
}
// RemediationStrategy returns the strategy to use to remediate a failed install or upgrade.
type RemediationStrategy string
const (
// RollbackRemediationStrategy represents a Helm remediation strategy of Helm rollback.
RollbackRemediationStrategy RemediationStrategy = "rollback"
// UninstallRemediationStrategy represents a Helm remediation strategy of Helm uninstall.
UninstallRemediationStrategy RemediationStrategy = "uninstall"
)
// Test holds the configuration for Helm test actions for this HelmRelease.
type Test struct {
// Enable enables Helm test actions for this HelmRelease after an
@ -282,6 +476,13 @@ type Test struct {
// 'HelmReleaseSpec.Timeout'.
// +optional
Timeout *metav1.Duration `json:"timeout,omitempty"`
// IgnoreFailures tells the controller to skip remediation when
// the Helm tests are run but fail.
// Can be overwritten for tests run after install or upgrade actions
// in 'Install.IgnoreTestFailures' and 'Upgrade.IgnoreTestFailures'.
// +optional
IgnoreFailures bool `json:"ignoreFailures,omitempty"`
}
// GetTimeout returns the configured timeout for the Helm test action,
@ -295,11 +496,6 @@ func (in Test) GetTimeout(defaultTimeout metav1.Duration) metav1.Duration {
// Rollback holds the configuration for Helm rollback actions for this HelmRelease.
type Rollback struct {
// Enable enables Helm rollback actions for this HelmRelease after an
// Helm install or upgrade action failure.
// +optional
Enable bool `json:"enable,omitempty"`
// Timeout is the time to wait for any individual Kubernetes operation (like Jobs
// for hooks) during the performance of a Helm rollback action. Defaults to
// 'HelmReleaseSpec.Timeout'.
@ -396,10 +592,17 @@ type HelmReleaseStatus struct {
// +optional
HelmChart string `json:"helmChart,omitempty"`
// Failures is the reconciliation failure count. It is reset after a successful
// reconciliation.
// Failures is the reconciliation failure count.
// +optional
Failures int64 `json:"failures,omitempty"`
// InstallFailures is the install failure count.
// +optional
InstallFailures int64 `json:"installFailures,omitempty"`
// UpgradeFailures is the upgrade failure count.
// +optional
UpgradeFailures int64 `json:"upgradeFailures,omitempty"`
}
// GetHelmChart returns the namespace and name of the HelmChart.
@ -411,18 +614,14 @@ func (in HelmReleaseStatus) GetHelmChart() (string, string) {
return split[0], split[1]
}
// HelmReleaseProgressing resets the conditions of the given HelmRelease to a single
// ReadyCondition with status ConditionUnknown.
// HelmReleaseProgressing resets any failures and registers progress toward reconciling the given HelmRelease
// by setting the ReadyCondition to ConditionUnknown for ProgressingReason.
func HelmReleaseProgressing(hr HelmRelease) HelmRelease {
hr.Status.Conditions = []Condition{
{
Type: ReadyCondition,
Status: corev1.ConditionUnknown,
LastTransitionTime: metav1.Now(),
Reason: ProgressingReason,
Message: "reconciliation in progress",
},
}
hr.Status.Failures = 0
hr.Status.InstallFailures = 0
hr.Status.UpgradeFailures = 0
hr.Status.Conditions = []Condition{}
SetHelmReleaseCondition(&hr, ReadyCondition, corev1.ConditionUnknown, ProgressingReason, "reconciliation in progress")
return hr
}
@ -439,88 +638,31 @@ func SetHelmReleaseCondition(hr *HelmRelease, condition string, status corev1.Co
})
}
// SetHelmReleaseReadiness sets the ReadyCondition, ObservedGeneration, LastAttemptedRevision,
// and LastReleaseRevision, on the HelmRelease.
func SetHelmReleaseReadiness(hr *HelmRelease, status corev1.ConditionStatus, reason, message string, revision string, releaseRevision int, valuesChecksum string) {
SetHelmReleaseCondition(hr, ReadyCondition, status, reason, message)
hr.Status.ObservedGeneration = hr.Generation
hr.Status.LastAttemptedRevision = revision
hr.Status.LastReleaseRevision = releaseRevision
hr.Status.LastAttemptedValuesChecksum = valuesChecksum
}
// HelmReleaseNotReady registers a failed release attempt of the given HelmRelease.
func HelmReleaseNotReady(hr HelmRelease, revision string, releaseRevision int, valuesChecksum, reason, message string) HelmRelease {
SetHelmReleaseReadiness(&hr, corev1.ConditionFalse, reason, message, revision, releaseRevision, valuesChecksum)
hr.Status.Failures = hr.Status.Failures + 1
func HelmReleaseNotReady(hr HelmRelease, reason, message string) HelmRelease {
SetHelmReleaseCondition(&hr, ReadyCondition, corev1.ConditionFalse, reason, message)
hr.Status.Failures++
return hr
}
// HelmReleaseReady registers a successful release attempt of the given HelmRelease.
func HelmReleaseReady(hr HelmRelease, revision string, releaseRevision int, valuesChecksum, reason, message string) HelmRelease {
SetHelmReleaseReadiness(&hr, corev1.ConditionTrue, reason, message, revision, releaseRevision, valuesChecksum)
hr.Status.LastAppliedRevision = revision
hr.Status.Failures = 0
func HelmReleaseReady(hr HelmRelease, reason, message string) HelmRelease {
SetHelmReleaseCondition(&hr, ReadyCondition, corev1.ConditionTrue, reason, message)
hr.Status.LastAppliedRevision = hr.Status.LastAttemptedRevision
return hr
}
// ShouldUpgrade determines if an Helm upgrade action needs to be performed for the given HelmRelease.
func ShouldUpgrade(hr HelmRelease, revision string, releaseRevision int, valuesChecksum string) bool {
switch {
case hr.Status.LastAttemptedRevision != revision:
return true
case hr.Status.LastReleaseRevision != releaseRevision:
return true
case hr.Generation != hr.Status.ObservedGeneration:
return true
case hr.Status.LastAttemptedValuesChecksum != valuesChecksum:
return true
case hr.Status.Failures > 0 &&
(hr.Spec.GetUpgrade().MaxRetries < 0 || hr.Status.Failures < int64(hr.Spec.GetUpgrade().MaxRetries)):
return true
default:
return false
}
}
// HelmReleaseAttempted registers an attempt of the given HelmRelease with the given state.
// and returns the modified HelmRelease and a boolean indicating a state change.
func HelmReleaseAttempted(hr HelmRelease, revision string, releaseRevision int, valuesChecksum string) (HelmRelease, bool) {
changed := hr.Status.LastAttemptedRevision != revision ||
hr.Status.LastReleaseRevision != releaseRevision ||
hr.Status.LastAttemptedValuesChecksum != valuesChecksum
hr.Status.LastAttemptedRevision = revision
hr.Status.LastReleaseRevision = releaseRevision
hr.Status.LastAttemptedValuesChecksum = valuesChecksum
// ShouldTest determines if a Helm test actions needs to be performed for the given HelmRelease.
func ShouldTest(hr HelmRelease) bool {
if hr.Spec.Test.Enable {
for _, c := range hr.Status.Conditions {
if c.Status == corev1.ConditionTrue && (c.Type == InstalledCondition || c.Type == UpgradedCondition) {
return true
}
}
}
return false
}
// ShouldRollback determines if a Helm rollback action needs to be performed for the given HelmRelease.
func ShouldRollback(hr HelmRelease, releaseRevision int) bool {
if hr.Spec.GetRollback().Enable {
if hr.Status.LastReleaseRevision <= releaseRevision {
return false
}
for _, c := range hr.Status.Conditions {
if c.Type == UpgradedCondition && c.Status == corev1.ConditionFalse {
return true
}
}
}
return false
}
// ShouldUninstall determines if a Helm uninstall action needs to be performed for the given HelmRelease.
func ShouldUninstall(hr HelmRelease, releaseRevision int) bool {
if releaseRevision <= 0 {
return false
}
for _, c := range hr.Status.Conditions {
if c.Type == InstalledCondition && c.Status == corev1.ConditionFalse {
return true
}
}
return false
return hr, changed
}
const (

View File

@ -234,6 +234,11 @@ func (in *Install) DeepCopyInto(out *Install) {
*out = new(v1.Duration)
**out = **in
}
if in.Remediation != nil {
in, out := &in.Remediation, &out.Remediation
*out = new(InstallRemediation)
(*in).DeepCopyInto(*out)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Install.
@ -246,6 +251,31 @@ func (in *Install) DeepCopy() *Install {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *InstallRemediation) DeepCopyInto(out *InstallRemediation) {
*out = *in
if in.IgnoreTestFailures != nil {
in, out := &in.IgnoreTestFailures, &out.IgnoreTestFailures
*out = new(bool)
**out = **in
}
if in.RemediateLastFailure != nil {
in, out := &in.RemediateLastFailure, &out.RemediateLastFailure
*out = new(bool)
**out = **in
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InstallRemediation.
func (in *InstallRemediation) DeepCopy() *InstallRemediation {
if in == nil {
return nil
}
out := new(InstallRemediation)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Rollback) DeepCopyInto(out *Rollback) {
*out = *in
@ -314,6 +344,11 @@ func (in *Upgrade) DeepCopyInto(out *Upgrade) {
*out = new(v1.Duration)
**out = **in
}
if in.Remediation != nil {
in, out := &in.Remediation, &out.Remediation
*out = new(UpgradeRemediation)
(*in).DeepCopyInto(*out)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Upgrade.
@ -326,6 +361,36 @@ func (in *Upgrade) DeepCopy() *Upgrade {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *UpgradeRemediation) DeepCopyInto(out *UpgradeRemediation) {
*out = *in
if in.IgnoreTestFailures != nil {
in, out := &in.IgnoreTestFailures, &out.IgnoreTestFailures
*out = new(bool)
**out = **in
}
if in.RemediateLastFailure != nil {
in, out := &in.RemediateLastFailure, &out.RemediateLastFailure
*out = new(bool)
**out = **in
}
if in.Strategy != nil {
in, out := &in.Strategy, &out.Strategy
*out = new(RemediationStrategy)
**out = **in
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UpgradeRemediation.
func (in *UpgradeRemediation) DeepCopy() *UpgradeRemediation {
if in == nil {
return nil
}
out := new(UpgradeRemediation)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ValuesReference) DeepCopyInto(out *ValuesReference) {
*out = *in

View File

@ -69,6 +69,16 @@ type HelmReleaseReconciler struct {
ExternalEventRecorder *recorder.EventRecorder
}
// ConditionError represents an error with a status condition reason attached.
type ConditionError struct {
Reason string
Err error
}
func (c ConditionError) Error() string {
return c.Err.Error()
}
// +kubebuilder:rbac:groups=helm.toolkit.fluxcd.io,resources=helmreleases,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=helm.toolkit.fluxcd.io,resources=helmreleases/status,verbs=get;update;patch
@ -117,7 +127,7 @@ func (r *HelmReleaseReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error)
if hr.Spec.Suspend {
msg := "HelmRelease is suspended, skipping reconciliation"
hr = v2.HelmReleaseNotReady(hr, hr.Status.LastAttemptedRevision, hr.Status.LastReleaseRevision, hr.Status.LastAttemptedValuesChecksum, v2.SuspendedReason, msg)
hr = v2.HelmReleaseNotReady(hr, v2.SuspendedReason, msg)
if err := r.Status().Update(ctx, &hr); err != nil {
log.Error(err, "unable to update status")
return ctrl.Result{Requeue: true}, err
@ -126,7 +136,13 @@ func (r *HelmReleaseReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error)
return ctrl.Result{}, nil
}
hr = v2.HelmReleaseProgressing(hr)
// Observe the HelmRelease generation.
hasNewGeneration := hr.Status.ObservedGeneration != hr.Generation
if hasNewGeneration {
hr.Status.ObservedGeneration = hr.Generation
hr = v2.HelmReleaseProgressing(hr)
}
if err := r.Status().Update(ctx, &hr); err != nil {
log.Error(err, "unable to update status")
return ctrl.Result{Requeue: true}, err
@ -143,7 +159,7 @@ func (r *HelmReleaseReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error)
msg = "HelmChart is not ready"
r.event(hr, hr.Status.LastAttemptedRevision, recorder.EventSeverityInfo, msg)
}
hr = v2.HelmReleaseNotReady(hr, hr.Status.LastAttemptedRevision, hr.Status.LastReleaseRevision, hr.Status.LastAttemptedValuesChecksum, v2.ArtifactFailedReason, msg)
hr = v2.HelmReleaseNotReady(hr, v2.ArtifactFailedReason, msg)
if err := r.Status().Update(ctx, &hr); err != nil {
log.Error(err, "unable to update status")
return ctrl.Result{Requeue: true}, err
@ -154,7 +170,7 @@ func (r *HelmReleaseReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error)
// Check chart artifact readiness
if hc.GetArtifact() == nil {
msg := "HelmChart is not ready"
hr = v2.HelmReleaseNotReady(hr, hr.Status.LastAttemptedRevision, hr.Status.LastReleaseRevision, hr.Status.LastAttemptedValuesChecksum, v2.ArtifactFailedReason, msg)
hr = v2.HelmReleaseNotReady(hr, v2.ArtifactFailedReason, msg)
r.event(hr, hr.Status.LastAttemptedRevision, recorder.EventSeverityInfo, msg)
log.Info(msg)
if err := r.Status().Update(ctx, &hr); err != nil {
@ -171,7 +187,7 @@ func (r *HelmReleaseReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error)
r.event(hr, hc.GetArtifact().Revision, recorder.EventSeverityInfo, msg)
log.Info(msg)
hr = v2.HelmReleaseNotReady(hr, hr.Status.LastAttemptedRevision, hr.Status.LastReleaseRevision, hr.Status.LastAttemptedValuesChecksum, v2.DependencyNotReadyReason, err.Error())
hr = v2.HelmReleaseNotReady(hr, v2.DependencyNotReadyReason, err.Error())
if err := r.Status().Update(ctx, &hr); err != nil {
log.Error(err, "unable to update status")
return ctrl.Result{Requeue: true}, err
@ -186,7 +202,7 @@ func (r *HelmReleaseReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error)
// Compose values
values, err := r.composeValues(ctx, hr)
if err != nil {
hr = v2.HelmReleaseNotReady(hr, hr.Status.LastAttemptedRevision, hr.Status.LastReleaseRevision, hr.Status.LastAttemptedValuesChecksum, v2.InitFailedReason, err.Error())
hr = v2.HelmReleaseNotReady(hr, v2.InitFailedReason, err.Error())
r.event(hr, hr.Status.LastAttemptedRevision, recorder.EventSeverityError, err.Error())
if err := r.Status().Update(ctx, &hr); err != nil {
log.Error(err, "unable to update status")
@ -195,7 +211,7 @@ func (r *HelmReleaseReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error)
return ctrl.Result{}, nil
}
reconciledHr, reconcileErr := r.release(log, *hr.DeepCopy(), hc, values)
reconciledHr, reconcileErr := r.release(log, *hr.DeepCopy(), hc, values, hasNewGeneration)
if reconcileErr != nil {
r.event(hr, hc.GetArtifact().Revision, recorder.EventSeverityError, fmt.Sprintf("reconciliation failed: %s", reconcileErr.Error()))
}
@ -279,12 +295,12 @@ func (r *HelmReleaseReconciler) reconcileChart(ctx context.Context, hr *v2.HelmR
return &helmChart, true, nil
}
func (r *HelmReleaseReconciler) release(log logr.Logger, hr v2.HelmRelease, source sourcev1.Source, values chartutil.Values) (v2.HelmRelease, error) {
func (r *HelmReleaseReconciler) release(log logr.Logger, hr v2.HelmRelease, source sourcev1.Source, values chartutil.Values, hasNewGeneration bool) (v2.HelmRelease, error) {
// Acquire lock
unlock, err := lock(fmt.Sprintf("%s-%s", hr.GetName(), hr.GetNamespace()))
if err != nil {
err = fmt.Errorf("lockfile error: %w", err)
return v2.HelmReleaseNotReady(hr, hr.Status.LastAttemptedRevision, hr.Status.LastReleaseRevision, hr.Status.LastAttemptedValuesChecksum, sourcev1.StorageOperationFailedReason, err.Error()), err
return v2.HelmReleaseNotReady(hr, sourcev1.StorageOperationFailedReason, err.Error()), err
}
defer unlock()
@ -298,74 +314,119 @@ func (r *HelmReleaseReconciler) release(log logr.Logger, hr v2.HelmRelease, sour
// Download artifact
artifactPath, err := download(source.GetArtifact().URL, tmpDir)
if err != nil {
return v2.HelmReleaseNotReady(hr, hr.Status.LastAttemptedRevision, hr.Status.LastReleaseRevision, hr.Status.LastAttemptedValuesChecksum, v2.ArtifactFailedReason, "artifact acquisition failed"), err
return v2.HelmReleaseNotReady(hr, v2.ArtifactFailedReason, "artifact acquisition failed"), err
}
// Load chart
loadedChart, err := loader.Load(artifactPath)
if err != nil {
return v2.HelmReleaseNotReady(hr, hr.Status.LastAttemptedRevision, hr.Status.LastReleaseRevision, hr.Status.LastAttemptedValuesChecksum, v2.ArtifactFailedReason, "failed to load chart"), err
return v2.HelmReleaseNotReady(hr, v2.ArtifactFailedReason, "failed to load chart"), err
}
// Initialize config
cfg, err := newActionCfg(log, r.Config, hr)
if err != nil {
return v2.HelmReleaseNotReady(hr, hr.Status.LastAttemptedRevision, hr.Status.LastReleaseRevision, hr.Status.LastAttemptedValuesChecksum, v2.InitFailedReason, "failed to initialize Helm action configuration"), err
return v2.HelmReleaseNotReady(hr, v2.InitFailedReason, "failed to initialize Helm action configuration"), err
}
// Get the current release
rel, err := cfg.Releases.Deployed(hr.GetReleaseName())
if err != nil && !errors.Is(err, driver.ErrNoDeployedReleases) {
return v2.HelmReleaseNotReady(hr, hr.Status.LastAttemptedRevision, hr.Status.LastReleaseRevision, hr.Status.LastAttemptedValuesChecksum, v2.InitFailedReason, "failed to determine if release exists"), err
// Determine last release revision.
rel, observeLastReleaseErr := observeLastRelease(cfg, hr)
if observeLastReleaseErr != nil {
return v2.HelmReleaseNotReady(hr, v2.GetLastReleaseFailedReason, "failed to get last release revision"), err
}
// Register the current release attempt.
revision := source.GetArtifact().Revision
releaseRevision := getReleaseRevision(rel)
valuesChecksum := calculateValuesChecksum(values)
hr, hasNewState := v2.HelmReleaseAttempted(hr, revision, releaseRevision, valuesChecksum)
if hasNewState {
hr = v2.HelmReleaseProgressing(hr)
}
// Install or upgrade the release
success := true
if errors.Is(err, driver.ErrNoDeployedReleases) {
// Determine release deployment action.
var deployAction v2.DeploymentAction
switch {
// Install if there is none.
case rel == nil:
deployAction = hr.Spec.GetInstall()
// Upgrade if there is a new generation, new state, or this is an upgrade retry.
case hasNewGeneration || hasNewState || hr.Spec.GetUpgrade().GetRemediation().GetFailureCount(hr) > 0:
deployAction = hr.Spec.GetUpgrade()
// Otherwise no action needed.
default:
return hr, nil
}
// Check if retries exhausted.
remediation := deployAction.GetRemediation()
if remediation.RetriesExhausted(hr) {
return hr, fmt.Errorf("%s retries exhausted", deployAction.GetDescription())
}
// Deploy the release.
switch a := deployAction.(type) {
case v2.Install:
rel, err = install(cfg, loadedChart, hr, values)
r.handleHelmActionResult(hr, source, err, "install", v2.InstalledCondition, v2.InstallSucceededReason, v2.InstallFailedReason)
success = err == nil
} else if v2.ShouldUpgrade(hr, source.GetArtifact().Revision, rel.Version, valuesChecksum) {
err = r.handleHelmActionResult(&hr, revision, err, a.GetDescription(), v2.InstalledCondition, v2.InstallSucceededReason, v2.InstallFailedReason)
case v2.Upgrade:
rel, err = upgrade(cfg, loadedChart, hr, values)
r.handleHelmActionResult(hr, source, err, "upgrade", v2.UpgradedCondition, v2.UpgradeSucceededReason, v2.UpgradeFailedReason)
success = err == nil
err = r.handleHelmActionResult(&hr, revision, err, a.GetDescription(), v2.UpgradedCondition, v2.UpgradeSucceededReason, v2.UpgradeFailedReason)
}
// Run tests
if v2.ShouldTest(hr) {
rel, err = test(cfg, hr)
r.handleHelmActionResult(hr, source, err, "test", v2.TestedCondition, v2.TestSucceededReason, v2.TestFailedReason)
}
// Run rollback
if rel != nil && v2.ShouldRollback(hr, rel.Version) {
success = false
err = rollback(cfg, hr)
r.handleHelmActionResult(hr, source, err, "rollback", v2.RolledBackCondition, v2.RollbackSucceededReason, v2.RollbackFailedReason)
}
// Determine release number after action runs
var releaseRevision int
if curRel, err := cfg.Releases.Deployed(hr.GetReleaseName()); err == nil {
releaseRevision = curRel.Version
}
// Run uninstall
if v2.ShouldUninstall(hr, releaseRevision) {
success = false
err = uninstall(cfg, hr)
if err == nil {
releaseRevision = 0
// Run tests if enabled and there is a successful new release revision.
if getReleaseRevision(rel) > releaseRevision && err == nil && hr.Spec.GetTest().Enable {
_, testErr := test(cfg, hr)
testErr = r.handleHelmActionResult(&hr, revision, testErr, "test", v2.TestedCondition, v2.TestSucceededReason, v2.TestFailedReason)
// Propagate any test error if not marked ignored.
if testErr != nil && !remediation.MustIgnoreTestFailures(hr.Spec.GetTest().IgnoreFailures) {
err = testErr
}
r.handleHelmActionResult(hr, source, err, "uninstall", v2.UninstalledCondition, v2.UninstallSucceededReason, v2.UninstallFailedReason)
}
if !success {
return v2.HelmReleaseNotReady(hr, source.GetArtifact().Revision, releaseRevision, valuesChecksum, v2.ReconciliationFailedReason, "release reconciliation failed"), err
if err != nil {
// Increment failure count for deployment action.
remediation.IncrementFailureCount(&hr)
// Remediate deployment failure if necessary.
if !remediation.RetriesExhausted(hr) || remediation.MustRemediateLastFailure() {
switch {
case getReleaseRevision(rel) <= releaseRevision:
log.Info(fmt.Sprintf("skipping remediation, no new release revision created"))
case remediation.GetStrategy() == v2.RollbackRemediationStrategy:
rollbackErr := rollback(cfg, hr)
rollbackConditionErr := r.handleHelmActionResult(&hr, revision, rollbackErr, "rollback", v2.RolledBackCondition, v2.RollbackSucceededReason, v2.RollbackFailedReason)
if rollbackConditionErr != nil {
err = rollbackConditionErr
}
case remediation.GetStrategy() == v2.UninstallRemediationStrategy:
uninstallErr := uninstall(cfg, hr)
uninstallConditionErr := r.handleHelmActionResult(&hr, revision, uninstallErr, "uninstall", v2.UninstalledCondition, v2.UninstallSucceededReason, v2.UninstallFailedReason)
if uninstallConditionErr != nil {
err = uninstallConditionErr
}
}
}
}
return v2.HelmReleaseReady(hr, source.GetArtifact().Revision, releaseRevision, valuesChecksum, v2.ReconciliationSucceededReason, "release reconciliation succeeded"), nil
// Determine release revision after deployment/remediation.
rel, observeLastReleaseErr = observeLastRelease(cfg, hr)
if observeLastReleaseErr != nil {
err = &ConditionError{
Reason: v2.GetLastReleaseFailedReason,
Err: errors.New("failed to get last release revision after deployment/remediation"),
}
}
hr.Status.LastReleaseRevision = getReleaseRevision(rel)
if err != nil {
reason := v2.ReconciliationFailedReason
var cerr *ConditionError
if errors.As(err, &cerr) {
reason = cerr.Reason
}
return v2.HelmReleaseNotReady(hr, reason, err.Error()), err
}
return v2.HelmReleaseReady(hr, v2.ReconciliationSucceededReason, "release reconciliation succeeded"), nil
}
func (r *HelmReleaseReconciler) checkDependencies(hr v2.HelmRelease) error {
@ -493,14 +554,17 @@ func (r *HelmReleaseReconciler) composeValues(ctx context.Context, hr v2.HelmRel
return mergeMaps(result, hr.GetValues()), nil
}
func (r *HelmReleaseReconciler) handleHelmActionResult(hr v2.HelmRelease, source sourcev1.Source, err error, action string, condition string, succeededReason string, failedReason string) {
func (r *HelmReleaseReconciler) handleHelmActionResult(hr *v2.HelmRelease, revision string, err error, action string, condition string, succeededReason string, failedReason string) error {
if err != nil {
v2.SetHelmReleaseCondition(&hr, condition, corev1.ConditionFalse, failedReason, err.Error())
r.event(hr, source.GetArtifact().Revision, recorder.EventSeverityError, fmt.Sprintf("Helm %s failed: %s", action, err.Error()))
msg := fmt.Sprintf("Helm %s failed: %s", action, err.Error())
v2.SetHelmReleaseCondition(hr, condition, corev1.ConditionFalse, failedReason, msg)
r.event(*hr, revision, recorder.EventSeverityError, msg)
return &ConditionError{Reason: failedReason, Err: errors.New(msg)}
} else {
msg := fmt.Sprintf("Helm %s succeeded", action)
v2.SetHelmReleaseCondition(&hr, condition, corev1.ConditionTrue, succeededReason, msg)
r.event(hr, source.GetArtifact().Revision, recorder.EventSeverityInfo, msg)
v2.SetHelmReleaseCondition(hr, condition, corev1.ConditionTrue, succeededReason, msg)
r.event(*hr, revision, recorder.EventSeverityInfo, msg)
return nil
}
}
@ -565,6 +629,23 @@ func helmChartRequiresUpdate(hr v2.HelmRelease, chart sourcev1.HelmChart) bool {
}
}
// observeLastRelease observes the last revision, if there is one, for for actual helm release associated with the given HelmRelease.
func observeLastRelease(cfg *action.Configuration, hr v2.HelmRelease) (*release.Release, error) {
rel, err := cfg.Releases.Last(hr.GetReleaseName())
if err != nil && errors.Is(err, driver.ErrReleaseNotFound) {
err = nil
}
return rel, err
}
// getReleaseRevision returns the revision of the given release.Release.
func getReleaseRevision(rel *release.Release) int {
if rel == nil {
return 0
}
return rel.Version
}
func install(cfg *action.Configuration, chart *chart.Chart, hr v2.HelmRelease, values chartutil.Values) (*release.Release, error) {
install := action.NewInstall(cfg)
install.ReleaseName = hr.GetReleaseName()
@ -597,7 +678,7 @@ func upgrade(cfg *action.Configuration, chart *chart.Chart, hr v2.HelmRelease, v
func test(cfg *action.Configuration, hr v2.HelmRelease) (*release.Release, error) {
test := action.NewReleaseTesting(cfg)
test.Namespace = hr.GetReleaseNamespace()
test.Timeout = hr.Spec.Test.GetTimeout(hr.GetTimeout()).Duration
test.Timeout = hr.Spec.GetTest().GetTimeout(hr.GetTimeout()).Duration
return test.Run(hr.GetReleaseName())
}

View File

@ -445,6 +445,9 @@ string
</table>
</div>
</div>
<h3 id="helm.toolkit.fluxcd.io/v2alpha1.DeploymentAction">DeploymentAction
</h3>
<p>DeploymentAction defines a consistent interface for Install and Upgrade.</p>
<h3 id="helm.toolkit.fluxcd.io/v2alpha1.HelmChartTemplate">HelmChartTemplate
</h3>
<p>
@ -854,8 +857,31 @@ int64
</td>
<td>
<em>(Optional)</em>
<p>Failures is the reconciliation failure count. It is reset after a successful
reconciliation.</p>
<p>Failures is the reconciliation failure count.</p>
</td>
</tr>
<tr>
<td>
<code>installFailures</code><br>
<em>
int64
</em>
</td>
<td>
<em>(Optional)</em>
<p>InstallFailures is the install failure count.</p>
</td>
</tr>
<tr>
<td>
<code>upgradeFailures</code><br>
<em>
int64
</em>
</td>
<td>
<em>(Optional)</em>
<p>UpgradeFailures is the upgrade failure count.</p>
</td>
</tr>
</tbody>
@ -897,6 +923,22 @@ for hooks) during the performance of a Helm install action. Defaults to
</tr>
<tr>
<td>
<code>remediation</code><br>
<em>
<a href="#helm.toolkit.fluxcd.io/v2alpha1.InstallRemediation">
InstallRemediation
</a>
</em>
</td>
<td>
<em>(Optional)</em>
<p>Remediation holds the remediation configuration for when the
Helm install action for the HelmRelease fails. The default
is to not perform any action.</p>
</td>
</tr>
<tr>
<td>
<code>disableWait</code><br>
<em>
bool
@ -963,6 +1005,78 @@ CRDs are installed if not already present.</p>
</table>
</div>
</div>
<h3 id="helm.toolkit.fluxcd.io/v2alpha1.InstallRemediation">InstallRemediation
</h3>
<p>
(<em>Appears on:</em>
<a href="#helm.toolkit.fluxcd.io/v2alpha1.Install">Install</a>)
</p>
<p>InstallRemediation holds the configuration for Helm install remediation.</p>
<div class="md-typeset__scrollwrap">
<div class="md-typeset__table">
<table>
<thead>
<tr>
<th>Field</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<code>retries</code><br>
<em>
int
</em>
</td>
<td>
<em>(Optional)</em>
<p>Retries is the number of retries that should be attempted on failures before
bailing. Remediation, using an uninstall, is performed between each attempt.
Defaults to &lsquo;0&rsquo;, a negative integer equals to unlimited retries.</p>
</td>
</tr>
<tr>
<td>
<code>ignoreTestFailures</code><br>
<em>
bool
</em>
</td>
<td>
<em>(Optional)</em>
<p>IgnoreTestFailures tells the controller to skip remediation when
the Helm tests are run after an install action but fail.
Defaults to &lsquo;Test.IgnoreFailures&rsquo;.</p>
</td>
</tr>
<tr>
<td>
<code>remediateLastFailure</code><br>
<em>
bool
</em>
</td>
<td>
<em>(Optional)</em>
<p>RemediateLastFailure tells the controller to remediate the last
failure, when no retries remain. Defaults to &lsquo;false&rsquo;.</p>
</td>
</tr>
</tbody>
</table>
</div>
</div>
<h3 id="helm.toolkit.fluxcd.io/v2alpha1.Remediation">Remediation
</h3>
<p>Remediation defines a consistent interface for InstallRemediation and UpgradeRemediation.</p>
<h3 id="helm.toolkit.fluxcd.io/v2alpha1.RemediationStrategy">RemediationStrategy
(<code>string</code> alias)</h3>
<p>
(<em>Appears on:</em>
<a href="#helm.toolkit.fluxcd.io/v2alpha1.UpgradeRemediation">UpgradeRemediation</a>)
</p>
<p>RemediationStrategy returns the strategy to use to remediate a failed install or upgrade.</p>
<h3 id="helm.toolkit.fluxcd.io/v2alpha1.Rollback">Rollback
</h3>
<p>
@ -982,19 +1096,6 @@ CRDs are installed if not already present.</p>
<tbody>
<tr>
<td>
<code>enable</code><br>
<em>
bool
</em>
</td>
<td>
<em>(Optional)</em>
<p>Enable enables Helm rollback actions for this HelmRelease after an
Helm install or upgrade action failure.</p>
</td>
</tr>
<tr>
<td>
<code>timeout</code><br>
<em>
<a href="https://godoc.org/k8s.io/apimachinery/pkg/apis/meta/v1#Duration">
@ -1121,6 +1222,21 @@ during the performance of a Helm test action. Defaults to
&lsquo;HelmReleaseSpec.Timeout&rsquo;.</p>
</td>
</tr>
<tr>
<td>
<code>ignoreFailures</code><br>
<em>
bool
</em>
</td>
<td>
<em>(Optional)</em>
<p>IgnoreFailures tells the controller to skip remediation when
the Helm tests are run but fail.
Can be overwritten for tests run after install or upgrade actions
in &lsquo;Install.IgnoreTestFailures&rsquo; and &lsquo;Upgrade.IgnoreTestFailures&rsquo;.</p>
</td>
</tr>
</tbody>
</table>
</div>
@ -1222,15 +1338,18 @@ for hooks) during the performance of a Helm upgrade action. Defaults to
</tr>
<tr>
<td>
<code>maxRetries</code><br>
<code>remediation</code><br>
<em>
int
<a href="#helm.toolkit.fluxcd.io/v2alpha1.UpgradeRemediation">
UpgradeRemediation
</a>
</em>
</td>
<td>
<em>(Optional)</em>
<p>MaxRetries is the number of retries that should be attempted on failures before
bailing. Defaults to &lsquo;0&rsquo;, a negative integer equals to unlimited retries.</p>
<p>Remediation holds the remediation configuration for when the
Helm upgrade action for the HelmRelease fails. The default
is to not perform any action.</p>
</td>
</tr>
<tr>
@ -1314,6 +1433,84 @@ upgrade action when it fails.</p>
</table>
</div>
</div>
<h3 id="helm.toolkit.fluxcd.io/v2alpha1.UpgradeRemediation">UpgradeRemediation
</h3>
<p>
(<em>Appears on:</em>
<a href="#helm.toolkit.fluxcd.io/v2alpha1.Upgrade">Upgrade</a>)
</p>
<p>UpgradeRemediation holds the configuration for Helm upgrade remediation.</p>
<div class="md-typeset__scrollwrap">
<div class="md-typeset__table">
<table>
<thead>
<tr>
<th>Field</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<code>retries</code><br>
<em>
int
</em>
</td>
<td>
<em>(Optional)</em>
<p>Retries is the number of retries that should be attempted on failures before
bailing. Remediation, using &lsquo;Strategy&rsquo;, is performed between each attempt.
Defaults to &lsquo;0&rsquo;, a negative integer equals to unlimited retries.</p>
</td>
</tr>
<tr>
<td>
<code>ignoreTestFailures</code><br>
<em>
bool
</em>
</td>
<td>
<em>(Optional)</em>
<p>IgnoreTestFailures tells the controller to skip remediation when
the Helm tests are run after an upgrade action but fail.
Defaults to &lsquo;Test.IgnoreFailures&rsquo;.</p>
</td>
</tr>
<tr>
<td>
<code>remediateLastFailure</code><br>
<em>
bool
</em>
</td>
<td>
<em>(Optional)</em>
<p>RemediateLastFailure tells the controller to remediate the last
failure, when no retries remain. Defaults to &lsquo;false&rsquo; unless &lsquo;Retries&rsquo;
is greater than 0.</p>
</td>
</tr>
<tr>
<td>
<code>strategy</code><br>
<em>
<a href="#helm.toolkit.fluxcd.io/v2alpha1.RemediationStrategy">
RemediationStrategy
</a>
</em>
</td>
<td>
<em>(Optional)</em>
<p>Strategy to use for failure remediation.
Defaults to &lsquo;rollback&rsquo;.</p>
</td>
</tr>
</tbody>
</table>
</div>
</div>
<h3 id="helm.toolkit.fluxcd.io/v2alpha1.ValuesReference">ValuesReference
</h3>
<p>

View File

@ -108,6 +108,12 @@ type Install struct {
// +optional
Timeout *metav1.Duration `json:"timeout,omitempty"`
// Remediation holds the remediation configuration for when the
// Helm install action for the HelmRelease fails. The default
// is to not perform any action.
// +optional
Remediation *InstallRemediation `json:"remediation,omitempty"`
// DisableWait disables the waiting for resources to be ready after a
// Helm install has been performed.
// +optional
@ -133,6 +139,26 @@ type Install struct {
SkipCRDs bool `json:"skipCRDs,omitempty"`
}
// InstallRemediation holds the configuration for Helm install remediation.
type InstallRemediation struct {
// Retries is the number of retries that should be attempted on failures before
// bailing. Remediation, using an uninstall, is performed between each attempt.
// Defaults to '0', a negative integer equals to unlimited retries.
// +optional
Retries int `json:"retries,omitempty"`
// IgnoreTestFailures tells the controller to skip remediation when
// the Helm tests are run after an install action but fail.
// Defaults to 'Test.IgnoreFailures'.
// +optional
IgnoreTestFailures *bool `json:"ignoreTestFailures,omitempty"`
// RemediateLastFailure tells the controller to remediate the last
// failure, when no retries remain. Defaults to 'false'.
// +optional
RemediateLastFailure *bool `json:"remediateLastFailure,omitempty"`
}
// Upgrade holds the configuration for Helm upgrade actions for this HelmRelease.
type Upgrade struct {
// Timeout is the time to wait for any individual Kubernetes operation (like Jobs
@ -141,10 +167,11 @@ type Upgrade struct {
// +optional
Timeout *metav1.Duration `json:"timeout,omitempty"`
// MaxRetries is the number of retries that should be attempted on failures before
// bailing. Defaults to '0', a negative integer equals to unlimited retries.
// Remediation holds the remediation configuration for when the
// Helm upgrade action for the HelmRelease fails. The default
// is to not perform any action.
// +optional
MaxRetries int `json:"maxRetries,omitempty"`
Remediation *UpgradeRemediation `json:"remediation,omitempty"`
// DisableWait disables the waiting for resources to be ready after a
// Helm upgrade has been performed.
@ -176,6 +203,33 @@ type Upgrade struct {
CleanupOnFail bool `json:"cleanupOnFail,omitempty"`
}
// UpgradeRemediation holds the configuration for Helm upgrade remediation.
type UpgradeRemediation struct {
// Retries is the number of retries that should be attempted on failures before
// bailing. Remediation, using 'Strategy', is performed between each attempt.
// Defaults to '0', a negative integer equals to unlimited retries.
// +optional
Retries int `json:"retries,omitempty"`
// IgnoreTestFailures tells the controller to skip remediation when
// the Helm tests are run after an upgrade action but fail.
// Defaults to 'Test.IgnoreFailures'.
// +optional
IgnoreTestFailures *bool `json:"ignoreTestFailures,omitempty"`
// RemediateLastFailure tells the controller to remediate the last
// failure, when no retries remain. Defaults to 'false' unless 'Retries'
// is greater than 0.
// +optional
RemediateLastFailure *bool `json:"remediateLastFailure,omitempty"`
// Strategy to use for failure remediation.
// Defaults to 'rollback'.
// +kubebuilder:validation:Enum=rollback;uninstall
// +optional
Strategy *RemediationStrategy `json:"strategy,omitempty"`
}
// Test holds the configuration for Helm test actions for this HelmRelease.
type Test struct {
// Enable enables Helm test actions for this HelmRelease after an
@ -188,15 +242,17 @@ type Test struct {
// 'HelmReleaseSpec.Timeout'.
// +optional
Timeout *metav1.Duration `json:"timeout,omitempty"`
// IgnoreFailures tells the controller to skip remediation when
// the Helm tests are run but fail.
// Can be overwritten for tests run after install or upgrade actions
// in 'Install.IgnoreTestFailures' and 'Upgrade.IgnoreTestFailures'.
// +optional
IgnoreFailures bool `json:"ignoreFailures,omitempty"`
}
// Rollback holds the configuration for Helm rollback actions for this HelmRelease.
type Rollback struct {
// Enable enables Helm rollback actions for this HelmRelease after an
// Helm install or upgrade action failure.
// +optional
Enable bool `json:"enable,omitempty"`
// Timeout is the time to wait for any individual Kubernetes operation (like Jobs
// for hooks) during the performance of a Helm rollback action. Defaults to
// 'HelmReleaseSpec.Timeout'.
@ -365,6 +421,9 @@ const (
// InitFailedReason represents the fact that the initialization of the Helm configuration failed.
InitFailedReason string = "InitFailed"
// GetLastReleaseFailedReason represents the fact that observing the last release failed.
GetLastReleaseFailedReason string = "GetLastReleaseFailed"
// ProgressingReason represents the fact that the reconciliation for the resource is underway.
ProgressingReason string = "Progressing"
@ -511,14 +570,64 @@ spec:
memory: 64Mi
```
At present, rollbacks are only supported for failed upgrades. Rollback support for other failed
actions (i.e. tests) is in the scope of the controller but awaits a proper design.
## Configuring failure remediation
## Enabling Helm test actions
By default, when a Helm action (install/upgrade/test) fails, no remediation is taken
(uninstall/rollback/retries). However, remediation can be opted in to in several ways
using `spec.install.remediation` and `spec.upgrade.remediation`.
Each of these support `retries`, to configure the number of additional attempts after an initial
failure. A negative integer results in infinite retries. This implicitly opts-in to a remediation
action between each attempt. The remediation action for install failures is an uninstall. The
remediation action for upgrade failures is by default a rollback, however
`spec.upgrade.remediation.strategy` can be set to `uninstall`, in which case after the uninstall,
the `spec.install` configuration takes over.
One can also opt-in to remediation of the last failure (when no retries remain) by:
1. For installs, setting `spec.install.remediation.remediateLastFailure` to `true`.
2. For upgrades, setting `spec.upgrade.remediation.remediateLastFailure` to `true`, or configuring
at least one retry.
```yaml
apiVersion: helm.fluxcd.io/v2alpha1
kind: HelmRelease
metadata:
name: podinfo
spec:
interval: 5m
chart:
name: podinfo
version: '^4.0.0'
sourceRef:
kind: HelmRepository
name: podinfo
interval: 1m
install:
remediation:
retries: 3
upgrade:
remediation:
remediateLastFailure: false
values:
resources:
requests:
cpu: 100m
memory: 64Mi
```
## Configuring Helm test actions
To make the controller run the Helm tests available for your chart after a successful Helm install
or upgrade, `spec.test.enable` should be set to `true`.
By default, when tests are enabled, failures in tests are considered release failures, and thus
are subject to the triggering Helm action's `remediation` configuration. However, test failures
can be ignored by setting `spec.test.ignoreFailures` to `true`. In this case, no remediation will
be taken, and the test failure will not affect the `Ready` status condition. This can be further
configured per Helm action by setting `spec.install.remediation.ignoreTestFailures` or
`spec.upgrade.remediation.ignoreTestFailures`, which default to `spec.test.ignoreFailures`.
```yaml
apiVersion: helm.toolkit.fluxcd.io/v2alpha1
kind: HelmRelease
@ -535,6 +644,7 @@ spec:
interval: 1m
test:
enable: true
ignoreFailures: true
values:
resources:
requests:
@ -542,10 +652,6 @@ spec:
memory: 64Mi
```
At present, failed tests do not mark the `HelmRelease` as not `Ready`. Making this configurable is
in the scope of the controller but awaits a proper design, as well as running them on a schedule or
for other actions than a successful Helm install or upgrade.
## Status
When the controller completes a reconciliation, it reports the result in the status sub-resource.

3
go.sum
View File

@ -79,6 +79,7 @@ github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kB
github.com/bitly/go-simplejson v0.5.0 h1:6IH+V8/tVMab511d5bn4M7EwGXZf9Hj6i2xSwkNEM+Y=
github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA=
github.com/blang/semver v3.1.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
github.com/blang/semver v3.5.0+incompatible h1:CGxCgetQ64DKk7rdZ++Vfnb1+ogGNnB17OJKJXD2Cfs=
github.com/blang/semver v3.5.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY=
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4=
@ -469,6 +470,7 @@ github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceT
github.com/mitchellh/copystructure v1.0.0 h1:Laisrj+bAB6b/yJwB5Bt3ITZhGJdqmxquMKeZ+mmkFQ=
github.com/mitchellh/copystructure v1.0.0/go.mod h1:SNtv71yrdKgLRyLFxmLdkAbkKEFWgYaq1OVrnRcwhnw=
github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI=
github.com/mitchellh/go-wordwrap v1.0.0 h1:6GlHJ/LTGMrIJbwgdqdl2eEH8o+Exx/0m8ir9Gns0u4=
@ -901,6 +903,7 @@ gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo=
gopkg.in/square/go-jose.v2 v2.2.2/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
gopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME=
gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI=
gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=