From 9f5a5341c58fc604f79a7cd87e09e2a949882f70 Mon Sep 17 00:00:00 2001 From: yunbo Date: Mon, 24 Jun 2024 11:52:33 +0800 Subject: [PATCH] jump: fix out of range Signed-off-by: yunbo --- pkg/controller/rollout/rollout_canary.go | 31 +++++++++++++------ pkg/controller/rollout/rollout_progressing.go | 13 +++++--- .../rollout/rollout_releaseManager.go | 1 + 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/pkg/controller/rollout/rollout_canary.go b/pkg/controller/rollout/rollout_canary.go index 5589d69..c814a75 100644 --- a/pkg/controller/rollout/rollout_canary.go +++ b/pkg/controller/rollout/rollout_canary.go @@ -70,6 +70,11 @@ func (m *canaryReleaseManager) runCanary(c *RolloutContext) error { if canaryStatus.PodTemplateHash == "" { canaryStatus.PodTemplateHash = c.Workload.PodTemplateHash } + + if m.doCanaryJump(c) { + klog.Infof("rollout(%s/%s) canary step jumped", c.Rollout.Namespace, c.Rollout.Name) + return nil + } // When the first batch is trafficRouting rolling and the next steps are rolling release, // We need to clean up the canary-related resources first and then rollout the rest of the batch. currentStep := c.Rollout.Spec.Strategy.Canary.Steps[canaryStatus.CurrentStepIndex-1] @@ -209,10 +214,6 @@ func (m *canaryReleaseManager) doCanaryMetricsAnalysis(c *RolloutContext) (bool, } func (m *canaryReleaseManager) doCanaryPaused(c *RolloutContext) (bool, error) { - if m.doCanaryJump(c) { - klog.Infof("rollout(%s/%s) canary step jumped", c.Rollout.Namespace, c.Rollout.Name) - return false, nil - } canaryStatus := c.NewStatus.CanaryStatus currentStep := c.Rollout.Spec.Strategy.Canary.Steps[canaryStatus.CurrentStepIndex-1] steps := len(c.Rollout.Spec.Strategy.Canary.Steps) @@ -240,25 +241,37 @@ func (m *canaryReleaseManager) doCanaryPaused(c *RolloutContext) (bool, error) { func (m *canaryReleaseManager) doCanaryJump(c *RolloutContext) (jumped bool) { canaryStatus := c.NewStatus.CanaryStatus - currentStep := c.Rollout.Spec.Strategy.Canary.Steps[canaryStatus.CurrentStepIndex-1] nextIndex := canaryStatus.NextStepIndex - if nextIndex != util.NextBatchIndex(c.Rollout, canaryStatus.CurrentStepIndex) && nextIndex > 0 { + /* + we set the CurrentStepIndex same as NextStepIndex to prevent currentStepIndex from out of range + for example, if we had a rollout with 4 steps and CurrentStepIndex was 2 + then, the user removed 3 steps from the plan, we can calculate NextStepIndex is 1 correctly, + but CurrentStepIndex remains 2, which could cause out of range. + */ + resetCurrentIndex := false + if int(canaryStatus.CurrentStepIndex) > len(c.Rollout.Spec.Strategy.Canary.Steps) { + canaryStatus.CurrentStepIndex = nextIndex + resetCurrentIndex = true + } + currentStep := c.Rollout.Spec.Strategy.Canary.Steps[canaryStatus.CurrentStepIndex-1] + if resetCurrentIndex || nextIndex != util.NextBatchIndex(c.Rollout, canaryStatus.CurrentStepIndex) && nextIndex > 0 { currentIndexBackup := canaryStatus.CurrentStepIndex + currentStepStateBackup := canaryStatus.CurrentStepState canaryStatus.CurrentStepIndex = nextIndex canaryStatus.NextStepIndex = util.NextBatchIndex(c.Rollout, nextIndex) nextStep := c.Rollout.Spec.Strategy.Canary.Steps[nextIndex-1] // if the Replicas between currentStep and nextStep is same, we can jump to // the TrafficRouting step; otherwise, we should start from the Init step - if reflect.DeepEqual(nextStep.Replicas, currentStep.Replicas) { + if reflect.DeepEqual(nextStep.Replicas, currentStep.Replicas) && !resetCurrentIndex { canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()} canaryStatus.CurrentStepState = v1beta1.CanaryStepStateTrafficRouting klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", c.Rollout.Namespace, c.Rollout.Name, - canaryStatus.CurrentStepIndex, v1beta1.CanaryStepStatePaused, canaryStatus.CurrentStepState) + canaryStatus.CurrentStepIndex, currentStepStateBackup, canaryStatus.CurrentStepState) } else { canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()} canaryStatus.CurrentStepState = v1beta1.CanaryStepStateInit klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", c.Rollout.Namespace, c.Rollout.Name, - canaryStatus.CurrentStepIndex, v1beta1.CanaryStepStatePaused, v1beta1.CanaryStepStateInit) + canaryStatus.CurrentStepIndex, currentStepStateBackup, v1beta1.CanaryStepStateInit) } klog.Infof("rollout(%s/%s) canary step from(%d) -> to(%d)", c.Rollout.Namespace, c.Rollout.Name, currentIndexBackup, canaryStatus.CurrentStepIndex) return true diff --git a/pkg/controller/rollout/rollout_progressing.go b/pkg/controller/rollout/rollout_progressing.go index 15c202e..24f9943 100644 --- a/pkg/controller/rollout/rollout_progressing.go +++ b/pkg/controller/rollout/rollout_progressing.go @@ -279,11 +279,15 @@ func (r *RolloutReconciler) handleRolloutPlanChanged(c *RolloutContext) error { return nil } - // otherwise, we jump to step paused, where the "jump" logic exists + // otherwise, execute the "jump" logic c.NewStatus.GetSubStatus().NextStepIndex = newStepIndex - c.NewStatus.GetSubStatus().CurrentStepState = v1beta1.CanaryStepStatePaused c.NewStatus.GetSubStatus().LastUpdateTime = &metav1.Time{Time: time.Now()} c.NewStatus.GetSubStatus().RolloutHash = c.Rollout.Annotations[util.RolloutHashAnnotation] + releaseManager, err := r.getReleaseManager(c.Rollout) + if err != nil { + return err + } + releaseManager.doCanaryJump(c) klog.Infof("rollout(%s/%s) canary step configuration change, and NextStepIndex(%d) state(%s)", c.Rollout.Namespace, c.Rollout.Name, c.NewStatus.GetSubStatus().NextStepIndex, c.NewStatus.GetSubStatus().CurrentStepState) return nil @@ -516,11 +520,10 @@ func setRolloutSucceededCondition(status *v1beta1.RolloutStatus, condStatus core func newTrafficRoutingContext(c *RolloutContext) *trafficrouting.TrafficRoutingContext { currentIndex := c.NewStatus.GetSubStatus().CurrentStepIndex - 1 var currentStep v1beta1.CanaryStep - //TODO - need better designed logic if currentIndex < 0 || int(currentIndex) >= len(c.Rollout.Spec.Strategy.GetSteps()) { klog.Warningf("Rollout(%s/%s) encounters a special case when constructing newTrafficRoutingContext", c.Rollout.Namespace, c.Rollout.Name) - // usually this only happens when deleting the rollout or rolling back - // in this situation, it's no matter which step the current is + // usually this only happens when deleting the rollout or rolling back (ie. Finalising) + // in these scenarios, it's not important which step the current is currentStep = c.Rollout.Spec.Strategy.GetSteps()[0] } else { currentStep = c.Rollout.Spec.Strategy.GetSteps()[currentIndex] diff --git a/pkg/controller/rollout/rollout_releaseManager.go b/pkg/controller/rollout/rollout_releaseManager.go index 25cb199..0177258 100644 --- a/pkg/controller/rollout/rollout_releaseManager.go +++ b/pkg/controller/rollout/rollout_releaseManager.go @@ -6,6 +6,7 @@ import ( type ReleaseManager interface { runCanary(c *RolloutContext) error + doCanaryJump(c *RolloutContext) bool doCanaryFinalising(c *RolloutContext) (bool, error) fetchBatchRelease(ns, name string) (*v1beta1.BatchRelease, error) removeBatchRelease(c *RolloutContext) (bool, error)