jump: fix out of range

Signed-off-by: yunbo <yunbo10124scut@gmail.com>
This commit is contained in:
yunbo 2024-06-24 11:52:33 +08:00
parent d6c1809f82
commit 9f5a5341c5
3 changed files with 31 additions and 14 deletions

View File

@ -70,6 +70,11 @@ func (m *canaryReleaseManager) runCanary(c *RolloutContext) error {
if canaryStatus.PodTemplateHash == "" {
canaryStatus.PodTemplateHash = c.Workload.PodTemplateHash
}
if m.doCanaryJump(c) {
klog.Infof("rollout(%s/%s) canary step jumped", c.Rollout.Namespace, c.Rollout.Name)
return nil
}
// When the first batch is trafficRouting rolling and the next steps are rolling release,
// We need to clean up the canary-related resources first and then rollout the rest of the batch.
currentStep := c.Rollout.Spec.Strategy.Canary.Steps[canaryStatus.CurrentStepIndex-1]
@ -209,10 +214,6 @@ func (m *canaryReleaseManager) doCanaryMetricsAnalysis(c *RolloutContext) (bool,
}
func (m *canaryReleaseManager) doCanaryPaused(c *RolloutContext) (bool, error) {
if m.doCanaryJump(c) {
klog.Infof("rollout(%s/%s) canary step jumped", c.Rollout.Namespace, c.Rollout.Name)
return false, nil
}
canaryStatus := c.NewStatus.CanaryStatus
currentStep := c.Rollout.Spec.Strategy.Canary.Steps[canaryStatus.CurrentStepIndex-1]
steps := len(c.Rollout.Spec.Strategy.Canary.Steps)
@ -240,25 +241,37 @@ func (m *canaryReleaseManager) doCanaryPaused(c *RolloutContext) (bool, error) {
func (m *canaryReleaseManager) doCanaryJump(c *RolloutContext) (jumped bool) {
canaryStatus := c.NewStatus.CanaryStatus
currentStep := c.Rollout.Spec.Strategy.Canary.Steps[canaryStatus.CurrentStepIndex-1]
nextIndex := canaryStatus.NextStepIndex
if nextIndex != util.NextBatchIndex(c.Rollout, canaryStatus.CurrentStepIndex) && nextIndex > 0 {
/*
we set the CurrentStepIndex same as NextStepIndex to prevent currentStepIndex from out of range
for example, if we had a rollout with 4 steps and CurrentStepIndex was 2
then, the user removed 3 steps from the plan, we can calculate NextStepIndex is 1 correctly,
but CurrentStepIndex remains 2, which could cause out of range.
*/
resetCurrentIndex := false
if int(canaryStatus.CurrentStepIndex) > len(c.Rollout.Spec.Strategy.Canary.Steps) {
canaryStatus.CurrentStepIndex = nextIndex
resetCurrentIndex = true
}
currentStep := c.Rollout.Spec.Strategy.Canary.Steps[canaryStatus.CurrentStepIndex-1]
if resetCurrentIndex || nextIndex != util.NextBatchIndex(c.Rollout, canaryStatus.CurrentStepIndex) && nextIndex > 0 {
currentIndexBackup := canaryStatus.CurrentStepIndex
currentStepStateBackup := canaryStatus.CurrentStepState
canaryStatus.CurrentStepIndex = nextIndex
canaryStatus.NextStepIndex = util.NextBatchIndex(c.Rollout, nextIndex)
nextStep := c.Rollout.Spec.Strategy.Canary.Steps[nextIndex-1]
// if the Replicas between currentStep and nextStep is same, we can jump to
// the TrafficRouting step; otherwise, we should start from the Init step
if reflect.DeepEqual(nextStep.Replicas, currentStep.Replicas) {
if reflect.DeepEqual(nextStep.Replicas, currentStep.Replicas) && !resetCurrentIndex {
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
canaryStatus.CurrentStepState = v1beta1.CanaryStepStateTrafficRouting
klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", c.Rollout.Namespace, c.Rollout.Name,
canaryStatus.CurrentStepIndex, v1beta1.CanaryStepStatePaused, canaryStatus.CurrentStepState)
canaryStatus.CurrentStepIndex, currentStepStateBackup, canaryStatus.CurrentStepState)
} else {
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
canaryStatus.CurrentStepState = v1beta1.CanaryStepStateInit
klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", c.Rollout.Namespace, c.Rollout.Name,
canaryStatus.CurrentStepIndex, v1beta1.CanaryStepStatePaused, v1beta1.CanaryStepStateInit)
canaryStatus.CurrentStepIndex, currentStepStateBackup, v1beta1.CanaryStepStateInit)
}
klog.Infof("rollout(%s/%s) canary step from(%d) -> to(%d)", c.Rollout.Namespace, c.Rollout.Name, currentIndexBackup, canaryStatus.CurrentStepIndex)
return true

View File

@ -279,11 +279,15 @@ func (r *RolloutReconciler) handleRolloutPlanChanged(c *RolloutContext) error {
return nil
}
// otherwise, we jump to step paused, where the "jump" logic exists
// otherwise, execute the "jump" logic
c.NewStatus.GetSubStatus().NextStepIndex = newStepIndex
c.NewStatus.GetSubStatus().CurrentStepState = v1beta1.CanaryStepStatePaused
c.NewStatus.GetSubStatus().LastUpdateTime = &metav1.Time{Time: time.Now()}
c.NewStatus.GetSubStatus().RolloutHash = c.Rollout.Annotations[util.RolloutHashAnnotation]
releaseManager, err := r.getReleaseManager(c.Rollout)
if err != nil {
return err
}
releaseManager.doCanaryJump(c)
klog.Infof("rollout(%s/%s) canary step configuration change, and NextStepIndex(%d) state(%s)",
c.Rollout.Namespace, c.Rollout.Name, c.NewStatus.GetSubStatus().NextStepIndex, c.NewStatus.GetSubStatus().CurrentStepState)
return nil
@ -516,11 +520,10 @@ func setRolloutSucceededCondition(status *v1beta1.RolloutStatus, condStatus core
func newTrafficRoutingContext(c *RolloutContext) *trafficrouting.TrafficRoutingContext {
currentIndex := c.NewStatus.GetSubStatus().CurrentStepIndex - 1
var currentStep v1beta1.CanaryStep
//TODO - need better designed logic
if currentIndex < 0 || int(currentIndex) >= len(c.Rollout.Spec.Strategy.GetSteps()) {
klog.Warningf("Rollout(%s/%s) encounters a special case when constructing newTrafficRoutingContext", c.Rollout.Namespace, c.Rollout.Name)
// usually this only happens when deleting the rollout or rolling back
// in this situation, it's no matter which step the current is
// usually this only happens when deleting the rollout or rolling back (ie. Finalising)
// in these scenarios, it's not important which step the current is
currentStep = c.Rollout.Spec.Strategy.GetSteps()[0]
} else {
currentStep = c.Rollout.Spec.Strategy.GetSteps()[currentIndex]

View File

@ -6,6 +6,7 @@ import (
type ReleaseManager interface {
runCanary(c *RolloutContext) error
doCanaryJump(c *RolloutContext) bool
doCanaryFinalising(c *RolloutContext) (bool, error)
fetchBatchRelease(ns, name string) (*v1beta1.BatchRelease, error)
removeBatchRelease(c *RolloutContext) (bool, error)