improve finalising logic for canary release (#229)

improve finalising logic for canary release-2

Signed-off-by: yunbo <yunbo10124scut@gmail.com>
Co-authored-by: yunbo <yunbo10124scut@gmail.com>
This commit is contained in:
myname4423 2024-09-04 17:29:58 +08:00 committed by GitHub
parent 5378dc2cf7
commit d2613132aa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 287 additions and 72 deletions

View File

@ -171,8 +171,6 @@ type RolloutStatus struct {
// Conditions a list of conditions a rollout can have.
// +optional
Conditions []RolloutCondition `json:"conditions,omitempty"`
// +optional
//BlueGreenStatus *BlueGreenStatus `json:"blueGreenStatus,omitempty"`
// Phase is the rollout phase.
Phase RolloutPhase `json:"phase,omitempty"`
// Message provides details on why the rollout is in its current phase

View File

@ -445,7 +445,7 @@ type CanaryStatus struct {
// BlueGreenStatus status fields that only pertain to the blueGreen rollout
type BlueGreenStatus struct {
CommonStatus `json:",inline"`
// CanaryRevision is calculated by rollout based on podTemplateHash, and the internal logic flow uses
// UpdatedRevision is calculated by rollout based on podTemplateHash, and the internal logic flow uses
// It may be different from rs podTemplateHash in different k8s versions, so it cannot be used as service selector label
UpdatedRevision string `json:"updatedRevision"`
// UpdatedReplicas the numbers of updated pods
@ -558,29 +558,29 @@ const (
type FinalisingStepType string
const (
// some work that should be done before pod scaling down.
// For BlueGreenStrategy:
// we rout all traffic to stable or new version based on FinaliseReason
// For CanaryStrategy:
// we remove the selector of stable service
FinalisingStepTypePreparing FinalisingStepType = "Preparing"
// Route all traffic to stable or new version based on FinaliseReason (for bluegreen)
FinalisingStepTypeRouteAllTraffic FinalisingStepType = "RouteAllTraffic"
// Restore the stable Service, i.e. remove corresponding selector
FinalisingStepTypeStableService FinalisingStepType = "RestoreStableService"
// Remove the canary Service
FinalisingStepTypeRemoveCanaryService FinalisingStepType = "RemoveCanaryService"
// Patch Batch Release to scale down (exception: the canary Deployment will be
// scaled down in FinalisingStepTypeDeleteBR step)
// For Both BlueGreenStrategy and CanaryStrategy:
// set workload.pause=false, set workload.partition=0
FinalisingStepTypeBatchRelease FinalisingStepType = "PatchBatchRelease"
//TODO - Currently, the next three steps are in the same function, FinalisingTrafficRouting
// we should try to separate the FinalisingStepTypeGateway and FinalisingStepTypeCanaryService
// with graceful time to prevent some potential issues
// Restore the stable Service (i.e. remove corresponding selector)
FinalisingStepTypeStableService FinalisingStepType = "RestoreStableService"
// Execute the FinalisingTrafficRouting function
FinalisingStepTypeTrafficRouting FinalisingStepType = "FinalisingTrafficRouting"
// Restore the GatewayAPI/Ingress/Istio
FinalisingStepTypeGateway FinalisingStepType = "RestoreGateway"
// Delete Canary Service
FinalisingStepTypeDeleteCanaryService FinalisingStepType = "DeleteCanaryService"
// Delete Batch Release
FinalisingStepTypeDeleteBR FinalisingStepType = "DeleteBatchRelease"
// All needed work done
FinalisingStepTypeEnd FinalisingStepType = "END"
)
// +genclient

View File

@ -534,8 +534,7 @@ spec:
format: int64
type: integer
phase:
description: BlueGreenStatus *BlueGreenStatus `json:"blueGreenStatus,omitempty"`
Phase is the rollout phase.
description: Phase is the rollout phase.
type: string
type: object
type: object
@ -1475,7 +1474,7 @@ spec:
format: int32
type: integer
updatedRevision:
description: CanaryRevision is calculated by rollout based on
description: UpdatedRevision is calculated by rollout based on
podTemplateHash, and the internal logic flow uses It may be
different from rs podTemplateHash in different k8s versions,
so it cannot be used as service selector label

View File

@ -363,43 +363,66 @@ func (m *canaryReleaseManager) doCanaryJump(c *RolloutContext) (jumped bool) {
// cleanup after rollout is completed or finished
func (m *canaryReleaseManager) doCanaryFinalising(c *RolloutContext) (bool, error) {
canaryStatus := c.NewStatus.CanaryStatus
// when CanaryStatus is nil, which means canary action hasn't started yet, don't need doing cleanup
if c.NewStatus.CanaryStatus == nil {
if canaryStatus == nil {
return true, nil
}
// 1. rollout progressing complete, remove rollout progressing annotation in workload
// rollout progressing complete, remove rollout progressing annotation in workload
err := m.removeRolloutProgressingAnnotation(c)
if err != nil {
return false, err
}
tr := newTrafficRoutingContext(c)
// 2. remove stable service the pod revision selector, so stable service will be selector all version pods.
done, err := m.trafficRoutingManager.FinalisingTrafficRouting(tr)
c.NewStatus.CanaryStatus.LastUpdateTime = tr.LastUpdateTime
if err != nil || !done {
return done, err
// execute steps based on the predefined order for each reason
nextStep := nextTask(c.FinalizeReason, canaryStatus.FinalisingStep)
// if current step is empty, set it with the first step
// if current step is end, we just return
if len(canaryStatus.FinalisingStep) == 0 {
canaryStatus.FinalisingStep = nextStep
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
} else if canaryStatus.FinalisingStep == v1beta1.FinalisingStepTypeEnd {
klog.Infof("rollout(%s/%s) finalising process is already completed", c.Rollout.Namespace, c.Rollout.Name)
return true, nil
}
// 3. set workload.pause=false; set workload.partition=0
done, err = m.finalizingBatchRelease(c)
if err != nil || !done {
return done, err
}
// 4. modify network api(ingress or gateway api) configuration, and route 100% traffic to stable pods.
done, err = m.trafficRoutingManager.FinalisingTrafficRouting(tr)
c.NewStatus.CanaryStatus.LastUpdateTime = tr.LastUpdateTime
if err != nil || !done {
return done, err
}
// 5. delete batchRelease crd
done, err = m.removeBatchRelease(c)
if err != nil {
klog.Errorf("rollout(%s/%s) Finalize batchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
return false, err
} else if !done {
klog.Infof("rollout(%s/%s) Finalising Step is %s", c.Rollout.Namespace, c.Rollout.Name, canaryStatus.FinalisingStep)
var retry bool
// the order of steps is maitained by calculating thenextStep
switch canaryStatus.FinalisingStep {
// set workload.pause=false; set workload.partition=0
case v1beta1.FinalisingStepTypeBatchRelease:
retry, err = m.finalizingBatchRelease(c)
// delete batchRelease
case v1beta1.FinalisingStepTypeDeleteBR:
retry, err = m.removeBatchRelease(c)
// restore the gateway resources (ingress/gatewayAPI/Istio), that means
// only stable Service will accept the traffic
case v1beta1.FinalisingStepTypeGateway:
retry, err = m.trafficRoutingManager.RestoreGateway(tr)
// restore the stable service
case v1beta1.FinalisingStepTypeStableService:
retry, err = m.trafficRoutingManager.RestoreStableService(tr)
// remove canary service
case v1beta1.FinalisingStepTypeRemoveCanaryService:
retry, err = m.trafficRoutingManager.RemoveCanaryService(tr)
default:
nextStep = nextTask(c.FinalizeReason, "")
klog.Warningf("unexpected finalising step, current step(%s), start from the first step(%s)", canaryStatus.FinalisingStep, nextStep)
canaryStatus.FinalisingStep = nextStep
return false, nil
}
klog.Infof("rollout(%s/%s) doCanaryFinalising success", c.Rollout.Namespace, c.Rollout.Name)
return true, nil
if err != nil || retry {
return false, err
}
// current step is done, run the next step
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
canaryStatus.FinalisingStep = nextStep
if canaryStatus.FinalisingStep == v1beta1.FinalisingStepTypeEnd {
return true, nil
}
return false, nil
}
func (m *canaryReleaseManager) removeRolloutProgressingAnnotation(c *RolloutContext) error {
@ -508,37 +531,39 @@ func createBatchRelease(rollout *v1beta1.Rollout, rolloutID string, batch int32,
return br
}
// bool means if we need retry; if error is not nil, always retry
func (m *canaryReleaseManager) removeBatchRelease(c *RolloutContext) (bool, error) {
batch := &v1beta1.BatchRelease{}
err := m.Get(context.TODO(), client.ObjectKey{Namespace: c.Rollout.Namespace, Name: c.Rollout.Name}, batch)
if err != nil && errors.IsNotFound(err) {
return true, nil
return false, nil
} else if err != nil {
klog.Errorf("rollout(%s/%s) fetch BatchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name)
return false, err
return true, err
}
if !batch.DeletionTimestamp.IsZero() {
klog.Infof("rollout(%s/%s) BatchRelease is terminating, and wait a moment", c.Rollout.Namespace, c.Rollout.Name)
return false, nil
return true, nil
}
//delete batchRelease
err = m.Delete(context.TODO(), batch)
if err != nil {
klog.Errorf("rollout(%s/%s) delete BatchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
return false, err
return true, err
}
klog.Infof("rollout(%s/%s) deleting BatchRelease, and wait a moment", c.Rollout.Namespace, c.Rollout.Name)
return false, nil
return true, nil
}
// bool means if we need retry; if error is not nil, always retry
func (m *canaryReleaseManager) finalizingBatchRelease(c *RolloutContext) (bool, error) {
br, err := m.fetchBatchRelease(c.Rollout.Namespace, c.Rollout.Name)
if err != nil {
if errors.IsNotFound(err) {
return true, nil
return false, nil
}
return false, err
return true, err
}
waitReady := c.WaitReady
// The Completed phase means batchRelease controller has processed all it
@ -546,7 +571,7 @@ func (m *canaryReleaseManager) finalizingBatchRelease(c *RolloutContext) (bool,
if br.Spec.ReleasePlan.BatchPartition == nil &&
br.Status.Phase == v1beta1.RolloutPhaseCompleted {
klog.Infof("rollout(%s/%s) finalizing batchRelease(%s) done", c.Rollout.Namespace, c.Rollout.Name, util.DumpJSON(br.Status))
return true, nil
return false, nil
}
// If BatchPartition is nil, BatchRelease will directly resume workload via:
@ -559,11 +584,11 @@ func (m *canaryReleaseManager) finalizingBatchRelease(c *RolloutContext) (bool,
switch br.Spec.ReleasePlan.FinalizingPolicy {
case v1beta1.WaitResumeFinalizingPolicyType:
if waitReady { // no need to patch again
return false, nil
return true, nil
}
default:
if !waitReady { // no need to patch again
return false, nil
return true, nil
}
}
}
@ -577,10 +602,10 @@ func (m *canaryReleaseManager) finalizingBatchRelease(c *RolloutContext) (bool,
// Patch BatchPartition and FinalizingPolicy, BatchPartition always patch null here.
body := fmt.Sprintf(`{"spec":{"releasePlan":{"batchPartition":null,"finalizingPolicy":"%s"}}}`, policy)
if err = m.Patch(context.TODO(), br, client.RawPatch(types.MergePatchType, []byte(body))); err != nil {
return false, err
return true, err
}
klog.Infof("rollout(%s/%s) patch batchRelease(%s) success", c.Rollout.Namespace, c.Rollout.Name, body)
return false, nil
return true, nil
}
// syncBatchRelease sync status of br to canaryStatus, and sync rollout-id of canaryStatus to br.
@ -601,3 +626,42 @@ func (m *canaryReleaseManager) syncBatchRelease(br *v1beta1.BatchRelease, canary
}
return nil
}
// calculate next task
func nextTask(reason string, currentTask v1beta1.FinalisingStepType) v1beta1.FinalisingStepType {
var taskSequence []v1beta1.FinalisingStepType
//REVIEW - should we consider more complex scenarios?
// like, user rollbacks the workload and disables the Rollout at the same time?
switch reason {
// in rollback, we cannot remove selector of the stable service in the first step,
// which may cause some traffic to problematic new version, so we should route all traffic to stable service
// in the first step
case v1beta1.FinaliseReasonRollback: // rollback
taskSequence = []v1beta1.FinalisingStepType{
v1beta1.FinalisingStepTypeGateway, // route all traffic to stable version
v1beta1.FinalisingStepTypeBatchRelease, // scale up old, scale down new
v1beta1.FinalisingStepTypeDeleteBR,
v1beta1.FinalisingStepTypeStableService,
v1beta1.FinalisingStepTypeRemoveCanaryService,
}
default: // others: success/disabled/deleting rollout
taskSequence = []v1beta1.FinalisingStepType{
v1beta1.FinalisingStepTypeStableService,
v1beta1.FinalisingStepTypeGateway,
v1beta1.FinalisingStepTypeRemoveCanaryService,
v1beta1.FinalisingStepTypeBatchRelease, // scale up new, scale down old
v1beta1.FinalisingStepTypeDeleteBR,
}
}
// if currentTask is empty, return first task
if len(currentTask) == 0 {
return taskSequence[0]
}
// find next task
for i := range taskSequence {
if currentTask == taskSequence[i] && i < len(taskSequence)-1 {
return taskSequence[i+1]
}
}
return v1beta1.FinalisingStepTypeEnd
}

View File

@ -45,6 +45,8 @@ type RolloutContext struct {
RecheckTime *time.Time
// wait stable workload pods ready
WaitReady bool
// finalising reason
FinalizeReason string
}
// parameter1 retryReconcile, parameter2 error
@ -116,6 +118,7 @@ func (r *RolloutReconciler) reconcileRolloutProgressing(rollout *v1beta1.Rollout
klog.Infof("rollout(%s/%s) is Progressing, and in reason(%s)", rollout.Namespace, rollout.Name, cond.Reason)
var done bool
rolloutContext.WaitReady = true
rolloutContext.FinalizeReason = v1beta1.FinaliseReasonSuccess
done, err = r.doFinalising(rolloutContext)
if err != nil {
return nil, err
@ -140,6 +143,7 @@ func (r *RolloutReconciler) reconcileRolloutProgressing(rollout *v1beta1.Rollout
case v1alpha1.ProgressingReasonCancelling:
klog.Infof("rollout(%s/%s) is Progressing, and in reason(%s)", rollout.Namespace, rollout.Name, cond.Reason)
var done bool
rolloutContext.FinalizeReason = v1beta1.FinaliseReasonRollback
done, err = r.doFinalising(rolloutContext)
if err != nil {
return nil, err
@ -416,11 +420,11 @@ func (r *RolloutReconciler) doProgressingReset(c *RolloutContext) (bool, error)
}
// if no trafficRouting exists, simply remove batchRelease
if !c.Rollout.Spec.Strategy.HasTrafficRoutings() {
done, err := releaseManager.removeBatchRelease(c)
retry, err := releaseManager.removeBatchRelease(c)
if err != nil {
klog.Errorf("rollout(%s/%s) DoFinalising batchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
return false, err
} else if !done {
} else if retry {
return false, nil
}
return true, nil
@ -454,11 +458,11 @@ func (r *RolloutReconciler) doProgressingReset(c *RolloutContext) (bool, error)
// canary deployment, for other release, the v2 pods won't be deleted immediately
// in both cases, only the stable pods (v1) accept the traffic
case v1beta1.FinalisingStepTypeDeleteBR:
done, err := releaseManager.removeBatchRelease(c)
retry, err := releaseManager.removeBatchRelease(c)
if err != nil {
klog.Errorf("rollout(%s/%s) Finalize batchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
return false, err
} else if !done {
} else if retry {
return false, nil
}
klog.Infof("rollout(%s/%s) in step (%s), and success", c.Rollout.Namespace, c.Rollout.Name, subStatus.FinalisingStep)

View File

@ -175,7 +175,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
},
},
{
name: "ReconcileRolloutProgressing rolling -> finalizing",
name: "ReconcileRolloutProgressing rolling -> finalizing", // call FinalisingTrafficRouting to restore stable Service
getObj: func() ([]*apps.Deployment, []*apps.ReplicaSet) {
dep1 := deploymentDemo.DeepCopy()
dep2 := deploymentDemo.DeepCopy()
@ -235,7 +235,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
},
},
{
name: "ReconcileRolloutProgressing finalizing1",
name: "ReconcileRolloutProgressing finalizing1", // call FinalisingTrafficRouting to remove canary service
getObj: func() ([]*apps.Deployment, []*apps.ReplicaSet) {
dep1 := deploymentDemo.DeepCopy()
delete(dep1.Annotations, util.InRolloutProgressingAnnotation)
@ -250,7 +250,11 @@ func TestReconcileRolloutProgressing(t *testing.T) {
return []*apps.Deployment{dep1}, []*apps.ReplicaSet{rs1}
},
getNetwork: func() ([]*corev1.Service, []*netv1.Ingress) {
return []*corev1.Service{demoService.DeepCopy()}, []*netv1.Ingress{demoIngress.DeepCopy()}
s1 := demoService.DeepCopy()
s1.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey] = "podtemplatehash-v1"
s2 := demoService.DeepCopy()
s2.Name = s1.Name + "-canary"
return []*corev1.Service{s1, s2}, []*netv1.Ingress{demoIngress.DeepCopy()}
},
getRollout: func() (*v1beta1.Rollout, *v1beta1.BatchRelease, *v1alpha1.TrafficRouting) {
obj := rolloutDemo.DeepCopy()
@ -285,6 +289,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
s.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
s.CanaryStatus.CurrentStepIndex = 4
s.CanaryStatus.NextStepIndex = 0
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeStableService
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
cond := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonFinalising
@ -300,7 +305,145 @@ func TestReconcileRolloutProgressing(t *testing.T) {
},
},
{
name: "ReconcileRolloutProgressing finalizing2",
name: "ReconcileRolloutProgressing finalizing2", // go to the next finalising step
getObj: func() ([]*apps.Deployment, []*apps.ReplicaSet) {
dep1 := deploymentDemo.DeepCopy()
delete(dep1.Annotations, util.InRolloutProgressingAnnotation)
dep1.Status = apps.DeploymentStatus{
ObservedGeneration: 2,
Replicas: 10,
UpdatedReplicas: 5,
ReadyReplicas: 10,
AvailableReplicas: 10,
}
rs1 := rsDemo.DeepCopy()
return []*apps.Deployment{dep1}, []*apps.ReplicaSet{rs1}
},
getNetwork: func() ([]*corev1.Service, []*netv1.Ingress) {
return []*corev1.Service{demoService.DeepCopy()}, []*netv1.Ingress{demoIngress.DeepCopy()}
},
getRollout: func() (*v1beta1.Rollout, *v1beta1.BatchRelease, *v1alpha1.TrafficRouting) {
obj := rolloutDemo.DeepCopy()
obj.Annotations[v1alpha1.TrafficRoutingAnnotation] = "tr-demo"
obj.Status.CanaryStatus.ObservedWorkloadGeneration = 2
obj.Status.CanaryStatus.RolloutHash = "f55bvd874d5f2fzvw46bv966x4bwbdv4wx6bd9f7b46ww788954b8z8w29b7wxfd"
obj.Status.CanaryStatus.StableRevision = "pod-template-hash-v1"
obj.Status.CanaryStatus.CanaryRevision = "6f8cc56547"
obj.Status.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
obj.Status.CanaryStatus.CurrentStepIndex = 4
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
// given that the selector of stable Service is removed
// we will go on it the next step, i.e. patch restoreGateway
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeStableService
cond := util.GetRolloutCondition(obj.Status, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonFinalising
cond.Status = corev1.ConditionTrue
util.SetRolloutCondition(&obj.Status, *cond)
br := batchDemo.DeepCopy()
br.Spec.ReleasePlan.Batches = []v1beta1.ReleaseBatch{
{
CanaryReplicas: intstr.FromInt(1),
},
}
tr := demoTR.DeepCopy()
tr.Finalizers = []string{util.ProgressingRolloutFinalizer(rolloutDemo.Name)}
return obj, br, tr
},
expectStatus: func() *v1beta1.RolloutStatus {
s := rolloutDemo.Status.DeepCopy()
s.CanaryStatus.ObservedWorkloadGeneration = 2
s.CanaryStatus.RolloutHash = "f55bvd874d5f2fzvw46bv966x4bwbdv4wx6bd9f7b46ww788954b8z8w29b7wxfd"
s.CanaryStatus.StableRevision = "pod-template-hash-v1"
s.CanaryStatus.CanaryRevision = "6f8cc56547"
s.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
s.CanaryStatus.CurrentStepIndex = 4
s.CanaryStatus.NextStepIndex = 0
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeGateway
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
cond := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonFinalising
cond.Status = corev1.ConditionTrue
util.SetRolloutCondition(s, *cond)
s.CurrentStepIndex = s.CanaryStatus.CurrentStepIndex
s.CurrentStepState = s.CanaryStatus.CurrentStepState
return s
},
expectTr: func() *v1alpha1.TrafficRouting {
tr := demoTR.DeepCopy()
return tr
},
},
{
name: "ReconcileRolloutProgressing finalizing3",
getObj: func() ([]*apps.Deployment, []*apps.ReplicaSet) {
dep1 := deploymentDemo.DeepCopy()
delete(dep1.Annotations, util.InRolloutProgressingAnnotation)
dep1.Status = apps.DeploymentStatus{
ObservedGeneration: 2,
Replicas: 10,
UpdatedReplicas: 5,
ReadyReplicas: 10,
AvailableReplicas: 10,
}
rs1 := rsDemo.DeepCopy()
return []*apps.Deployment{dep1}, []*apps.ReplicaSet{rs1}
},
getNetwork: func() ([]*corev1.Service, []*netv1.Ingress) {
return []*corev1.Service{demoService.DeepCopy()}, []*netv1.Ingress{demoIngress.DeepCopy()}
},
getRollout: func() (*v1beta1.Rollout, *v1beta1.BatchRelease, *v1alpha1.TrafficRouting) {
obj := rolloutDemo.DeepCopy()
obj.Annotations[v1alpha1.TrafficRoutingAnnotation] = "tr-demo"
obj.Status.CanaryStatus.ObservedWorkloadGeneration = 2
obj.Status.CanaryStatus.RolloutHash = "f55bvd874d5f2fzvw46bv966x4bwbdv4wx6bd9f7b46ww788954b8z8w29b7wxfd"
obj.Status.CanaryStatus.StableRevision = "pod-template-hash-v1"
obj.Status.CanaryStatus.CanaryRevision = "6f8cc56547"
obj.Status.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
obj.Status.CanaryStatus.CurrentStepIndex = 4
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
// because the batchRelease hasn't completed (ie. br.Status.Phase is not completed),
// it will take more than one reconciles to go on to the next step
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeBatchRelease
cond := util.GetRolloutCondition(obj.Status, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonFinalising
cond.Status = corev1.ConditionTrue
util.SetRolloutCondition(&obj.Status, *cond)
br := batchDemo.DeepCopy()
br.Spec.ReleasePlan.Batches = []v1beta1.ReleaseBatch{
{
CanaryReplicas: intstr.FromInt(1),
},
}
tr := demoTR.DeepCopy()
tr.Finalizers = []string{util.ProgressingRolloutFinalizer(rolloutDemo.Name)}
return obj, br, tr
},
expectStatus: func() *v1beta1.RolloutStatus {
s := rolloutDemo.Status.DeepCopy()
s.CanaryStatus.ObservedWorkloadGeneration = 2
s.CanaryStatus.RolloutHash = "f55bvd874d5f2fzvw46bv966x4bwbdv4wx6bd9f7b46ww788954b8z8w29b7wxfd"
s.CanaryStatus.StableRevision = "pod-template-hash-v1"
s.CanaryStatus.CanaryRevision = "6f8cc56547"
s.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
s.CanaryStatus.CurrentStepIndex = 4
s.CanaryStatus.NextStepIndex = 0
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeBatchRelease
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
cond := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonFinalising
cond.Status = corev1.ConditionTrue
util.SetRolloutCondition(s, *cond)
s.CurrentStepIndex = s.CanaryStatus.CurrentStepIndex
s.CurrentStepState = s.CanaryStatus.CurrentStepState
return s
},
expectTr: func() *v1alpha1.TrafficRouting {
tr := demoTR.DeepCopy()
return tr
},
},
{
name: "ReconcileRolloutProgressing finalizing4",
getObj: func() ([]*apps.Deployment, []*apps.ReplicaSet) {
dep1 := deploymentDemo.DeepCopy()
delete(dep1.Annotations, util.InRolloutProgressingAnnotation)
@ -326,6 +469,9 @@ func TestReconcileRolloutProgressing(t *testing.T) {
obj.Status.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
obj.Status.CanaryStatus.CurrentStepIndex = 4
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
// the batchRelease has completed (ie. br.Status.Phase is completed),
// we expect the finalizing step to be next step, i.e. deleteBatchRelease
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeBatchRelease
cond := util.GetRolloutCondition(obj.Status, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonFinalising
cond.Status = corev1.ConditionTrue
@ -348,6 +494,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
s.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
s.CanaryStatus.CurrentStepIndex = 4
s.CanaryStatus.NextStepIndex = 0
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeDeleteBR
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
cond2 := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
cond2.Reason = v1alpha1.ProgressingReasonFinalising
@ -385,6 +532,9 @@ func TestReconcileRolloutProgressing(t *testing.T) {
obj.Status.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
obj.Status.CanaryStatus.CurrentStepIndex = 4
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
// deleteBatchRelease is the last step, and it won't wait a grace time
// after this step, this release should be succeeded
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeDeleteBR
cond := util.GetRolloutCondition(obj.Status, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonFinalising
cond.Status = corev1.ConditionTrue
@ -401,6 +551,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
s.CanaryStatus.CurrentStepIndex = 4
s.CanaryStatus.NextStepIndex = 0
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeEnd
cond2 := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
cond2.Reason = v1alpha1.ProgressingReasonCompleted
cond2.Status = corev1.ConditionFalse

View File

@ -238,7 +238,7 @@ func (r *RolloutReconciler) reconcileRolloutTerminating(rollout *v1beta1.Rollout
klog.Errorf("rollout(%s/%s) get workload failed: %s", rollout.Namespace, rollout.Name, err.Error())
return nil, err
}
c := &RolloutContext{Rollout: rollout, NewStatus: newStatus, Workload: workload}
c := &RolloutContext{Rollout: rollout, NewStatus: newStatus, Workload: workload, FinalizeReason: v1beta1.FinaliseReasonDelete}
done, err := r.doFinalising(c)
if err != nil {
return nil, err
@ -262,7 +262,7 @@ func (r *RolloutReconciler) reconcileRolloutDisabling(rollout *v1beta1.Rollout,
klog.Errorf("rollout(%s/%s) get workload failed: %s", rollout.Namespace, rollout.Name, err.Error())
return nil, err
}
c := &RolloutContext{Rollout: rollout, NewStatus: newStatus, Workload: workload}
c := &RolloutContext{Rollout: rollout, NewStatus: newStatus, Workload: workload, FinalizeReason: v1beta1.FinaliseReasonDisalbed}
done, err := r.doFinalising(c)
if err != nil {
return nil, err

View File

@ -204,26 +204,25 @@ func (m *Manager) FinalisingTrafficRouting(c *TrafficRoutingContext) (bool, erro
if len(c.ObjectRef) == 0 {
return true, nil
}
klog.Infof("%s start finalising traffic routing", c.Key)
// remove stable service the pod revision selector, so stable service will be selector all version pods.
klog.InfoS("start finalising traffic routing", "rollout", c.Key)
// remove stable service the pod revision selector, so stable service will select pods of all versions.
if retry, err := m.RestoreStableService(c); err != nil || retry {
return false, err
}
klog.Infof("%s restore stable service success", c.Key)
klog.InfoS("restore stable service success", "rollout", c.Key)
// modify network(ingress & gateway api) configuration, route all traffic to stable service
if retry, err := m.RestoreGateway(c); err != nil || retry {
return false, err
}
klog.Infof("%s restore gateway success", c.Key)
klog.InfoS("restore gateway success", "rollout", c.Key)
// remove canary service
if retry, err := m.RemoveCanaryService(c); err != nil || retry {
return false, err
}
klog.Infof("%s remove canary service success, finalising traffic routing is done", c.Key)
klog.InfoS("remove canary service success, finalising traffic routing is done", "rollout", c.Key)
return true, nil
}
// RestoreGateway restore gateway resources without graceful time
// returns:
// - if error is not nil, usually we need to retry later. Only if error is nil, we consider the bool.
// - The bool value indicates whether retry is needed. If true, it usually means

View File

@ -213,7 +213,7 @@ var _ = SIGDescribe("Rollout v1beta1", func() {
body := fmt.Sprintf(`{"status":{"canaryStatus":{"currentStepState":"%s"}}}`, v1beta1.CanaryStepStateReady)
Expect(k8sClient.Status().Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, []byte(body)))).NotTo(HaveOccurred())
return false
}, 10*time.Second, time.Second).Should(BeTrue())
}, 10*time.Second, time.Millisecond*500).Should(BeTrue())
}
RolloutJumpCanaryStep := func(name string, target int) {