improve finalising logic for canary release (#229)
improve finalising logic for canary release-2 Signed-off-by: yunbo <yunbo10124scut@gmail.com> Co-authored-by: yunbo <yunbo10124scut@gmail.com>
This commit is contained in:
parent
5378dc2cf7
commit
d2613132aa
|
@ -171,8 +171,6 @@ type RolloutStatus struct {
|
|||
// Conditions a list of conditions a rollout can have.
|
||||
// +optional
|
||||
Conditions []RolloutCondition `json:"conditions,omitempty"`
|
||||
// +optional
|
||||
//BlueGreenStatus *BlueGreenStatus `json:"blueGreenStatus,omitempty"`
|
||||
// Phase is the rollout phase.
|
||||
Phase RolloutPhase `json:"phase,omitempty"`
|
||||
// Message provides details on why the rollout is in its current phase
|
||||
|
|
|
@ -445,7 +445,7 @@ type CanaryStatus struct {
|
|||
// BlueGreenStatus status fields that only pertain to the blueGreen rollout
|
||||
type BlueGreenStatus struct {
|
||||
CommonStatus `json:",inline"`
|
||||
// CanaryRevision is calculated by rollout based on podTemplateHash, and the internal logic flow uses
|
||||
// UpdatedRevision is calculated by rollout based on podTemplateHash, and the internal logic flow uses
|
||||
// It may be different from rs podTemplateHash in different k8s versions, so it cannot be used as service selector label
|
||||
UpdatedRevision string `json:"updatedRevision"`
|
||||
// UpdatedReplicas the numbers of updated pods
|
||||
|
@ -558,29 +558,29 @@ const (
|
|||
type FinalisingStepType string
|
||||
|
||||
const (
|
||||
// some work that should be done before pod scaling down.
|
||||
// For BlueGreenStrategy:
|
||||
// we rout all traffic to stable or new version based on FinaliseReason
|
||||
// For CanaryStrategy:
|
||||
// we remove the selector of stable service
|
||||
FinalisingStepTypePreparing FinalisingStepType = "Preparing"
|
||||
// Route all traffic to stable or new version based on FinaliseReason (for bluegreen)
|
||||
FinalisingStepTypeRouteAllTraffic FinalisingStepType = "RouteAllTraffic"
|
||||
// Restore the stable Service, i.e. remove corresponding selector
|
||||
FinalisingStepTypeStableService FinalisingStepType = "RestoreStableService"
|
||||
// Remove the canary Service
|
||||
FinalisingStepTypeRemoveCanaryService FinalisingStepType = "RemoveCanaryService"
|
||||
|
||||
// Patch Batch Release to scale down (exception: the canary Deployment will be
|
||||
// scaled down in FinalisingStepTypeDeleteBR step)
|
||||
// For Both BlueGreenStrategy and CanaryStrategy:
|
||||
// set workload.pause=false, set workload.partition=0
|
||||
FinalisingStepTypeBatchRelease FinalisingStepType = "PatchBatchRelease"
|
||||
//TODO - Currently, the next three steps are in the same function, FinalisingTrafficRouting
|
||||
// we should try to separate the FinalisingStepTypeGateway and FinalisingStepTypeCanaryService
|
||||
// with graceful time to prevent some potential issues
|
||||
|
||||
// Restore the stable Service (i.e. remove corresponding selector)
|
||||
FinalisingStepTypeStableService FinalisingStepType = "RestoreStableService"
|
||||
// Execute the FinalisingTrafficRouting function
|
||||
FinalisingStepTypeTrafficRouting FinalisingStepType = "FinalisingTrafficRouting"
|
||||
// Restore the GatewayAPI/Ingress/Istio
|
||||
FinalisingStepTypeGateway FinalisingStepType = "RestoreGateway"
|
||||
// Delete Canary Service
|
||||
FinalisingStepTypeDeleteCanaryService FinalisingStepType = "DeleteCanaryService"
|
||||
// Delete Batch Release
|
||||
FinalisingStepTypeDeleteBR FinalisingStepType = "DeleteBatchRelease"
|
||||
// All needed work done
|
||||
FinalisingStepTypeEnd FinalisingStepType = "END"
|
||||
)
|
||||
|
||||
// +genclient
|
||||
|
|
|
@ -534,8 +534,7 @@ spec:
|
|||
format: int64
|
||||
type: integer
|
||||
phase:
|
||||
description: BlueGreenStatus *BlueGreenStatus `json:"blueGreenStatus,omitempty"`
|
||||
Phase is the rollout phase.
|
||||
description: Phase is the rollout phase.
|
||||
type: string
|
||||
type: object
|
||||
type: object
|
||||
|
@ -1475,7 +1474,7 @@ spec:
|
|||
format: int32
|
||||
type: integer
|
||||
updatedRevision:
|
||||
description: CanaryRevision is calculated by rollout based on
|
||||
description: UpdatedRevision is calculated by rollout based on
|
||||
podTemplateHash, and the internal logic flow uses It may be
|
||||
different from rs podTemplateHash in different k8s versions,
|
||||
so it cannot be used as service selector label
|
||||
|
|
|
@ -363,43 +363,66 @@ func (m *canaryReleaseManager) doCanaryJump(c *RolloutContext) (jumped bool) {
|
|||
|
||||
// cleanup after rollout is completed or finished
|
||||
func (m *canaryReleaseManager) doCanaryFinalising(c *RolloutContext) (bool, error) {
|
||||
canaryStatus := c.NewStatus.CanaryStatus
|
||||
// when CanaryStatus is nil, which means canary action hasn't started yet, don't need doing cleanup
|
||||
if c.NewStatus.CanaryStatus == nil {
|
||||
if canaryStatus == nil {
|
||||
return true, nil
|
||||
}
|
||||
// 1. rollout progressing complete, remove rollout progressing annotation in workload
|
||||
// rollout progressing complete, remove rollout progressing annotation in workload
|
||||
err := m.removeRolloutProgressingAnnotation(c)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
tr := newTrafficRoutingContext(c)
|
||||
// 2. remove stable service the pod revision selector, so stable service will be selector all version pods.
|
||||
done, err := m.trafficRoutingManager.FinalisingTrafficRouting(tr)
|
||||
c.NewStatus.CanaryStatus.LastUpdateTime = tr.LastUpdateTime
|
||||
if err != nil || !done {
|
||||
return done, err
|
||||
// execute steps based on the predefined order for each reason
|
||||
nextStep := nextTask(c.FinalizeReason, canaryStatus.FinalisingStep)
|
||||
// if current step is empty, set it with the first step
|
||||
// if current step is end, we just return
|
||||
if len(canaryStatus.FinalisingStep) == 0 {
|
||||
canaryStatus.FinalisingStep = nextStep
|
||||
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
|
||||
} else if canaryStatus.FinalisingStep == v1beta1.FinalisingStepTypeEnd {
|
||||
klog.Infof("rollout(%s/%s) finalising process is already completed", c.Rollout.Namespace, c.Rollout.Name)
|
||||
return true, nil
|
||||
}
|
||||
// 3. set workload.pause=false; set workload.partition=0
|
||||
done, err = m.finalizingBatchRelease(c)
|
||||
if err != nil || !done {
|
||||
return done, err
|
||||
}
|
||||
// 4. modify network api(ingress or gateway api) configuration, and route 100% traffic to stable pods.
|
||||
done, err = m.trafficRoutingManager.FinalisingTrafficRouting(tr)
|
||||
c.NewStatus.CanaryStatus.LastUpdateTime = tr.LastUpdateTime
|
||||
if err != nil || !done {
|
||||
return done, err
|
||||
}
|
||||
// 5. delete batchRelease crd
|
||||
done, err = m.removeBatchRelease(c)
|
||||
if err != nil {
|
||||
klog.Errorf("rollout(%s/%s) Finalize batchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
|
||||
return false, err
|
||||
} else if !done {
|
||||
klog.Infof("rollout(%s/%s) Finalising Step is %s", c.Rollout.Namespace, c.Rollout.Name, canaryStatus.FinalisingStep)
|
||||
|
||||
var retry bool
|
||||
// the order of steps is maitained by calculating thenextStep
|
||||
switch canaryStatus.FinalisingStep {
|
||||
// set workload.pause=false; set workload.partition=0
|
||||
case v1beta1.FinalisingStepTypeBatchRelease:
|
||||
retry, err = m.finalizingBatchRelease(c)
|
||||
// delete batchRelease
|
||||
case v1beta1.FinalisingStepTypeDeleteBR:
|
||||
retry, err = m.removeBatchRelease(c)
|
||||
// restore the gateway resources (ingress/gatewayAPI/Istio), that means
|
||||
// only stable Service will accept the traffic
|
||||
case v1beta1.FinalisingStepTypeGateway:
|
||||
retry, err = m.trafficRoutingManager.RestoreGateway(tr)
|
||||
// restore the stable service
|
||||
case v1beta1.FinalisingStepTypeStableService:
|
||||
retry, err = m.trafficRoutingManager.RestoreStableService(tr)
|
||||
// remove canary service
|
||||
case v1beta1.FinalisingStepTypeRemoveCanaryService:
|
||||
retry, err = m.trafficRoutingManager.RemoveCanaryService(tr)
|
||||
|
||||
default:
|
||||
nextStep = nextTask(c.FinalizeReason, "")
|
||||
klog.Warningf("unexpected finalising step, current step(%s), start from the first step(%s)", canaryStatus.FinalisingStep, nextStep)
|
||||
canaryStatus.FinalisingStep = nextStep
|
||||
return false, nil
|
||||
}
|
||||
klog.Infof("rollout(%s/%s) doCanaryFinalising success", c.Rollout.Namespace, c.Rollout.Name)
|
||||
return true, nil
|
||||
if err != nil || retry {
|
||||
return false, err
|
||||
}
|
||||
// current step is done, run the next step
|
||||
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
|
||||
canaryStatus.FinalisingStep = nextStep
|
||||
if canaryStatus.FinalisingStep == v1beta1.FinalisingStepTypeEnd {
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (m *canaryReleaseManager) removeRolloutProgressingAnnotation(c *RolloutContext) error {
|
||||
|
@ -508,37 +531,39 @@ func createBatchRelease(rollout *v1beta1.Rollout, rolloutID string, batch int32,
|
|||
return br
|
||||
}
|
||||
|
||||
// bool means if we need retry; if error is not nil, always retry
|
||||
func (m *canaryReleaseManager) removeBatchRelease(c *RolloutContext) (bool, error) {
|
||||
batch := &v1beta1.BatchRelease{}
|
||||
err := m.Get(context.TODO(), client.ObjectKey{Namespace: c.Rollout.Namespace, Name: c.Rollout.Name}, batch)
|
||||
if err != nil && errors.IsNotFound(err) {
|
||||
return true, nil
|
||||
return false, nil
|
||||
} else if err != nil {
|
||||
klog.Errorf("rollout(%s/%s) fetch BatchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name)
|
||||
return false, err
|
||||
return true, err
|
||||
}
|
||||
if !batch.DeletionTimestamp.IsZero() {
|
||||
klog.Infof("rollout(%s/%s) BatchRelease is terminating, and wait a moment", c.Rollout.Namespace, c.Rollout.Name)
|
||||
return false, nil
|
||||
return true, nil
|
||||
}
|
||||
|
||||
//delete batchRelease
|
||||
err = m.Delete(context.TODO(), batch)
|
||||
if err != nil {
|
||||
klog.Errorf("rollout(%s/%s) delete BatchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
|
||||
return false, err
|
||||
return true, err
|
||||
}
|
||||
klog.Infof("rollout(%s/%s) deleting BatchRelease, and wait a moment", c.Rollout.Namespace, c.Rollout.Name)
|
||||
return false, nil
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// bool means if we need retry; if error is not nil, always retry
|
||||
func (m *canaryReleaseManager) finalizingBatchRelease(c *RolloutContext) (bool, error) {
|
||||
br, err := m.fetchBatchRelease(c.Rollout.Namespace, c.Rollout.Name)
|
||||
if err != nil {
|
||||
if errors.IsNotFound(err) {
|
||||
return true, nil
|
||||
return false, nil
|
||||
}
|
||||
return false, err
|
||||
return true, err
|
||||
}
|
||||
waitReady := c.WaitReady
|
||||
// The Completed phase means batchRelease controller has processed all it
|
||||
|
@ -546,7 +571,7 @@ func (m *canaryReleaseManager) finalizingBatchRelease(c *RolloutContext) (bool,
|
|||
if br.Spec.ReleasePlan.BatchPartition == nil &&
|
||||
br.Status.Phase == v1beta1.RolloutPhaseCompleted {
|
||||
klog.Infof("rollout(%s/%s) finalizing batchRelease(%s) done", c.Rollout.Namespace, c.Rollout.Name, util.DumpJSON(br.Status))
|
||||
return true, nil
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// If BatchPartition is nil, BatchRelease will directly resume workload via:
|
||||
|
@ -559,11 +584,11 @@ func (m *canaryReleaseManager) finalizingBatchRelease(c *RolloutContext) (bool,
|
|||
switch br.Spec.ReleasePlan.FinalizingPolicy {
|
||||
case v1beta1.WaitResumeFinalizingPolicyType:
|
||||
if waitReady { // no need to patch again
|
||||
return false, nil
|
||||
return true, nil
|
||||
}
|
||||
default:
|
||||
if !waitReady { // no need to patch again
|
||||
return false, nil
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -577,10 +602,10 @@ func (m *canaryReleaseManager) finalizingBatchRelease(c *RolloutContext) (bool,
|
|||
// Patch BatchPartition and FinalizingPolicy, BatchPartition always patch null here.
|
||||
body := fmt.Sprintf(`{"spec":{"releasePlan":{"batchPartition":null,"finalizingPolicy":"%s"}}}`, policy)
|
||||
if err = m.Patch(context.TODO(), br, client.RawPatch(types.MergePatchType, []byte(body))); err != nil {
|
||||
return false, err
|
||||
return true, err
|
||||
}
|
||||
klog.Infof("rollout(%s/%s) patch batchRelease(%s) success", c.Rollout.Namespace, c.Rollout.Name, body)
|
||||
return false, nil
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// syncBatchRelease sync status of br to canaryStatus, and sync rollout-id of canaryStatus to br.
|
||||
|
@ -601,3 +626,42 @@ func (m *canaryReleaseManager) syncBatchRelease(br *v1beta1.BatchRelease, canary
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// calculate next task
|
||||
func nextTask(reason string, currentTask v1beta1.FinalisingStepType) v1beta1.FinalisingStepType {
|
||||
var taskSequence []v1beta1.FinalisingStepType
|
||||
//REVIEW - should we consider more complex scenarios?
|
||||
// like, user rollbacks the workload and disables the Rollout at the same time?
|
||||
switch reason {
|
||||
// in rollback, we cannot remove selector of the stable service in the first step,
|
||||
// which may cause some traffic to problematic new version, so we should route all traffic to stable service
|
||||
// in the first step
|
||||
case v1beta1.FinaliseReasonRollback: // rollback
|
||||
taskSequence = []v1beta1.FinalisingStepType{
|
||||
v1beta1.FinalisingStepTypeGateway, // route all traffic to stable version
|
||||
v1beta1.FinalisingStepTypeBatchRelease, // scale up old, scale down new
|
||||
v1beta1.FinalisingStepTypeDeleteBR,
|
||||
v1beta1.FinalisingStepTypeStableService,
|
||||
v1beta1.FinalisingStepTypeRemoveCanaryService,
|
||||
}
|
||||
default: // others: success/disabled/deleting rollout
|
||||
taskSequence = []v1beta1.FinalisingStepType{
|
||||
v1beta1.FinalisingStepTypeStableService,
|
||||
v1beta1.FinalisingStepTypeGateway,
|
||||
v1beta1.FinalisingStepTypeRemoveCanaryService,
|
||||
v1beta1.FinalisingStepTypeBatchRelease, // scale up new, scale down old
|
||||
v1beta1.FinalisingStepTypeDeleteBR,
|
||||
}
|
||||
}
|
||||
// if currentTask is empty, return first task
|
||||
if len(currentTask) == 0 {
|
||||
return taskSequence[0]
|
||||
}
|
||||
// find next task
|
||||
for i := range taskSequence {
|
||||
if currentTask == taskSequence[i] && i < len(taskSequence)-1 {
|
||||
return taskSequence[i+1]
|
||||
}
|
||||
}
|
||||
return v1beta1.FinalisingStepTypeEnd
|
||||
}
|
||||
|
|
|
@ -45,6 +45,8 @@ type RolloutContext struct {
|
|||
RecheckTime *time.Time
|
||||
// wait stable workload pods ready
|
||||
WaitReady bool
|
||||
// finalising reason
|
||||
FinalizeReason string
|
||||
}
|
||||
|
||||
// parameter1 retryReconcile, parameter2 error
|
||||
|
@ -116,6 +118,7 @@ func (r *RolloutReconciler) reconcileRolloutProgressing(rollout *v1beta1.Rollout
|
|||
klog.Infof("rollout(%s/%s) is Progressing, and in reason(%s)", rollout.Namespace, rollout.Name, cond.Reason)
|
||||
var done bool
|
||||
rolloutContext.WaitReady = true
|
||||
rolloutContext.FinalizeReason = v1beta1.FinaliseReasonSuccess
|
||||
done, err = r.doFinalising(rolloutContext)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -140,6 +143,7 @@ func (r *RolloutReconciler) reconcileRolloutProgressing(rollout *v1beta1.Rollout
|
|||
case v1alpha1.ProgressingReasonCancelling:
|
||||
klog.Infof("rollout(%s/%s) is Progressing, and in reason(%s)", rollout.Namespace, rollout.Name, cond.Reason)
|
||||
var done bool
|
||||
rolloutContext.FinalizeReason = v1beta1.FinaliseReasonRollback
|
||||
done, err = r.doFinalising(rolloutContext)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -416,11 +420,11 @@ func (r *RolloutReconciler) doProgressingReset(c *RolloutContext) (bool, error)
|
|||
}
|
||||
// if no trafficRouting exists, simply remove batchRelease
|
||||
if !c.Rollout.Spec.Strategy.HasTrafficRoutings() {
|
||||
done, err := releaseManager.removeBatchRelease(c)
|
||||
retry, err := releaseManager.removeBatchRelease(c)
|
||||
if err != nil {
|
||||
klog.Errorf("rollout(%s/%s) DoFinalising batchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
|
||||
return false, err
|
||||
} else if !done {
|
||||
} else if retry {
|
||||
return false, nil
|
||||
}
|
||||
return true, nil
|
||||
|
@ -454,11 +458,11 @@ func (r *RolloutReconciler) doProgressingReset(c *RolloutContext) (bool, error)
|
|||
// canary deployment, for other release, the v2 pods won't be deleted immediately
|
||||
// in both cases, only the stable pods (v1) accept the traffic
|
||||
case v1beta1.FinalisingStepTypeDeleteBR:
|
||||
done, err := releaseManager.removeBatchRelease(c)
|
||||
retry, err := releaseManager.removeBatchRelease(c)
|
||||
if err != nil {
|
||||
klog.Errorf("rollout(%s/%s) Finalize batchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
|
||||
return false, err
|
||||
} else if !done {
|
||||
} else if retry {
|
||||
return false, nil
|
||||
}
|
||||
klog.Infof("rollout(%s/%s) in step (%s), and success", c.Rollout.Namespace, c.Rollout.Name, subStatus.FinalisingStep)
|
||||
|
|
|
@ -175,7 +175,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
|
|||
},
|
||||
},
|
||||
{
|
||||
name: "ReconcileRolloutProgressing rolling -> finalizing",
|
||||
name: "ReconcileRolloutProgressing rolling -> finalizing", // call FinalisingTrafficRouting to restore stable Service
|
||||
getObj: func() ([]*apps.Deployment, []*apps.ReplicaSet) {
|
||||
dep1 := deploymentDemo.DeepCopy()
|
||||
dep2 := deploymentDemo.DeepCopy()
|
||||
|
@ -235,7 +235,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
|
|||
},
|
||||
},
|
||||
{
|
||||
name: "ReconcileRolloutProgressing finalizing1",
|
||||
name: "ReconcileRolloutProgressing finalizing1", // call FinalisingTrafficRouting to remove canary service
|
||||
getObj: func() ([]*apps.Deployment, []*apps.ReplicaSet) {
|
||||
dep1 := deploymentDemo.DeepCopy()
|
||||
delete(dep1.Annotations, util.InRolloutProgressingAnnotation)
|
||||
|
@ -250,7 +250,11 @@ func TestReconcileRolloutProgressing(t *testing.T) {
|
|||
return []*apps.Deployment{dep1}, []*apps.ReplicaSet{rs1}
|
||||
},
|
||||
getNetwork: func() ([]*corev1.Service, []*netv1.Ingress) {
|
||||
return []*corev1.Service{demoService.DeepCopy()}, []*netv1.Ingress{demoIngress.DeepCopy()}
|
||||
s1 := demoService.DeepCopy()
|
||||
s1.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey] = "podtemplatehash-v1"
|
||||
s2 := demoService.DeepCopy()
|
||||
s2.Name = s1.Name + "-canary"
|
||||
return []*corev1.Service{s1, s2}, []*netv1.Ingress{demoIngress.DeepCopy()}
|
||||
},
|
||||
getRollout: func() (*v1beta1.Rollout, *v1beta1.BatchRelease, *v1alpha1.TrafficRouting) {
|
||||
obj := rolloutDemo.DeepCopy()
|
||||
|
@ -285,6 +289,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
|
|||
s.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
|
||||
s.CanaryStatus.CurrentStepIndex = 4
|
||||
s.CanaryStatus.NextStepIndex = 0
|
||||
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeStableService
|
||||
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
|
||||
cond := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
|
||||
cond.Reason = v1alpha1.ProgressingReasonFinalising
|
||||
|
@ -300,7 +305,145 @@ func TestReconcileRolloutProgressing(t *testing.T) {
|
|||
},
|
||||
},
|
||||
{
|
||||
name: "ReconcileRolloutProgressing finalizing2",
|
||||
name: "ReconcileRolloutProgressing finalizing2", // go to the next finalising step
|
||||
getObj: func() ([]*apps.Deployment, []*apps.ReplicaSet) {
|
||||
dep1 := deploymentDemo.DeepCopy()
|
||||
delete(dep1.Annotations, util.InRolloutProgressingAnnotation)
|
||||
dep1.Status = apps.DeploymentStatus{
|
||||
ObservedGeneration: 2,
|
||||
Replicas: 10,
|
||||
UpdatedReplicas: 5,
|
||||
ReadyReplicas: 10,
|
||||
AvailableReplicas: 10,
|
||||
}
|
||||
rs1 := rsDemo.DeepCopy()
|
||||
return []*apps.Deployment{dep1}, []*apps.ReplicaSet{rs1}
|
||||
},
|
||||
getNetwork: func() ([]*corev1.Service, []*netv1.Ingress) {
|
||||
return []*corev1.Service{demoService.DeepCopy()}, []*netv1.Ingress{demoIngress.DeepCopy()}
|
||||
},
|
||||
getRollout: func() (*v1beta1.Rollout, *v1beta1.BatchRelease, *v1alpha1.TrafficRouting) {
|
||||
obj := rolloutDemo.DeepCopy()
|
||||
obj.Annotations[v1alpha1.TrafficRoutingAnnotation] = "tr-demo"
|
||||
obj.Status.CanaryStatus.ObservedWorkloadGeneration = 2
|
||||
obj.Status.CanaryStatus.RolloutHash = "f55bvd874d5f2fzvw46bv966x4bwbdv4wx6bd9f7b46ww788954b8z8w29b7wxfd"
|
||||
obj.Status.CanaryStatus.StableRevision = "pod-template-hash-v1"
|
||||
obj.Status.CanaryStatus.CanaryRevision = "6f8cc56547"
|
||||
obj.Status.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
|
||||
obj.Status.CanaryStatus.CurrentStepIndex = 4
|
||||
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
|
||||
// given that the selector of stable Service is removed
|
||||
// we will go on it the next step, i.e. patch restoreGateway
|
||||
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeStableService
|
||||
cond := util.GetRolloutCondition(obj.Status, v1beta1.RolloutConditionProgressing)
|
||||
cond.Reason = v1alpha1.ProgressingReasonFinalising
|
||||
cond.Status = corev1.ConditionTrue
|
||||
util.SetRolloutCondition(&obj.Status, *cond)
|
||||
br := batchDemo.DeepCopy()
|
||||
br.Spec.ReleasePlan.Batches = []v1beta1.ReleaseBatch{
|
||||
{
|
||||
CanaryReplicas: intstr.FromInt(1),
|
||||
},
|
||||
}
|
||||
tr := demoTR.DeepCopy()
|
||||
tr.Finalizers = []string{util.ProgressingRolloutFinalizer(rolloutDemo.Name)}
|
||||
return obj, br, tr
|
||||
},
|
||||
expectStatus: func() *v1beta1.RolloutStatus {
|
||||
s := rolloutDemo.Status.DeepCopy()
|
||||
s.CanaryStatus.ObservedWorkloadGeneration = 2
|
||||
s.CanaryStatus.RolloutHash = "f55bvd874d5f2fzvw46bv966x4bwbdv4wx6bd9f7b46ww788954b8z8w29b7wxfd"
|
||||
s.CanaryStatus.StableRevision = "pod-template-hash-v1"
|
||||
s.CanaryStatus.CanaryRevision = "6f8cc56547"
|
||||
s.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
|
||||
s.CanaryStatus.CurrentStepIndex = 4
|
||||
s.CanaryStatus.NextStepIndex = 0
|
||||
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeGateway
|
||||
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
|
||||
cond := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
|
||||
cond.Reason = v1alpha1.ProgressingReasonFinalising
|
||||
cond.Status = corev1.ConditionTrue
|
||||
util.SetRolloutCondition(s, *cond)
|
||||
s.CurrentStepIndex = s.CanaryStatus.CurrentStepIndex
|
||||
s.CurrentStepState = s.CanaryStatus.CurrentStepState
|
||||
return s
|
||||
},
|
||||
expectTr: func() *v1alpha1.TrafficRouting {
|
||||
tr := demoTR.DeepCopy()
|
||||
return tr
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ReconcileRolloutProgressing finalizing3",
|
||||
getObj: func() ([]*apps.Deployment, []*apps.ReplicaSet) {
|
||||
dep1 := deploymentDemo.DeepCopy()
|
||||
delete(dep1.Annotations, util.InRolloutProgressingAnnotation)
|
||||
dep1.Status = apps.DeploymentStatus{
|
||||
ObservedGeneration: 2,
|
||||
Replicas: 10,
|
||||
UpdatedReplicas: 5,
|
||||
ReadyReplicas: 10,
|
||||
AvailableReplicas: 10,
|
||||
}
|
||||
rs1 := rsDemo.DeepCopy()
|
||||
return []*apps.Deployment{dep1}, []*apps.ReplicaSet{rs1}
|
||||
},
|
||||
getNetwork: func() ([]*corev1.Service, []*netv1.Ingress) {
|
||||
return []*corev1.Service{demoService.DeepCopy()}, []*netv1.Ingress{demoIngress.DeepCopy()}
|
||||
},
|
||||
getRollout: func() (*v1beta1.Rollout, *v1beta1.BatchRelease, *v1alpha1.TrafficRouting) {
|
||||
obj := rolloutDemo.DeepCopy()
|
||||
obj.Annotations[v1alpha1.TrafficRoutingAnnotation] = "tr-demo"
|
||||
obj.Status.CanaryStatus.ObservedWorkloadGeneration = 2
|
||||
obj.Status.CanaryStatus.RolloutHash = "f55bvd874d5f2fzvw46bv966x4bwbdv4wx6bd9f7b46ww788954b8z8w29b7wxfd"
|
||||
obj.Status.CanaryStatus.StableRevision = "pod-template-hash-v1"
|
||||
obj.Status.CanaryStatus.CanaryRevision = "6f8cc56547"
|
||||
obj.Status.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
|
||||
obj.Status.CanaryStatus.CurrentStepIndex = 4
|
||||
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
|
||||
// because the batchRelease hasn't completed (ie. br.Status.Phase is not completed),
|
||||
// it will take more than one reconciles to go on to the next step
|
||||
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeBatchRelease
|
||||
cond := util.GetRolloutCondition(obj.Status, v1beta1.RolloutConditionProgressing)
|
||||
cond.Reason = v1alpha1.ProgressingReasonFinalising
|
||||
cond.Status = corev1.ConditionTrue
|
||||
util.SetRolloutCondition(&obj.Status, *cond)
|
||||
br := batchDemo.DeepCopy()
|
||||
br.Spec.ReleasePlan.Batches = []v1beta1.ReleaseBatch{
|
||||
{
|
||||
CanaryReplicas: intstr.FromInt(1),
|
||||
},
|
||||
}
|
||||
tr := demoTR.DeepCopy()
|
||||
tr.Finalizers = []string{util.ProgressingRolloutFinalizer(rolloutDemo.Name)}
|
||||
return obj, br, tr
|
||||
},
|
||||
expectStatus: func() *v1beta1.RolloutStatus {
|
||||
s := rolloutDemo.Status.DeepCopy()
|
||||
s.CanaryStatus.ObservedWorkloadGeneration = 2
|
||||
s.CanaryStatus.RolloutHash = "f55bvd874d5f2fzvw46bv966x4bwbdv4wx6bd9f7b46ww788954b8z8w29b7wxfd"
|
||||
s.CanaryStatus.StableRevision = "pod-template-hash-v1"
|
||||
s.CanaryStatus.CanaryRevision = "6f8cc56547"
|
||||
s.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
|
||||
s.CanaryStatus.CurrentStepIndex = 4
|
||||
s.CanaryStatus.NextStepIndex = 0
|
||||
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeBatchRelease
|
||||
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
|
||||
cond := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
|
||||
cond.Reason = v1alpha1.ProgressingReasonFinalising
|
||||
cond.Status = corev1.ConditionTrue
|
||||
util.SetRolloutCondition(s, *cond)
|
||||
s.CurrentStepIndex = s.CanaryStatus.CurrentStepIndex
|
||||
s.CurrentStepState = s.CanaryStatus.CurrentStepState
|
||||
return s
|
||||
},
|
||||
expectTr: func() *v1alpha1.TrafficRouting {
|
||||
tr := demoTR.DeepCopy()
|
||||
return tr
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ReconcileRolloutProgressing finalizing4",
|
||||
getObj: func() ([]*apps.Deployment, []*apps.ReplicaSet) {
|
||||
dep1 := deploymentDemo.DeepCopy()
|
||||
delete(dep1.Annotations, util.InRolloutProgressingAnnotation)
|
||||
|
@ -326,6 +469,9 @@ func TestReconcileRolloutProgressing(t *testing.T) {
|
|||
obj.Status.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
|
||||
obj.Status.CanaryStatus.CurrentStepIndex = 4
|
||||
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
|
||||
// the batchRelease has completed (ie. br.Status.Phase is completed),
|
||||
// we expect the finalizing step to be next step, i.e. deleteBatchRelease
|
||||
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeBatchRelease
|
||||
cond := util.GetRolloutCondition(obj.Status, v1beta1.RolloutConditionProgressing)
|
||||
cond.Reason = v1alpha1.ProgressingReasonFinalising
|
||||
cond.Status = corev1.ConditionTrue
|
||||
|
@ -348,6 +494,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
|
|||
s.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
|
||||
s.CanaryStatus.CurrentStepIndex = 4
|
||||
s.CanaryStatus.NextStepIndex = 0
|
||||
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeDeleteBR
|
||||
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
|
||||
cond2 := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
|
||||
cond2.Reason = v1alpha1.ProgressingReasonFinalising
|
||||
|
@ -385,6 +532,9 @@ func TestReconcileRolloutProgressing(t *testing.T) {
|
|||
obj.Status.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
|
||||
obj.Status.CanaryStatus.CurrentStepIndex = 4
|
||||
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
|
||||
// deleteBatchRelease is the last step, and it won't wait a grace time
|
||||
// after this step, this release should be succeeded
|
||||
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeDeleteBR
|
||||
cond := util.GetRolloutCondition(obj.Status, v1beta1.RolloutConditionProgressing)
|
||||
cond.Reason = v1alpha1.ProgressingReasonFinalising
|
||||
cond.Status = corev1.ConditionTrue
|
||||
|
@ -401,6 +551,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
|
|||
s.CanaryStatus.CurrentStepIndex = 4
|
||||
s.CanaryStatus.NextStepIndex = 0
|
||||
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
|
||||
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeEnd
|
||||
cond2 := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
|
||||
cond2.Reason = v1alpha1.ProgressingReasonCompleted
|
||||
cond2.Status = corev1.ConditionFalse
|
||||
|
|
|
@ -238,7 +238,7 @@ func (r *RolloutReconciler) reconcileRolloutTerminating(rollout *v1beta1.Rollout
|
|||
klog.Errorf("rollout(%s/%s) get workload failed: %s", rollout.Namespace, rollout.Name, err.Error())
|
||||
return nil, err
|
||||
}
|
||||
c := &RolloutContext{Rollout: rollout, NewStatus: newStatus, Workload: workload}
|
||||
c := &RolloutContext{Rollout: rollout, NewStatus: newStatus, Workload: workload, FinalizeReason: v1beta1.FinaliseReasonDelete}
|
||||
done, err := r.doFinalising(c)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -262,7 +262,7 @@ func (r *RolloutReconciler) reconcileRolloutDisabling(rollout *v1beta1.Rollout,
|
|||
klog.Errorf("rollout(%s/%s) get workload failed: %s", rollout.Namespace, rollout.Name, err.Error())
|
||||
return nil, err
|
||||
}
|
||||
c := &RolloutContext{Rollout: rollout, NewStatus: newStatus, Workload: workload}
|
||||
c := &RolloutContext{Rollout: rollout, NewStatus: newStatus, Workload: workload, FinalizeReason: v1beta1.FinaliseReasonDisalbed}
|
||||
done, err := r.doFinalising(c)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
|
@ -204,26 +204,25 @@ func (m *Manager) FinalisingTrafficRouting(c *TrafficRoutingContext) (bool, erro
|
|||
if len(c.ObjectRef) == 0 {
|
||||
return true, nil
|
||||
}
|
||||
klog.Infof("%s start finalising traffic routing", c.Key)
|
||||
// remove stable service the pod revision selector, so stable service will be selector all version pods.
|
||||
klog.InfoS("start finalising traffic routing", "rollout", c.Key)
|
||||
// remove stable service the pod revision selector, so stable service will select pods of all versions.
|
||||
if retry, err := m.RestoreStableService(c); err != nil || retry {
|
||||
return false, err
|
||||
}
|
||||
klog.Infof("%s restore stable service success", c.Key)
|
||||
klog.InfoS("restore stable service success", "rollout", c.Key)
|
||||
// modify network(ingress & gateway api) configuration, route all traffic to stable service
|
||||
if retry, err := m.RestoreGateway(c); err != nil || retry {
|
||||
return false, err
|
||||
}
|
||||
klog.Infof("%s restore gateway success", c.Key)
|
||||
klog.InfoS("restore gateway success", "rollout", c.Key)
|
||||
// remove canary service
|
||||
if retry, err := m.RemoveCanaryService(c); err != nil || retry {
|
||||
return false, err
|
||||
}
|
||||
klog.Infof("%s remove canary service success, finalising traffic routing is done", c.Key)
|
||||
klog.InfoS("remove canary service success, finalising traffic routing is done", "rollout", c.Key)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// RestoreGateway restore gateway resources without graceful time
|
||||
// returns:
|
||||
// - if error is not nil, usually we need to retry later. Only if error is nil, we consider the bool.
|
||||
// - The bool value indicates whether retry is needed. If true, it usually means
|
||||
|
|
|
@ -213,7 +213,7 @@ var _ = SIGDescribe("Rollout v1beta1", func() {
|
|||
body := fmt.Sprintf(`{"status":{"canaryStatus":{"currentStepState":"%s"}}}`, v1beta1.CanaryStepStateReady)
|
||||
Expect(k8sClient.Status().Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, []byte(body)))).NotTo(HaveOccurred())
|
||||
return false
|
||||
}, 10*time.Second, time.Second).Should(BeTrue())
|
||||
}, 10*time.Second, time.Millisecond*500).Should(BeTrue())
|
||||
}
|
||||
|
||||
RolloutJumpCanaryStep := func(name string, target int) {
|
||||
|
|
Loading…
Reference in New Issue