imporve traffic strategy for canary and partition release

Signed-off-by: yunbo <yunbo10124scut@gmail.com>
This commit is contained in:
yunbo 2024-06-07 11:06:39 +08:00
parent cddc53ad14
commit d32837e104
9 changed files with 544 additions and 163 deletions

View File

@ -17,6 +17,9 @@ limitations under the License.
package v1beta1
import (
"reflect"
apps "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
@ -92,6 +95,22 @@ func (r *RolloutStrategy) GetRollingStyle() RollingStyleType {
return PartitionRollingStyle
}
// simply using EnableExtraWorkloadForCanary is not enough, for example, a v1alaph1 Rollout
// can be converted to v1beta1 Rollout with EnableExtraWorkloadForCanary set as true, even the
// objectRef is cloneset (which doesn't support canary release)
func IsRealPartition(rollout *Rollout) bool {
estimation := rollout.Spec.Strategy.GetRollingStyle()
if estimation == EmptyRollingStyle || estimation == BlueGreenRollingStyle {
return false
}
targetRef := rollout.Spec.WorkloadRef
if targetRef.APIVersion == apps.SchemeGroupVersion.String() && targetRef.Kind == reflect.TypeOf(apps.Deployment{}).Name() &&
estimation == CanaryRollingStyle {
return false
}
return true
}
// r.GetRollingStyle() == BlueGreenRollingStyle
func (r *RolloutStrategy) IsBlueGreenRelease() bool {
return r.GetRollingStyle() == BlueGreenRollingStyle

View File

@ -75,7 +75,7 @@ func (m *canaryReleaseManager) runCanary(c *RolloutContext) error {
currentStep := c.Rollout.Spec.Strategy.Canary.Steps[canaryStatus.CurrentStepIndex-1]
if currentStep.Traffic == nil && len(currentStep.Matches) == 0 {
tr := newTrafficRoutingContext(c)
done, err := m.trafficRoutingManager.FinalisingTrafficRouting(tr, false)
done, err := m.trafficRoutingManager.FinalisingTrafficRouting(tr)
c.NewStatus.CanaryStatus.LastUpdateTime = tr.LastUpdateTime
if err != nil {
return err
@ -89,10 +89,59 @@ func (m *canaryReleaseManager) runCanary(c *RolloutContext) error {
switch canaryStatus.CurrentStepState {
// before CanaryStepStateUpgrade, handle some special cases, to prevent traffic loss
case v1beta1.CanaryStepStateInit:
// placeholder for the later traffic modification Pull Request
canaryStatus.NextStepIndex = util.NextBatchIndex(c.Rollout, canaryStatus.CurrentStepIndex)
klog.Infof("rollout(%s/%s) run canary strategy, and state(%s)", c.Rollout.Namespace, c.Rollout.Name, v1beta1.CanaryStepStateInit)
tr := newTrafficRoutingContext(c)
if currentStep.Traffic == nil && len(currentStep.Matches) == 0 {
canaryStatus.CurrentStepState = v1beta1.CanaryStepStateUpgrade
klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", c.Rollout.Namespace, c.Rollout.Name,
canaryStatus.CurrentStepIndex, v1beta1.CanaryStepStateInit, canaryStatus.CurrentStepState)
return nil
}
/*
The next check is used to bypass the bug in ingress-nginx controller https://github.com/kubernetes/ingress-nginx/issues/9635
for partition release, if the currentStep replicas is "100%", we can assume that all traffic should be routed to canary pods
*/
if currentStep.Replicas.StrVal == "100%" && v1beta1.IsRealPartition(c.Rollout) {
klog.Infof("special case detected: rollout(%s/%s) restore stable Service", c.Rollout.Namespace, c.Rollout.Name)
done, err := m.trafficRoutingManager.RestoreStableService(tr)
if err != nil {
return err
} else if !done {
expectedTime := time.Now().Add(time.Duration(defaultGracePeriodSeconds) * time.Second)
c.RecheckTime = &expectedTime
return nil
}
}
/*
The next check is used to solve the following scenario:
steps:
- replicas: 1 # frist batch
matches:
- headers:
- name: user-agent
type: Exact
value: pc
we should patch selector to stable Service before CanaryStepStateUpgrade when in the first batch
otherwise, some traffic will loss between CanaryStepStateUpgrade and CanaryStepStateTrafficRouting
*/
if canaryStatus.CurrentStepIndex == 1 {
klog.Infof("special case detected: rollout(%s/%s) patch stable Service", c.Rollout.Namespace, c.Rollout.Name)
done, err := m.trafficRoutingManager.PatchStableService(tr)
if err != nil {
return err
} else if !done {
expectedTime := time.Now().Add(time.Duration(defaultGracePeriodSeconds) * time.Second)
c.RecheckTime = &expectedTime
return nil
}
}
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
canaryStatus.CurrentStepState = v1beta1.CanaryStepStateUpgrade
fallthrough
klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", c.Rollout.Namespace, c.Rollout.Name,
canaryStatus.CurrentStepIndex, v1beta1.CanaryStepStateInit, canaryStatus.CurrentStepState)
case v1beta1.CanaryStepStateUpgrade:
klog.Infof("rollout(%s/%s) run canary strategy, and state(%s)", c.Rollout.Namespace, c.Rollout.Name, v1beta1.CanaryStepStateUpgrade)
@ -101,6 +150,9 @@ func (m *canaryReleaseManager) runCanary(c *RolloutContext) error {
return err
} else if done {
canaryStatus.CurrentStepState = v1beta1.CanaryStepStateTrafficRouting
if currentStep.Replicas.StrVal == "100%" && v1beta1.IsRealPartition(c.Rollout) {
canaryStatus.CurrentStepState = v1beta1.CanaryStepStateMetricsAnalysis
}
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", c.Rollout.Namespace, c.Rollout.Name,
canaryStatus.CurrentStepIndex, v1beta1.CanaryStepStateUpgrade, canaryStatus.CurrentStepState)
@ -216,6 +268,12 @@ func (m *canaryReleaseManager) doCanaryPaused(c *RolloutContext) (bool, error) {
canaryStatus := c.NewStatus.CanaryStatus
currentStep := c.Rollout.Spec.Strategy.Canary.Steps[canaryStatus.CurrentStepIndex-1]
steps := len(c.Rollout.Spec.Strategy.Canary.Steps)
// If it is the last step, and 100% of pods, then return true
if int32(steps) == canaryStatus.CurrentStepIndex {
if currentStep.Replicas != nil && currentStep.Replicas.StrVal == "100%" {
return true, nil
}
}
cond := util.GetRolloutCondition(*c.NewStatus, v1beta1.RolloutConditionProgressing)
// need manual confirmation
if currentStep.Pause.Duration == nil {
@ -268,8 +326,9 @@ func (m *canaryReleaseManager) doCanaryJump(c *RolloutContext) (jumped bool) {
// cleanup after rollout is completed or finished
func (m *canaryReleaseManager) doCanaryFinalising(c *RolloutContext) (bool, error) {
canaryStatus := c.NewStatus.CanaryStatus
// when CanaryStatus is nil, which means canary action hasn't started yet, don't need doing cleanup
if c.NewStatus.CanaryStatus == nil {
if canaryStatus == nil {
return true, nil
}
// 1. rollout progressing complete, remove rollout progressing annotation in workload
@ -278,33 +337,73 @@ func (m *canaryReleaseManager) doCanaryFinalising(c *RolloutContext) (bool, erro
return false, err
}
tr := newTrafficRoutingContext(c)
// 2. remove stable service the pod revision selector, so stable service will be selector all version pods.
done, err := m.trafficRoutingManager.FinalisingTrafficRouting(tr, true)
c.NewStatus.CanaryStatus.LastUpdateTime = tr.LastUpdateTime
if err != nil || !done {
return done, err
klog.Infof("rollout(%s/%s) Finalising Step is %s", c.Rollout.Namespace, c.Rollout.Name, canaryStatus.FinalisingStep)
switch canaryStatus.FinalisingStep {
default:
canaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeStableService
fallthrough
case v1beta1.FinalisingStepTypeStableService:
// restore stable service selector to select all pods [with grace time]
done, err := m.trafficRoutingManager.RestoreStableService(tr)
if err != nil || !done {
canaryStatus.LastUpdateTime = tr.LastUpdateTime
return done, err
}
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
canaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeGateway
case v1beta1.FinalisingStepTypeGateway:
// modify network api(ingress or gateway api) configuration
done, err := m.trafficRoutingManager.FinalisingTrafficRouting(tr)
if err != nil || !done {
canaryStatus.LastUpdateTime = tr.LastUpdateTime
return done, err
}
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
canaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeCanaryService
/*
//TODO - As mentioned in FinalisingTrafficRouting function,
we should wait grace time between FinalisingStepTypeGateway and FinalisingStepTypeCanaryService
to avoid a very rare case which could cause minor traffic loss (espically, Istio), but it's difficult
to implement now.
However, we still reserve the FinalisingStepTypeCanaryService step here, but instead of removing the
canary Service as expected (which has been done in FinalisingStepTypeGateway), FinalisingStepTypeCanaryService
simply wait a gracetime between FinalisingStepTypeCanaryService and FinalisingStepTypeDeleteBR now
*/
case v1beta1.FinalisingStepTypeCanaryService:
// wait a gracetime for safety
if canaryStatus.LastUpdateTime != nil {
if verifyTime := canaryStatus.LastUpdateTime.Add(time.Second * time.Duration(3)); verifyTime.After(time.Now()) {
klog.Infof("restoring network configuration, but we need to wait %d seconds", 3)
return false, nil
}
}
canaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeBatchRelease
case v1beta1.FinalisingStepTypeBatchRelease:
// set workload.pause=false; set workload.partition=0
done, err := m.finalizingBatchRelease(c)
if err != nil || !done {
return done, err
}
canaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeDeleteBR
case v1beta1.FinalisingStepTypeDeleteBR:
// delete batchRelease crd
done, err := m.removeBatchRelease(c)
if err != nil {
klog.Errorf("rollout(%s/%s) Finalize batchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
return false, err
} else if !done {
return false, nil
}
klog.Infof("rollout(%s/%s) doCanaryFinalising success", c.Rollout.Namespace, c.Rollout.Name)
return true, nil
}
// 3. set workload.pause=false; set workload.partition=0
done, err = m.finalizingBatchRelease(c)
if err != nil || !done {
return done, err
}
// 4. modify network api(ingress or gateway api) configuration, and route 100% traffic to stable pods.
done, err = m.trafficRoutingManager.FinalisingTrafficRouting(tr, false)
c.NewStatus.CanaryStatus.LastUpdateTime = tr.LastUpdateTime
if err != nil || !done {
return done, err
}
// 5. delete batchRelease crd
done, err = m.removeBatchRelease(c)
if err != nil {
klog.Errorf("rollout(%s/%s) Finalize batchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
return false, err
} else if !done {
return false, nil
}
klog.Infof("rollout(%s/%s) doCanaryFinalising success", c.Rollout.Namespace, c.Rollout.Name)
return true, nil
return false, nil
}
func (m *canaryReleaseManager) removeRolloutProgressingAnnotation(c *RolloutContext) error {

View File

@ -63,6 +63,7 @@ func TestRunCanary(t *testing.T) {
obj.Status.CanaryStatus.StableRevision = "pod-template-hash-v1"
obj.Status.CanaryStatus.CanaryRevision = "6f8cc56547"
obj.Status.CanaryStatus.CurrentStepIndex = 1
obj.Status.CanaryStatus.NextStepIndex = 2
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateUpgrade
cond := util.GetRolloutCondition(obj.Status, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonInRolling
@ -76,6 +77,7 @@ func TestRunCanary(t *testing.T) {
s.CanaryStatus.StableRevision = "pod-template-hash-v1"
s.CanaryStatus.CanaryRevision = "6f8cc56547"
s.CanaryStatus.CurrentStepIndex = 1
s.CanaryStatus.NextStepIndex = 2
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateUpgrade
cond := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonInRolling
@ -138,6 +140,7 @@ func TestRunCanary(t *testing.T) {
obj.Status.CanaryStatus.StableRevision = "pod-template-hash-v1"
obj.Status.CanaryStatus.CanaryRevision = "6f8cc56547"
obj.Status.CanaryStatus.CurrentStepIndex = 1
obj.Status.CanaryStatus.NextStepIndex = 2
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateUpgrade
cond := util.GetRolloutCondition(obj.Status, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonInRolling
@ -183,6 +186,7 @@ func TestRunCanary(t *testing.T) {
s.CanaryStatus.CanaryReplicas = 1
s.CanaryStatus.CanaryReadyReplicas = 1
s.CanaryStatus.CurrentStepIndex = 1
s.CanaryStatus.NextStepIndex = 2
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateTrafficRouting
cond := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonInRolling
@ -287,6 +291,7 @@ func TestRunCanaryPaused(t *testing.T) {
obj.Status.CanaryStatus.StableRevision = "pod-template-hash-v1"
obj.Status.CanaryStatus.CanaryRevision = "6f8cc56547"
obj.Status.CanaryStatus.CurrentStepIndex = 3
obj.Status.CanaryStatus.NextStepIndex = 4
obj.Status.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStatePaused
return obj
@ -298,6 +303,7 @@ func TestRunCanaryPaused(t *testing.T) {
obj.CanaryStatus.StableRevision = "pod-template-hash-v1"
obj.CanaryStatus.CanaryRevision = "6f8cc56547"
obj.CanaryStatus.CurrentStepIndex = 3
obj.CanaryStatus.NextStepIndex = 4
obj.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
obj.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStatePaused
return obj

View File

@ -45,6 +45,8 @@ type RolloutContext struct {
RecheckTime *time.Time
// wait stable workload pods ready
WaitReady bool
// finalising reason
FinalizeReason string
}
// parameter1 retryReconcile, parameter2 error
@ -122,6 +124,7 @@ func (r *RolloutReconciler) reconcileRolloutProgressing(rollout *v1beta1.Rollout
klog.Infof("rollout(%s/%s) is Progressing, and in reason(%s)", rollout.Namespace, rollout.Name, cond.Reason)
var done bool
rolloutContext.WaitReady = true
rolloutContext.FinalizeReason = v1beta1.FinaliseReasonSuccess
done, err = r.doFinalising(rolloutContext)
if err != nil {
return nil, err
@ -146,6 +149,7 @@ func (r *RolloutReconciler) reconcileRolloutProgressing(rollout *v1beta1.Rollout
case v1alpha1.ProgressingReasonCancelling:
klog.Infof("rollout(%s/%s) is Progressing, and in reason(%s)", rollout.Namespace, rollout.Name, cond.Reason)
var done bool
rolloutContext.FinalizeReason = v1beta1.FinaliseReasonRollback
done, err = r.doFinalising(rolloutContext)
if err != nil {
return nil, err
@ -411,22 +415,36 @@ func isRollingBackInBatches(rollout *v1beta1.Rollout, workload *util.Workload) b
// 1. modify network api(ingress or gateway api) configuration, and route 100% traffic to stable pods
// 2. remove batchRelease CR.
func (r *RolloutReconciler) doProgressingReset(c *RolloutContext) (bool, error) {
if len(c.Rollout.Spec.Strategy.Canary.TrafficRoutings) > 0 {
if c.Rollout.Spec.Strategy.HasTrafficRoutings() {
// modify network api(ingress or gateway api) configuration, and route 100% traffic to stable pods
tr := newTrafficRoutingContext(c)
done, err := r.trafficRoutingManager.FinalisingTrafficRouting(tr, false)
c.NewStatus.CanaryStatus.LastUpdateTime = tr.LastUpdateTime
done, err := r.trafficRoutingManager.RouteAllTrafficToCanaryORStable(tr, v1beta1.FinaliseReasonContinuous)
if err != nil || !done {
c.NewStatus.GetSubStatus().LastUpdateTime = tr.LastUpdateTime
return done, err
}
}
done, err := r.canaryManager.removeBatchRelease(c)
releaseManager, err := r.getReleaseManager(c.Rollout)
if err != nil {
return false, err
}
done, err := releaseManager.removeBatchRelease(c)
if err != nil {
klog.Errorf("rollout(%s/%s) DoFinalising batchRelease failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
return false, err
} else if !done {
return false, nil
}
// restore Service and network api
if c.Rollout.Spec.Strategy.HasTrafficRoutings() {
tr := newTrafficRoutingContext(c)
done, err = r.trafficRoutingManager.FinalisingTrafficRouting(tr)
c.NewStatus.GetSubStatus().LastUpdateTime = tr.LastUpdateTime
if err != nil || !done {
return done, err
}
}
return true, nil
}

View File

@ -242,7 +242,9 @@ func TestReconcileRolloutProgressing(t *testing.T) {
return []*apps.Deployment{dep1}, []*apps.ReplicaSet{rs1}
},
getNetwork: func() ([]*corev1.Service, []*netv1.Ingress) {
return []*corev1.Service{demoService.DeepCopy()}, []*netv1.Ingress{demoIngress.DeepCopy()}
s1 := demoService.DeepCopy()
s1.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey] = "podtemplatehash-v1"
return []*corev1.Service{s1}, []*netv1.Ingress{demoIngress.DeepCopy()}
},
getRollout: func() (*v1beta1.Rollout, *v1beta1.BatchRelease, *v1alpha1.TrafficRouting) {
obj := rolloutDemo.DeepCopy()
@ -277,6 +279,11 @@ func TestReconcileRolloutProgressing(t *testing.T) {
s.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
s.CanaryStatus.CurrentStepIndex = 4
s.CanaryStatus.NextStepIndex = 0
// the first finalizing step is restorStableService, given that the stable service has
// selector, and removing it will take a grace time (i.e. take at least 2 reconciles),
// therefore, after calling ReconcileRolloutProgressing once, the finalizing step must be
// restorStableService
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeStableService
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
cond := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonFinalising
@ -291,6 +298,140 @@ func TestReconcileRolloutProgressing(t *testing.T) {
},
{
name: "ReconcileRolloutProgressing finalizing2",
getObj: func() ([]*apps.Deployment, []*apps.ReplicaSet) {
dep1 := deploymentDemo.DeepCopy()
delete(dep1.Annotations, util.InRolloutProgressingAnnotation)
dep1.Status = apps.DeploymentStatus{
ObservedGeneration: 2,
Replicas: 10,
UpdatedReplicas: 5,
ReadyReplicas: 10,
AvailableReplicas: 10,
}
rs1 := rsDemo.DeepCopy()
return []*apps.Deployment{dep1}, []*apps.ReplicaSet{rs1}
},
getNetwork: func() ([]*corev1.Service, []*netv1.Ingress) {
return []*corev1.Service{demoService.DeepCopy()}, []*netv1.Ingress{demoIngress.DeepCopy()}
},
getRollout: func() (*v1beta1.Rollout, *v1beta1.BatchRelease, *v1alpha1.TrafficRouting) {
obj := rolloutDemo.DeepCopy()
obj.Annotations[v1alpha1.TrafficRoutingAnnotation] = "tr-demo"
obj.Status.CanaryStatus.ObservedWorkloadGeneration = 2
obj.Status.CanaryStatus.RolloutHash = "f55bvd874d5f2fzvw46bv966x4bwbdv4wx6bd9f7b46ww788954b8z8w29b7wxfd"
obj.Status.CanaryStatus.StableRevision = "pod-template-hash-v1"
obj.Status.CanaryStatus.CanaryRevision = "6f8cc56547"
obj.Status.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
obj.Status.CanaryStatus.CurrentStepIndex = 4
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
// given that the stable service has no selector, and removing it will immediately return
// ture. Therefore, we expect the finalizing step to be next step, i.e. restoreGateway
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeStableService
cond := util.GetRolloutCondition(obj.Status, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonFinalising
cond.Status = corev1.ConditionTrue
util.SetRolloutCondition(&obj.Status, *cond)
br := batchDemo.DeepCopy()
br.Spec.ReleasePlan.Batches = []v1beta1.ReleaseBatch{
{
CanaryReplicas: intstr.FromInt(1),
},
}
tr := demoTR.DeepCopy()
tr.Finalizers = []string{util.ProgressingRolloutFinalizer(rolloutDemo.Name)}
return obj, br, tr
},
expectStatus: func() *v1beta1.RolloutStatus {
s := rolloutDemo.Status.DeepCopy()
s.CanaryStatus.ObservedWorkloadGeneration = 2
s.CanaryStatus.RolloutHash = "f55bvd874d5f2fzvw46bv966x4bwbdv4wx6bd9f7b46ww788954b8z8w29b7wxfd"
s.CanaryStatus.StableRevision = "pod-template-hash-v1"
s.CanaryStatus.CanaryRevision = "6f8cc56547"
s.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
s.CanaryStatus.CurrentStepIndex = 4
s.CanaryStatus.NextStepIndex = 0
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeGateway
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
cond := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonFinalising
cond.Status = corev1.ConditionTrue
util.SetRolloutCondition(s, *cond)
return s
},
expectTr: func() *v1alpha1.TrafficRouting {
tr := demoTR.DeepCopy()
return tr
},
},
{
name: "ReconcileRolloutProgressing finalizing3",
getObj: func() ([]*apps.Deployment, []*apps.ReplicaSet) {
dep1 := deploymentDemo.DeepCopy()
delete(dep1.Annotations, util.InRolloutProgressingAnnotation)
dep1.Status = apps.DeploymentStatus{
ObservedGeneration: 2,
Replicas: 10,
UpdatedReplicas: 5,
ReadyReplicas: 10,
AvailableReplicas: 10,
}
rs1 := rsDemo.DeepCopy()
return []*apps.Deployment{dep1}, []*apps.ReplicaSet{rs1}
},
getNetwork: func() ([]*corev1.Service, []*netv1.Ingress) {
return []*corev1.Service{demoService.DeepCopy()}, []*netv1.Ingress{demoIngress.DeepCopy()}
},
getRollout: func() (*v1beta1.Rollout, *v1beta1.BatchRelease, *v1alpha1.TrafficRouting) {
obj := rolloutDemo.DeepCopy()
obj.Annotations[v1alpha1.TrafficRoutingAnnotation] = "tr-demo"
obj.Status.CanaryStatus.ObservedWorkloadGeneration = 2
obj.Status.CanaryStatus.RolloutHash = "f55bvd874d5f2fzvw46bv966x4bwbdv4wx6bd9f7b46ww788954b8z8w29b7wxfd"
obj.Status.CanaryStatus.StableRevision = "pod-template-hash-v1"
obj.Status.CanaryStatus.CanaryRevision = "6f8cc56547"
obj.Status.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
obj.Status.CanaryStatus.CurrentStepIndex = 4
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
// restoring gateway will return true immediately
// Rollout will go on to next step, i.e. deleteCanaryService
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeGateway
cond := util.GetRolloutCondition(obj.Status, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonFinalising
cond.Status = corev1.ConditionTrue
util.SetRolloutCondition(&obj.Status, *cond)
br := batchDemo.DeepCopy()
br.Spec.ReleasePlan.Batches = []v1beta1.ReleaseBatch{
{
CanaryReplicas: intstr.FromInt(1),
},
}
tr := demoTR.DeepCopy()
tr.Finalizers = []string{util.ProgressingRolloutFinalizer(rolloutDemo.Name)}
return obj, br, tr
},
expectStatus: func() *v1beta1.RolloutStatus {
s := rolloutDemo.Status.DeepCopy()
s.CanaryStatus.ObservedWorkloadGeneration = 2
s.CanaryStatus.RolloutHash = "f55bvd874d5f2fzvw46bv966x4bwbdv4wx6bd9f7b46ww788954b8z8w29b7wxfd"
s.CanaryStatus.StableRevision = "pod-template-hash-v1"
s.CanaryStatus.CanaryRevision = "6f8cc56547"
s.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
s.CanaryStatus.CurrentStepIndex = 4
s.CanaryStatus.NextStepIndex = 0
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeCanaryService
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
cond := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonFinalising
cond.Status = corev1.ConditionTrue
util.SetRolloutCondition(s, *cond)
return s
},
expectTr: func() *v1alpha1.TrafficRouting {
tr := demoTR.DeepCopy()
return tr
},
},
{
name: "ReconcileRolloutProgressing finalizing4",
getObj: func() ([]*apps.Deployment, []*apps.ReplicaSet) {
dep1 := deploymentDemo.DeepCopy()
delete(dep1.Annotations, util.InRolloutProgressingAnnotation)
@ -316,6 +457,9 @@ func TestReconcileRolloutProgressing(t *testing.T) {
obj.Status.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
obj.Status.CanaryStatus.CurrentStepIndex = 4
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
// current finalizing step is patchBatchRelease, since the batch release has been "patched"
// we expect the finalizing step to be next step, i.e. deleteBatchRelease
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeBatchRelease
cond := util.GetRolloutCondition(obj.Status, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonFinalising
cond.Status = corev1.ConditionTrue
@ -338,6 +482,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
s.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
s.CanaryStatus.CurrentStepIndex = 4
s.CanaryStatus.NextStepIndex = 0
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeDeleteBR
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
cond2 := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
cond2.Reason = v1alpha1.ProgressingReasonFinalising
@ -373,6 +518,9 @@ func TestReconcileRolloutProgressing(t *testing.T) {
obj.Status.CanaryStatus.PodTemplateHash = "pod-template-hash-v2"
obj.Status.CanaryStatus.CurrentStepIndex = 4
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
// deleteBatchRelease is the last step, and it won't wait a grace time
// after this step, this release should be succeeded
obj.Status.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeDeleteBR
cond := util.GetRolloutCondition(obj.Status, v1beta1.RolloutConditionProgressing)
cond.Reason = v1alpha1.ProgressingReasonFinalising
cond.Status = corev1.ConditionTrue
@ -389,6 +537,7 @@ func TestReconcileRolloutProgressing(t *testing.T) {
s.CanaryStatus.CurrentStepIndex = 4
s.CanaryStatus.NextStepIndex = 0
s.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
s.CanaryStatus.FinalisingStep = v1beta1.FinalisingStepTypeDeleteBR
cond2 := util.GetRolloutCondition(*s, v1beta1.RolloutConditionProgressing)
cond2.Reason = v1alpha1.ProgressingReasonCompleted
cond2.Status = corev1.ConditionFalse

View File

@ -123,13 +123,13 @@ func (r *TrafficRoutingReconciler) Reconcile(ctx context.Context, req ctrl.Reque
done, err = r.trafficRoutingManager.DoTrafficRouting(newTrafficRoutingContext(tr))
}
case v1alpha1.TrafficRoutingPhaseFinalizing:
done, err = r.trafficRoutingManager.FinalisingTrafficRouting(newTrafficRoutingContext(tr), false)
done, err = r.trafficRoutingManager.FinalisingTrafficRouting(newTrafficRoutingContext(tr))
if done {
newStatus.Phase = v1alpha1.TrafficRoutingPhaseHealthy
newStatus.Message = "TrafficRouting is Healthy"
}
case v1alpha1.TrafficRoutingPhaseTerminating:
done, err = r.trafficRoutingManager.FinalisingTrafficRouting(newTrafficRoutingContext(tr), false)
done, err = r.trafficRoutingManager.FinalisingTrafficRouting(newTrafficRoutingContext(tr))
if done {
// remove trafficRouting finalizer
err = r.handleFinalizer(tr)

View File

@ -126,7 +126,7 @@ func (m *Manager) DoTrafficRouting(c *TrafficRoutingContext) (bool, error) {
if c.LastUpdateTime != nil {
// wait seconds for network providers to consume the modification about workload, service and so on.
if verifyTime := c.LastUpdateTime.Add(time.Second * time.Duration(trafficRouting.GracePeriodSeconds)); verifyTime.After(time.Now()) {
klog.Infof("%s update workload or service selector, and wait 3 seconds", c.Key)
klog.Infof("%s update workload or service selector, and wait %d seconds", c.Key, trafficRouting.GracePeriodSeconds)
return false, nil
}
}
@ -138,6 +138,15 @@ func (m *Manager) DoTrafficRouting(c *TrafficRoutingContext) (bool, error) {
klog.Warningf("%s stableRevision or podTemplateHash can not be empty, and wait a moment", c.Key)
return false, nil
}
/*
Why is the serviceModified flag moved here?
The rationale behind this is that when we create a canary Service, it is already instantiated with the appropriate selector.
If the stable Service also has had selector patched previously, the logic will proceed to the EnsureRoutes function uninterrupted.
It's important to note the following: Creating a new Service and updating the gateway resource occurs within a single reconciliation loop;
this can lead to instability.
Therefore, by moving the serviceModified flag, we introduce a grace period between these two operations to ensure stability.
*/
serviceModified := false
// fetch canary service
err = m.Get(context.TODO(), client.ObjectKey{Namespace: c.Namespace, Name: canaryServiceName}, canaryService)
if err != nil && !errors.IsNotFound(err) {
@ -148,9 +157,9 @@ func (m *Manager) DoTrafficRouting(c *TrafficRoutingContext) (bool, error) {
if err != nil {
return false, err
}
serviceModified = true
}
serviceModified := false
// patch canary service to only select the canary pods
if canaryService.Spec.Selector[c.RevisionLabelKey] != c.CanaryRevision {
body := fmt.Sprintf(`{"spec":{"selector":{"%s":"%s"}}}`, c.RevisionLabelKey, c.CanaryRevision)
@ -180,6 +189,13 @@ func (m *Manager) DoTrafficRouting(c *TrafficRoutingContext) (bool, error) {
if serviceModified {
return false, nil
}
} else {
// for end-to-end canary deployment scenario and scenario when DisableGenerateCanaryService is on
// selector is not needed, we should remove it
verify, err := m.restoreStableService(c)
if err != nil || !verify {
return false, err
}
}
// new network provider, ingress or gateway
@ -199,7 +215,7 @@ func (m *Manager) DoTrafficRouting(c *TrafficRoutingContext) (bool, error) {
return true, nil
}
func (m *Manager) FinalisingTrafficRouting(c *TrafficRoutingContext, onlyRestoreStableService bool) (bool, error) {
func (m *Manager) FinalisingTrafficRouting(c *TrafficRoutingContext) (bool, error) {
if len(c.ObjectRef) == 0 {
return true, nil
}
@ -235,31 +251,16 @@ func (m *Manager) FinalisingTrafficRouting(c *TrafficRoutingContext, onlyRestore
verify, err := m.restoreStableService(c)
if err != nil || !verify {
return false, err
} else if onlyRestoreStableService {
return true, nil
}
// First route 100% traffic to stable service
c.Strategy.Traffic = utilpointer.StringPtr("0%")
verify, err = trController.EnsureRoutes(context.TODO(), &c.Strategy)
if err != nil {
return false, err
} else if !verify {
c.LastUpdateTime = &metav1.Time{Time: time.Now()}
return false, nil
}
if c.LastUpdateTime != nil {
// After restore the stable service configuration, give network provider 3 seconds to react
if verifyTime := c.LastUpdateTime.Add(time.Second * time.Duration(trafficRouting.GracePeriodSeconds)); verifyTime.After(time.Now()) {
klog.Infof("%s route 100% traffic to stable service, and wait a moment", c.Key)
return false, nil
}
}
// modify network(ingress & gateway api) configuration, route all traffic to stable service
if err = trController.Finalise(context.TODO()); err != nil {
return false, err
}
//TODO - we should wait grace time between Finalish and removal of canary service
// to avoid a very rare case which could cause minor traffic loss (espically, Istio)
// However, it seems difficult to implement this unless re-write Finalise function:
// end to end deployment, don't remove the canary service;
// because canary service is stable service
if !c.OnlyTrafficRouting && !c.DisableGenerateCanaryService {
@ -274,6 +275,145 @@ func (m *Manager) FinalisingTrafficRouting(c *TrafficRoutingContext, onlyRestore
return true, nil
}
// Route All Traffic To Canary OR Stable based on the FinalizeReason
func (m *Manager) RouteAllTrafficToCanaryORStable(c *TrafficRoutingContext, FinalizeReason string) (bool, error) {
if len(c.ObjectRef) == 0 {
return true, nil
}
if c.OnlyTrafficRouting {
return true, nil
}
klog.Infof("%s RouteAllTrafficTo with Reason %s", c.Key, FinalizeReason)
trafficToCanary := ""
switch FinalizeReason {
case v1beta1.FinaliseReasonRollback, v1beta1.FinaliseReasonContinuous:
trafficToCanary = "0%"
case v1beta1.FinaliseReasonSuccess:
trafficToCanary = "100%"
default:
return true, nil
}
trafficRouting := c.ObjectRef[0]
if trafficRouting.GracePeriodSeconds <= 0 {
trafficRouting.GracePeriodSeconds = defaultGracePeriodSeconds
}
cServiceName := getCanaryServiceName(trafficRouting.Service, c.OnlyTrafficRouting, c.DisableGenerateCanaryService)
trController, err := newNetworkProvider(m.Client, c, trafficRouting.Service, cServiceName)
if err != nil {
klog.Errorf("%s newTrafficRoutingController failed: %s", c.Key, err.Error())
return false, err
}
//fetch stable service
stableService := &corev1.Service{}
err = m.Get(context.TODO(), client.ObjectKey{Namespace: c.Namespace, Name: trafficRouting.Service}, stableService)
if err != nil {
if errors.IsNotFound(err) {
return true, nil
}
klog.Errorf("%s get stable service(%s) failed: %s", c.Key, trafficRouting.Service, err.Error())
return false, err
}
if stableService.Spec.Selector[c.RevisionLabelKey] == "" && !c.DisableGenerateCanaryService {
return true, nil
}
// Route 100% traffic to service that won't be scaled down
c.Strategy.Traffic = utilpointer.StringPtr(trafficToCanary)
verify, err := trController.EnsureRoutes(context.TODO(), &c.Strategy)
if err != nil {
return false, err
} else if !verify {
c.LastUpdateTime = &metav1.Time{Time: time.Now()}
klog.Infof("%s updated %s traffic, wait a grace time", c.Key, trafficToCanary)
return false, nil
}
if c.LastUpdateTime != nil {
// After restore the stable service configuration, give network provider seconds to react
if verifyTime := c.LastUpdateTime.Add(time.Second * time.Duration(trafficRouting.GracePeriodSeconds)); verifyTime.After(time.Now()) {
klog.Infof("%s routed %s traffic to canary pods, but you need wait %d seconds", c.Key, trafficToCanary, trafficRouting.GracePeriodSeconds)
return false, nil
}
}
// if all traffic is routed canary, then it is ok to restore stable Service
// it is mainly to avoid affect of bug in ingress-nginx controller
if trafficToCanary == "100%" {
verify, err := m.restoreStableService(c)
if err != nil || !verify {
return false, err
}
}
return true, nil
}
func (m *Manager) PatchStableService(c *TrafficRoutingContext) (bool, error) {
if len(c.ObjectRef) == 0 {
return true, nil
}
trafficRouting := c.ObjectRef[0]
if trafficRouting.GracePeriodSeconds <= 0 {
trafficRouting.GracePeriodSeconds = defaultGracePeriodSeconds
}
if c.OnlyTrafficRouting {
return true, nil
}
//fetch stable service
stableService := &corev1.Service{}
err := m.Get(context.TODO(), client.ObjectKey{Namespace: c.Namespace, Name: trafficRouting.Service}, stableService)
if err != nil {
klog.Errorf("%s get stable service(%s) failed: %s", c.Key, trafficRouting.Service, err.Error())
// not found, wait a moment, retry
if errors.IsNotFound(err) {
return false, nil
}
return false, err
}
if stableService.Spec.Selector[c.RevisionLabelKey] == c.StableRevision {
return true, nil
}
// patch stable service to only select the stable pods
body := fmt.Sprintf(`{"spec":{"selector":{"%s":"%s"}}}`, c.RevisionLabelKey, c.StableRevision)
if err = m.Patch(context.TODO(), stableService, client.RawPatch(types.StrategicMergePatchType, []byte(body))); err != nil {
klog.Errorf("%s patch stable service(%s) selector failed: %s", c.Key, stableService.Name, err.Error())
return false, err
}
klog.Infof("%s do something special: add stable service(%s) selector(%s=%s) success", c.Key, stableService.Name, c.RevisionLabelKey, c.StableRevision)
return true, nil
}
func (m *Manager) RestoreStableService(c *TrafficRoutingContext) (bool, error) {
if len(c.ObjectRef) == 0 {
return true, nil
}
trafficRouting := c.ObjectRef[0]
if trafficRouting.GracePeriodSeconds <= 0 {
trafficRouting.GracePeriodSeconds = defaultGracePeriodSeconds
}
//fetch stable service
stableService := &corev1.Service{}
err := m.Get(context.TODO(), client.ObjectKey{Namespace: c.Namespace, Name: trafficRouting.Service}, stableService)
if err != nil {
if errors.IsNotFound(err) {
return true, nil
}
klog.Errorf("%s get stable service(%s) failed: %s", c.Key, trafficRouting.Service, err.Error())
return false, err
}
// restore stable Service
verify, err := m.restoreStableService(c)
if err != nil || !verify {
return false, err
}
return true, nil
}
func newNetworkProvider(c client.Client, con *TrafficRoutingContext, sService, cService string) (network.NetworkProvider, error) {
trafficRouting := con.ObjectRef[0]
if trafficRouting.CustomNetworkRefs != nil {
@ -364,7 +504,7 @@ func (m *Manager) restoreStableService(c *TrafficRoutingContext) (bool, error) {
klog.Errorf("%s patch stable service(%s) failed: %s", c.Key, trafficRouting.Service, err.Error())
return false, err
}
klog.Infof("remove %s stable service(%s) pod revision selector, and wait a moment", c.Key, trafficRouting.Service)
klog.Infof("removed %s stable service(%s) pod revision selector, and wait %d seconds (log once)", c.Key, trafficRouting.Service, trafficRouting.GracePeriodSeconds)
c.LastUpdateTime = &metav1.Time{Time: time.Now()}
return false, nil
}
@ -373,7 +513,7 @@ func (m *Manager) restoreStableService(c *TrafficRoutingContext) (bool, error) {
}
// After restore the stable service configuration, give network provider 3 seconds to react
if verifyTime := c.LastUpdateTime.Add(time.Second * time.Duration(trafficRouting.GracePeriodSeconds)); verifyTime.After(time.Now()) {
klog.Infof("%s restoring stable service(%s), and wait a moment", c.Key, trafficRouting.Service)
klog.Infof("%s restoring stable service(%s), and wait %d seconds", c.Key, trafficRouting.Service, trafficRouting.GracePeriodSeconds)
return false, nil
}
klog.Infof("%s doFinalising stable service(%s) success", c.Key, trafficRouting.Service)

View File

@ -774,6 +774,7 @@ func TestDoTrafficRoutingWithIstio(t *testing.T) {
getRollout: func() (*v1beta1.Rollout, *util.Workload) {
obj := demoIstioRollout.DeepCopy()
obj.Status.CanaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now().Add(-10 * time.Second)}
obj.Spec.Strategy.Canary.TrafficRoutings[0].GracePeriodSeconds = 1
return obj, &util.Workload{RevisionLabelKey: apps.DefaultDeploymentUniqueLabelKey}
},
expectUnstructureds: func() []*unstructured.Unstructured {
@ -803,7 +804,6 @@ func TestDoTrafficRoutingWithIstio(t *testing.T) {
objects = append(objects, u)
return objects
},
// Rollout(/rollout-demo) is doing trafficRouting({"traffic":"5%"}), and wait a moment
expectDone: true,
},
{
@ -833,6 +833,7 @@ func TestDoTrafficRoutingWithIstio(t *testing.T) {
obj := demoIstioRollout.DeepCopy()
// set DisableGenerateCanaryService as true
obj.Spec.Strategy.Canary.DisableGenerateCanaryService = true
obj.Spec.Strategy.Canary.TrafficRoutings[0].GracePeriodSeconds = 1
obj.Status.CanaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now().Add(-10 * time.Second)}
return obj, &util.Workload{RevisionLabelKey: apps.DefaultDeploymentUniqueLabelKey}
},
@ -863,7 +864,6 @@ func TestDoTrafficRoutingWithIstio(t *testing.T) {
objects = append(objects, u)
return objects
},
// Rollout(/rollout-demo) is doing trafficRouting({"traffic":"5%"}), and wait a moment
expectDone: true,
},
}
@ -897,11 +897,22 @@ func TestDoTrafficRoutingWithIstio(t *testing.T) {
if err != nil {
t.Fatalf("InitializeTrafficRouting failed: %s", err)
}
// now we need to wait at least 2x grace time to keep traffic stable:
// create the canary service -> grace time -> update the gateway -> grace time
// therefore, before both grace times are over, DoTrafficRouting should return false
// firstly, create the canary Service, before the grace time over, return false
_, err = manager.DoTrafficRouting(c)
if err != nil {
t.Fatalf("DoTrafficRouting failed: %s", err)
}
// may return false due to in the course of doing trafficRouting, let's do it again
time.Sleep(1 * time.Second)
// secondly, update the gateway, before the grace time over, return false
_, err = manager.DoTrafficRouting(c)
if err != nil {
t.Fatalf("DoTrafficRouting failed: %s", err)
}
time.Sleep(1 * time.Second)
// now, both grace times are over, it should be true
done, err := manager.DoTrafficRouting(c)
if err != nil {
t.Fatalf("DoTrafficRouting failed: %s", err)
@ -920,12 +931,12 @@ func TestDoTrafficRoutingWithIstio(t *testing.T) {
func TestFinalisingTrafficRouting(t *testing.T) {
cases := []struct {
name string
getObj func() ([]*corev1.Service, []*netv1.Ingress)
getRollout func() (*v1beta1.Rollout, *util.Workload)
onlyRestoreStableService bool
expectObj func() ([]*corev1.Service, []*netv1.Ingress)
expectDone bool
name string
getObj func() ([]*corev1.Service, []*netv1.Ingress)
getRollout func() (*v1beta1.Rollout, *util.Workload)
onlyTrafficRouting bool
expectObj func() ([]*corev1.Service, []*netv1.Ingress)
expectDone bool
}{
{
name: "FinalisingTrafficRouting test1",
@ -950,7 +961,6 @@ func TestFinalisingTrafficRouting(t *testing.T) {
obj.Status.CanaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now().Add(-time.Hour)}
return obj, &util.Workload{RevisionLabelKey: apps.DefaultDeploymentUniqueLabelKey}
},
onlyRestoreStableService: true,
expectObj: func() ([]*corev1.Service, []*netv1.Ingress) {
s1 := demoService.DeepCopy()
s2 := demoService.DeepCopy()
@ -964,6 +974,8 @@ func TestFinalisingTrafficRouting(t *testing.T) {
c2.Spec.Rules[0].HTTP.Paths[0].Backend.Service.Name = "echoserver-canary"
return []*corev1.Service{s1, s2}, []*netv1.Ingress{c1, c2}
},
// the Serive has selector on it, which takes a grace time to remove
// that's why expectDone is false
expectDone: false,
},
{
@ -985,123 +997,60 @@ func TestFinalisingTrafficRouting(t *testing.T) {
obj := demoRollout.DeepCopy()
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
obj.Status.CanaryStatus.CurrentStepIndex = 4
obj.Status.CanaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now().Add(-10 * time.Second)}
return obj, &util.Workload{RevisionLabelKey: apps.DefaultDeploymentUniqueLabelKey}
},
onlyRestoreStableService: true,
expectObj: func() ([]*corev1.Service, []*netv1.Ingress) {
s1 := demoService.DeepCopy()
s2 := demoService.DeepCopy()
s2.Name = "echoserver-canary"
s2.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey] = "podtemplatehash-v2"
c1 := demoIngress.DeepCopy()
c2 := demoIngress.DeepCopy()
c2.Name = "echoserver-canary"
c2.Annotations[fmt.Sprintf("%s/canary", nginxIngressAnnotationDefaultPrefix)] = "true"
c2.Annotations[fmt.Sprintf("%s/canary-weight", nginxIngressAnnotationDefaultPrefix)] = "100"
c2.Spec.Rules[0].HTTP.Paths[0].Backend.Service.Name = "echoserver-canary"
return []*corev1.Service{s1, s2}, []*netv1.Ingress{c1, c2}
return []*corev1.Service{s1}, []*netv1.Ingress{c1}
},
expectDone: false,
// the Serive has selector removed, therefore code will go on and return ture
expectDone: true,
},
{
name: "FinalisingTrafficRouting test3",
name: "canary Service already clear",
getObj: func() ([]*corev1.Service, []*netv1.Ingress) {
s1 := demoService.DeepCopy()
s2 := demoService.DeepCopy()
s2.Name = "echoserver-canary"
s2.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey] = "podtemplatehash-v2"
s1.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey] = "podtemplatehash-v1"
c1 := demoIngress.DeepCopy()
c2 := demoIngress.DeepCopy()
c2.Name = "echoserver-canary"
c2.Annotations[fmt.Sprintf("%s/canary", nginxIngressAnnotationDefaultPrefix)] = "true"
c2.Annotations[fmt.Sprintf("%s/canary-weight", nginxIngressAnnotationDefaultPrefix)] = "100"
c2.Spec.Rules[0].HTTP.Paths[0].Backend.Service.Name = "echoserver-canary"
return []*corev1.Service{s1, s2}, []*netv1.Ingress{c1, c2}
return []*corev1.Service{s1}, []*netv1.Ingress{c1}
},
getRollout: func() (*v1beta1.Rollout, *util.Workload) {
obj := demoRollout.DeepCopy()
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
obj.Status.CanaryStatus.CurrentStepIndex = 4
obj.Status.CanaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now().Add(-10 * time.Second)}
obj.Status.CanaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now().Add(-time.Hour)}
return obj, &util.Workload{RevisionLabelKey: apps.DefaultDeploymentUniqueLabelKey}
},
onlyRestoreStableService: true,
expectObj: func() ([]*corev1.Service, []*netv1.Ingress) {
s1 := demoService.DeepCopy()
s2 := demoService.DeepCopy()
s2.Name = "echoserver-canary"
s2.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey] = "podtemplatehash-v2"
s1.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey] = "podtemplatehash-v1"
c1 := demoIngress.DeepCopy()
c2 := demoIngress.DeepCopy()
c2.Name = "echoserver-canary"
c2.Annotations[fmt.Sprintf("%s/canary", nginxIngressAnnotationDefaultPrefix)] = "true"
c2.Annotations[fmt.Sprintf("%s/canary-weight", nginxIngressAnnotationDefaultPrefix)] = "100"
c2.Spec.Rules[0].HTTP.Paths[0].Backend.Service.Name = "echoserver-canary"
return []*corev1.Service{s1, s2}, []*netv1.Ingress{c1, c2}
return []*corev1.Service{s1}, []*netv1.Ingress{c1}
},
expectDone: true,
},
{
name: "FinalisingTrafficRouting test4",
name: "OnlyTrafficRouting true",
getObj: func() ([]*corev1.Service, []*netv1.Ingress) {
s1 := demoService.DeepCopy()
s2 := demoService.DeepCopy()
s2.Name = "echoserver-canary"
s2.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey] = "podtemplatehash-v2"
c1 := demoIngress.DeepCopy()
c2 := demoIngress.DeepCopy()
c2.Name = "echoserver-canary"
c2.Annotations[fmt.Sprintf("%s/canary", nginxIngressAnnotationDefaultPrefix)] = "true"
c2.Annotations[fmt.Sprintf("%s/canary-weight", nginxIngressAnnotationDefaultPrefix)] = "100"
c2.Spec.Rules[0].HTTP.Paths[0].Backend.Service.Name = "echoserver-canary"
return []*corev1.Service{s1, s2}, []*netv1.Ingress{c1, c2}
return []*corev1.Service{s1}, []*netv1.Ingress{c1, c2}
},
getRollout: func() (*v1beta1.Rollout, *util.Workload) {
obj := demoRollout.DeepCopy()
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
obj.Status.CanaryStatus.CurrentStepIndex = 4
obj.Status.CanaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now().Add(-3 * time.Second)}
obj.Status.CanaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now().Add(-time.Hour)}
return obj, &util.Workload{RevisionLabelKey: apps.DefaultDeploymentUniqueLabelKey}
},
onlyRestoreStableService: false,
expectObj: func() ([]*corev1.Service, []*netv1.Ingress) {
s1 := demoService.DeepCopy()
s2 := demoService.DeepCopy()
s2.Name = "echoserver-canary"
s2.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey] = "podtemplatehash-v2"
c1 := demoIngress.DeepCopy()
c2 := demoIngress.DeepCopy()
c2.Name = "echoserver-canary"
c2.Annotations[fmt.Sprintf("%s/canary", nginxIngressAnnotationDefaultPrefix)] = "true"
c2.Annotations[fmt.Sprintf("%s/canary-weight", nginxIngressAnnotationDefaultPrefix)] = "0"
c2.Spec.Rules[0].HTTP.Paths[0].Backend.Service.Name = "echoserver-canary"
return []*corev1.Service{s1, s2}, []*netv1.Ingress{c1, c2}
},
expectDone: false,
},
{
name: "FinalisingTrafficRouting test5",
getObj: func() ([]*corev1.Service, []*netv1.Ingress) {
s1 := demoService.DeepCopy()
s2 := demoService.DeepCopy()
s2.Name = "echoserver-canary"
s2.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey] = "podtemplatehash-v2"
c1 := demoIngress.DeepCopy()
c2 := demoIngress.DeepCopy()
c2.Name = "echoserver-canary"
c2.Annotations[fmt.Sprintf("%s/canary", nginxIngressAnnotationDefaultPrefix)] = "true"
c2.Annotations[fmt.Sprintf("%s/canary-weight", nginxIngressAnnotationDefaultPrefix)] = "0"
c2.Spec.Rules[0].HTTP.Paths[0].Backend.Service.Name = "echoserver-canary"
return []*corev1.Service{s1, s2}, []*netv1.Ingress{c1, c2}
},
getRollout: func() (*v1beta1.Rollout, *util.Workload) {
obj := demoRollout.DeepCopy()
obj.Status.CanaryStatus.CurrentStepState = v1beta1.CanaryStepStateCompleted
obj.Status.CanaryStatus.CurrentStepIndex = 4
obj.Status.CanaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now().Add(-3 * time.Second)}
return obj, &util.Workload{RevisionLabelKey: apps.DefaultDeploymentUniqueLabelKey}
},
onlyRestoreStableService: false,
onlyTrafficRouting: true,
expectObj: func() ([]*corev1.Service, []*netv1.Ingress) {
s1 := demoService.DeepCopy()
c1 := demoIngress.DeepCopy()
@ -1125,18 +1074,19 @@ func TestFinalisingTrafficRouting(t *testing.T) {
newStatus := rollout.Status.DeepCopy()
currentStep := rollout.Spec.Strategy.Canary.Steps[newStatus.CanaryStatus.CurrentStepIndex-1]
c := &TrafficRoutingContext{
Key: fmt.Sprintf("Rollout(%s/%s)", rollout.Namespace, rollout.Name),
Namespace: rollout.Namespace,
ObjectRef: rollout.Spec.Strategy.Canary.TrafficRoutings,
Strategy: currentStep.TrafficRoutingStrategy,
OwnerRef: *metav1.NewControllerRef(rollout, v1beta1.SchemeGroupVersion.WithKind("Rollout")),
RevisionLabelKey: workload.RevisionLabelKey,
StableRevision: newStatus.CanaryStatus.StableRevision,
CanaryRevision: newStatus.CanaryStatus.PodTemplateHash,
LastUpdateTime: newStatus.CanaryStatus.LastUpdateTime,
Key: fmt.Sprintf("Rollout(%s/%s)", rollout.Namespace, rollout.Name),
Namespace: rollout.Namespace,
ObjectRef: rollout.Spec.Strategy.Canary.TrafficRoutings,
Strategy: currentStep.TrafficRoutingStrategy,
OwnerRef: *metav1.NewControllerRef(rollout, v1beta1.SchemeGroupVersion.WithKind("Rollout")),
RevisionLabelKey: workload.RevisionLabelKey,
StableRevision: newStatus.CanaryStatus.StableRevision,
CanaryRevision: newStatus.CanaryStatus.PodTemplateHash,
LastUpdateTime: newStatus.CanaryStatus.LastUpdateTime,
OnlyTrafficRouting: cs.onlyTrafficRouting,
}
manager := NewTrafficRoutingManager(client)
done, err := manager.FinalisingTrafficRouting(c, cs.onlyRestoreStableService)
done, err := manager.FinalisingTrafficRouting(c)
if err != nil {
t.Fatalf("DoTrafficRouting failed: %s", err)
}

View File

@ -112,9 +112,9 @@ func (r *customController) EnsureRoutes(ctx context.Context, strategy *v1beta1.T
done := true
// *strategy.Weight == 0 indicates traffic routing is doing finalising and tries to route whole traffic to stable service
// then directly do finalising
if strategy.Traffic != nil && *strategy.Traffic == "0%" {
return true, nil
}
// if strategy.Traffic != nil && *strategy.Traffic == "0%" {
// return true, nil
// }
var err error
customNetworkRefList := make([]*unstructured.Unstructured, len(r.conf.TrafficConf))