Feat: clean up the canary-related resources while canary step's weight and matches were nil (#108)

Signed-off-by: songyang.song <songyang.song@alibaba-inc.com>

Signed-off-by: songyang.song <songyang.song@alibaba-inc.com>
Co-authored-by: songyang.song <songyang.song@alibaba-inc.com>
This commit is contained in:
yangs 2023-01-03 10:31:21 +08:00 committed by GitHub
parent 3165f4e8c6
commit 08dd7878ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 215 additions and 13 deletions

View File

@ -160,13 +160,13 @@ func (rc *realBatchControlPlane) SyncWorkloadInformation() (control.WorkloadEven
workloadInfo := controller.GetInfo()
if !workloadInfo.IsStable() {
klog.Info("Workload(%v) still reconciling, waiting for it to complete, generation: %v, observed: %v",
klog.Infof("Workload(%v) still reconciling, waiting for it to complete, generation: %v, observed: %v",
workloadInfo.LogKey, workloadInfo.Generation, workloadInfo.Status.ObservedGeneration)
return control.WorkloadStillReconciling, workloadInfo, nil
}
if workloadInfo.IsPromoted() {
klog.Info("Workload(%v) has been promoted, no need to rollout again actually, replicas: %v, updated: %v",
klog.Infof("Workload(%v) has been promoted, no need to rollout again actually, replicas: %v, updated: %v",
workloadInfo.LogKey, workloadInfo.Replicas, workloadInfo.Status.UpdatedReadyReplicas)
return control.WorkloadNormalState, workloadInfo, nil
}

View File

@ -66,7 +66,20 @@ func (m *canaryReleaseManager) runCanary(c *util.RolloutContext) error {
if canaryStatus.PodTemplateHash == "" {
canaryStatus.PodTemplateHash = c.Workload.PodTemplateHash
}
// When the first batch is trafficRouting rolling and the next steps are rolling release,
// We need to clean up the canary-related resources first and then rollout the rest of the batch.
currentStep := c.Rollout.Spec.Strategy.Canary.Steps[canaryStatus.CurrentStepIndex-1]
if currentStep.Weight == nil && len(currentStep.Matches) == 0 {
done, err := m.trafficRoutingManager.FinalisingTrafficRouting(c, false)
if err != nil {
return err
} else if !done {
klog.Infof("rollout(%s/%s) cleaning up canary-related resources", c.Rollout.Namespace, c.Rollout.Name)
expectedTime := time.Now().Add(time.Duration(defaultGracePeriodSeconds) * time.Second)
c.RecheckTime = &expectedTime
return nil
}
}
switch canaryStatus.CurrentStepState {
case v1alpha1.CanaryStepStateUpgrade:
klog.Infof("rollout(%s/%s) run canary strategy, and state(%s)", c.Rollout.Namespace, c.Rollout.Name, v1alpha1.CanaryStepStateUpgrade)

View File

@ -81,6 +81,10 @@ func (m *Manager) DoTrafficRouting(c *util.RolloutContext) (bool, error) {
trafficRouting.GracePeriodSeconds = defaultGracePeriodSeconds
}
canaryStatus := c.NewStatus.CanaryStatus
currentStep := c.Rollout.Spec.Strategy.Canary.Steps[canaryStatus.CurrentStepIndex-1]
if currentStep.Weight == nil && len(currentStep.Matches) == 0 {
return true, nil
}
if canaryStatus.StableRevision == "" || canaryStatus.PodTemplateHash == "" {
klog.Warningf("rollout(%s/%s) stableRevision or podTemplateHash can not be empty, and wait a moment", c.Rollout.Namespace, c.Rollout.Name)
return false, nil
@ -171,6 +175,28 @@ func (m *Manager) FinalisingTrafficRouting(c *util.RolloutContext, onlyRestoreSt
if trafficRouting.GracePeriodSeconds <= 0 {
trafficRouting.GracePeriodSeconds = defaultGracePeriodSeconds
}
cServiceName := fmt.Sprintf("%s-canary", trafficRouting.Service)
trController, err := newNetworkProvider(m.Client, c.Rollout, c.NewStatus, trafficRouting.Service, cServiceName)
if err != nil {
klog.Errorf("rollout(%s/%s) newTrafficRoutingController failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
return false, err
}
cService := &corev1.Service{ObjectMeta: metav1.ObjectMeta{Namespace: c.Rollout.Namespace, Name: cServiceName}}
// if canary svc has been already cleaned up, just return
if err = m.Get(context.TODO(), client.ObjectKeyFromObject(cService), cService); err != nil {
if !errors.IsNotFound(err) {
klog.Errorf("rollout(%s/%s) get canary service(%s) failed: %s", c.Rollout.Namespace, c.Rollout.Name, cServiceName, err.Error())
return false, err
}
// In rollout failure case, no canary-service will be created, this step ensures that the canary-ingress can be deleted in a time.
if err = trController.Finalise(context.TODO()); err != nil {
return false, err
}
return true, nil
}
if c.NewStatus.CanaryStatus == nil {
c.NewStatus.CanaryStatus = &v1alpha1.CanaryStatus{}
}
@ -183,12 +209,6 @@ func (m *Manager) FinalisingTrafficRouting(c *util.RolloutContext, onlyRestoreSt
return true, nil
}
cServiceName := fmt.Sprintf("%s-canary", trafficRouting.Service)
trController, err := newNetworkProvider(m.Client, c.Rollout, c.NewStatus, trafficRouting.Service, cServiceName)
if err != nil {
klog.Errorf("rollout(%s/%s) newTrafficRoutingController failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
return false, err
}
// First route 100% traffic to stable service
verify, err = trController.EnsureRoutes(context.TODO(), utilpointer.Int32(0), nil)
if err != nil {
@ -210,7 +230,6 @@ func (m *Manager) FinalisingTrafficRouting(c *util.RolloutContext, onlyRestoreSt
return false, err
}
// remove canary service
cService := &corev1.Service{ObjectMeta: metav1.ObjectMeta{Namespace: c.Rollout.Namespace, Name: cServiceName}}
err = m.Delete(context.TODO(), cService)
if err != nil && !errors.IsNotFound(err) {
klog.Errorf("rollout(%s/%s) remove canary service(%s) failed: %s", c.Rollout.Namespace, c.Rollout.Name, cService.Name, err.Error())

View File

@ -211,9 +211,7 @@ func validateRolloutSpecCanarySteps(steps []appsv1alpha1.CanaryStep, fldPath *fi
for i := range steps {
s := &steps[i]
if isTraffic && s.Weight == nil && len(s.Matches) == 0 {
return field.ErrorList{field.Invalid(fldPath.Index(i).Child("steps"), steps, `weight or matches cannot be empty for traffic routing`)}
} else if s.Weight == nil && s.Replicas == nil {
if s.Weight == nil && s.Replicas == nil {
return field.ErrorList{field.Invalid(fldPath.Index(i).Child("steps"), steps, `weight and replicas cannot be empty at the same time`)}
}
if s.Replicas != nil {

View File

@ -1981,6 +1981,178 @@ var _ = SIGDescribe("Rollout", func() {
Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
WaitRolloutWorkloadGeneration(rollout.Name, workload.Generation)
})
It("V1->V2: A/B testing, aliyun-alb, header & cookies. cloneSet workload", func() {
configmap := &v1.ConfigMap{}
Expect(ReadYamlToObject("./test_data/rollout/rollout-configuration.yaml", configmap)).ToNot(HaveOccurred())
if err := k8sClient.Create(context.TODO(), configmap); err != nil {
if !errors.IsAlreadyExists(err) {
Expect(err).Should(BeNil())
}
}
defer k8sClient.Delete(context.TODO(), configmap)
By("Creating Rollout...")
rollout := &v1alpha1.Rollout{}
Expect(ReadYamlToObject("./test_data/rollout/rollout_canary_base.yaml", rollout)).ToNot(HaveOccurred())
replica1 := intstr.FromInt(1)
replica2 := intstr.FromInt(3)
rollout.Spec.ObjectRef.WorkloadRef = &v1alpha1.WorkloadRef{
APIVersion: "apps.kruise.io/v1alpha1",
Kind: "CloneSet",
Name: "echoserver",
}
rollout.Spec.Strategy.Canary.Steps = []v1alpha1.CanaryStep{
{
Matches: []v1alpha1.HttpRouteMatch{
{
Headers: []gatewayv1alpha2.HTTPHeaderMatch{
{
Name: "Cookie",
Value: "demo1=value1;demo2=value2",
},
{
Name: "SourceIp",
Value: "192.168.0.0/16;172.16.0.0/16",
},
{
Name: "headername",
Value: "headervalue1;headervalue2",
},
},
},
},
Pause: v1alpha1.RolloutPause{},
Replicas: &replica1,
},
{
Replicas: &replica2,
Pause: v1alpha1.RolloutPause{},
},
}
rollout.Spec.Strategy.Canary.TrafficRoutings[0].Ingress.ClassType = "aliyun-alb"
CreateObject(rollout)
By("Creating workload and waiting for all pods ready...")
// service
service := &v1.Service{}
Expect(ReadYamlToObject("./test_data/rollout/service.yaml", service)).ToNot(HaveOccurred())
CreateObject(service)
// ingress
ingress := &netv1.Ingress{}
Expect(ReadYamlToObject("./test_data/rollout/nginx_ingress.yaml", ingress)).ToNot(HaveOccurred())
ingress.Annotations = map[string]string{}
ingress.Spec.IngressClassName = utilpointer.String("alb")
CreateObject(ingress)
// workload
workload := &appsv1alpha1.CloneSet{}
Expect(ReadYamlToObject("./test_data/rollout/cloneset.yaml", workload)).ToNot(HaveOccurred())
CreateObject(workload)
WaitCloneSetAllPodsReady(workload)
// check rollout status
Expect(GetObject(rollout.Name, rollout)).NotTo(HaveOccurred())
Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
Expect(rollout.Status.Phase).Should(Equal(v1alpha1.RolloutPhaseHealthy))
Expect(rollout.Status.CanaryStatus.StableRevision).Should(Equal(workload.Status.CurrentRevision[strings.LastIndex(workload.Status.CurrentRevision, "-")+1:]))
stableRevision := rollout.Status.CanaryStatus.StableRevision
By("check rollout status & paused success")
// v1 -> v2, start rollout action
newEnvs := mergeEnvVar(workload.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "NODE_NAME", Value: "version2"})
workload.Spec.Template.Spec.Containers[0].Env = newEnvs
UpdateCloneSet(workload)
By("Update cloneset EnvVar: NODE_NAME from(version1) -> to(version2)")
time.Sleep(time.Second * 3)
// wait step 1 complete
WaitRolloutCanaryStepPaused(rollout.Name, 1)
// check workload status & paused
Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
Expect(workload.Status.UpdatedReplicas).Should(BeNumerically("==", 1))
Expect(workload.Status.UpdatedReadyReplicas).Should(BeNumerically("==", 1))
Expect(workload.Spec.UpdateStrategy.Paused).Should(BeFalse())
By("check cloneSet status & paused success")
// check rollout status
Expect(GetObject(rollout.Name, rollout)).NotTo(HaveOccurred())
Expect(rollout.Status.Phase).Should(Equal(v1alpha1.RolloutPhaseProgressing))
Expect(rollout.Status.CanaryStatus.StableRevision).Should(Equal(stableRevision))
Expect(rollout.Status.CanaryStatus.CanaryRevision).Should(Equal(workload.Status.UpdateRevision[strings.LastIndex(workload.Status.UpdateRevision, "-")+1:]))
Expect(rollout.Status.CanaryStatus.PodTemplateHash).Should(Equal(workload.Status.UpdateRevision[strings.LastIndex(workload.Status.UpdateRevision, "-")+1:]))
canaryRevision := rollout.Status.CanaryStatus.PodTemplateHash
Expect(rollout.Status.CanaryStatus.CurrentStepIndex).Should(BeNumerically("==", 1))
Expect(rollout.Status.CanaryStatus.RolloutHash).Should(Equal(rollout.Annotations[util.RolloutHashAnnotation]))
// check stable, canary service & ingress
// stable service
Expect(GetObject(service.Name, service)).NotTo(HaveOccurred())
Expect(service.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey]).Should(Equal(stableRevision))
//canary service
cService := &v1.Service{}
Expect(GetObject(service.Name+"-canary", cService)).NotTo(HaveOccurred())
Expect(cService.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey]).Should(Equal(canaryRevision))
// canary ingress
cIngress := &netv1.Ingress{}
labIngressAnnotationDefaultPrefix := "alb.ingress.kubernetes.io"
Expect(GetObject(service.Name+"-canary", cIngress)).NotTo(HaveOccurred())
Expect(cIngress.Annotations[fmt.Sprintf("%s/conditions.echoserver-canary", labIngressAnnotationDefaultPrefix)]).Should(Equal(`[{"cookieConfig":{"values":[{"key":"demo1","value":"value1"},{"key":"demo2","value":"value2"}]},"type":"Cookie"},{"sourceIpConfig":{"values":["192.168.0.0/16","172.16.0.0/16"]},"type":"SourceIp"},{"headerConfig":{"key":"headername","values":["headervalue1","headervalue2"]},"type":"Header"}]`))
// resume rollout canary
ResumeRolloutCanary(rollout.Name)
// wait step 2 complete
WaitRolloutCanaryStepPaused(rollout.Name, 2)
// canary ingress and canary service should be deleted
cIngress = &netv1.Ingress{}
Expect(GetObject(service.Name+"-canary", cIngress)).To(HaveOccurred())
cService = &v1.Service{}
Expect(GetObject(service.Name+"-canary", cService)).To(HaveOccurred())
// check service update
Expect(GetObject(service.Name, service)).NotTo(HaveOccurred())
Expect(service.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey]).Should(Equal(""))
// check cloneSet
Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
Expect(workload.Status.UpdatedReplicas).Should(BeNumerically("==", 3))
Expect(workload.Status.UpdatedReadyReplicas).Should(BeNumerically("==", 3))
Expect(workload.Spec.UpdateStrategy.Paused).Should(BeFalse())
// resume rollout to complete
ResumeRolloutCanary(rollout.Name)
WaitRolloutStatusPhase(rollout.Name, v1alpha1.RolloutPhaseHealthy)
WaitCloneSetAllPodsReady(workload)
By("rollout completed, and check")
// check service & ingress & cloneSet
// ingress
Expect(GetObject(ingress.Name, ingress)).NotTo(HaveOccurred())
// service
Expect(GetObject(service.Name, service)).NotTo(HaveOccurred())
// cloneSet
Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
Expect(workload.Status.UpdatedReplicas).Should(BeNumerically("==", 5))
Expect(workload.Status.UpdatedReadyReplicas).Should(BeNumerically("==", 5))
Expect(workload.Spec.UpdateStrategy.Partition.IntVal).Should(BeNumerically("==", 0))
Expect(workload.Spec.UpdateStrategy.Paused).Should(BeFalse())
Expect(workload.Status.CurrentRevision).Should(ContainSubstring(canaryRevision))
Expect(workload.Status.UpdateRevision).Should(ContainSubstring(canaryRevision))
for _, env := range workload.Spec.Template.Spec.Containers[0].Env {
if env.Name == "NODE_NAME" {
Expect(env.Value).Should(Equal("version2"))
}
}
// check progressing succeed
Expect(GetObject(rollout.Name, rollout)).NotTo(HaveOccurred())
cond := util.GetRolloutCondition(rollout.Status, v1alpha1.RolloutConditionProgressing)
Expect(cond.Reason).Should(Equal(v1alpha1.ProgressingReasonCompleted))
Expect(string(cond.Status)).Should(Equal(string(metav1.ConditionFalse)))
cond = util.GetRolloutCondition(rollout.Status, v1alpha1.RolloutConditionSucceeded)
Expect(string(cond.Status)).Should(Equal(string(metav1.ConditionTrue)))
Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
WaitRolloutWorkloadGeneration(rollout.Name, workload.Generation)
})
})
KruiseDescribe("Canary rollout with Gateway API", func() {