diff --git a/pkg/controller/batchrelease/control/partitionstyle/control_plane.go b/pkg/controller/batchrelease/control/partitionstyle/control_plane.go index 8a5cd58..c44592b 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/control_plane.go +++ b/pkg/controller/batchrelease/control/partitionstyle/control_plane.go @@ -160,13 +160,13 @@ func (rc *realBatchControlPlane) SyncWorkloadInformation() (control.WorkloadEven workloadInfo := controller.GetInfo() if !workloadInfo.IsStable() { - klog.Info("Workload(%v) still reconciling, waiting for it to complete, generation: %v, observed: %v", + klog.Infof("Workload(%v) still reconciling, waiting for it to complete, generation: %v, observed: %v", workloadInfo.LogKey, workloadInfo.Generation, workloadInfo.Status.ObservedGeneration) return control.WorkloadStillReconciling, workloadInfo, nil } if workloadInfo.IsPromoted() { - klog.Info("Workload(%v) has been promoted, no need to rollout again actually, replicas: %v, updated: %v", + klog.Infof("Workload(%v) has been promoted, no need to rollout again actually, replicas: %v, updated: %v", workloadInfo.LogKey, workloadInfo.Replicas, workloadInfo.Status.UpdatedReadyReplicas) return control.WorkloadNormalState, workloadInfo, nil } diff --git a/pkg/controller/rollout/rollout_canary.go b/pkg/controller/rollout/rollout_canary.go index 6c89057..a02b477 100644 --- a/pkg/controller/rollout/rollout_canary.go +++ b/pkg/controller/rollout/rollout_canary.go @@ -66,7 +66,20 @@ func (m *canaryReleaseManager) runCanary(c *util.RolloutContext) error { if canaryStatus.PodTemplateHash == "" { canaryStatus.PodTemplateHash = c.Workload.PodTemplateHash } - + // When the first batch is trafficRouting rolling and the next steps are rolling release, + // We need to clean up the canary-related resources first and then rollout the rest of the batch. + currentStep := c.Rollout.Spec.Strategy.Canary.Steps[canaryStatus.CurrentStepIndex-1] + if currentStep.Weight == nil && len(currentStep.Matches) == 0 { + done, err := m.trafficRoutingManager.FinalisingTrafficRouting(c, false) + if err != nil { + return err + } else if !done { + klog.Infof("rollout(%s/%s) cleaning up canary-related resources", c.Rollout.Namespace, c.Rollout.Name) + expectedTime := time.Now().Add(time.Duration(defaultGracePeriodSeconds) * time.Second) + c.RecheckTime = &expectedTime + return nil + } + } switch canaryStatus.CurrentStepState { case v1alpha1.CanaryStepStateUpgrade: klog.Infof("rollout(%s/%s) run canary strategy, and state(%s)", c.Rollout.Namespace, c.Rollout.Name, v1alpha1.CanaryStepStateUpgrade) diff --git a/pkg/trafficrouting/manager.go b/pkg/trafficrouting/manager.go index 5e3f46c..4b1c315 100644 --- a/pkg/trafficrouting/manager.go +++ b/pkg/trafficrouting/manager.go @@ -81,6 +81,10 @@ func (m *Manager) DoTrafficRouting(c *util.RolloutContext) (bool, error) { trafficRouting.GracePeriodSeconds = defaultGracePeriodSeconds } canaryStatus := c.NewStatus.CanaryStatus + currentStep := c.Rollout.Spec.Strategy.Canary.Steps[canaryStatus.CurrentStepIndex-1] + if currentStep.Weight == nil && len(currentStep.Matches) == 0 { + return true, nil + } if canaryStatus.StableRevision == "" || canaryStatus.PodTemplateHash == "" { klog.Warningf("rollout(%s/%s) stableRevision or podTemplateHash can not be empty, and wait a moment", c.Rollout.Namespace, c.Rollout.Name) return false, nil @@ -171,6 +175,28 @@ func (m *Manager) FinalisingTrafficRouting(c *util.RolloutContext, onlyRestoreSt if trafficRouting.GracePeriodSeconds <= 0 { trafficRouting.GracePeriodSeconds = defaultGracePeriodSeconds } + + cServiceName := fmt.Sprintf("%s-canary", trafficRouting.Service) + trController, err := newNetworkProvider(m.Client, c.Rollout, c.NewStatus, trafficRouting.Service, cServiceName) + if err != nil { + klog.Errorf("rollout(%s/%s) newTrafficRoutingController failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error()) + return false, err + } + + cService := &corev1.Service{ObjectMeta: metav1.ObjectMeta{Namespace: c.Rollout.Namespace, Name: cServiceName}} + // if canary svc has been already cleaned up, just return + if err = m.Get(context.TODO(), client.ObjectKeyFromObject(cService), cService); err != nil { + if !errors.IsNotFound(err) { + klog.Errorf("rollout(%s/%s) get canary service(%s) failed: %s", c.Rollout.Namespace, c.Rollout.Name, cServiceName, err.Error()) + return false, err + } + // In rollout failure case, no canary-service will be created, this step ensures that the canary-ingress can be deleted in a time. + if err = trController.Finalise(context.TODO()); err != nil { + return false, err + } + return true, nil + } + if c.NewStatus.CanaryStatus == nil { c.NewStatus.CanaryStatus = &v1alpha1.CanaryStatus{} } @@ -183,12 +209,6 @@ func (m *Manager) FinalisingTrafficRouting(c *util.RolloutContext, onlyRestoreSt return true, nil } - cServiceName := fmt.Sprintf("%s-canary", trafficRouting.Service) - trController, err := newNetworkProvider(m.Client, c.Rollout, c.NewStatus, trafficRouting.Service, cServiceName) - if err != nil { - klog.Errorf("rollout(%s/%s) newTrafficRoutingController failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error()) - return false, err - } // First route 100% traffic to stable service verify, err = trController.EnsureRoutes(context.TODO(), utilpointer.Int32(0), nil) if err != nil { @@ -210,7 +230,6 @@ func (m *Manager) FinalisingTrafficRouting(c *util.RolloutContext, onlyRestoreSt return false, err } // remove canary service - cService := &corev1.Service{ObjectMeta: metav1.ObjectMeta{Namespace: c.Rollout.Namespace, Name: cServiceName}} err = m.Delete(context.TODO(), cService) if err != nil && !errors.IsNotFound(err) { klog.Errorf("rollout(%s/%s) remove canary service(%s) failed: %s", c.Rollout.Namespace, c.Rollout.Name, cService.Name, err.Error()) diff --git a/pkg/webhook/rollout/validating/rollout_create_update_handler.go b/pkg/webhook/rollout/validating/rollout_create_update_handler.go index 80f34b5..608b691 100644 --- a/pkg/webhook/rollout/validating/rollout_create_update_handler.go +++ b/pkg/webhook/rollout/validating/rollout_create_update_handler.go @@ -211,9 +211,7 @@ func validateRolloutSpecCanarySteps(steps []appsv1alpha1.CanaryStep, fldPath *fi for i := range steps { s := &steps[i] - if isTraffic && s.Weight == nil && len(s.Matches) == 0 { - return field.ErrorList{field.Invalid(fldPath.Index(i).Child("steps"), steps, `weight or matches cannot be empty for traffic routing`)} - } else if s.Weight == nil && s.Replicas == nil { + if s.Weight == nil && s.Replicas == nil { return field.ErrorList{field.Invalid(fldPath.Index(i).Child("steps"), steps, `weight and replicas cannot be empty at the same time`)} } if s.Replicas != nil { diff --git a/test/e2e/rollout_test.go b/test/e2e/rollout_test.go index 6f7d10c..f709667 100644 --- a/test/e2e/rollout_test.go +++ b/test/e2e/rollout_test.go @@ -1981,6 +1981,178 @@ var _ = SIGDescribe("Rollout", func() { Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred()) WaitRolloutWorkloadGeneration(rollout.Name, workload.Generation) }) + + It("V1->V2: A/B testing, aliyun-alb, header & cookies. cloneSet workload", func() { + configmap := &v1.ConfigMap{} + Expect(ReadYamlToObject("./test_data/rollout/rollout-configuration.yaml", configmap)).ToNot(HaveOccurred()) + if err := k8sClient.Create(context.TODO(), configmap); err != nil { + if !errors.IsAlreadyExists(err) { + Expect(err).Should(BeNil()) + } + } + defer k8sClient.Delete(context.TODO(), configmap) + + By("Creating Rollout...") + rollout := &v1alpha1.Rollout{} + Expect(ReadYamlToObject("./test_data/rollout/rollout_canary_base.yaml", rollout)).ToNot(HaveOccurred()) + replica1 := intstr.FromInt(1) + replica2 := intstr.FromInt(3) + rollout.Spec.ObjectRef.WorkloadRef = &v1alpha1.WorkloadRef{ + APIVersion: "apps.kruise.io/v1alpha1", + Kind: "CloneSet", + Name: "echoserver", + } + rollout.Spec.Strategy.Canary.Steps = []v1alpha1.CanaryStep{ + { + Matches: []v1alpha1.HttpRouteMatch{ + { + Headers: []gatewayv1alpha2.HTTPHeaderMatch{ + { + Name: "Cookie", + Value: "demo1=value1;demo2=value2", + }, + { + Name: "SourceIp", + Value: "192.168.0.0/16;172.16.0.0/16", + }, + { + Name: "headername", + Value: "headervalue1;headervalue2", + }, + }, + }, + }, + Pause: v1alpha1.RolloutPause{}, + Replicas: &replica1, + }, + { + Replicas: &replica2, + Pause: v1alpha1.RolloutPause{}, + }, + } + rollout.Spec.Strategy.Canary.TrafficRoutings[0].Ingress.ClassType = "aliyun-alb" + CreateObject(rollout) + By("Creating workload and waiting for all pods ready...") + // service + service := &v1.Service{} + Expect(ReadYamlToObject("./test_data/rollout/service.yaml", service)).ToNot(HaveOccurred()) + CreateObject(service) + // ingress + ingress := &netv1.Ingress{} + Expect(ReadYamlToObject("./test_data/rollout/nginx_ingress.yaml", ingress)).ToNot(HaveOccurred()) + ingress.Annotations = map[string]string{} + ingress.Spec.IngressClassName = utilpointer.String("alb") + CreateObject(ingress) + // workload + workload := &appsv1alpha1.CloneSet{} + Expect(ReadYamlToObject("./test_data/rollout/cloneset.yaml", workload)).ToNot(HaveOccurred()) + CreateObject(workload) + WaitCloneSetAllPodsReady(workload) + + // check rollout status + Expect(GetObject(rollout.Name, rollout)).NotTo(HaveOccurred()) + Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred()) + Expect(rollout.Status.Phase).Should(Equal(v1alpha1.RolloutPhaseHealthy)) + Expect(rollout.Status.CanaryStatus.StableRevision).Should(Equal(workload.Status.CurrentRevision[strings.LastIndex(workload.Status.CurrentRevision, "-")+1:])) + stableRevision := rollout.Status.CanaryStatus.StableRevision + By("check rollout status & paused success") + + // v1 -> v2, start rollout action + newEnvs := mergeEnvVar(workload.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "NODE_NAME", Value: "version2"}) + workload.Spec.Template.Spec.Containers[0].Env = newEnvs + UpdateCloneSet(workload) + By("Update cloneset EnvVar: NODE_NAME from(version1) -> to(version2)") + time.Sleep(time.Second * 3) + // wait step 1 complete + WaitRolloutCanaryStepPaused(rollout.Name, 1) + + // check workload status & paused + Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred()) + Expect(workload.Status.UpdatedReplicas).Should(BeNumerically("==", 1)) + Expect(workload.Status.UpdatedReadyReplicas).Should(BeNumerically("==", 1)) + Expect(workload.Spec.UpdateStrategy.Paused).Should(BeFalse()) + By("check cloneSet status & paused success") + + // check rollout status + Expect(GetObject(rollout.Name, rollout)).NotTo(HaveOccurred()) + Expect(rollout.Status.Phase).Should(Equal(v1alpha1.RolloutPhaseProgressing)) + Expect(rollout.Status.CanaryStatus.StableRevision).Should(Equal(stableRevision)) + Expect(rollout.Status.CanaryStatus.CanaryRevision).Should(Equal(workload.Status.UpdateRevision[strings.LastIndex(workload.Status.UpdateRevision, "-")+1:])) + Expect(rollout.Status.CanaryStatus.PodTemplateHash).Should(Equal(workload.Status.UpdateRevision[strings.LastIndex(workload.Status.UpdateRevision, "-")+1:])) + canaryRevision := rollout.Status.CanaryStatus.PodTemplateHash + Expect(rollout.Status.CanaryStatus.CurrentStepIndex).Should(BeNumerically("==", 1)) + Expect(rollout.Status.CanaryStatus.RolloutHash).Should(Equal(rollout.Annotations[util.RolloutHashAnnotation])) + + // check stable, canary service & ingress + // stable service + Expect(GetObject(service.Name, service)).NotTo(HaveOccurred()) + Expect(service.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey]).Should(Equal(stableRevision)) + //canary service + cService := &v1.Service{} + Expect(GetObject(service.Name+"-canary", cService)).NotTo(HaveOccurred()) + Expect(cService.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey]).Should(Equal(canaryRevision)) + // canary ingress + cIngress := &netv1.Ingress{} + labIngressAnnotationDefaultPrefix := "alb.ingress.kubernetes.io" + Expect(GetObject(service.Name+"-canary", cIngress)).NotTo(HaveOccurred()) + Expect(cIngress.Annotations[fmt.Sprintf("%s/conditions.echoserver-canary", labIngressAnnotationDefaultPrefix)]).Should(Equal(`[{"cookieConfig":{"values":[{"key":"demo1","value":"value1"},{"key":"demo2","value":"value2"}]},"type":"Cookie"},{"sourceIpConfig":{"values":["192.168.0.0/16","172.16.0.0/16"]},"type":"SourceIp"},{"headerConfig":{"key":"headername","values":["headervalue1","headervalue2"]},"type":"Header"}]`)) + + // resume rollout canary + ResumeRolloutCanary(rollout.Name) + // wait step 2 complete + WaitRolloutCanaryStepPaused(rollout.Name, 2) + + // canary ingress and canary service should be deleted + cIngress = &netv1.Ingress{} + Expect(GetObject(service.Name+"-canary", cIngress)).To(HaveOccurred()) + cService = &v1.Service{} + Expect(GetObject(service.Name+"-canary", cService)).To(HaveOccurred()) + + // check service update + Expect(GetObject(service.Name, service)).NotTo(HaveOccurred()) + Expect(service.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey]).Should(Equal("")) + + // check cloneSet + Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred()) + Expect(workload.Status.UpdatedReplicas).Should(BeNumerically("==", 3)) + Expect(workload.Status.UpdatedReadyReplicas).Should(BeNumerically("==", 3)) + Expect(workload.Spec.UpdateStrategy.Paused).Should(BeFalse()) + + // resume rollout to complete + ResumeRolloutCanary(rollout.Name) + WaitRolloutStatusPhase(rollout.Name, v1alpha1.RolloutPhaseHealthy) + WaitCloneSetAllPodsReady(workload) + By("rollout completed, and check") + + // check service & ingress & cloneSet + // ingress + Expect(GetObject(ingress.Name, ingress)).NotTo(HaveOccurred()) + // service + Expect(GetObject(service.Name, service)).NotTo(HaveOccurred()) + // cloneSet + Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred()) + Expect(workload.Status.UpdatedReplicas).Should(BeNumerically("==", 5)) + Expect(workload.Status.UpdatedReadyReplicas).Should(BeNumerically("==", 5)) + Expect(workload.Spec.UpdateStrategy.Partition.IntVal).Should(BeNumerically("==", 0)) + Expect(workload.Spec.UpdateStrategy.Paused).Should(BeFalse()) + Expect(workload.Status.CurrentRevision).Should(ContainSubstring(canaryRevision)) + Expect(workload.Status.UpdateRevision).Should(ContainSubstring(canaryRevision)) + for _, env := range workload.Spec.Template.Spec.Containers[0].Env { + if env.Name == "NODE_NAME" { + Expect(env.Value).Should(Equal("version2")) + } + } + + // check progressing succeed + Expect(GetObject(rollout.Name, rollout)).NotTo(HaveOccurred()) + cond := util.GetRolloutCondition(rollout.Status, v1alpha1.RolloutConditionProgressing) + Expect(cond.Reason).Should(Equal(v1alpha1.ProgressingReasonCompleted)) + Expect(string(cond.Status)).Should(Equal(string(metav1.ConditionFalse))) + cond = util.GetRolloutCondition(rollout.Status, v1alpha1.RolloutConditionSucceeded) + Expect(string(cond.Status)).Should(Equal(string(metav1.ConditionTrue))) + Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred()) + WaitRolloutWorkloadGeneration(rollout.Name, workload.Generation) + }) }) KruiseDescribe("Canary rollout with Gateway API", func() {