Feat: clean up the canary-related resources while canary step's weight and matches were nil (#108)
Signed-off-by: songyang.song <songyang.song@alibaba-inc.com> Signed-off-by: songyang.song <songyang.song@alibaba-inc.com> Co-authored-by: songyang.song <songyang.song@alibaba-inc.com>
This commit is contained in:
parent
3165f4e8c6
commit
08dd7878ff
|
|
@ -160,13 +160,13 @@ func (rc *realBatchControlPlane) SyncWorkloadInformation() (control.WorkloadEven
|
|||
|
||||
workloadInfo := controller.GetInfo()
|
||||
if !workloadInfo.IsStable() {
|
||||
klog.Info("Workload(%v) still reconciling, waiting for it to complete, generation: %v, observed: %v",
|
||||
klog.Infof("Workload(%v) still reconciling, waiting for it to complete, generation: %v, observed: %v",
|
||||
workloadInfo.LogKey, workloadInfo.Generation, workloadInfo.Status.ObservedGeneration)
|
||||
return control.WorkloadStillReconciling, workloadInfo, nil
|
||||
}
|
||||
|
||||
if workloadInfo.IsPromoted() {
|
||||
klog.Info("Workload(%v) has been promoted, no need to rollout again actually, replicas: %v, updated: %v",
|
||||
klog.Infof("Workload(%v) has been promoted, no need to rollout again actually, replicas: %v, updated: %v",
|
||||
workloadInfo.LogKey, workloadInfo.Replicas, workloadInfo.Status.UpdatedReadyReplicas)
|
||||
return control.WorkloadNormalState, workloadInfo, nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -66,7 +66,20 @@ func (m *canaryReleaseManager) runCanary(c *util.RolloutContext) error {
|
|||
if canaryStatus.PodTemplateHash == "" {
|
||||
canaryStatus.PodTemplateHash = c.Workload.PodTemplateHash
|
||||
}
|
||||
|
||||
// When the first batch is trafficRouting rolling and the next steps are rolling release,
|
||||
// We need to clean up the canary-related resources first and then rollout the rest of the batch.
|
||||
currentStep := c.Rollout.Spec.Strategy.Canary.Steps[canaryStatus.CurrentStepIndex-1]
|
||||
if currentStep.Weight == nil && len(currentStep.Matches) == 0 {
|
||||
done, err := m.trafficRoutingManager.FinalisingTrafficRouting(c, false)
|
||||
if err != nil {
|
||||
return err
|
||||
} else if !done {
|
||||
klog.Infof("rollout(%s/%s) cleaning up canary-related resources", c.Rollout.Namespace, c.Rollout.Name)
|
||||
expectedTime := time.Now().Add(time.Duration(defaultGracePeriodSeconds) * time.Second)
|
||||
c.RecheckTime = &expectedTime
|
||||
return nil
|
||||
}
|
||||
}
|
||||
switch canaryStatus.CurrentStepState {
|
||||
case v1alpha1.CanaryStepStateUpgrade:
|
||||
klog.Infof("rollout(%s/%s) run canary strategy, and state(%s)", c.Rollout.Namespace, c.Rollout.Name, v1alpha1.CanaryStepStateUpgrade)
|
||||
|
|
|
|||
|
|
@ -81,6 +81,10 @@ func (m *Manager) DoTrafficRouting(c *util.RolloutContext) (bool, error) {
|
|||
trafficRouting.GracePeriodSeconds = defaultGracePeriodSeconds
|
||||
}
|
||||
canaryStatus := c.NewStatus.CanaryStatus
|
||||
currentStep := c.Rollout.Spec.Strategy.Canary.Steps[canaryStatus.CurrentStepIndex-1]
|
||||
if currentStep.Weight == nil && len(currentStep.Matches) == 0 {
|
||||
return true, nil
|
||||
}
|
||||
if canaryStatus.StableRevision == "" || canaryStatus.PodTemplateHash == "" {
|
||||
klog.Warningf("rollout(%s/%s) stableRevision or podTemplateHash can not be empty, and wait a moment", c.Rollout.Namespace, c.Rollout.Name)
|
||||
return false, nil
|
||||
|
|
@ -171,6 +175,28 @@ func (m *Manager) FinalisingTrafficRouting(c *util.RolloutContext, onlyRestoreSt
|
|||
if trafficRouting.GracePeriodSeconds <= 0 {
|
||||
trafficRouting.GracePeriodSeconds = defaultGracePeriodSeconds
|
||||
}
|
||||
|
||||
cServiceName := fmt.Sprintf("%s-canary", trafficRouting.Service)
|
||||
trController, err := newNetworkProvider(m.Client, c.Rollout, c.NewStatus, trafficRouting.Service, cServiceName)
|
||||
if err != nil {
|
||||
klog.Errorf("rollout(%s/%s) newTrafficRoutingController failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
|
||||
return false, err
|
||||
}
|
||||
|
||||
cService := &corev1.Service{ObjectMeta: metav1.ObjectMeta{Namespace: c.Rollout.Namespace, Name: cServiceName}}
|
||||
// if canary svc has been already cleaned up, just return
|
||||
if err = m.Get(context.TODO(), client.ObjectKeyFromObject(cService), cService); err != nil {
|
||||
if !errors.IsNotFound(err) {
|
||||
klog.Errorf("rollout(%s/%s) get canary service(%s) failed: %s", c.Rollout.Namespace, c.Rollout.Name, cServiceName, err.Error())
|
||||
return false, err
|
||||
}
|
||||
// In rollout failure case, no canary-service will be created, this step ensures that the canary-ingress can be deleted in a time.
|
||||
if err = trController.Finalise(context.TODO()); err != nil {
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if c.NewStatus.CanaryStatus == nil {
|
||||
c.NewStatus.CanaryStatus = &v1alpha1.CanaryStatus{}
|
||||
}
|
||||
|
|
@ -183,12 +209,6 @@ func (m *Manager) FinalisingTrafficRouting(c *util.RolloutContext, onlyRestoreSt
|
|||
return true, nil
|
||||
}
|
||||
|
||||
cServiceName := fmt.Sprintf("%s-canary", trafficRouting.Service)
|
||||
trController, err := newNetworkProvider(m.Client, c.Rollout, c.NewStatus, trafficRouting.Service, cServiceName)
|
||||
if err != nil {
|
||||
klog.Errorf("rollout(%s/%s) newTrafficRoutingController failed: %s", c.Rollout.Namespace, c.Rollout.Name, err.Error())
|
||||
return false, err
|
||||
}
|
||||
// First route 100% traffic to stable service
|
||||
verify, err = trController.EnsureRoutes(context.TODO(), utilpointer.Int32(0), nil)
|
||||
if err != nil {
|
||||
|
|
@ -210,7 +230,6 @@ func (m *Manager) FinalisingTrafficRouting(c *util.RolloutContext, onlyRestoreSt
|
|||
return false, err
|
||||
}
|
||||
// remove canary service
|
||||
cService := &corev1.Service{ObjectMeta: metav1.ObjectMeta{Namespace: c.Rollout.Namespace, Name: cServiceName}}
|
||||
err = m.Delete(context.TODO(), cService)
|
||||
if err != nil && !errors.IsNotFound(err) {
|
||||
klog.Errorf("rollout(%s/%s) remove canary service(%s) failed: %s", c.Rollout.Namespace, c.Rollout.Name, cService.Name, err.Error())
|
||||
|
|
|
|||
|
|
@ -211,9 +211,7 @@ func validateRolloutSpecCanarySteps(steps []appsv1alpha1.CanaryStep, fldPath *fi
|
|||
|
||||
for i := range steps {
|
||||
s := &steps[i]
|
||||
if isTraffic && s.Weight == nil && len(s.Matches) == 0 {
|
||||
return field.ErrorList{field.Invalid(fldPath.Index(i).Child("steps"), steps, `weight or matches cannot be empty for traffic routing`)}
|
||||
} else if s.Weight == nil && s.Replicas == nil {
|
||||
if s.Weight == nil && s.Replicas == nil {
|
||||
return field.ErrorList{field.Invalid(fldPath.Index(i).Child("steps"), steps, `weight and replicas cannot be empty at the same time`)}
|
||||
}
|
||||
if s.Replicas != nil {
|
||||
|
|
|
|||
|
|
@ -1981,6 +1981,178 @@ var _ = SIGDescribe("Rollout", func() {
|
|||
Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
|
||||
WaitRolloutWorkloadGeneration(rollout.Name, workload.Generation)
|
||||
})
|
||||
|
||||
It("V1->V2: A/B testing, aliyun-alb, header & cookies. cloneSet workload", func() {
|
||||
configmap := &v1.ConfigMap{}
|
||||
Expect(ReadYamlToObject("./test_data/rollout/rollout-configuration.yaml", configmap)).ToNot(HaveOccurred())
|
||||
if err := k8sClient.Create(context.TODO(), configmap); err != nil {
|
||||
if !errors.IsAlreadyExists(err) {
|
||||
Expect(err).Should(BeNil())
|
||||
}
|
||||
}
|
||||
defer k8sClient.Delete(context.TODO(), configmap)
|
||||
|
||||
By("Creating Rollout...")
|
||||
rollout := &v1alpha1.Rollout{}
|
||||
Expect(ReadYamlToObject("./test_data/rollout/rollout_canary_base.yaml", rollout)).ToNot(HaveOccurred())
|
||||
replica1 := intstr.FromInt(1)
|
||||
replica2 := intstr.FromInt(3)
|
||||
rollout.Spec.ObjectRef.WorkloadRef = &v1alpha1.WorkloadRef{
|
||||
APIVersion: "apps.kruise.io/v1alpha1",
|
||||
Kind: "CloneSet",
|
||||
Name: "echoserver",
|
||||
}
|
||||
rollout.Spec.Strategy.Canary.Steps = []v1alpha1.CanaryStep{
|
||||
{
|
||||
Matches: []v1alpha1.HttpRouteMatch{
|
||||
{
|
||||
Headers: []gatewayv1alpha2.HTTPHeaderMatch{
|
||||
{
|
||||
Name: "Cookie",
|
||||
Value: "demo1=value1;demo2=value2",
|
||||
},
|
||||
{
|
||||
Name: "SourceIp",
|
||||
Value: "192.168.0.0/16;172.16.0.0/16",
|
||||
},
|
||||
{
|
||||
Name: "headername",
|
||||
Value: "headervalue1;headervalue2",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
Pause: v1alpha1.RolloutPause{},
|
||||
Replicas: &replica1,
|
||||
},
|
||||
{
|
||||
Replicas: &replica2,
|
||||
Pause: v1alpha1.RolloutPause{},
|
||||
},
|
||||
}
|
||||
rollout.Spec.Strategy.Canary.TrafficRoutings[0].Ingress.ClassType = "aliyun-alb"
|
||||
CreateObject(rollout)
|
||||
By("Creating workload and waiting for all pods ready...")
|
||||
// service
|
||||
service := &v1.Service{}
|
||||
Expect(ReadYamlToObject("./test_data/rollout/service.yaml", service)).ToNot(HaveOccurred())
|
||||
CreateObject(service)
|
||||
// ingress
|
||||
ingress := &netv1.Ingress{}
|
||||
Expect(ReadYamlToObject("./test_data/rollout/nginx_ingress.yaml", ingress)).ToNot(HaveOccurred())
|
||||
ingress.Annotations = map[string]string{}
|
||||
ingress.Spec.IngressClassName = utilpointer.String("alb")
|
||||
CreateObject(ingress)
|
||||
// workload
|
||||
workload := &appsv1alpha1.CloneSet{}
|
||||
Expect(ReadYamlToObject("./test_data/rollout/cloneset.yaml", workload)).ToNot(HaveOccurred())
|
||||
CreateObject(workload)
|
||||
WaitCloneSetAllPodsReady(workload)
|
||||
|
||||
// check rollout status
|
||||
Expect(GetObject(rollout.Name, rollout)).NotTo(HaveOccurred())
|
||||
Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
|
||||
Expect(rollout.Status.Phase).Should(Equal(v1alpha1.RolloutPhaseHealthy))
|
||||
Expect(rollout.Status.CanaryStatus.StableRevision).Should(Equal(workload.Status.CurrentRevision[strings.LastIndex(workload.Status.CurrentRevision, "-")+1:]))
|
||||
stableRevision := rollout.Status.CanaryStatus.StableRevision
|
||||
By("check rollout status & paused success")
|
||||
|
||||
// v1 -> v2, start rollout action
|
||||
newEnvs := mergeEnvVar(workload.Spec.Template.Spec.Containers[0].Env, v1.EnvVar{Name: "NODE_NAME", Value: "version2"})
|
||||
workload.Spec.Template.Spec.Containers[0].Env = newEnvs
|
||||
UpdateCloneSet(workload)
|
||||
By("Update cloneset EnvVar: NODE_NAME from(version1) -> to(version2)")
|
||||
time.Sleep(time.Second * 3)
|
||||
// wait step 1 complete
|
||||
WaitRolloutCanaryStepPaused(rollout.Name, 1)
|
||||
|
||||
// check workload status & paused
|
||||
Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
|
||||
Expect(workload.Status.UpdatedReplicas).Should(BeNumerically("==", 1))
|
||||
Expect(workload.Status.UpdatedReadyReplicas).Should(BeNumerically("==", 1))
|
||||
Expect(workload.Spec.UpdateStrategy.Paused).Should(BeFalse())
|
||||
By("check cloneSet status & paused success")
|
||||
|
||||
// check rollout status
|
||||
Expect(GetObject(rollout.Name, rollout)).NotTo(HaveOccurred())
|
||||
Expect(rollout.Status.Phase).Should(Equal(v1alpha1.RolloutPhaseProgressing))
|
||||
Expect(rollout.Status.CanaryStatus.StableRevision).Should(Equal(stableRevision))
|
||||
Expect(rollout.Status.CanaryStatus.CanaryRevision).Should(Equal(workload.Status.UpdateRevision[strings.LastIndex(workload.Status.UpdateRevision, "-")+1:]))
|
||||
Expect(rollout.Status.CanaryStatus.PodTemplateHash).Should(Equal(workload.Status.UpdateRevision[strings.LastIndex(workload.Status.UpdateRevision, "-")+1:]))
|
||||
canaryRevision := rollout.Status.CanaryStatus.PodTemplateHash
|
||||
Expect(rollout.Status.CanaryStatus.CurrentStepIndex).Should(BeNumerically("==", 1))
|
||||
Expect(rollout.Status.CanaryStatus.RolloutHash).Should(Equal(rollout.Annotations[util.RolloutHashAnnotation]))
|
||||
|
||||
// check stable, canary service & ingress
|
||||
// stable service
|
||||
Expect(GetObject(service.Name, service)).NotTo(HaveOccurred())
|
||||
Expect(service.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey]).Should(Equal(stableRevision))
|
||||
//canary service
|
||||
cService := &v1.Service{}
|
||||
Expect(GetObject(service.Name+"-canary", cService)).NotTo(HaveOccurred())
|
||||
Expect(cService.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey]).Should(Equal(canaryRevision))
|
||||
// canary ingress
|
||||
cIngress := &netv1.Ingress{}
|
||||
labIngressAnnotationDefaultPrefix := "alb.ingress.kubernetes.io"
|
||||
Expect(GetObject(service.Name+"-canary", cIngress)).NotTo(HaveOccurred())
|
||||
Expect(cIngress.Annotations[fmt.Sprintf("%s/conditions.echoserver-canary", labIngressAnnotationDefaultPrefix)]).Should(Equal(`[{"cookieConfig":{"values":[{"key":"demo1","value":"value1"},{"key":"demo2","value":"value2"}]},"type":"Cookie"},{"sourceIpConfig":{"values":["192.168.0.0/16","172.16.0.0/16"]},"type":"SourceIp"},{"headerConfig":{"key":"headername","values":["headervalue1","headervalue2"]},"type":"Header"}]`))
|
||||
|
||||
// resume rollout canary
|
||||
ResumeRolloutCanary(rollout.Name)
|
||||
// wait step 2 complete
|
||||
WaitRolloutCanaryStepPaused(rollout.Name, 2)
|
||||
|
||||
// canary ingress and canary service should be deleted
|
||||
cIngress = &netv1.Ingress{}
|
||||
Expect(GetObject(service.Name+"-canary", cIngress)).To(HaveOccurred())
|
||||
cService = &v1.Service{}
|
||||
Expect(GetObject(service.Name+"-canary", cService)).To(HaveOccurred())
|
||||
|
||||
// check service update
|
||||
Expect(GetObject(service.Name, service)).NotTo(HaveOccurred())
|
||||
Expect(service.Spec.Selector[apps.DefaultDeploymentUniqueLabelKey]).Should(Equal(""))
|
||||
|
||||
// check cloneSet
|
||||
Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
|
||||
Expect(workload.Status.UpdatedReplicas).Should(BeNumerically("==", 3))
|
||||
Expect(workload.Status.UpdatedReadyReplicas).Should(BeNumerically("==", 3))
|
||||
Expect(workload.Spec.UpdateStrategy.Paused).Should(BeFalse())
|
||||
|
||||
// resume rollout to complete
|
||||
ResumeRolloutCanary(rollout.Name)
|
||||
WaitRolloutStatusPhase(rollout.Name, v1alpha1.RolloutPhaseHealthy)
|
||||
WaitCloneSetAllPodsReady(workload)
|
||||
By("rollout completed, and check")
|
||||
|
||||
// check service & ingress & cloneSet
|
||||
// ingress
|
||||
Expect(GetObject(ingress.Name, ingress)).NotTo(HaveOccurred())
|
||||
// service
|
||||
Expect(GetObject(service.Name, service)).NotTo(HaveOccurred())
|
||||
// cloneSet
|
||||
Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
|
||||
Expect(workload.Status.UpdatedReplicas).Should(BeNumerically("==", 5))
|
||||
Expect(workload.Status.UpdatedReadyReplicas).Should(BeNumerically("==", 5))
|
||||
Expect(workload.Spec.UpdateStrategy.Partition.IntVal).Should(BeNumerically("==", 0))
|
||||
Expect(workload.Spec.UpdateStrategy.Paused).Should(BeFalse())
|
||||
Expect(workload.Status.CurrentRevision).Should(ContainSubstring(canaryRevision))
|
||||
Expect(workload.Status.UpdateRevision).Should(ContainSubstring(canaryRevision))
|
||||
for _, env := range workload.Spec.Template.Spec.Containers[0].Env {
|
||||
if env.Name == "NODE_NAME" {
|
||||
Expect(env.Value).Should(Equal("version2"))
|
||||
}
|
||||
}
|
||||
|
||||
// check progressing succeed
|
||||
Expect(GetObject(rollout.Name, rollout)).NotTo(HaveOccurred())
|
||||
cond := util.GetRolloutCondition(rollout.Status, v1alpha1.RolloutConditionProgressing)
|
||||
Expect(cond.Reason).Should(Equal(v1alpha1.ProgressingReasonCompleted))
|
||||
Expect(string(cond.Status)).Should(Equal(string(metav1.ConditionFalse)))
|
||||
cond = util.GetRolloutCondition(rollout.Status, v1alpha1.RolloutConditionSucceeded)
|
||||
Expect(string(cond.Status)).Should(Equal(string(metav1.ConditionTrue)))
|
||||
Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
|
||||
WaitRolloutWorkloadGeneration(rollout.Name, workload.Generation)
|
||||
})
|
||||
})
|
||||
|
||||
KruiseDescribe("Canary rollout with Gateway API", func() {
|
||||
|
|
|
|||
Loading…
Reference in New Issue