Fix: blue-green batch-id e2e fails sometime

Signed-off-by: AiRanthem <zhongtianyun.zty@alibaba-inc.com>
This commit is contained in:
AiRanthem 2025-04-01 11:07:29 +08:00
parent ec1a67c19e
commit ec34bb53db
6 changed files with 22 additions and 10 deletions

View File

@ -160,7 +160,7 @@ func (r *Executor) progressBatches(release *v1beta1.BatchRelease, newStatus *v1b
case v1beta1.VerifyingBatchState: case v1beta1.VerifyingBatchState:
// replicas/partition has been modified, should wait pod ready in this state. // replicas/partition has been modified, should wait pod ready in this state.
err = workloadController.CheckBatchReady() err = workloadController.EnsureBatchPodsReadyAndLabeled()
switch { switch {
case err != nil: case err != nil:
// should go to upgrade state to do again to avoid dead wait. // should go to upgrade state to do again to avoid dead wait.
@ -175,7 +175,7 @@ func (r *Executor) progressBatches(release *v1beta1.BatchRelease, newStatus *v1b
case v1beta1.ReadyBatchState: case v1beta1.ReadyBatchState:
// replicas/partition may be modified even though ready, should recheck in this state. // replicas/partition may be modified even though ready, should recheck in this state.
err = workloadController.CheckBatchReady() err = workloadController.EnsureBatchPodsReadyAndLabeled()
switch { switch {
case err != nil: case err != nil:
// if the batch ready condition changed due to some reasons, just recalculate the current batch. // if the batch ready condition changed due to some reasons, just recalculate the current batch.

View File

@ -107,7 +107,7 @@ func (rc *realBatchControlPlane) UpgradeBatch() error {
return nil return nil
} }
func (rc *realBatchControlPlane) CheckBatchReady() error { func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() error {
controller, err := rc.BuildController() controller, err := rc.BuildController()
if err != nil { if err != nil {
return err return err

View File

@ -115,10 +115,10 @@ func (rc *realCanaryController) UpgradeBatch() error {
return err return err
} }
return rc.patcher.PatchPodBatchLabel(batchContext) return nil
} }
func (rc *realCanaryController) CheckBatchReady() error { func (rc *realCanaryController) EnsureBatchPodsReadyAndLabeled() error {
stable, err := rc.BuildStableController() stable, err := rc.BuildStableController()
if err != nil { if err != nil {
return err return err
@ -143,7 +143,9 @@ func (rc *realCanaryController) CheckBatchReady() error {
} }
klog.Infof("BatchRelease %v calculated context when check batch ready: %s", klog.Infof("BatchRelease %v calculated context when check batch ready: %s",
klog.KObj(rc.release), batchContext.Log()) klog.KObj(rc.release), batchContext.Log())
if err = rc.patcher.PatchPodBatchLabel(batchContext); err != nil {
klog.ErrorS(err, "failed to patch pod labels", "release", klog.KObj(rc.release))
}
return batchContext.IsBatchReady() return batchContext.IsBatchReady()
} }

View File

@ -52,10 +52,10 @@ type Interface interface {
// it returns nil if the preparation is succeeded, else the preparation should retry. // it returns nil if the preparation is succeeded, else the preparation should retry.
UpgradeBatch() error UpgradeBatch() error
// CheckBatchReady checks how many replicas are ready to serve requests in the current batch. // EnsureBatchPodsReadyAndLabeled checks how many replicas are ready to serve requests in the current batch.
// this function is tasked to do any initialization work on the resources. // this function is tasked to do any initialization work on the resources.
// it returns nil if the preparation is succeeded, else the preparation should retry. // it returns nil if the preparation is succeeded, else the preparation should retry.
CheckBatchReady() error EnsureBatchPodsReadyAndLabeled() error
// Finalize makes sure the resources are in a good final state. // Finalize makes sure the resources are in a good final state.
// this function is tasked to do any initialization work on the resources. // this function is tasked to do any initialization work on the resources.

View File

@ -114,7 +114,7 @@ func (rc *realBatchControlPlane) UpgradeBatch() error {
return rc.patcher.PatchPodBatchLabel(batchContext) return rc.patcher.PatchPodBatchLabel(batchContext)
} }
func (rc *realBatchControlPlane) CheckBatchReady() error { func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() error {
controller, err := rc.BuildController() controller, err := rc.BuildController()
if err != nil { if err != nil {
return err return err

View File

@ -624,6 +624,9 @@ var _ = SIGDescribe("Rollout v1beta1", func() {
Expect(GetObject(service.Name+"-canary", cIngress)).NotTo(HaveOccurred()) Expect(GetObject(service.Name+"-canary", cIngress)).NotTo(HaveOccurred())
Expect(cIngress.Annotations[fmt.Sprintf("%s/canary", nginxIngressAnnotationDefaultPrefix)]).Should(Equal("true")) Expect(cIngress.Annotations[fmt.Sprintf("%s/canary", nginxIngressAnnotationDefaultPrefix)]).Should(Equal("true"))
Expect(cIngress.Annotations[fmt.Sprintf("%s/canary-weight", nginxIngressAnnotationDefaultPrefix)]).Should(Equal(removePercentageSign(*rollout.Spec.Strategy.Canary.Steps[3].Traffic))) Expect(cIngress.Annotations[fmt.Sprintf("%s/canary-weight", nginxIngressAnnotationDefaultPrefix)]).Should(Equal(removePercentageSign(*rollout.Spec.Strategy.Canary.Steps[3].Traffic)))
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "1", 1))
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "2", 1))
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "3", 1))
// Jump to step 3 // Jump to step 3
By("Jump to step 3") By("Jump to step 3")
@ -820,7 +823,11 @@ var _ = SIGDescribe("Rollout v1beta1", func() {
Expect(string(cond.Status)).Should(Equal(string(metav1.ConditionTrue))) Expect(string(cond.Status)).Should(Equal(string(metav1.ConditionTrue)))
Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred()) Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
WaitRolloutWorkloadGeneration(rollout.Name, workload.Generation) WaitRolloutWorkloadGeneration(rollout.Name, workload.Generation)
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "1", 1))
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "2", 1))
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "3", 1))
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "4", 1))
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "5", 1))
}) })
// step1-> 2-> 3-> 4-> 3-(TrafficChange)-> 3-> 2-> 1-> 5 // step1-> 2-> 3-> 4-> 3-(TrafficChange)-> 3-> 2-> 1-> 5
@ -1561,6 +1568,9 @@ var _ = SIGDescribe("Rollout v1beta1", func() {
Expect(GetObject(service.Name+"-canary", cIngress)).NotTo(HaveOccurred()) Expect(GetObject(service.Name+"-canary", cIngress)).NotTo(HaveOccurred())
Expect(cIngress.Annotations[fmt.Sprintf("%s/canary", nginxIngressAnnotationDefaultPrefix)]).Should(Equal("true")) Expect(cIngress.Annotations[fmt.Sprintf("%s/canary", nginxIngressAnnotationDefaultPrefix)]).Should(Equal("true"))
Expect(cIngress.Annotations[fmt.Sprintf("%s/canary-weight", nginxIngressAnnotationDefaultPrefix)]).Should(Equal(removePercentageSign(*rollout.Spec.Strategy.Canary.Steps[2].Traffic))) Expect(cIngress.Annotations[fmt.Sprintf("%s/canary-weight", nginxIngressAnnotationDefaultPrefix)]).Should(Equal(removePercentageSign(*rollout.Spec.Strategy.Canary.Steps[2].Traffic)))
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "1", 1))
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "2", 1))
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "3", 1))
// remove step 2 3 4 // remove step 2 3 4
By("Remove step 2 3 4") By("Remove step 2 3 4")