Fix: blue-green batch-id e2e fails sometime

Signed-off-by: AiRanthem <zhongtianyun.zty@alibaba-inc.com>
This commit is contained in:
AiRanthem 2025-04-01 11:07:29 +08:00
parent ec1a67c19e
commit ec34bb53db
6 changed files with 22 additions and 10 deletions

View File

@ -160,7 +160,7 @@ func (r *Executor) progressBatches(release *v1beta1.BatchRelease, newStatus *v1b
case v1beta1.VerifyingBatchState:
// replicas/partition has been modified, should wait pod ready in this state.
err = workloadController.CheckBatchReady()
err = workloadController.EnsureBatchPodsReadyAndLabeled()
switch {
case err != nil:
// should go to upgrade state to do again to avoid dead wait.
@ -175,7 +175,7 @@ func (r *Executor) progressBatches(release *v1beta1.BatchRelease, newStatus *v1b
case v1beta1.ReadyBatchState:
// replicas/partition may be modified even though ready, should recheck in this state.
err = workloadController.CheckBatchReady()
err = workloadController.EnsureBatchPodsReadyAndLabeled()
switch {
case err != nil:
// if the batch ready condition changed due to some reasons, just recalculate the current batch.

View File

@ -107,7 +107,7 @@ func (rc *realBatchControlPlane) UpgradeBatch() error {
return nil
}
func (rc *realBatchControlPlane) CheckBatchReady() error {
func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() error {
controller, err := rc.BuildController()
if err != nil {
return err

View File

@ -115,10 +115,10 @@ func (rc *realCanaryController) UpgradeBatch() error {
return err
}
return rc.patcher.PatchPodBatchLabel(batchContext)
return nil
}
func (rc *realCanaryController) CheckBatchReady() error {
func (rc *realCanaryController) EnsureBatchPodsReadyAndLabeled() error {
stable, err := rc.BuildStableController()
if err != nil {
return err
@ -143,7 +143,9 @@ func (rc *realCanaryController) CheckBatchReady() error {
}
klog.Infof("BatchRelease %v calculated context when check batch ready: %s",
klog.KObj(rc.release), batchContext.Log())
if err = rc.patcher.PatchPodBatchLabel(batchContext); err != nil {
klog.ErrorS(err, "failed to patch pod labels", "release", klog.KObj(rc.release))
}
return batchContext.IsBatchReady()
}

View File

@ -52,10 +52,10 @@ type Interface interface {
// it returns nil if the preparation is succeeded, else the preparation should retry.
UpgradeBatch() error
// CheckBatchReady checks how many replicas are ready to serve requests in the current batch.
// EnsureBatchPodsReadyAndLabeled checks how many replicas are ready to serve requests in the current batch.
// this function is tasked to do any initialization work on the resources.
// it returns nil if the preparation is succeeded, else the preparation should retry.
CheckBatchReady() error
EnsureBatchPodsReadyAndLabeled() error
// Finalize makes sure the resources are in a good final state.
// this function is tasked to do any initialization work on the resources.

View File

@ -114,7 +114,7 @@ func (rc *realBatchControlPlane) UpgradeBatch() error {
return rc.patcher.PatchPodBatchLabel(batchContext)
}
func (rc *realBatchControlPlane) CheckBatchReady() error {
func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() error {
controller, err := rc.BuildController()
if err != nil {
return err

View File

@ -624,6 +624,9 @@ var _ = SIGDescribe("Rollout v1beta1", func() {
Expect(GetObject(service.Name+"-canary", cIngress)).NotTo(HaveOccurred())
Expect(cIngress.Annotations[fmt.Sprintf("%s/canary", nginxIngressAnnotationDefaultPrefix)]).Should(Equal("true"))
Expect(cIngress.Annotations[fmt.Sprintf("%s/canary-weight", nginxIngressAnnotationDefaultPrefix)]).Should(Equal(removePercentageSign(*rollout.Spec.Strategy.Canary.Steps[3].Traffic)))
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "1", 1))
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "2", 1))
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "3", 1))
// Jump to step 3
By("Jump to step 3")
@ -820,7 +823,11 @@ var _ = SIGDescribe("Rollout v1beta1", func() {
Expect(string(cond.Status)).Should(Equal(string(metav1.ConditionTrue)))
Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
WaitRolloutWorkloadGeneration(rollout.Name, workload.Generation)
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "1", 1))
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "2", 1))
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "3", 1))
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "4", 1))
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "5", 1))
})
// step1-> 2-> 3-> 4-> 3-(TrafficChange)-> 3-> 2-> 1-> 5
@ -1561,6 +1568,9 @@ var _ = SIGDescribe("Rollout v1beta1", func() {
Expect(GetObject(service.Name+"-canary", cIngress)).NotTo(HaveOccurred())
Expect(cIngress.Annotations[fmt.Sprintf("%s/canary", nginxIngressAnnotationDefaultPrefix)]).Should(Equal("true"))
Expect(cIngress.Annotations[fmt.Sprintf("%s/canary-weight", nginxIngressAnnotationDefaultPrefix)]).Should(Equal(removePercentageSign(*rollout.Spec.Strategy.Canary.Steps[2].Traffic)))
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "1", 1))
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "2", 1))
Expect(CheckPodBatchLabelV2(namespace, workload.Spec.Selector, rollout.Status.CanaryStatus.ObservedRolloutID, "3", 1))
// remove step 2 3 4
By("Remove step 2 3 4")