Fix: blue-green batch-id e2e fails sometime

Signed-off-by: AiRanthem <zhongtianyun.zty@alibaba-inc.com>
This commit is contained in:
AiRanthem 2025-03-24 11:16:36 +08:00
parent 6094e966ac
commit 54bdb7b0b9
4 changed files with 59 additions and 47 deletions

View File

@ -97,7 +97,7 @@ jobs:
set +e
./bin/ginkgo -timeout 60m -v --focus='bluegreen disable hpa test case - autoscaling/v1 for v1.19' test/e2e
retVal=$?
# kubectl get pod -n kruise-rollout --no-headers | grep manager | awk '{print $1}' | xargs kubectl logs -n kruise-rollout
kubectl get pod -n kruise-rollout --no-headers | grep manager | awk '{print $1}' | xargs kubectl logs -n kruise-rollout
restartCount=$(kubectl get pod -n kruise-rollout --no-headers | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "Kruise-rollout has not restarted"
@ -115,7 +115,7 @@ jobs:
set +e
./bin/ginkgo -timeout 60m -v --focus='Bluegreen Release - Deployment - Ingress' test/e2e
retVal=$?
# kubectl get pod -n kruise-rollout --no-headers | grep manager | awk '{print $1}' | xargs kubectl logs -n kruise-rollout
kubectl get pod -n kruise-rollout --no-headers | grep manager | awk '{print $1}' | xargs kubectl logs -n kruise-rollout
restartCount=$(kubectl get pod -n kruise-rollout --no-headers | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "Kruise-rollout has not restarted"
@ -133,7 +133,7 @@ jobs:
set +e
./bin/ginkgo -timeout 60m -v --focus='Bluegreen Release - Cloneset - Ingress' test/e2e
retVal=$?
# kubectl get pod -n kruise-rollout --no-headers | grep manager | awk '{print $1}' | xargs kubectl logs -n kruise-rollout
kubectl get pod -n kruise-rollout --no-headers | grep manager | awk '{print $1}' | xargs kubectl logs -n kruise-rollout
restartCount=$(kubectl get pod -n kruise-rollout --no-headers | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "Kruise-rollout has not restarted"

View File

@ -97,7 +97,7 @@ jobs:
set +e
./bin/ginkgo -timeout 60m -v --focus='bluegreen delete rollout case - autoscaling/v2 for v1.23' test/e2e
retVal=$?
# kubectl get pod -n kruise-rollout --no-headers | grep manager | awk '{print $1}' | xargs kubectl logs -n kruise-rollout
kubectl get pod -n kruise-rollout --no-headers | grep manager | awk '{print $1}' | xargs kubectl logs -n kruise-rollout
restartCount=$(kubectl get pod -n kruise-rollout --no-headers | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "Kruise-rollout has not restarted"
@ -115,7 +115,7 @@ jobs:
set +e
./bin/ginkgo -timeout 60m -v --focus='Bluegreen Release - Deployment - Ingress' test/e2e
retVal=$?
# kubectl get pod -n kruise-rollout --no-headers | grep manager | awk '{print $1}' | xargs kubectl logs -n kruise-rollout
kubectl get pod -n kruise-rollout --no-headers | grep manager | awk '{print $1}' | xargs kubectl logs -n kruise-rollout
restartCount=$(kubectl get pod -n kruise-rollout --no-headers | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "Kruise-rollout has not restarted"
@ -133,7 +133,7 @@ jobs:
set +e
./bin/ginkgo -timeout 60m -v --focus='Bluegreen Release - Cloneset - Ingress' test/e2e
retVal=$?
# kubectl get pod -n kruise-rollout --no-headers | grep manager | awk '{print $1}' | xargs kubectl logs -n kruise-rollout
kubectl get pod -n kruise-rollout --no-headers | grep manager | awk '{print $1}' | xargs kubectl logs -n kruise-rollout
restartCount=$(kubectl get pod -n kruise-rollout --no-headers | awk '{print $4}')
if [ "${restartCount}" -eq "0" ];then
echo "Kruise-rollout has not restarted"

View File

@ -104,7 +104,7 @@ func (rc *realBatchControlPlane) UpgradeBatch() error {
return err
}
return rc.patchPodLabels(batchContext)
return nil
}
func (rc *realBatchControlPlane) CheckBatchReady() error {
@ -126,7 +126,9 @@ func (rc *realBatchControlPlane) CheckBatchReady() error {
klog.Infof("BatchRelease %v calculated context when check batch ready: %s",
klog.KObj(rc.release), batchContext.Log())
if err = rc.patchPodLabels(batchContext); err != nil {
klog.ErrorS(err, "failed to patch pod labels", "release", klog.KObj(rc.release))
}
return batchContext.IsBatchReady()
}

View File

@ -26,6 +26,7 @@ import (
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
"github.com/onsi/gomega/format"
appsv1alpha1 "github.com/openkruise/kruise-api/apps/v1alpha1"
appsv1beta1 "github.com/openkruise/kruise-api/apps/v1beta1"
"github.com/openkruise/rollouts/api/v1beta1"
@ -48,6 +49,7 @@ import (
)
var _ = SIGDescribe("Rollout v1beta1", func() {
format.MaxLength = 0
var namespace string
DumpAllResources := func() {
@ -384,24 +386,27 @@ var _ = SIGDescribe("Rollout v1beta1", func() {
}
count := 0
podsMap := make(map[string]string)
for _, pod := range pods {
podsMap[pod.Name] = fmt.Sprintf("rolloutID:%s, batchID:%s, controllerRevisionHash:%s",
pod.Labels[v1beta1.RolloutIDLabel], pod.Labels[v1beta1.RolloutBatchIDLabel], pod.Labels[apps.ControllerRevisionHashLabelKey])
if pod.Labels[v1beta1.RolloutIDLabel] == rolloutID &&
pod.Labels[v1beta1.RolloutBatchIDLabel] == batchID {
count++
}
}
if count != expected {
return fmt.Errorf("expected %d pods with rolloutID %s and batchID %s, got %d", expected, rolloutID, batchID, count)
return fmt.Errorf("expected %d pods with rolloutID %s and batchID %s, got %d; all pods info: %s", expected, rolloutID, batchID, count, podsMap)
}
klog.InfoS("check pod batch label success", "count", count, "rolloutID", rolloutID, "batchID", batchID)
return nil
}
var err error
for i := 0; i < 120; i++ {
for i := 0; i < 15; i++ {
if err = fn(); err == nil {
return nil
}
time.Sleep(time.Second)
time.Sleep(2 * time.Second)
}
return err
}
@ -493,6 +498,7 @@ var _ = SIGDescribe("Rollout v1beta1", func() {
},
}
Expect(k8sClient.Create(context.TODO(), &ns)).Should(SatisfyAny(BeNil()))
klog.InfoS("Namespace created", "namespace", namespace)
})
AfterEach(func() {
@ -2301,25 +2307,27 @@ var _ = SIGDescribe("Rollout v1beta1", func() {
WaitRolloutStepPaused(rollout.Name, 1)
stableRevision := GetStableRSRevision(workload)
By(stableRevision)
Expect(GetObject(rollout.Name, rollout)).NotTo(HaveOccurred())
Expect(rollout.Status.CanaryStatus).Should(BeNil())
Expect(rollout.Status.BlueGreenStatus.StableRevision).Should(Equal(stableRevision))
Eventually(func(g Gomega) {
g.Expect(GetObject(rollout.Name, rollout)).NotTo(HaveOccurred())
g.Expect(rollout.Status.CanaryStatus).Should(BeNil())
g.Expect(rollout.Status.BlueGreenStatus.StableRevision).Should(Equal(stableRevision))
// check workload status & paused
Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
Expect(workload.Status.UpdatedReplicas).Should(BeNumerically("==", 3))
Expect(workload.Status.UnavailableReplicas).Should(BeNumerically("==", 8))
Expect(workload.Status.ReadyReplicas).Should(BeNumerically("==", 8))
// check workload status & paused
g.Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
g.Expect(workload.Status.UpdatedReplicas).Should(BeNumerically("==", 3))
g.Expect(workload.Status.UnavailableReplicas).Should(BeNumerically("==", 8))
g.Expect(workload.Status.ReadyReplicas).Should(BeNumerically("==", 8))
// check rollout status
Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
Expect(GetObject(rollout.Name, rollout)).NotTo(HaveOccurred())
Expect(rollout.Status.Phase).Should(Equal(v1beta1.RolloutPhaseProgressing))
Expect(rollout.Status.BlueGreenStatus.RolloutHash).Should(Equal(rollout.Annotations[util.RolloutHashAnnotation]))
Expect(rollout.Status.BlueGreenStatus.CurrentStepIndex).Should(BeNumerically("==", 1))
Expect(rollout.Status.BlueGreenStatus.NextStepIndex).Should(BeNumerically("==", 2))
Expect(rollout.Status.BlueGreenStatus.UpdatedReplicas).Should(BeNumerically("==", 3))
Expect(rollout.Status.BlueGreenStatus.UpdatedReadyReplicas).Should(BeNumerically("==", 3))
// check rollout status
g.Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
g.Expect(GetObject(rollout.Name, rollout)).NotTo(HaveOccurred())
g.Expect(rollout.Status.Phase).Should(Equal(v1beta1.RolloutPhaseProgressing))
g.Expect(rollout.Status.BlueGreenStatus.RolloutHash).Should(Equal(rollout.Annotations[util.RolloutHashAnnotation]))
g.Expect(rollout.Status.BlueGreenStatus.CurrentStepIndex).Should(BeNumerically("==", 1))
g.Expect(rollout.Status.BlueGreenStatus.NextStepIndex).Should(BeNumerically("==", 2))
g.Expect(rollout.Status.BlueGreenStatus.UpdatedReplicas).Should(BeNumerically("==", 3))
g.Expect(rollout.Status.BlueGreenStatus.UpdatedReadyReplicas).Should(BeNumerically("==", 3))
}).WithTimeout(time.Second * 60).WithPolling(time.Second * 3).Should(Succeed())
// ------ 50% maxSurge, scale up: from 5 to 6 ------
workload.Spec.Replicas = utilpointer.Int32(6)
UpdateDeployment(workload)
@ -2397,23 +2405,25 @@ var _ = SIGDescribe("Rollout v1beta1", func() {
WaitRolloutStepPaused(rollout.Name, 2)
// workload
Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
Expect(workload.Status.UpdatedReplicas).Should(BeNumerically("==", 4))
Expect(workload.Status.UnavailableReplicas).Should(BeNumerically("==", 8))
Expect(workload.Status.ReadyReplicas).Should(BeNumerically("==", 8))
Expect(workload.Spec.Paused).Should(BeFalse())
Expect(workload.Spec.Strategy.Type).Should(Equal(apps.RollingUpdateDeploymentStrategyType))
Expect(workload.Spec.MinReadySeconds).Should(Equal(int32(v1beta1.MaxReadySeconds)))
Expect(*workload.Spec.ProgressDeadlineSeconds).Should(Equal(int32(v1beta1.MaxProgressSeconds)))
Expect(reflect.DeepEqual(workload.Spec.Strategy.RollingUpdate.MaxUnavailable, &intstr.IntOrString{Type: intstr.Int, IntVal: 0})).Should(BeTrue())
Expect(reflect.DeepEqual(workload.Spec.Strategy.RollingUpdate.MaxSurge, &intstr.IntOrString{Type: intstr.String, StrVal: "100%"})).Should(BeTrue())
Eventually(func(g Gomega) {
g.Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
g.Expect(workload.Status.UpdatedReplicas).Should(BeNumerically("==", 4))
g.Expect(workload.Status.UnavailableReplicas).Should(BeNumerically("==", 8))
g.Expect(workload.Status.ReadyReplicas).Should(BeNumerically("==", 8))
g.Expect(workload.Spec.Paused).Should(BeFalse())
g.Expect(workload.Spec.Strategy.Type).Should(Equal(apps.RollingUpdateDeploymentStrategyType))
g.Expect(workload.Spec.MinReadySeconds).Should(Equal(int32(v1beta1.MaxReadySeconds)))
g.Expect(*workload.Spec.ProgressDeadlineSeconds).Should(Equal(int32(v1beta1.MaxProgressSeconds)))
g.Expect(reflect.DeepEqual(workload.Spec.Strategy.RollingUpdate.MaxUnavailable, &intstr.IntOrString{Type: intstr.Int, IntVal: 0})).Should(BeTrue())
g.Expect(reflect.DeepEqual(workload.Spec.Strategy.RollingUpdate.MaxSurge, &intstr.IntOrString{Type: intstr.String, StrVal: "100%"})).Should(BeTrue())
// rollout
Expect(GetObject(rollout.Name, rollout)).NotTo(HaveOccurred())
Expect(rollout.Status.BlueGreenStatus.CurrentStepIndex).Should(BeNumerically("==", 2))
Expect(rollout.Status.BlueGreenStatus.NextStepIndex).Should(BeNumerically("==", 3))
Expect(rollout.Status.BlueGreenStatus.UpdatedReplicas).Should(BeNumerically("==", 4))
Expect(rollout.Status.BlueGreenStatus.UpdatedReadyReplicas).Should(BeNumerically("==", 4))
// rollout
g.Expect(GetObject(rollout.Name, rollout)).NotTo(HaveOccurred())
g.Expect(rollout.Status.BlueGreenStatus.CurrentStepIndex).Should(BeNumerically("==", 2))
g.Expect(rollout.Status.BlueGreenStatus.NextStepIndex).Should(BeNumerically("==", 3))
g.Expect(rollout.Status.BlueGreenStatus.UpdatedReplicas).Should(BeNumerically("==", 4))
g.Expect(rollout.Status.BlueGreenStatus.UpdatedReadyReplicas).Should(BeNumerically("==", 4))
}).WithTimeout(time.Second * 60).WithPolling(time.Second * 1).Should(Succeed())
// ------ scale up: from 4 to 7 ------
workload.Spec.Replicas = utilpointer.Int32(7)
@ -2530,7 +2540,7 @@ var _ = SIGDescribe("Rollout v1beta1", func() {
Expect(rollout.Status.BlueGreenStatus.UpdatedReadyReplicas).Should(BeNumerically("==", 3))
By("delete rollout and check deployment")
k8sClient.Delete(context.TODO(), rollout)
_ = k8sClient.Delete(context.TODO(), rollout)
WaitRolloutNotFound(rollout.Name)
Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
// check annotation
@ -2865,7 +2875,7 @@ var _ = SIGDescribe("Rollout v1beta1", func() {
Expect(hpa.Spec.ScaleTargetRef.Name).Should(Equal(workload.Name + HPADisableSuffix))
By("delete rollout and check deployment")
k8sClient.Delete(context.TODO(), rollout)
_ = k8sClient.Delete(context.TODO(), rollout)
WaitRolloutNotFound(rollout.Name)
Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
// check annotation
@ -3689,7 +3699,7 @@ var _ = SIGDescribe("Rollout v1beta1", func() {
// ------ delete rollout ------
By("delete rollout and check deployment")
k8sClient.Delete(context.TODO(), rollout)
_ = k8sClient.Delete(context.TODO(), rollout)
WaitRolloutNotFound(rollout.Name)
Expect(GetObject(workload.Name, workload)).NotTo(HaveOccurred())
// check workload annotation