e2e: add a test case when the GracePeriodSeconds is reach out

Signed-off-by: zhzhuang-zju <m17799853869@163.com>
2024-08-26 10:22:26 +08:00 · 2024-08-26 10:22:26 +08:00 · effc1474ac
parent aee21b8f48
commit effc1474ac
2 changed files with 79 additions and 12 deletions
--- a/test/e2e/coverage_docs/failover_test.md
+++ b/test/e2e/coverage_docs/failover_test.md
@ -1,11 +1,9 @@
 ### failover e2e test coverage analysis

-| Test Case                                                                          | E2E Describe Text                              | Comments                                                                                     |
-|------------------------------------------------------------------------------------|------------------------------------------------|----------------------------------------------------------------------------------------------|
-| Test if the deployment of failed cluster is rescheduled to other available cluster | deployment failover testing                    | [Failover](https://karmada.io/docs/next/userguide/failover/failover-overview)                |
-| Test if the deployment of taint cluster is rescheduled to other available cluster  | taint cluster                                  |                                                                                              |
-| Test if the deployment will rescheduled to other available cluster                 | application failover with purgeMode graciously | [Application failover](https://karmada.io/docs/next/userguide/failover/application-failover) |
-| Test if the deployment will never rescheduled to other available cluster           | application failover with purgeMode never      |                                                                                              |
-
-#### TODO
-1. There are 2 way to evict the deployment of the Graciously PurgeMode, the third case cover the first way only, we may add a test case [when the GracePeriodSeconds is reach out](https://karmada.io/docs/next/userguide/failover/application-failover/#:~:text=after%20a%20timeout%20is%20reached%20before%20evicting%20the%20application).
+| Test Case                                                                          | E2E Describe Text                                                                                           | Comments                                                                                                                                                                                                                                                                                         |
+|------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Test if the deployment of failed cluster is rescheduled to other available cluster | deployment failover testing                                                                                 | [Failover](https://karmada.io/docs/next/userguide/failover/failover-overview)                                                                                                                                                                                                                    |
+| Test if the deployment of taint cluster is rescheduled to other available cluster  | taint cluster                                                                                               |                                                                                                                                                                                                                                                                                                  |
+| Test if the deployment will rescheduled to other available cluster                 | application failover with purgeMode graciously when the application come back to healthy on the new cluster | [Application failover](https://karmada.io/docs/next/userguide/failover/application-failover/#:~:text=the%20legacy%20application.-,Graciously%20represents%20that%20Karmada%20will%20wait%20for%20the%20application%20to%20come%20back%20to%20healthy%20on%20the%20new%20cluster,-or%20after%20a) |
+| Test if the deployment will rescheduled to other available cluster (timeout)       | application failover with purgeMode graciously when the GracePeriodSeconds is reach out                     | [Application failover](https://karmada.io/docs/next/userguide/failover/application-failover/#:~:text=after%20a%20timeout%20is%20reached%20before%20evicting%20the%20application)                                                                                                                 |
+| Test if the deployment will never rescheduled to other available cluster           | application failover with purgeMode never                                                                   |                                                                                                                                                                                                                                                                                                  |
--- a/test/e2e/failover_test.go
+++ b/test/e2e/failover_test.go
@ -19,6 +19,7 @@ package e2e
 import (
 	"context"
 	"fmt"
+	"time"

 	"github.com/onsi/ginkgo/v2"
 	"github.com/onsi/gomega"
@ -26,6 +27,7 @@ import (
 	corev1 "k8s.io/api/core/v1"
 	apierrors "k8s.io/apimachinery/pkg/api/errors"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/apimachinery/pkg/util/rand"
 	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/klog/v2"
@ -274,6 +276,7 @@ var _ = framework.SerialDescribe("failover testing", func() {
 		var policy *policyv1alpha1.PropagationPolicy
 		var overridePolicy *policyv1alpha1.OverridePolicy
 		var maxGroups, minGroups int
+		var gracePeriodSeconds, tolerationSeconds int32
 		ginkgo.BeforeEach(func() {
 			policyNamespace = testNamespace
 			policyName = deploymentNamePrefix + rand.String(RandomStrLength)
@ -282,6 +285,8 @@ var _ = framework.SerialDescribe("failover testing", func() {
 			deployment = testhelper.NewDeployment(deploymentNamespace, deploymentName)
 			maxGroups = 1
 			minGroups = 1
+			gracePeriodSeconds = 30
+			tolerationSeconds = 30

 			policy = &policyv1alpha1.PropagationPolicy{
 				ObjectMeta: metav1.ObjectMeta{
@ -312,10 +317,10 @@ var _ = framework.SerialDescribe("failover testing", func() {
 					Failover: &policyv1alpha1.FailoverBehavior{
 						Application: &policyv1alpha1.ApplicationFailoverBehavior{
 							DecisionConditions: policyv1alpha1.DecisionConditions{
-								TolerationSeconds: ptr.To[int32](30),
+								TolerationSeconds: ptr.To[int32](tolerationSeconds),
 							},
 							PurgeMode:          policyv1alpha1.Graciously,
-							GracePeriodSeconds: ptr.To[int32](30),
+							GracePeriodSeconds: ptr.To[int32](gracePeriodSeconds),
 						},
 					},
 				},
@ -331,7 +336,7 @@ var _ = framework.SerialDescribe("failover testing", func() {
 			})
 		})

-		ginkgo.It("application failover with purgeMode graciously", func() {
+		ginkgo.It("application failover with purgeMode graciously when the application come back to healthy on the new cluster", func() {
 			disabledClusters := framework.ExtractTargetClustersFrom(controlPlaneClient, deployment)
 			ginkgo.By("create an error op", func() {
 				overridePolicy = testhelper.NewOverridePolicyByOverrideRules(policyNamespace, policyName, []policyv1alpha1.ResourceSelector{
@ -393,6 +398,70 @@ var _ = framework.SerialDescribe("failover testing", func() {
 				framework.RemoveOverridePolicy(karmadaClient, policyNamespace, policyName)
 			})
 		})
+
+		ginkgo.It("application failover with purgeMode graciously when the GracePeriodSeconds is reach out", func() {
+			gracePeriodSeconds = 10
+			ginkgo.By("update pp", func() {
+				// modify gracePeriodSeconds to create a time difference with tolerationSecond to avoid cluster interference
+				patch := []map[string]interface{}{
+					{
+						"op":    "replace",
+						"path":  "/spec/failover/application/gracePeriodSeconds",
+						"value": ptr.To[int32](gracePeriodSeconds),
+					},
+				}
+				framework.PatchPropagationPolicy(karmadaClient, policy.Namespace, policy.Name, patch, types.JSONPatchType)
+			})
+
+			disabledClusters := framework.ExtractTargetClustersFrom(controlPlaneClient, deployment)
+			var beginTime time.Time
+			ginkgo.By("create an error op", func() {
+				overridePolicy = testhelper.NewOverridePolicyByOverrideRules(policyNamespace, policyName, []policyv1alpha1.ResourceSelector{
+					{
+						APIVersion: deployment.APIVersion,
+						Kind:       deployment.Kind,
+						Name:       deployment.Name,
+					},
+				}, []policyv1alpha1.RuleWithCluster{
+					{
+						TargetCluster: &policyv1alpha1.ClusterAffinity{
+							// guarantee that application cannot come back to healthy on the new cluster
+							ClusterNames: framework.ClusterNames(),
+						},
+						Overriders: policyv1alpha1.Overriders{
+							ImageOverrider: []policyv1alpha1.ImageOverrider{
+								{
+									Component: "Registry",
+									Operator:  "replace",
+									Value:     "fake",
+								},
+							},
+						},
+					},
+				})
+				framework.CreateOverridePolicy(karmadaClient, overridePolicy)
+				beginTime = time.Now()
+			})
+			defer framework.RemoveOverridePolicy(karmadaClient, policyNamespace, policyName)
+
+			ginkgo.By("check if deployment present on member clusters has correct image value", func() {
+				framework.WaitDeploymentPresentOnClustersFitWith(disabledClusters, deployment.Namespace, deployment.Name,
+					func(deployment *appsv1.Deployment) bool {
+						for _, container := range deployment.Spec.Template.Spec.Containers {
+							if container.Image != "fake/nginx:1.19.0" {
+								return false
+							}
+						}
+						return true
+					})
+			})
+
+			ginkgo.By("check whether application failover with purgeMode graciously when the GracePeriodSeconds is reach out", func() {
+				framework.WaitDeploymentDisappearOnClusters(disabledClusters, deploymentNamespace, deploymentName)
+				evictionTime := time.Now()
+				gomega.Expect(evictionTime.Sub(beginTime) > time.Duration(gracePeriodSeconds+tolerationSeconds)*time.Second).Should(gomega.BeTrue())
+			})
+		})
 	})

 	ginkgo.Context("Application failover testing with purgeMode never", func() {