Fix: Running parallel WorkloadSpread E2E test would fail due to the fake-zone label being deleted after another test completed. (#1996)

Signed-off-by: AiRanthem <zhongtianyun.zty@alibaba-inc.com>
This commit is contained in:
Ai Ranthem 2025-04-17 17:15:29 +08:00 committed by GitHub
parent 318165b7ea
commit 683ce2a993
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 4 additions and 181 deletions

View File

@ -20,6 +20,7 @@ import (
"context"
"encoding/json"
"fmt"
"sort"
"strconv"
"time"
@ -30,7 +31,6 @@ import (
schedulingv1 "k8s.io/api/scheduling/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/wait"
clientset "k8s.io/client-go/kubernetes"
@ -48,7 +48,6 @@ var (
KruiseKindCloneSet = appsv1alpha1.SchemeGroupVersion.WithKind("CloneSet")
KruiseKindStatefulSet = appsv1alpha1.SchemeGroupVersion.WithKind("StatefulSet")
controllerKindDep = appsv1.SchemeGroupVersion.WithKind("Deployment")
//controllerKindJob = batchv1.SchemeGroupVersion.WithKind("Job")
)
var _ = SIGDescribe("workloadspread", func() {
@ -104,6 +103,9 @@ var _ = SIGDescribe("workloadspread", func() {
}
workers = append(workers, &node)
}
sort.Slice(workers, func(i, j int) bool {
return workers[i].Name < workers[j].Name
})
gomega.Expect(len(workers) > 2).Should(gomega.Equal(true))
// subset-a
worker0 := workers[0]
@ -115,22 +117,6 @@ var _ = SIGDescribe("workloadspread", func() {
tester.SetNodeLabel(c, worker2, WorkloadSpreadFakeZoneKey, "zone-b")
})
f.AfterEachActions = []func(){
func() {
nodeList, err := c.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
gomega.Expect(err).NotTo(gomega.HaveOccurred())
patchBody := fmt.Sprintf(`{"metadata":{"labels":{"%s":null}}}`, WorkloadSpreadFakeZoneKey)
for i := range nodeList.Items {
node := nodeList.Items[i]
if _, exist := node.GetLabels()[WorkloadSpreadFakeZoneKey]; !exist {
continue
}
_, err = c.CoreV1().Nodes().Patch(context.TODO(), node.Name, types.StrategicMergePatchType, []byte(patchBody), metav1.PatchOptions{})
gomega.Expect(err).NotTo(gomega.HaveOccurred())
}
},
}
framework.KruiseDescribe("WorkloadSpread functionality", func() {
ginkgo.AfterEach(func() {
if ginkgo.CurrentGinkgoTestDescription().Failed {

View File

@ -1,6 +1,5 @@
/*
Copyright 2019 The Kruise Authors.
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -30,96 +29,6 @@ import (
runtimeutils "k8s.io/apimachinery/pkg/util/runtime"
)
//var (
// cloudConfig = &framework.TestContext.CloudConfig
//)
//
//// There are certain operations we only want to run once per overall test invocation
//// (such as deleting old namespaces, or verifying that all system pods are running.
//// Because of the way Ginkgo runs tests in parallel, we must use SynchronizedBeforeSuite
//// to ensure that these operations only run on the first parallel Ginkgo node.
////
//// This function takes two parameters: one function which runs on only the first Ginkgo node,
//// returning an opaque byte array, and then a second function which runs on all Ginkgo nodes,
//// accepting the byte array.
//var _ = ginkgo.SynchronizedBeforeSuite(func() []byte {
// // Run only on Ginkgo node 1
//
// switch framework.TestContext.Provider {
// case "gce", "gke":
// framework.LogClusterImageSources()
// }
//
// c, err := framework.LoadClientset()
// if err != nil {
// klog.Fatal("Error loading client: ", err)
// }
//
// // Delete any namespaces except those created by the system. This ensures no
// // lingering resources are left over from a previous test run.
// if framework.TestContext.CleanStart {
// deleted, err := framework.DeleteNamespaces(c, nil, /* deleteFilter */
// []string{
// metav1.NamespaceSystem,
// metav1.NamespaceDefault,
// metav1.NamespacePublic,
// })
// if err != nil {
// framework.Failf("Error deleting orphaned namespaces: %v", err)
// }
// klog.Infof("Waiting for deletion of the following namespaces: %v", deleted)
// if err := framework.WaitForNamespacesDeleted(c, deleted, framework.NamespaceCleanupTimeout); err != nil {
// framework.Failf("Failed to delete orphaned namespaces %v: %v", deleted, err)
// }
// }
//
// // In large clusters we may get to this point but still have a bunch
// // of nodes without Routes created. Since this would make a node
// // unschedulable, we need to wait until all of them are schedulable.
// framework.ExpectNoError(framework.WaitForAllNodesSchedulable(c, framework.TestContext.NodeSchedulableTimeout))
//
// // Ensure all pods are running and ready before starting tests (otherwise,
// // cluster infrastructure pods that are being pulled or started can block
// // test pods from running, and tests that ensure all pods are running and
// // ready will fail).
// podStartupTimeout := framework.TestContext.SystemPodsStartupTimeout
// // TODO: In large clusters, we often observe a non-starting pods due to
// // #41007. To avoid those pods preventing the whole test runs (and just
// // wasting the whole run), we allow for some not-ready pods (with the
// // number equal to the number of allowed not-ready nodes).
// if err := framework.WaitForPodsRunningReady(c, metav1.NamespaceSystem, int32(framework.TestContext.MinStartupPods), int32(framework.TestContext.AllowedNotReadyNodes), podStartupTimeout, map[string]string{}); err != nil {
// framework.DumpAllNamespaceInfo(c, metav1.NamespaceSystem)
// framework.LogFailedContainers(c, metav1.NamespaceSystem, framework.Logf)
// runKubernetesServiceTestContainer(c, metav1.NamespaceDefault)
// framework.Failf("Error waiting for all pods to be running and ready: %v", err)
// }
//
// if err := framework.WaitForDaemonSets(c, metav1.NamespaceSystem, int32(framework.TestContext.AllowedNotReadyNodes), framework.TestContext.SystemDaemonsetStartupTimeout); err != nil {
// framework.Logf("WARNING: Waiting for all daemonsets to be ready failed: %v", err)
// }
//
// // Log the version of the server and this client.
// framework.Logf("e2e test version: %s", version.Get().GitVersion)
//
// dc := c.DiscoveryClient
//
// serverVersion, serverErr := dc.ServerVersion()
// if serverErr != nil {
// framework.Logf("Unexpected server error retrieving version: %v", serverErr)
// }
// if serverVersion != nil {
// framework.Logf("kube-apiserver version: %s", serverVersion.GitVersion)
// }
//
// // Reference common test to make the import valid.
// commontest.CurrentSuite = commontest.E2E
//
// return nil
//
//}, func(data []byte) {
// // Run on all Ginkgo nodes
//})
// Similar to SynchronizedBeforeSuite, we want to run some operations only once (such as collecting cluster logs).
// Here, the order of functions is reversed; first, the function which runs everywhere,
// and then the function that only runs on the first Ginkgo node.
@ -130,45 +39,8 @@ var _ = ginkgo.SynchronizedAfterSuite(func() {
}, func() {
// Run only Ginkgo on node 1
framework.Logf("Running AfterSuite actions on node 1")
//if framework.TestContext.GatherSuiteMetricsAfterTest {
// if err := gatherTestSuiteMetrics(); err != nil {
// framework.Logf("Error gathering metrics: %v", err)
// }
//}
})
//func gatherTestSuiteMetrics() error {
// framework.Logf("Gathering metrics")
// c, err := framework.LoadClientset()
// if err != nil {
// return fmt.Errorf("error loading client: %v", err)
// }
//
// // Grab metrics for apiserver, scheduler, controller-manager, kubelet (for non-kubemark case) and cluster autoscaler (optionally).
// grabber, err := metrics.NewMetricsGrabber(c, nil, !framework.ProviderIs("kubemark"), true, true, true, framework.TestContext.IncludeClusterAutoscalerMetrics)
// if err != nil {
// return fmt.Errorf("failed to create MetricsGrabber: %v", err)
// }
//
// received, err := grabber.Grab()
// if err != nil {
// return fmt.Errorf("failed to grab metrics: %v", err)
// }
//
// metricsForE2E := (*framework.MetricsForE2E)(&received)
// metricsJSON := metricsForE2E.PrintJSON()
// if framework.TestContext.ReportDir != "" {
// filePath := path.Join(framework.TestContext.ReportDir, "MetricsForE2ESuite_"+time.Now().Format(time.RFC3339)+".json")
// if err := ioutil.WriteFile(filePath, []byte(metricsJSON), 0644); err != nil {
// return fmt.Errorf("error writing to %q: %v", filePath, err)
// }
// } else {
// framework.Logf("\n\nTest Suite Metrics:\n%s\n", metricsJSON)
// }
//
// return nil
//}
// RunE2ETests checks configuration parameters (specified through flags) and then runs
// E2E tests using the Ginkgo runner.
// If a "report directory" is specified, one or more JUnit test reports will be
@ -176,8 +48,6 @@ var _ = ginkgo.SynchronizedAfterSuite(func() {
// This function is called on each Ginkgo node in parallel mode.
func RunE2ETests(t *testing.T) {
runtimeutils.ReallyCrash = true
//logs.InitLogs()
//defer logs.FlushLogs()
gomega.RegisterFailHandler(ginkgo.Fail)
// Disable skipped tests unless they are explicitly requested.
@ -191,36 +61,3 @@ func RunE2ETests(t *testing.T) {
ginkgo.RunSpecsWithDefaultAndCustomReporters(t, "Kruise e2e suite", r)
}
//// Run a test container to try and contact the Kubernetes api-server from a pod, wait for it
//// to flip to Ready, log its output and delete it.
//func runKubernetesServiceTestContainer(c clientset.Interface, ns string) {
// path := "test/images/clusterapi-tester/pod.yaml"
// framework.Logf("Parsing pod from %v", path)
// p, err := manifest.PodFromManifest(path)
// if err != nil {
// framework.Logf("Failed to parse clusterapi-tester from manifest %v: %v", path, err)
// return
// }
// p.Namespace = ns
// if _, err := c.CoreV1().Pods(ns).Create(p); err != nil {
// framework.Logf("Failed to create %v: %v", p.Name, err)
// return
// }
// defer func() {
// if err := c.CoreV1().Pods(ns).Delete(p.Name, nil); err != nil {
// framework.Logf("Failed to delete pod %v: %v", p.Name, err)
// }
// }()
// timeout := 5 * time.Minute
// if err := framework.WaitForPodCondition(c, ns, p.Name, "clusterapi-tester", timeout, testutils.PodRunningReady); err != nil {
// framework.Logf("Pod %v took longer than %v to enter running/ready: %v", p.Name, timeout, err)
// return
// }
// logs, err := framework.GetPodLogs(c, ns, p.Name, p.Spec.Containers[0].Name)
// if err != nil {
// framework.Logf("Failed to retrieve logs from %v: %v", p.Name, err)
// } else {
// framework.Logf("Output of clusterapi-tester:\n%v", logs)
// }
//}