Integration tests: Warn (instead of erroring) upon pod restarts (#4623)

* Integration tests: Warn (instead of erroring) upon pod restarts Fixes #4595 Don't have integration tests fail whenever a pod is detected to have restarted just once. For now we'll be just logging this out and creating a warning annotation for it.
2020-06-18 06:08:05 -05:00 · 2020-06-18 06:08:05 -05:00 · c8c5980d63
parent b176fbeb6d
commit c8c5980d63
12 changed files with 125 additions and 28 deletions
--- a/test/edges/edges_test.go
+++ b/test/edges/edges_test.go
@ -83,7 +83,11 @@ func TestDirectEdges(t *testing.T) {
 	}
 	if err := TestHelper.CheckPods(testNamespace, "terminus", 1); err != nil {
-		testutil.AnnotatedError(t, "CheckPods timed-out", err)
+		if rce, ok := err.(*testutil.RestartCountError); ok {
 			testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
 		} else {
 			testutil.AnnotatedError(t, "CheckPods timed-out", err)
 		}
 	}
 	if err := TestHelper.CheckDeployment(testNamespace, "terminus", 1); err != nil {
@ -121,7 +125,11 @@ func TestDirectEdges(t *testing.T) {
 	}
 	if err := TestHelper.CheckPods(testNamespace, "slow-cooker", 1); err != nil {
-		testutil.AnnotatedError(t, "CheckPods timed-out", err)
+		if rce, ok := err.(*testutil.RestartCountError); ok {
 			testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
 		} else {
 			testutil.AnnotatedError(t, "CheckPods timed-out", err)
 		}
 	}
 	if err := TestHelper.CheckDeployment(testNamespace, "slow-cooker", 1); err != nil {
--- a/test/egress/egress_test.go
+++ b/test/egress/egress_test.go
@ -41,7 +41,11 @@ func TestEgressHttp(t *testing.T) {
 	err = TestHelper.CheckPods(prefixedNs, "egress-test", 1)
 	if err != nil {
-		testutil.AnnotatedFatal(t, "CheckPods timed-out", err)
+		if rce, ok := err.(*testutil.RestartCountError); ok {
 			testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
 		} else {
 			testutil.AnnotatedError(t, "CheckPods timed-out", err)
 		}
 	}
 	testCase := func(url, methodToUse string) {
--- a/test/externalissuer/external_issuer_test.go
+++ b/test/externalissuer/external_issuer_test.go
@ -53,11 +53,19 @@ func verifyInstallApp(t *testing.T) {
 	}
 	if err := TestHelper.CheckPods(prefixedNs, TestAppBackendDeploymentName, 1); err != nil {
-		testutil.AnnotatedError(t, "CheckPods timed-out", err)
+		if rce, ok := err.(*testutil.RestartCountError); ok {
 			testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
 		} else {
 			testutil.AnnotatedError(t, "CheckPods timed-out", err)
 		}
 	}
 	if err := TestHelper.CheckPods(prefixedNs, "slow-cooker", 1); err != nil {
-		testutil.AnnotatedError(t, "CheckPods timed-out", err)
+		if rce, ok := err.(*testutil.RestartCountError); ok {
 			testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
 		} else {
 			testutil.AnnotatedError(t, "CheckPods timed-out", err)
 		}
 	}
 }
--- a/test/get/get_test.go
+++ b/test/get/get_test.go
@ -78,8 +78,11 @@ func TestCliGet(t *testing.T) {
 	// wait for pods to start
 	for deploy, replicas := range deployReplicas {
 		if err := TestHelper.CheckPods(prefixedNs, deploy, replicas); err != nil {
-			testutil.AnnotatedError(t, "CheckPods timed-out",
+			if rce, ok := err.(*testutil.RestartCountError); ok {
-				fmt.Errorf("Error validating pods for deploy [%s]:\n%s", deploy, err))
+				testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
 			} else {
 				testutil.AnnotatedError(t, "CheckPods timed-out", err)
 			}
 		}
 	}
--- a/test/install_test.go
+++ b/test/install_test.go
@ -125,7 +125,11 @@ func TestUpgradeTestAppWorksBeforeUpgrade(t *testing.T) {
 		testAppNamespace := TestHelper.GetTestNamespace("upgrade-test")
 		for _, deploy := range []string{"emoji", "voting", "web"} {
 			if err := TestHelper.CheckPods(testAppNamespace, deploy, 1); err != nil {
-				testutil.AnnotatedError(t, "CheckPods timed-out", err)
+				if rce, ok := err.(*testutil.RestartCountError); ok {
 					testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
 				} else {
 					testutil.AnnotatedError(t, "CheckPods timed-out", err)
 				}
 			}
 			if err := TestHelper.CheckDeployment(testAppNamespace, deploy, 1); err != nil {
--- a/test/serviceprofiles/serviceprofiles_test.go
+++ b/test/serviceprofiles/serviceprofiles_test.go
@ -52,7 +52,11 @@ func TestServiceProfiles(t *testing.T) {
 	// wait for deployments to start
 	for _, deploy := range []string{"t1", "t2", "t3", "gateway"} {
 		if err := TestHelper.CheckPods(testNamespace, deploy, 1); err != nil {
-			testutil.AnnotatedError(t, "CheckPods timed-out", err)
+			if rce, ok := err.(*testutil.RestartCountError); ok {
 				testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
 			} else {
 				testutil.AnnotatedError(t, "CheckPods timed-out", err)
 			}
 		}
 		if err := TestHelper.CheckDeployment(testNamespace, deploy, 1); err != nil {
--- a/test/tap/tap_test.go
+++ b/test/tap/tap_test.go
@ -99,7 +99,11 @@ func TestCliTap(t *testing.T) {
 	// wait for deployments to start
 	for _, deploy := range []string{"t1", "t2", "t3", "gateway"} {
 		if err := TestHelper.CheckPods(prefixedNs, deploy, 1); err != nil {
-			testutil.AnnotatedError(t, "CheckPods timed-out", err)
+			if rce, ok := err.(*testutil.RestartCountError); ok {
 				testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
 			} else {
 				testutil.AnnotatedError(t, "CheckPods timed-out", err)
 			}
 		}
 		if err := TestHelper.CheckDeployment(prefixedNs, deploy, 1); err != nil {
--- a/test/tracing/tracing_test.go
+++ b/test/tracing/tracing_test.go
@ -126,7 +126,11 @@ func TestTracing(t *testing.T) {
 		tracingNs:   "jaeger",
 	} {
 		if err := TestHelper.CheckPods(ns, deploy, 1); err != nil {
-			testutil.AnnotatedError(t, "CheckPods timed-out", err)
+			if rce, ok := err.(*testutil.RestartCountError); ok {
 				testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
 			} else {
 				testutil.AnnotatedError(t, "CheckPods timed-out", err)
 			}
 		}
 		if err := TestHelper.CheckDeployment(ns, deploy, 1); err != nil {
--- a/test/trafficsplit/trafficsplit_test.go
+++ b/test/trafficsplit/trafficsplit_test.go
@ -168,7 +168,11 @@ func TestTrafficSplitCli(t *testing.T) {
 	// wait for deployments to start
 	for _, deploy := range []string{"backend", "failing", "slow-cooker"} {
 		if err := TestHelper.CheckPods(prefixedNs, deploy, 1); err != nil {
-			testutil.AnnotatedError(t, "CheckPods timed-out", err)
+			if rce, ok := err.(*testutil.RestartCountError); ok {
 				testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
 			} else {
 				testutil.AnnotatedError(t, "CheckPods timed-out", err)
 			}
 		}
 		if err := TestHelper.CheckDeployment(prefixedNs, deploy, 1); err != nil {
--- a/test/uninstall/uninstall_test.go
+++ b/test/uninstall/uninstall_test.go
@ -51,8 +51,11 @@ func TestResourcesPostInstall(t *testing.T) {
 	// Tests Pods and Deployments
 	for deploy, spec := range testutil.LinkerdDeployReplicas {
 		if err := TestHelper.CheckPods(TestHelper.GetLinkerdNamespace(), deploy, spec.Replicas); err != nil {
-			testutil.AnnotatedFatal(t, "CheckPods timed-out",
+			if rce, ok := err.(*testutil.RestartCountError); ok {
-				fmt.Errorf("Error validating pods for deploy [%s]:\n%s", deploy, err))
+				testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
 			} else {
 				testutil.AnnotatedError(t, "CheckPods timed-out", err)
 			}
 		}
 		if err := TestHelper.CheckDeployment(TestHelper.GetLinkerdNamespace(), deploy, spec.Replicas); err != nil {
 			testutil.AnnotatedFatalf(t, "CheckDeployment timed-out", "Error validating deployment [%s]:\n%s", deploy, err)
--- a/testutil/annotations.go
+++ b/testutil/annotations.go
@ -13,9 +13,26 @@ const (
 	rootPath = "/linkerd2/"
 )
-func echoAnnotation(t *testing.T, args ...interface{}) {
+type level int
 const (
 	err level = iota
 	warn
 )
 func (l level) String() string {
 	switch l {
 	case err:
 		return "error"
 	case warn:
 		return "warning"
 	}
 	panic(fmt.Sprintf("invalid level: %d", l))
 }
 func echoAnnotation(t *testing.T, l level, args ...interface{}) {
 	if _, ok := os.LookupEnv(envFlag); ok {
-		_, fileName, fileLine, ok := runtime.Caller(2)
+		_, fileName, fileLine, ok := runtime.Caller(3)
 		if !ok {
 			panic("Couldn't recover runtime info")
 		}
@ -26,17 +43,25 @@ func echoAnnotation(t *testing.T, args ...interface{}) {
 		testName := parts[0]
 		for _, arg := range args {
 			msg := fmt.Sprintf("%s - %s", testName, arg)
-			fmt.Printf("::error file=%s,line=%d::%s\n", fileName, fileLine, msg)
+			fmt.Printf("::%s file=%s,line=%d::%s\n", l, fileName, fileLine, msg)
 		}
 	}
 }
 func echoAnnotationErr(t *testing.T, args ...interface{}) {
 	echoAnnotation(t, err, args...)
 }
 func echoAnnotationWarn(t *testing.T, args ...interface{}) {
 	echoAnnotation(t, warn, args...)
 }
 // Error is a wrapper around t.Error()
 // args are passed to t.Error(args) and each arg will be sent to stdout formatted
 // as a Github annotation when the envFlag environment variable is set
 func Error(t *testing.T, args ...interface{}) {
 	t.Helper()
-	echoAnnotation(t, args...)
+	echoAnnotationErr(t, args...)
 	t.Error(args...)
 }
@ -44,7 +69,7 @@ func Error(t *testing.T, args ...interface{}) {
 // will be used as the Github annotation
 func AnnotatedError(t *testing.T, msg string, args ...interface{}) {
 	t.Helper()
-	echoAnnotation(t, msg)
+	echoAnnotationErr(t, msg)
 	t.Error(args...)
 }
@ -54,7 +79,7 @@ func AnnotatedError(t *testing.T, msg string, args ...interface{}) {
 // environment variable is set
 func Errorf(t *testing.T, format string, args ...interface{}) {
 	t.Helper()
-	echoAnnotation(t, fmt.Sprintf(format, args...))
+	echoAnnotationErr(t, fmt.Sprintf(format, args...))
 	t.Errorf(format, args...)
 }
@ -62,7 +87,7 @@ func Errorf(t *testing.T, format string, args ...interface{}) {
 // will be used as the Github annotation
 func AnnotatedErrorf(t *testing.T, msg, format string, args ...interface{}) {
 	t.Helper()
-	echoAnnotation(t, msg)
+	echoAnnotationErr(t, msg)
 	t.Errorf(format, args...)
 }
@ -71,7 +96,7 @@ func AnnotatedErrorf(t *testing.T, msg, format string, args ...interface{}) {
 // as a Github annotation when the envFlag environment variable is set
 func Fatal(t *testing.T, args ...interface{}) {
 	t.Helper()
-	echoAnnotation(t, args)
+	echoAnnotationErr(t, args)
 	t.Fatal(args...)
 }
@ -79,7 +104,7 @@ func Fatal(t *testing.T, args ...interface{}) {
 // will be used as the Github annotation
 func AnnotatedFatal(t *testing.T, msg string, args ...interface{}) {
 	t.Helper()
-	echoAnnotation(t, msg)
+	echoAnnotationErr(t, msg)
 	t.Fatal(args...)
 }
@ -89,7 +114,7 @@ func AnnotatedFatal(t *testing.T, msg string, args ...interface{}) {
 // environment variable is set
 func Fatalf(t *testing.T, format string, args ...interface{}) {
 	t.Helper()
-	echoAnnotation(t, fmt.Sprintf(format, args...))
+	echoAnnotationErr(t, fmt.Sprintf(format, args...))
 	t.Fatalf(format, args...)
 }
@ -97,6 +122,14 @@ func Fatalf(t *testing.T, format string, args ...interface{}) {
 // will be used as the Github annotation
 func AnnotatedFatalf(t *testing.T, msg, format string, args ...interface{}) {
 	t.Helper()
-	echoAnnotation(t, msg)
+	echoAnnotationErr(t, msg)
 	t.Fatalf(format, args...)
 }
 // AnnotatedWarn is a wrapper around t.Log() but it also admits a msg string that
 // will be used as the Github warning annotation
 func AnnotatedWarn(t *testing.T, msg string, args ...interface{}) {
 	t.Helper()
 	echoAnnotationWarn(t, msg)
 	t.Log(args...)
 }
--- a/testutil/kubernetes_helper.go
+++ b/testutil/kubernetes_helper.go
@ -1,6 +1,7 @@
 package testutil
 import (
 	"errors"
 	"fmt"
 	"os/exec"
 	"regexp"
@ -26,6 +27,19 @@ type KubernetesHelper struct {
 	retryFor   func(time.Duration, func() error) error
 }
 // RestartCountError is returned by CheckPods() whenever a pod has restarted exactly one time.
 // Consumers should log this type of error instead of failing the test.
 // This is to alleviate CI flakiness stemming from a containerd bug.
 // See https://github.com/kubernetes/kubernetes/issues/89064
 // See https://github.com/containerd/containerd/issues/4068
 type RestartCountError struct {
 	msg string
 }
 func (e *RestartCountError) Error() string {
 	return e.msg
 }
 // NewKubernetesHelper creates a new instance of KubernetesHelper.
 func NewKubernetesHelper(k8sContext string, retryFor func(time.Duration, func() error) error) (*KubernetesHelper, error) {
 	rules := clientcmd.NewDefaultClientConfigLoadingRules()
@ -208,9 +222,13 @@ func (h *KubernetesHelper) CheckPods(namespace string, deploymentName string, re
 	for _, pod := range checkedPods {
 		for _, status := range append(pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses...) {
-			if status.RestartCount != 0 {
+			errStr := fmt.Sprintf("Container [%s] in pod [%s] in namespace [%s] has restart count [%d]",
-				return fmt.Errorf("Container [%s] in pod [%s] in namespace [%s] has restart count [%d]",
+				status.Name, pod.Name, pod.Namespace, status.RestartCount)
-					status.Name, pod.Name, pod.Namespace, status.RestartCount)
+			if status.RestartCount == 1 {
 				return &RestartCountError{errStr}
 			}
 			if status.RestartCount > 1 {
 				return errors.New(errStr)
 			}
 		}
 	}