Integration tests: Warn (instead of erroring) upon pod restarts (#4623)

* Integration tests: Warn (instead of erroring) upon pod restarts

Fixes #4595

Don't have integration tests fail whenever a pod is detected to have
restarted just once. For now we'll be just logging this out and creating
a warning annotation for it.
This commit is contained in:
Alejandro Pedraza 2020-06-18 06:08:05 -05:00 committed by GitHub
parent b176fbeb6d
commit c8c5980d63
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 125 additions and 28 deletions

View File

@ -83,7 +83,11 @@ func TestDirectEdges(t *testing.T) {
} }
if err := TestHelper.CheckPods(testNamespace, "terminus", 1); err != nil { if err := TestHelper.CheckPods(testNamespace, "terminus", 1); err != nil {
testutil.AnnotatedError(t, "CheckPods timed-out", err) if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
} }
if err := TestHelper.CheckDeployment(testNamespace, "terminus", 1); err != nil { if err := TestHelper.CheckDeployment(testNamespace, "terminus", 1); err != nil {
@ -121,7 +125,11 @@ func TestDirectEdges(t *testing.T) {
} }
if err := TestHelper.CheckPods(testNamespace, "slow-cooker", 1); err != nil { if err := TestHelper.CheckPods(testNamespace, "slow-cooker", 1); err != nil {
testutil.AnnotatedError(t, "CheckPods timed-out", err) if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
} }
if err := TestHelper.CheckDeployment(testNamespace, "slow-cooker", 1); err != nil { if err := TestHelper.CheckDeployment(testNamespace, "slow-cooker", 1); err != nil {

View File

@ -41,7 +41,11 @@ func TestEgressHttp(t *testing.T) {
err = TestHelper.CheckPods(prefixedNs, "egress-test", 1) err = TestHelper.CheckPods(prefixedNs, "egress-test", 1)
if err != nil { if err != nil {
testutil.AnnotatedFatal(t, "CheckPods timed-out", err) if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
} }
testCase := func(url, methodToUse string) { testCase := func(url, methodToUse string) {

View File

@ -53,11 +53,19 @@ func verifyInstallApp(t *testing.T) {
} }
if err := TestHelper.CheckPods(prefixedNs, TestAppBackendDeploymentName, 1); err != nil { if err := TestHelper.CheckPods(prefixedNs, TestAppBackendDeploymentName, 1); err != nil {
testutil.AnnotatedError(t, "CheckPods timed-out", err) if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
} }
if err := TestHelper.CheckPods(prefixedNs, "slow-cooker", 1); err != nil { if err := TestHelper.CheckPods(prefixedNs, "slow-cooker", 1); err != nil {
testutil.AnnotatedError(t, "CheckPods timed-out", err) if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
} }
} }

View File

@ -78,8 +78,11 @@ func TestCliGet(t *testing.T) {
// wait for pods to start // wait for pods to start
for deploy, replicas := range deployReplicas { for deploy, replicas := range deployReplicas {
if err := TestHelper.CheckPods(prefixedNs, deploy, replicas); err != nil { if err := TestHelper.CheckPods(prefixedNs, deploy, replicas); err != nil {
testutil.AnnotatedError(t, "CheckPods timed-out", if rce, ok := err.(*testutil.RestartCountError); ok {
fmt.Errorf("Error validating pods for deploy [%s]:\n%s", deploy, err)) testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
} }
} }

View File

@ -125,7 +125,11 @@ func TestUpgradeTestAppWorksBeforeUpgrade(t *testing.T) {
testAppNamespace := TestHelper.GetTestNamespace("upgrade-test") testAppNamespace := TestHelper.GetTestNamespace("upgrade-test")
for _, deploy := range []string{"emoji", "voting", "web"} { for _, deploy := range []string{"emoji", "voting", "web"} {
if err := TestHelper.CheckPods(testAppNamespace, deploy, 1); err != nil { if err := TestHelper.CheckPods(testAppNamespace, deploy, 1); err != nil {
testutil.AnnotatedError(t, "CheckPods timed-out", err) if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
} }
if err := TestHelper.CheckDeployment(testAppNamespace, deploy, 1); err != nil { if err := TestHelper.CheckDeployment(testAppNamespace, deploy, 1); err != nil {

View File

@ -52,7 +52,11 @@ func TestServiceProfiles(t *testing.T) {
// wait for deployments to start // wait for deployments to start
for _, deploy := range []string{"t1", "t2", "t3", "gateway"} { for _, deploy := range []string{"t1", "t2", "t3", "gateway"} {
if err := TestHelper.CheckPods(testNamespace, deploy, 1); err != nil { if err := TestHelper.CheckPods(testNamespace, deploy, 1); err != nil {
testutil.AnnotatedError(t, "CheckPods timed-out", err) if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
} }
if err := TestHelper.CheckDeployment(testNamespace, deploy, 1); err != nil { if err := TestHelper.CheckDeployment(testNamespace, deploy, 1); err != nil {

View File

@ -99,7 +99,11 @@ func TestCliTap(t *testing.T) {
// wait for deployments to start // wait for deployments to start
for _, deploy := range []string{"t1", "t2", "t3", "gateway"} { for _, deploy := range []string{"t1", "t2", "t3", "gateway"} {
if err := TestHelper.CheckPods(prefixedNs, deploy, 1); err != nil { if err := TestHelper.CheckPods(prefixedNs, deploy, 1); err != nil {
testutil.AnnotatedError(t, "CheckPods timed-out", err) if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
} }
if err := TestHelper.CheckDeployment(prefixedNs, deploy, 1); err != nil { if err := TestHelper.CheckDeployment(prefixedNs, deploy, 1); err != nil {

View File

@ -126,7 +126,11 @@ func TestTracing(t *testing.T) {
tracingNs: "jaeger", tracingNs: "jaeger",
} { } {
if err := TestHelper.CheckPods(ns, deploy, 1); err != nil { if err := TestHelper.CheckPods(ns, deploy, 1); err != nil {
testutil.AnnotatedError(t, "CheckPods timed-out", err) if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
} }
if err := TestHelper.CheckDeployment(ns, deploy, 1); err != nil { if err := TestHelper.CheckDeployment(ns, deploy, 1); err != nil {

View File

@ -168,7 +168,11 @@ func TestTrafficSplitCli(t *testing.T) {
// wait for deployments to start // wait for deployments to start
for _, deploy := range []string{"backend", "failing", "slow-cooker"} { for _, deploy := range []string{"backend", "failing", "slow-cooker"} {
if err := TestHelper.CheckPods(prefixedNs, deploy, 1); err != nil { if err := TestHelper.CheckPods(prefixedNs, deploy, 1); err != nil {
testutil.AnnotatedError(t, "CheckPods timed-out", err) if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
} }
if err := TestHelper.CheckDeployment(prefixedNs, deploy, 1); err != nil { if err := TestHelper.CheckDeployment(prefixedNs, deploy, 1); err != nil {

View File

@ -51,8 +51,11 @@ func TestResourcesPostInstall(t *testing.T) {
// Tests Pods and Deployments // Tests Pods and Deployments
for deploy, spec := range testutil.LinkerdDeployReplicas { for deploy, spec := range testutil.LinkerdDeployReplicas {
if err := TestHelper.CheckPods(TestHelper.GetLinkerdNamespace(), deploy, spec.Replicas); err != nil { if err := TestHelper.CheckPods(TestHelper.GetLinkerdNamespace(), deploy, spec.Replicas); err != nil {
testutil.AnnotatedFatal(t, "CheckPods timed-out", if rce, ok := err.(*testutil.RestartCountError); ok {
fmt.Errorf("Error validating pods for deploy [%s]:\n%s", deploy, err)) testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
} }
if err := TestHelper.CheckDeployment(TestHelper.GetLinkerdNamespace(), deploy, spec.Replicas); err != nil { if err := TestHelper.CheckDeployment(TestHelper.GetLinkerdNamespace(), deploy, spec.Replicas); err != nil {
testutil.AnnotatedFatalf(t, "CheckDeployment timed-out", "Error validating deployment [%s]:\n%s", deploy, err) testutil.AnnotatedFatalf(t, "CheckDeployment timed-out", "Error validating deployment [%s]:\n%s", deploy, err)

View File

@ -13,9 +13,26 @@ const (
rootPath = "/linkerd2/" rootPath = "/linkerd2/"
) )
func echoAnnotation(t *testing.T, args ...interface{}) { type level int
const (
err level = iota
warn
)
func (l level) String() string {
switch l {
case err:
return "error"
case warn:
return "warning"
}
panic(fmt.Sprintf("invalid level: %d", l))
}
func echoAnnotation(t *testing.T, l level, args ...interface{}) {
if _, ok := os.LookupEnv(envFlag); ok { if _, ok := os.LookupEnv(envFlag); ok {
_, fileName, fileLine, ok := runtime.Caller(2) _, fileName, fileLine, ok := runtime.Caller(3)
if !ok { if !ok {
panic("Couldn't recover runtime info") panic("Couldn't recover runtime info")
} }
@ -26,17 +43,25 @@ func echoAnnotation(t *testing.T, args ...interface{}) {
testName := parts[0] testName := parts[0]
for _, arg := range args { for _, arg := range args {
msg := fmt.Sprintf("%s - %s", testName, arg) msg := fmt.Sprintf("%s - %s", testName, arg)
fmt.Printf("::error file=%s,line=%d::%s\n", fileName, fileLine, msg) fmt.Printf("::%s file=%s,line=%d::%s\n", l, fileName, fileLine, msg)
} }
} }
} }
func echoAnnotationErr(t *testing.T, args ...interface{}) {
echoAnnotation(t, err, args...)
}
func echoAnnotationWarn(t *testing.T, args ...interface{}) {
echoAnnotation(t, warn, args...)
}
// Error is a wrapper around t.Error() // Error is a wrapper around t.Error()
// args are passed to t.Error(args) and each arg will be sent to stdout formatted // args are passed to t.Error(args) and each arg will be sent to stdout formatted
// as a Github annotation when the envFlag environment variable is set // as a Github annotation when the envFlag environment variable is set
func Error(t *testing.T, args ...interface{}) { func Error(t *testing.T, args ...interface{}) {
t.Helper() t.Helper()
echoAnnotation(t, args...) echoAnnotationErr(t, args...)
t.Error(args...) t.Error(args...)
} }
@ -44,7 +69,7 @@ func Error(t *testing.T, args ...interface{}) {
// will be used as the Github annotation // will be used as the Github annotation
func AnnotatedError(t *testing.T, msg string, args ...interface{}) { func AnnotatedError(t *testing.T, msg string, args ...interface{}) {
t.Helper() t.Helper()
echoAnnotation(t, msg) echoAnnotationErr(t, msg)
t.Error(args...) t.Error(args...)
} }
@ -54,7 +79,7 @@ func AnnotatedError(t *testing.T, msg string, args ...interface{}) {
// environment variable is set // environment variable is set
func Errorf(t *testing.T, format string, args ...interface{}) { func Errorf(t *testing.T, format string, args ...interface{}) {
t.Helper() t.Helper()
echoAnnotation(t, fmt.Sprintf(format, args...)) echoAnnotationErr(t, fmt.Sprintf(format, args...))
t.Errorf(format, args...) t.Errorf(format, args...)
} }
@ -62,7 +87,7 @@ func Errorf(t *testing.T, format string, args ...interface{}) {
// will be used as the Github annotation // will be used as the Github annotation
func AnnotatedErrorf(t *testing.T, msg, format string, args ...interface{}) { func AnnotatedErrorf(t *testing.T, msg, format string, args ...interface{}) {
t.Helper() t.Helper()
echoAnnotation(t, msg) echoAnnotationErr(t, msg)
t.Errorf(format, args...) t.Errorf(format, args...)
} }
@ -71,7 +96,7 @@ func AnnotatedErrorf(t *testing.T, msg, format string, args ...interface{}) {
// as a Github annotation when the envFlag environment variable is set // as a Github annotation when the envFlag environment variable is set
func Fatal(t *testing.T, args ...interface{}) { func Fatal(t *testing.T, args ...interface{}) {
t.Helper() t.Helper()
echoAnnotation(t, args) echoAnnotationErr(t, args)
t.Fatal(args...) t.Fatal(args...)
} }
@ -79,7 +104,7 @@ func Fatal(t *testing.T, args ...interface{}) {
// will be used as the Github annotation // will be used as the Github annotation
func AnnotatedFatal(t *testing.T, msg string, args ...interface{}) { func AnnotatedFatal(t *testing.T, msg string, args ...interface{}) {
t.Helper() t.Helper()
echoAnnotation(t, msg) echoAnnotationErr(t, msg)
t.Fatal(args...) t.Fatal(args...)
} }
@ -89,7 +114,7 @@ func AnnotatedFatal(t *testing.T, msg string, args ...interface{}) {
// environment variable is set // environment variable is set
func Fatalf(t *testing.T, format string, args ...interface{}) { func Fatalf(t *testing.T, format string, args ...interface{}) {
t.Helper() t.Helper()
echoAnnotation(t, fmt.Sprintf(format, args...)) echoAnnotationErr(t, fmt.Sprintf(format, args...))
t.Fatalf(format, args...) t.Fatalf(format, args...)
} }
@ -97,6 +122,14 @@ func Fatalf(t *testing.T, format string, args ...interface{}) {
// will be used as the Github annotation // will be used as the Github annotation
func AnnotatedFatalf(t *testing.T, msg, format string, args ...interface{}) { func AnnotatedFatalf(t *testing.T, msg, format string, args ...interface{}) {
t.Helper() t.Helper()
echoAnnotation(t, msg) echoAnnotationErr(t, msg)
t.Fatalf(format, args...) t.Fatalf(format, args...)
} }
// AnnotatedWarn is a wrapper around t.Log() but it also admits a msg string that
// will be used as the Github warning annotation
func AnnotatedWarn(t *testing.T, msg string, args ...interface{}) {
t.Helper()
echoAnnotationWarn(t, msg)
t.Log(args...)
}

View File

@ -1,6 +1,7 @@
package testutil package testutil
import ( import (
"errors"
"fmt" "fmt"
"os/exec" "os/exec"
"regexp" "regexp"
@ -26,6 +27,19 @@ type KubernetesHelper struct {
retryFor func(time.Duration, func() error) error retryFor func(time.Duration, func() error) error
} }
// RestartCountError is returned by CheckPods() whenever a pod has restarted exactly one time.
// Consumers should log this type of error instead of failing the test.
// This is to alleviate CI flakiness stemming from a containerd bug.
// See https://github.com/kubernetes/kubernetes/issues/89064
// See https://github.com/containerd/containerd/issues/4068
type RestartCountError struct {
msg string
}
func (e *RestartCountError) Error() string {
return e.msg
}
// NewKubernetesHelper creates a new instance of KubernetesHelper. // NewKubernetesHelper creates a new instance of KubernetesHelper.
func NewKubernetesHelper(k8sContext string, retryFor func(time.Duration, func() error) error) (*KubernetesHelper, error) { func NewKubernetesHelper(k8sContext string, retryFor func(time.Duration, func() error) error) (*KubernetesHelper, error) {
rules := clientcmd.NewDefaultClientConfigLoadingRules() rules := clientcmd.NewDefaultClientConfigLoadingRules()
@ -208,9 +222,13 @@ func (h *KubernetesHelper) CheckPods(namespace string, deploymentName string, re
for _, pod := range checkedPods { for _, pod := range checkedPods {
for _, status := range append(pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses...) { for _, status := range append(pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses...) {
if status.RestartCount != 0 { errStr := fmt.Sprintf("Container [%s] in pod [%s] in namespace [%s] has restart count [%d]",
return fmt.Errorf("Container [%s] in pod [%s] in namespace [%s] has restart count [%d]", status.Name, pod.Name, pod.Namespace, status.RestartCount)
status.Name, pod.Name, pod.Namespace, status.RestartCount) if status.RestartCount == 1 {
return &RestartCountError{errStr}
}
if status.RestartCount > 1 {
return errors.New(errStr)
} }
} }
} }