Integration tests: Warn (instead of erroring) upon pod restarts (#4623)

* Integration tests: Warn (instead of erroring) upon pod restarts

Fixes #4595

Don't have integration tests fail whenever a pod is detected to have
restarted just once. For now we'll be just logging this out and creating
a warning annotation for it.
This commit is contained in:
Alejandro Pedraza 2020-06-18 06:08:05 -05:00 committed by GitHub
parent b176fbeb6d
commit c8c5980d63
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 125 additions and 28 deletions

View File

@ -83,8 +83,12 @@ func TestDirectEdges(t *testing.T) {
}
if err := TestHelper.CheckPods(testNamespace, "terminus", 1); err != nil {
if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
}
if err := TestHelper.CheckDeployment(testNamespace, "terminus", 1); err != nil {
testutil.AnnotatedErrorf(t, "CheckDeployment timed-out", "Error validating deployment [%s]:\n%s", "terminus", err)
@ -121,8 +125,12 @@ func TestDirectEdges(t *testing.T) {
}
if err := TestHelper.CheckPods(testNamespace, "slow-cooker", 1); err != nil {
if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
}
if err := TestHelper.CheckDeployment(testNamespace, "slow-cooker", 1); err != nil {
testutil.AnnotatedErrorf(t, "CheckDeployment timed-out", "error validating deployment [%s]:\n%s", "terminus", err)

View File

@ -41,7 +41,11 @@ func TestEgressHttp(t *testing.T) {
err = TestHelper.CheckPods(prefixedNs, "egress-test", 1)
if err != nil {
testutil.AnnotatedFatal(t, "CheckPods timed-out", err)
if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
}
testCase := func(url, methodToUse string) {

View File

@ -53,12 +53,20 @@ func verifyInstallApp(t *testing.T) {
}
if err := TestHelper.CheckPods(prefixedNs, TestAppBackendDeploymentName, 1); err != nil {
if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
}
if err := TestHelper.CheckPods(prefixedNs, "slow-cooker", 1); err != nil {
if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
}
}
func checkAppWoks(t *testing.T, timeout time.Duration) error {

View File

@ -78,8 +78,11 @@ func TestCliGet(t *testing.T) {
// wait for pods to start
for deploy, replicas := range deployReplicas {
if err := TestHelper.CheckPods(prefixedNs, deploy, replicas); err != nil {
testutil.AnnotatedError(t, "CheckPods timed-out",
fmt.Errorf("Error validating pods for deploy [%s]:\n%s", deploy, err))
if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
}
}

View File

@ -125,8 +125,12 @@ func TestUpgradeTestAppWorksBeforeUpgrade(t *testing.T) {
testAppNamespace := TestHelper.GetTestNamespace("upgrade-test")
for _, deploy := range []string{"emoji", "voting", "web"} {
if err := TestHelper.CheckPods(testAppNamespace, deploy, 1); err != nil {
if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
}
if err := TestHelper.CheckDeployment(testAppNamespace, deploy, 1); err != nil {
testutil.AnnotatedErrorf(t, "CheckDeployment timed-out", "Error validating deployment [%s]:\n%s", deploy, err)

View File

@ -52,8 +52,12 @@ func TestServiceProfiles(t *testing.T) {
// wait for deployments to start
for _, deploy := range []string{"t1", "t2", "t3", "gateway"} {
if err := TestHelper.CheckPods(testNamespace, deploy, 1); err != nil {
if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
}
if err := TestHelper.CheckDeployment(testNamespace, deploy, 1); err != nil {
testutil.AnnotatedErrorf(t, "CheckDeployment timed-out", "Error validating deployment [%s]:\n%s", deploy, err)

View File

@ -99,8 +99,12 @@ func TestCliTap(t *testing.T) {
// wait for deployments to start
for _, deploy := range []string{"t1", "t2", "t3", "gateway"} {
if err := TestHelper.CheckPods(prefixedNs, deploy, 1); err != nil {
if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
}
if err := TestHelper.CheckDeployment(prefixedNs, deploy, 1); err != nil {
testutil.AnnotatedErrorf(t, "CheckDeployment timed-out", "Error validating deployment [%s]:\n%s", deploy, err)

View File

@ -126,8 +126,12 @@ func TestTracing(t *testing.T) {
tracingNs: "jaeger",
} {
if err := TestHelper.CheckPods(ns, deploy, 1); err != nil {
if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
}
if err := TestHelper.CheckDeployment(ns, deploy, 1); err != nil {
testutil.AnnotatedErrorf(t, "CheckDeployment timed-out", "Error validating deployment [%s]:\n%s", deploy, err)

View File

@ -168,8 +168,12 @@ func TestTrafficSplitCli(t *testing.T) {
// wait for deployments to start
for _, deploy := range []string{"backend", "failing", "slow-cooker"} {
if err := TestHelper.CheckPods(prefixedNs, deploy, 1); err != nil {
if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
}
if err := TestHelper.CheckDeployment(prefixedNs, deploy, 1); err != nil {
testutil.AnnotatedErrorf(t, "CheckDeployment timed-out", "Error validating deployment [%s]:\n%s", deploy, err)

View File

@ -51,8 +51,11 @@ func TestResourcesPostInstall(t *testing.T) {
// Tests Pods and Deployments
for deploy, spec := range testutil.LinkerdDeployReplicas {
if err := TestHelper.CheckPods(TestHelper.GetLinkerdNamespace(), deploy, spec.Replicas); err != nil {
testutil.AnnotatedFatal(t, "CheckPods timed-out",
fmt.Errorf("Error validating pods for deploy [%s]:\n%s", deploy, err))
if rce, ok := err.(*testutil.RestartCountError); ok {
testutil.AnnotatedWarn(t, "CheckPods timed-out", rce)
} else {
testutil.AnnotatedError(t, "CheckPods timed-out", err)
}
}
if err := TestHelper.CheckDeployment(TestHelper.GetLinkerdNamespace(), deploy, spec.Replicas); err != nil {
testutil.AnnotatedFatalf(t, "CheckDeployment timed-out", "Error validating deployment [%s]:\n%s", deploy, err)

View File

@ -13,9 +13,26 @@ const (
rootPath = "/linkerd2/"
)
func echoAnnotation(t *testing.T, args ...interface{}) {
type level int
const (
err level = iota
warn
)
func (l level) String() string {
switch l {
case err:
return "error"
case warn:
return "warning"
}
panic(fmt.Sprintf("invalid level: %d", l))
}
func echoAnnotation(t *testing.T, l level, args ...interface{}) {
if _, ok := os.LookupEnv(envFlag); ok {
_, fileName, fileLine, ok := runtime.Caller(2)
_, fileName, fileLine, ok := runtime.Caller(3)
if !ok {
panic("Couldn't recover runtime info")
}
@ -26,17 +43,25 @@ func echoAnnotation(t *testing.T, args ...interface{}) {
testName := parts[0]
for _, arg := range args {
msg := fmt.Sprintf("%s - %s", testName, arg)
fmt.Printf("::error file=%s,line=%d::%s\n", fileName, fileLine, msg)
fmt.Printf("::%s file=%s,line=%d::%s\n", l, fileName, fileLine, msg)
}
}
}
func echoAnnotationErr(t *testing.T, args ...interface{}) {
echoAnnotation(t, err, args...)
}
func echoAnnotationWarn(t *testing.T, args ...interface{}) {
echoAnnotation(t, warn, args...)
}
// Error is a wrapper around t.Error()
// args are passed to t.Error(args) and each arg will be sent to stdout formatted
// as a Github annotation when the envFlag environment variable is set
func Error(t *testing.T, args ...interface{}) {
t.Helper()
echoAnnotation(t, args...)
echoAnnotationErr(t, args...)
t.Error(args...)
}
@ -44,7 +69,7 @@ func Error(t *testing.T, args ...interface{}) {
// will be used as the Github annotation
func AnnotatedError(t *testing.T, msg string, args ...interface{}) {
t.Helper()
echoAnnotation(t, msg)
echoAnnotationErr(t, msg)
t.Error(args...)
}
@ -54,7 +79,7 @@ func AnnotatedError(t *testing.T, msg string, args ...interface{}) {
// environment variable is set
func Errorf(t *testing.T, format string, args ...interface{}) {
t.Helper()
echoAnnotation(t, fmt.Sprintf(format, args...))
echoAnnotationErr(t, fmt.Sprintf(format, args...))
t.Errorf(format, args...)
}
@ -62,7 +87,7 @@ func Errorf(t *testing.T, format string, args ...interface{}) {
// will be used as the Github annotation
func AnnotatedErrorf(t *testing.T, msg, format string, args ...interface{}) {
t.Helper()
echoAnnotation(t, msg)
echoAnnotationErr(t, msg)
t.Errorf(format, args...)
}
@ -71,7 +96,7 @@ func AnnotatedErrorf(t *testing.T, msg, format string, args ...interface{}) {
// as a Github annotation when the envFlag environment variable is set
func Fatal(t *testing.T, args ...interface{}) {
t.Helper()
echoAnnotation(t, args)
echoAnnotationErr(t, args)
t.Fatal(args...)
}
@ -79,7 +104,7 @@ func Fatal(t *testing.T, args ...interface{}) {
// will be used as the Github annotation
func AnnotatedFatal(t *testing.T, msg string, args ...interface{}) {
t.Helper()
echoAnnotation(t, msg)
echoAnnotationErr(t, msg)
t.Fatal(args...)
}
@ -89,7 +114,7 @@ func AnnotatedFatal(t *testing.T, msg string, args ...interface{}) {
// environment variable is set
func Fatalf(t *testing.T, format string, args ...interface{}) {
t.Helper()
echoAnnotation(t, fmt.Sprintf(format, args...))
echoAnnotationErr(t, fmt.Sprintf(format, args...))
t.Fatalf(format, args...)
}
@ -97,6 +122,14 @@ func Fatalf(t *testing.T, format string, args ...interface{}) {
// will be used as the Github annotation
func AnnotatedFatalf(t *testing.T, msg, format string, args ...interface{}) {
t.Helper()
echoAnnotation(t, msg)
echoAnnotationErr(t, msg)
t.Fatalf(format, args...)
}
// AnnotatedWarn is a wrapper around t.Log() but it also admits a msg string that
// will be used as the Github warning annotation
func AnnotatedWarn(t *testing.T, msg string, args ...interface{}) {
t.Helper()
echoAnnotationWarn(t, msg)
t.Log(args...)
}

View File

@ -1,6 +1,7 @@
package testutil
import (
"errors"
"fmt"
"os/exec"
"regexp"
@ -26,6 +27,19 @@ type KubernetesHelper struct {
retryFor func(time.Duration, func() error) error
}
// RestartCountError is returned by CheckPods() whenever a pod has restarted exactly one time.
// Consumers should log this type of error instead of failing the test.
// This is to alleviate CI flakiness stemming from a containerd bug.
// See https://github.com/kubernetes/kubernetes/issues/89064
// See https://github.com/containerd/containerd/issues/4068
type RestartCountError struct {
msg string
}
func (e *RestartCountError) Error() string {
return e.msg
}
// NewKubernetesHelper creates a new instance of KubernetesHelper.
func NewKubernetesHelper(k8sContext string, retryFor func(time.Duration, func() error) error) (*KubernetesHelper, error) {
rules := clientcmd.NewDefaultClientConfigLoadingRules()
@ -208,9 +222,13 @@ func (h *KubernetesHelper) CheckPods(namespace string, deploymentName string, re
for _, pod := range checkedPods {
for _, status := range append(pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses...) {
if status.RestartCount != 0 {
return fmt.Errorf("Container [%s] in pod [%s] in namespace [%s] has restart count [%d]",
errStr := fmt.Sprintf("Container [%s] in pod [%s] in namespace [%s] has restart count [%d]",
status.Name, pod.Name, pod.Namespace, status.RestartCount)
if status.RestartCount == 1 {
return &RestartCountError{errStr}
}
if status.RestartCount > 1 {
return errors.New(errStr)
}
}
}