mirror of https://github.com/linkerd/linkerd2.git
Flaky tests: when installation test fails, fetch logs and events (#4410)
* When installation test fails, fetch logs and events Re #4371 When a test fails in `./test/install_test.go`, trigger the `TestLogs` and `TestEvents` tests in a separate process in order to output any unexpected logs/events that might have caused the initial test failure. For instance, currently we're sporadically experiencing pod restarts. Instead of ignoring them, this might help provide us with the real underlying cause.
This commit is contained in:
parent
bfedcd5485
commit
de5b22ffba
|
@ -4,8 +4,9 @@
|
||||||
# 1. upgrade_integration_tests
|
# 1. upgrade_integration_tests
|
||||||
# 2. helm_integration_tests
|
# 2. helm_integration_tests
|
||||||
# 3. helm_upgrade_integration_tests
|
# 3. helm_upgrade_integration_tests
|
||||||
# 4. deep_integration_tests
|
# 4. uninstall_integration_tests
|
||||||
# 5. external_issuer_integration_tests
|
# 5. deep_integration_tests
|
||||||
|
# 6. external_issuer_integration_tests
|
||||||
|
|
||||||
bindir=$( cd "${BASH_SOURCE[0]%/*}" && pwd )
|
bindir=$( cd "${BASH_SOURCE[0]%/*}" && pwd )
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package test
|
package test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
@ -19,11 +20,32 @@ import (
|
||||||
/// TEST SETUP ///
|
/// TEST SETUP ///
|
||||||
//////////////////////
|
//////////////////////
|
||||||
|
|
||||||
var TestHelper *testutil.TestHelper
|
var (
|
||||||
|
TestHelper *testutil.TestHelper
|
||||||
|
|
||||||
|
// controlPlaneInstalled becomes true as soon as there's a CP available to
|
||||||
|
// retrieve logs from
|
||||||
|
controlPlaneInstalled = false
|
||||||
|
)
|
||||||
|
|
||||||
func TestMain(m *testing.M) {
|
func TestMain(m *testing.M) {
|
||||||
TestHelper = testutil.NewTestHelper()
|
TestHelper = testutil.NewTestHelper()
|
||||||
os.Exit(m.Run())
|
code := m.Run()
|
||||||
|
// if the test failed, show logs and events that can potentially explain cause
|
||||||
|
if code != 0 && controlPlaneInstalled {
|
||||||
|
_, errs1 := fetchAndCheckLogs()
|
||||||
|
for _, err := range errs1 {
|
||||||
|
fmt.Println(err)
|
||||||
|
}
|
||||||
|
errs2 := fetchAndCheckEvents()
|
||||||
|
for _, err := range errs2 {
|
||||||
|
fmt.Println(err)
|
||||||
|
}
|
||||||
|
if len(errs1) == 0 && len(errs2) == 0 {
|
||||||
|
fmt.Println("No unexpected log entries or events found")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
os.Exit(code)
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -195,6 +217,7 @@ func TestInstallOrUpgradeCli(t *testing.T) {
|
||||||
if TestHelper.GetHelmReleaseName() != "" {
|
if TestHelper.GetHelmReleaseName() != "" {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
controlPlaneInstalled = true
|
||||||
|
|
||||||
var (
|
var (
|
||||||
cmd = "install"
|
cmd = "install"
|
||||||
|
@ -358,6 +381,7 @@ func TestInstallHelm(t *testing.T) {
|
||||||
if TestHelper.GetHelmReleaseName() == "" {
|
if TestHelper.GetHelmReleaseName() == "" {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
controlPlaneInstalled = true
|
||||||
|
|
||||||
cn := fmt.Sprintf("identity.%s.cluster.local", TestHelper.GetLinkerdNamespace())
|
cn := fmt.Sprintf("identity.%s.cluster.local", TestHelper.GetLinkerdNamespace())
|
||||||
var err error
|
var err error
|
||||||
|
@ -396,7 +420,7 @@ func TestResourcesPostInstall(t *testing.T) {
|
||||||
for _, svc := range linkerdSvcs {
|
for _, svc := range linkerdSvcs {
|
||||||
if err := TestHelper.CheckService(TestHelper.GetLinkerdNamespace(), svc); err != nil {
|
if err := TestHelper.CheckService(TestHelper.GetLinkerdNamespace(), svc); err != nil {
|
||||||
testutil.AnnotatedErrorf(t, fmt.Sprintf("error validating service [%s]", svc),
|
testutil.AnnotatedErrorf(t, fmt.Sprintf("error validating service [%s]", svc),
|
||||||
"rrror validating service [%s]:\n%s", svc, err)
|
"error validating service [%s]:\n%s", svc, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -721,7 +745,10 @@ func TestCheckProxy(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestLogs(t *testing.T) {
|
func fetchAndCheckLogs() ([]string, []error) {
|
||||||
|
okMessages := []string{}
|
||||||
|
errs := []error{}
|
||||||
|
|
||||||
controllerRegex := regexp.MustCompile("level=(panic|fatal|error|warn)")
|
controllerRegex := regexp.MustCompile("level=(panic|fatal|error|warn)")
|
||||||
proxyRegex := regexp.MustCompile(fmt.Sprintf("%s (ERR|WARN)", k8s.ProxyContainerName))
|
proxyRegex := regexp.MustCompile(fmt.Sprintf("%s (ERR|WARN)", k8s.ProxyContainerName))
|
||||||
clientGoRegex := regexp.MustCompile("client-go@")
|
clientGoRegex := regexp.MustCompile("client-go@")
|
||||||
|
@ -744,85 +771,96 @@ func TestLogs(t *testing.T) {
|
||||||
knownErrorsRegex = knownProxyErrorsRegex
|
knownErrorsRegex = knownProxyErrorsRegex
|
||||||
}
|
}
|
||||||
|
|
||||||
t.Run(name, func(t *testing.T) {
|
outputStream, err := TestHelper.LinkerdRunStream(
|
||||||
outputStream, err := TestHelper.LinkerdRunStream(
|
"logs", "--no-color",
|
||||||
"logs", "--no-color",
|
"--control-plane-component", deploy,
|
||||||
"--control-plane-component", deploy,
|
"--container", container,
|
||||||
"--container", container,
|
)
|
||||||
)
|
if err != nil {
|
||||||
if err != nil {
|
errs = append(errs, fmt.Errorf("error running command:\n%s", err))
|
||||||
testutil.AnnotatedErrorf(t, "error running command",
|
continue
|
||||||
"error running command:\n%s", err)
|
}
|
||||||
}
|
defer outputStream.Stop()
|
||||||
defer outputStream.Stop()
|
// Ignore the error returned, since ReadUntil will return an error if it
|
||||||
// Ignore the error returned, since ReadUntil will return an error if it
|
// does not return 10,000 after 2 seconds. We don't need 10,000 log lines.
|
||||||
// does not return 10,000 after 2 seconds. We don't need 10,000 log lines.
|
outputLines, _ := outputStream.ReadUntil(10000, 2*time.Second)
|
||||||
outputLines, _ := outputStream.ReadUntil(10000, 2*time.Second)
|
if len(outputLines) == 0 {
|
||||||
|
// Retry one time for 30 more seconds, in case the cluster is slow to
|
||||||
|
// produce log lines.
|
||||||
|
outputLines, _ = outputStream.ReadUntil(10000, 30*time.Second)
|
||||||
if len(outputLines) == 0 {
|
if len(outputLines) == 0 {
|
||||||
// Retry one time for 30 more seconds, in case the cluster is slow to
|
errs = append(errs, fmt.Errorf("no logs found for %s", name))
|
||||||
// produce log lines.
|
|
||||||
outputLines, _ = outputStream.ReadUntil(10000, 30*time.Second)
|
|
||||||
if len(outputLines) == 0 {
|
|
||||||
testutil.AnnotatedErrorf(t, "no logs found for %s", name)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for _, line := range outputLines {
|
for _, line := range outputLines {
|
||||||
if errRegex.MatchString(line) {
|
if errRegex.MatchString(line) {
|
||||||
if knownErrorsRegex.MatchString(line) {
|
if knownErrorsRegex.MatchString(line) {
|
||||||
// report all known logging errors in the output
|
// report all known logging errors in the output
|
||||||
t.Logf("Found known error in %s log: %s", name, line)
|
okMessages = append(okMessages, fmt.Sprintf("found known error in %s log: %s", name, line))
|
||||||
|
} else {
|
||||||
|
if proxy {
|
||||||
|
okMessages = append(okMessages, fmt.Sprintf("found unexpected proxy error in %s log: %s", name, line))
|
||||||
} else {
|
} else {
|
||||||
if proxy {
|
errs = append(errs, fmt.Errorf("found unexpected controller error in %s log: %s", name, line))
|
||||||
t.Logf("Found unexpected proxy error in %s log: %s", name, line)
|
|
||||||
} else {
|
|
||||||
testutil.AnnotatedErrorf(t,
|
|
||||||
"Found unexpected controller error in %s log: %s", name, line)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if clientGoRegex.MatchString((line)) {
|
|
||||||
hasClientGoLogs = true
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
})
|
if clientGoRegex.MatchString((line)) {
|
||||||
|
hasClientGoLogs = true
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !hasClientGoLogs {
|
if !hasClientGoLogs {
|
||||||
testutil.AnnotatedError(t, "didn't find any client-go entries")
|
errs = append(errs, errors.New("didn't find any client-go entries"))
|
||||||
|
}
|
||||||
|
|
||||||
|
return okMessages, errs
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLogs(t *testing.T) {
|
||||||
|
okMessages, errs := fetchAndCheckLogs()
|
||||||
|
for msg := range okMessages {
|
||||||
|
t.Log(msg)
|
||||||
|
}
|
||||||
|
for err := range errs {
|
||||||
|
testutil.AnnotatedError(t, "Error checking logs", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestEvents(t *testing.T) {
|
func fetchAndCheckEvents() []error {
|
||||||
out, err := TestHelper.Kubectl("",
|
out, err := TestHelper.Kubectl("",
|
||||||
"--namespace", TestHelper.GetLinkerdNamespace(),
|
"--namespace", TestHelper.GetLinkerdNamespace(),
|
||||||
"get", "events", "-ojson",
|
"get", "events", "-ojson",
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
testutil.AnnotatedErrorf(t, "'kubectl get events' command failed",
|
return []error{fmt.Errorf("'kubectl get events' command failed with %s\n%s", err, out)}
|
||||||
"'kubectl get events' command failed with %s\n%s", err, out)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
events, err := testutil.ParseEvents(out)
|
events, err := testutil.ParseEvents(out)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
testutil.AnnotatedError(t, "error parsing events", err)
|
return []error{err}
|
||||||
}
|
}
|
||||||
|
|
||||||
var unknownEvents []string
|
errs := []error{}
|
||||||
for _, e := range events {
|
for _, e := range events {
|
||||||
if e.Type == corev1.EventTypeNormal {
|
if e.Type == corev1.EventTypeNormal {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
evtStr := fmt.Sprintf("Reason: [%s] Object: [%s] Message: [%s]", e.Reason, e.InvolvedObject.Name, e.Message)
|
evtStr := fmt.Errorf("found unexpected warning event: reason: [%s] Object: [%s] Message: [%s]", e.Reason, e.InvolvedObject.Name, e.Message)
|
||||||
if !knownEventWarningsRegex.MatchString(e.Message) {
|
if !knownEventWarningsRegex.MatchString(e.Message) {
|
||||||
unknownEvents = append(unknownEvents, evtStr)
|
errs = append(errs, evtStr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(unknownEvents) > 0 {
|
return errs
|
||||||
testutil.AnnotatedErrorf(t, "found unexpected warning events",
|
}
|
||||||
"found unexpected warning events:\n%s", strings.Join(unknownEvents, "\n"))
|
|
||||||
|
func TestEvents(t *testing.T) {
|
||||||
|
for err := range fetchAndCheckEvents() {
|
||||||
|
testutil.AnnotatedError(t, "Error checking events", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue