Introduce logging and restart integration tests (#2414)

The integration tests deploy complete Linkerd environments into
Kubernetes, but do not check if the components are logging errors or
restarting.

Introduce integration tests to validation that all expected
control-plane containers (including `linkerd-proxy` and `linkerd-init`)
are found, logging no errors, and not restarting.

Fixes #2348

Signed-off-by: Andrew Seigner <siggy@buoyant.io>
This commit is contained in:
Andrew Seigner 2019-03-05 19:49:38 -08:00 committed by GitHub
parent 0dcd69c465
commit d850b02b66
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 114 additions and 11 deletions

View File

@ -3,6 +3,7 @@ package test
import (
"fmt"
"os"
"regexp"
"strconv"
"strings"
"testing"
@ -11,6 +12,15 @@ import (
"github.com/linkerd/linkerd2/testutil"
)
type deploySpec struct {
replicas int
containers []string
}
const (
proxyContainer = "linkerd-proxy"
)
//////////////////////
/// TEST SETUP ///
//////////////////////
@ -31,12 +41,31 @@ var (
"linkerd-web",
}
linkerdDeployReplicas = map[string]int{
"linkerd-controller": 1,
"linkerd-grafana": 1,
"linkerd-prometheus": 1,
"linkerd-web": 1,
linkerdDeployReplicas = map[string]deploySpec{
"linkerd-controller": {1, []string{"destination", "public-api", "tap"}},
"linkerd-grafana": {1, []string{}},
"linkerd-prometheus": {1, []string{}},
"linkerd-web": {1, []string{"web"}},
}
// Linkerd commonly logs these errors during testing, remove these once
// they're addressed.
knownErrorsRegex = regexp.MustCompile(strings.Join([]string{
// TLS not ready at startup
`.*-tls linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy ERR! admin={bg=tls-config} linkerd2_proxy::transport::tls::config error loading /var/linkerd-io/identity/certificate\.crt: No such file or directory \(os error 2\)`,
`.*-tls linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy ERR! admin={bg=tls-config} linkerd2_proxy::transport::tls::config error loading /var/linkerd-io/trust-anchors/trust-anchors\.pem: No such file or directory \(os error 2\)`,
`.*-tls linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy WARN admin={bg=tls-config} linkerd2_proxy::transport::tls::config error reloading TLS config: Io\("/var/linkerd-io/identity/certificate\.crt", Some\(2\)\), falling back`,
`.*-tls linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy WARN admin={bg=tls-config} linkerd2_proxy::transport::tls::config error reloading TLS config: Io\("/var/linkerd-io/trust-anchors/trust-anchors\.pem", Some\(2\)\), falling back`,
// k8s hitting readiness endpoints before components are ready
`.* linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy ERR! proxy={server=in listen=0\.0\.0\.0:4143 remote=.*} linkerd2_proxy::proxy::http::router service error: an error occurred trying to connect: Connection refused \(os error 111\) \(address: 127\.0\.0\.1:.*\)`,
`.* linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy ERR! proxy={server=out listen=127\.0\.0\.1:4140 remote=.*} linkerd2_proxy::proxy::http::router service error: an error occurred trying to connect: Connection refused \(os error 111\) \(address: .*:4191\)`,
`.* linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy ERR! admin={server=metrics listen=0.0.0.0:4191 remote=.*} linkerd2_proxy::control::serve_http error serving metrics: Error { kind: Shutdown, cause: Os { code: 107, kind: NotConnected, message: "Transport endpoint is not connected" } }`,
`.* linkerd-controller-.*-.* tap time=".*" level=error msg="\[.*\] encountered an error: rpc error: code = Canceled desc = context canceled"`,
`.* linkerd-web-.*-.* linkerd-proxy WARN trust_dns_proto::xfer::dns_exchange failed to associate send_message response to the sender`,
}, "|"))
)
//////////////////////
@ -83,7 +112,7 @@ func TestInstall(t *testing.T) {
}
if TestHelper.TLS() {
cmd = append(cmd, []string{"--tls", "optional"}...)
linkerdDeployReplicas["linkerd-ca"] = 1
linkerdDeployReplicas["linkerd-ca"] = deploySpec{1, []string{"ca"}}
}
if TestHelper.SingleNamespace() {
cmd = append(cmd, "--single-namespace")
@ -113,11 +142,11 @@ func TestInstall(t *testing.T) {
}
// Tests Pods and Deployments
for deploy, replicas := range linkerdDeployReplicas {
if err := TestHelper.CheckPods(TestHelper.GetLinkerdNamespace(), deploy, replicas); err != nil {
for deploy, spec := range linkerdDeployReplicas {
if err := TestHelper.CheckPods(TestHelper.GetLinkerdNamespace(), deploy, spec.replicas); err != nil {
t.Fatal(fmt.Errorf("Error validating pods for deploy [%s]:\n%s", deploy, err))
}
if err := TestHelper.CheckDeployment(TestHelper.GetLinkerdNamespace(), deploy, replicas); err != nil {
if err := TestHelper.CheckDeployment(TestHelper.GetLinkerdNamespace(), deploy, spec.replicas); err != nil {
t.Fatal(fmt.Errorf("Error validating deploy [%s]:\n%s", deploy, err))
}
}
@ -261,3 +290,50 @@ func TestCheckProxy(t *testing.T) {
t.Fatal(err.Error())
}
}
func TestLogs(t *testing.T) {
controllerRegex := regexp.MustCompile("level=(panic|fatal|error|warn)")
proxyRegex := regexp.MustCompile(fmt.Sprintf("%s (ERR|WARN)", proxyContainer))
for deploy, spec := range linkerdDeployReplicas {
deploy := strings.TrimPrefix(deploy, "linkerd-")
containers := append(spec.containers, proxyContainer)
for _, container := range containers {
errRegex := controllerRegex
if container == proxyContainer {
errRegex = proxyRegex
}
outputStream, err := TestHelper.LinkerdRunStream(
"logs", "--no-color",
"--control-plane-component", deploy,
"--container", container,
)
if err != nil {
t.Errorf("Error running command:\n%s", err)
}
defer outputStream.Stop()
// Ignore the error returned, since ReadUntil will return an error if it
// does not return 10,000 after 1 second. We don't need 10,000 log lines.
outputLines, _ := outputStream.ReadUntil(10000, 2*time.Second)
if len(outputLines) == 0 {
t.Errorf("No logs found for %s/%s", deploy, container)
}
for _, line := range outputLines {
if errRegex.MatchString(line) && !knownErrorsRegex.MatchString(line) {
t.Errorf("Found error in %s/%s log: %s", deploy, container, line)
}
}
}
}
}
func TestRestarts(t *testing.T) {
for deploy, spec := range linkerdDeployReplicas {
if err := TestHelper.CheckPods(TestHelper.GetLinkerdNamespace(), deploy, spec.replicas); err != nil {
t.Fatal(fmt.Errorf("Error validating pods [%s]:\n%s", deploy, err))
}
}
}

View File

@ -87,6 +87,13 @@ func (h *KubernetesHelper) KubectlApply(stdin string, namespace string) (string,
return string(out), err
}
// Kubectl executes an arbitrary Kubectl command
func (h *KubernetesHelper) Kubectl(arg ...string) (string, error) {
cmd := exec.Command("kubectl", arg...)
out, err := cmd.CombinedOutput()
return string(out), err
}
// getDeployments gets all deployments with a count of their ready replicas in
// the specified namespace.
func (h *KubernetesHelper) getDeployments(namespace string) (map[string]int, error) {
@ -127,9 +134,12 @@ func (h *KubernetesHelper) CheckDeployment(namespace string, deploymentName stri
}
// CheckPods checks that a deployment in a namespace contains the expected
// number of pods in the Running state.
// number of pods in the Running state, and that no pods have been restarted.
func (h *KubernetesHelper) CheckPods(namespace string, deploymentName string, replicas int) error {
return h.retryFor(3*time.Minute, func() error {
var checkedPods []corev1.Pod
err := h.retryFor(3*time.Minute, func() error {
checkedPods = []corev1.Pod{}
pods, err := h.clientset.CoreV1().Pods(namespace).List(metav1.ListOptions{})
if err != nil {
return err
@ -138,6 +148,8 @@ func (h *KubernetesHelper) CheckPods(namespace string, deploymentName string, re
var deploymentReplicas int
for _, pod := range pods.Items {
if strings.HasPrefix(pod.Name, deploymentName) {
checkedPods = append(checkedPods, pod)
deploymentReplicas++
if pod.Status.Phase != "Running" {
return fmt.Errorf("Pod [%s] in namespace [%s] is not running",
@ -159,6 +171,21 @@ func (h *KubernetesHelper) CheckPods(namespace string, deploymentName string, re
return nil
})
if err != nil {
return err
}
for _, pod := range checkedPods {
for _, status := range append(pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses...) {
if status.RestartCount != 0 {
return fmt.Errorf("Container [%s] in pod [%s] in namespace [%s] has restart count [%d]",
status.Name, pod.Name, pod.Namespace, status.RestartCount)
}
}
}
return nil
}
// CheckService checks that a service exists in a namespace.