mirror of https://github.com/linkerd/linkerd2.git
Introduce logging and restart integration tests (#2414)
The integration tests deploy complete Linkerd environments into Kubernetes, but do not check if the components are logging errors or restarting. Introduce integration tests to validation that all expected control-plane containers (including `linkerd-proxy` and `linkerd-init`) are found, logging no errors, and not restarting. Fixes #2348 Signed-off-by: Andrew Seigner <siggy@buoyant.io>
This commit is contained in:
parent
0dcd69c465
commit
d850b02b66
|
@ -3,6 +3,7 @@ package test
|
|||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
|
@ -11,6 +12,15 @@ import (
|
|||
"github.com/linkerd/linkerd2/testutil"
|
||||
)
|
||||
|
||||
type deploySpec struct {
|
||||
replicas int
|
||||
containers []string
|
||||
}
|
||||
|
||||
const (
|
||||
proxyContainer = "linkerd-proxy"
|
||||
)
|
||||
|
||||
//////////////////////
|
||||
/// TEST SETUP ///
|
||||
//////////////////////
|
||||
|
@ -31,12 +41,31 @@ var (
|
|||
"linkerd-web",
|
||||
}
|
||||
|
||||
linkerdDeployReplicas = map[string]int{
|
||||
"linkerd-controller": 1,
|
||||
"linkerd-grafana": 1,
|
||||
"linkerd-prometheus": 1,
|
||||
"linkerd-web": 1,
|
||||
linkerdDeployReplicas = map[string]deploySpec{
|
||||
"linkerd-controller": {1, []string{"destination", "public-api", "tap"}},
|
||||
"linkerd-grafana": {1, []string{}},
|
||||
"linkerd-prometheus": {1, []string{}},
|
||||
"linkerd-web": {1, []string{"web"}},
|
||||
}
|
||||
|
||||
// Linkerd commonly logs these errors during testing, remove these once
|
||||
// they're addressed.
|
||||
knownErrorsRegex = regexp.MustCompile(strings.Join([]string{
|
||||
// TLS not ready at startup
|
||||
`.*-tls linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy ERR! admin={bg=tls-config} linkerd2_proxy::transport::tls::config error loading /var/linkerd-io/identity/certificate\.crt: No such file or directory \(os error 2\)`,
|
||||
`.*-tls linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy ERR! admin={bg=tls-config} linkerd2_proxy::transport::tls::config error loading /var/linkerd-io/trust-anchors/trust-anchors\.pem: No such file or directory \(os error 2\)`,
|
||||
`.*-tls linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy WARN admin={bg=tls-config} linkerd2_proxy::transport::tls::config error reloading TLS config: Io\("/var/linkerd-io/identity/certificate\.crt", Some\(2\)\), falling back`,
|
||||
`.*-tls linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy WARN admin={bg=tls-config} linkerd2_proxy::transport::tls::config error reloading TLS config: Io\("/var/linkerd-io/trust-anchors/trust-anchors\.pem", Some\(2\)\), falling back`,
|
||||
|
||||
// k8s hitting readiness endpoints before components are ready
|
||||
`.* linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy ERR! proxy={server=in listen=0\.0\.0\.0:4143 remote=.*} linkerd2_proxy::proxy::http::router service error: an error occurred trying to connect: Connection refused \(os error 111\) \(address: 127\.0\.0\.1:.*\)`,
|
||||
`.* linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy ERR! proxy={server=out listen=127\.0\.0\.1:4140 remote=.*} linkerd2_proxy::proxy::http::router service error: an error occurred trying to connect: Connection refused \(os error 111\) \(address: .*:4191\)`,
|
||||
|
||||
`.* linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy ERR! admin={server=metrics listen=0.0.0.0:4191 remote=.*} linkerd2_proxy::control::serve_http error serving metrics: Error { kind: Shutdown, cause: Os { code: 107, kind: NotConnected, message: "Transport endpoint is not connected" } }`,
|
||||
|
||||
`.* linkerd-controller-.*-.* tap time=".*" level=error msg="\[.*\] encountered an error: rpc error: code = Canceled desc = context canceled"`,
|
||||
`.* linkerd-web-.*-.* linkerd-proxy WARN trust_dns_proto::xfer::dns_exchange failed to associate send_message response to the sender`,
|
||||
}, "|"))
|
||||
)
|
||||
|
||||
//////////////////////
|
||||
|
@ -83,7 +112,7 @@ func TestInstall(t *testing.T) {
|
|||
}
|
||||
if TestHelper.TLS() {
|
||||
cmd = append(cmd, []string{"--tls", "optional"}...)
|
||||
linkerdDeployReplicas["linkerd-ca"] = 1
|
||||
linkerdDeployReplicas["linkerd-ca"] = deploySpec{1, []string{"ca"}}
|
||||
}
|
||||
if TestHelper.SingleNamespace() {
|
||||
cmd = append(cmd, "--single-namespace")
|
||||
|
@ -113,11 +142,11 @@ func TestInstall(t *testing.T) {
|
|||
}
|
||||
|
||||
// Tests Pods and Deployments
|
||||
for deploy, replicas := range linkerdDeployReplicas {
|
||||
if err := TestHelper.CheckPods(TestHelper.GetLinkerdNamespace(), deploy, replicas); err != nil {
|
||||
for deploy, spec := range linkerdDeployReplicas {
|
||||
if err := TestHelper.CheckPods(TestHelper.GetLinkerdNamespace(), deploy, spec.replicas); err != nil {
|
||||
t.Fatal(fmt.Errorf("Error validating pods for deploy [%s]:\n%s", deploy, err))
|
||||
}
|
||||
if err := TestHelper.CheckDeployment(TestHelper.GetLinkerdNamespace(), deploy, replicas); err != nil {
|
||||
if err := TestHelper.CheckDeployment(TestHelper.GetLinkerdNamespace(), deploy, spec.replicas); err != nil {
|
||||
t.Fatal(fmt.Errorf("Error validating deploy [%s]:\n%s", deploy, err))
|
||||
}
|
||||
}
|
||||
|
@ -261,3 +290,50 @@ func TestCheckProxy(t *testing.T) {
|
|||
t.Fatal(err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func TestLogs(t *testing.T) {
|
||||
controllerRegex := regexp.MustCompile("level=(panic|fatal|error|warn)")
|
||||
proxyRegex := regexp.MustCompile(fmt.Sprintf("%s (ERR|WARN)", proxyContainer))
|
||||
|
||||
for deploy, spec := range linkerdDeployReplicas {
|
||||
deploy := strings.TrimPrefix(deploy, "linkerd-")
|
||||
containers := append(spec.containers, proxyContainer)
|
||||
|
||||
for _, container := range containers {
|
||||
errRegex := controllerRegex
|
||||
if container == proxyContainer {
|
||||
errRegex = proxyRegex
|
||||
}
|
||||
|
||||
outputStream, err := TestHelper.LinkerdRunStream(
|
||||
"logs", "--no-color",
|
||||
"--control-plane-component", deploy,
|
||||
"--container", container,
|
||||
)
|
||||
if err != nil {
|
||||
t.Errorf("Error running command:\n%s", err)
|
||||
}
|
||||
defer outputStream.Stop()
|
||||
// Ignore the error returned, since ReadUntil will return an error if it
|
||||
// does not return 10,000 after 1 second. We don't need 10,000 log lines.
|
||||
outputLines, _ := outputStream.ReadUntil(10000, 2*time.Second)
|
||||
if len(outputLines) == 0 {
|
||||
t.Errorf("No logs found for %s/%s", deploy, container)
|
||||
}
|
||||
|
||||
for _, line := range outputLines {
|
||||
if errRegex.MatchString(line) && !knownErrorsRegex.MatchString(line) {
|
||||
t.Errorf("Found error in %s/%s log: %s", deploy, container, line)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRestarts(t *testing.T) {
|
||||
for deploy, spec := range linkerdDeployReplicas {
|
||||
if err := TestHelper.CheckPods(TestHelper.GetLinkerdNamespace(), deploy, spec.replicas); err != nil {
|
||||
t.Fatal(fmt.Errorf("Error validating pods [%s]:\n%s", deploy, err))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -87,6 +87,13 @@ func (h *KubernetesHelper) KubectlApply(stdin string, namespace string) (string,
|
|||
return string(out), err
|
||||
}
|
||||
|
||||
// Kubectl executes an arbitrary Kubectl command
|
||||
func (h *KubernetesHelper) Kubectl(arg ...string) (string, error) {
|
||||
cmd := exec.Command("kubectl", arg...)
|
||||
out, err := cmd.CombinedOutput()
|
||||
return string(out), err
|
||||
}
|
||||
|
||||
// getDeployments gets all deployments with a count of their ready replicas in
|
||||
// the specified namespace.
|
||||
func (h *KubernetesHelper) getDeployments(namespace string) (map[string]int, error) {
|
||||
|
@ -127,9 +134,12 @@ func (h *KubernetesHelper) CheckDeployment(namespace string, deploymentName stri
|
|||
}
|
||||
|
||||
// CheckPods checks that a deployment in a namespace contains the expected
|
||||
// number of pods in the Running state.
|
||||
// number of pods in the Running state, and that no pods have been restarted.
|
||||
func (h *KubernetesHelper) CheckPods(namespace string, deploymentName string, replicas int) error {
|
||||
return h.retryFor(3*time.Minute, func() error {
|
||||
var checkedPods []corev1.Pod
|
||||
|
||||
err := h.retryFor(3*time.Minute, func() error {
|
||||
checkedPods = []corev1.Pod{}
|
||||
pods, err := h.clientset.CoreV1().Pods(namespace).List(metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -138,6 +148,8 @@ func (h *KubernetesHelper) CheckPods(namespace string, deploymentName string, re
|
|||
var deploymentReplicas int
|
||||
for _, pod := range pods.Items {
|
||||
if strings.HasPrefix(pod.Name, deploymentName) {
|
||||
checkedPods = append(checkedPods, pod)
|
||||
|
||||
deploymentReplicas++
|
||||
if pod.Status.Phase != "Running" {
|
||||
return fmt.Errorf("Pod [%s] in namespace [%s] is not running",
|
||||
|
@ -159,6 +171,21 @@ func (h *KubernetesHelper) CheckPods(namespace string, deploymentName string, re
|
|||
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, pod := range checkedPods {
|
||||
for _, status := range append(pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses...) {
|
||||
if status.RestartCount != 0 {
|
||||
return fmt.Errorf("Container [%s] in pod [%s] in namespace [%s] has restart count [%d]",
|
||||
status.Name, pod.Name, pod.Namespace, status.RestartCount)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// CheckService checks that a service exists in a namespace.
|
||||
|
|
Loading…
Reference in New Issue