Increase retry timeout for retryable tests, refactor RetryFor (#1835)

When running integration tests in a Kubernetes cluster that sometimes takes a little longer to get pods ready, the integration tests fail tests too early because most tests have a retry timeout of 30 seconds. 

This PR bumps up this retry timeout for `TestInstall` to 3 minutes. This gives the test enough time to download any new docker images that it needs to complete succesfully and also reduces the need to have large timeout values for subsequent tests. This PR also refactors `CheckPods` to check that all containers in a pods for a deployment are in a`Ready` state. This helps also helps in ensuring that all docker images have been downloaded and the pods are in a good state.

Tests were run on the community cluster and all were successful.

Signed-off-by: Dennis Adjei-Baah <dennis@buoyant.io>
This commit is contained in:
Dennis Adjei-Baah 2018-11-06 16:03:58 -08:00 committed by GitHub
parent dfaf3b1e1b
commit 15e87bfd8d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 110 additions and 117 deletions

View File

@ -16,6 +16,14 @@ import (
var TestHelper *testutil.TestHelper
var egressHttpDeployments = []string{
"egress-test-https-post",
"egress-test-http-post",
"egress-test-https-get",
"egress-test-http-get",
"egress-test-not-www-get",
}
func TestMain(m *testing.M) {
TestHelper = testutil.NewTestHelper()
os.Exit(m.Run())
@ -37,6 +45,13 @@ func TestEgressHttp(t *testing.T) {
t.Fatalf("Unexpected error: %v output:\n%s", err, out)
}
for _, deploy := range egressHttpDeployments {
err = TestHelper.CheckPods(prefixedNs, deploy, 1)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
}
test_case := func(serviceName, dnsName, protocolToUse, methodToUse string) {
testName := fmt.Sprintf("Can use egress to send %s request to %s (%s)", methodToUse, protocolToUse, serviceName)
t.Run(testName, func(t *testing.T) {

View File

@ -165,7 +165,7 @@ spec:
apiVersion: apps/v1beta1
kind: Deployment
metadata:
name: egress-test-http-get
name: egress-test-not-www-get
spec:
replicas: 1
selector:

View File

@ -9,7 +9,6 @@ import (
"sort"
"strings"
"testing"
"time"
"github.com/linkerd/linkerd2/testutil"
)
@ -68,16 +67,10 @@ func TestCliGet(t *testing.T) {
}
// wait for pods to start
err = TestHelper.RetryFor(30*time.Second, func() error {
for deploy, replicas := range deployReplicas {
if err := TestHelper.CheckPods(prefixedNs, deploy, replicas); err != nil {
return fmt.Errorf("Error validating pods for deploy [%s]:\n%s", deploy, err)
}
for deploy, replicas := range deployReplicas {
if err := TestHelper.CheckPods(prefixedNs, deploy, replicas); err != nil {
t.Error(fmt.Errorf("Error validating pods for deploy [%s]:\n%s", deploy, err))
}
return nil
})
if err != nil {
t.Error(err)
}
t.Run("get pods from --all-namespaces", func(t *testing.T) {

View File

@ -89,57 +89,39 @@ func TestInstall(t *testing.T) {
}
// Tests Services
err = TestHelper.RetryFor(10*time.Second, func() error {
for _, svc := range linkerdSvcs {
if err := TestHelper.CheckService(TestHelper.GetLinkerdNamespace(), svc); err != nil {
return fmt.Errorf("Error validating service [%s]:\n%s", svc, err)
}
for _, svc := range linkerdSvcs {
if err := TestHelper.CheckService(TestHelper.GetLinkerdNamespace(), svc); err != nil {
t.Error(fmt.Errorf("Error validating service [%s]:\n%s", svc, err))
}
return nil
})
if err != nil {
t.Error(err)
}
// Tests Pods and Deployments
err = TestHelper.RetryFor(2*time.Minute, func() error {
for deploy, replicas := range linkerdDeployReplicas {
if err := TestHelper.CheckPods(TestHelper.GetLinkerdNamespace(), deploy, replicas); err != nil {
return fmt.Errorf("Error validating pods for deploy [%s]:\n%s", deploy, err)
}
if err := TestHelper.CheckDeployment(TestHelper.GetLinkerdNamespace(), deploy, replicas); err != nil {
return fmt.Errorf("Error validating deploy [%s]:\n%s", deploy, err)
}
for deploy, replicas := range linkerdDeployReplicas {
if err := TestHelper.CheckPods(TestHelper.GetLinkerdNamespace(), deploy, replicas); err != nil {
t.Fatal(fmt.Errorf("Error validating pods for deploy [%s]:\n%s", deploy, err))
}
if err := TestHelper.CheckDeployment(TestHelper.GetLinkerdNamespace(), deploy, replicas); err != nil {
t.Fatal(fmt.Errorf("Error validating deploy [%s]:\n%s", deploy, err))
}
return nil
})
if err != nil {
t.Error(err)
}
}
func TestVersionPostInstall(t *testing.T) {
err := TestHelper.RetryFor(30*time.Second, func() error {
return TestHelper.CheckVersion(TestHelper.GetVersion())
})
err := TestHelper.CheckVersion(TestHelper.GetVersion())
if err != nil {
t.Fatalf("Version command failed\n%s", err.Error())
}
}
func TestCheckPostInstall(t *testing.T) {
var out string
var err error
overallErr := TestHelper.RetryFor(30*time.Second, func() error {
out, _, err = TestHelper.LinkerdRun(
"check",
"--expected-version",
TestHelper.GetVersion(),
"--wait=0",
)
return err
})
if overallErr != nil {
out, _, err := TestHelper.LinkerdRun(
"check",
"--expected-version",
TestHelper.GetVersion(),
"--wait=0",
)
if err != nil {
t.Fatalf("Check command failed\n%s", out)
}
@ -206,6 +188,13 @@ func TestInject(t *testing.T) {
t.Fatalf("kubectl apply command failed\n%s", out)
}
for _, deploy := range []string{"smoke-test-terminus","smoke-test-gateway"} {
err = TestHelper.CheckPods(prefixedNs, deploy, 1)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
}
svcURL, err := TestHelper.ProxyURLFor(prefixedNs, "smoke-test-gateway-svc", "http")
if err != nil {
t.Fatalf("Failed to get proxy URL: %s", err)
@ -225,20 +214,15 @@ func TestInject(t *testing.T) {
func TestCheckProxy(t *testing.T) {
prefixedNs := TestHelper.GetTestNamespace("smoke-test")
var out string
err := TestHelper.RetryFor(2*time.Minute, func() error {
var err error
out, _, err = TestHelper.LinkerdRun(
"check",
"--proxy",
"--expected-version",
TestHelper.GetVersion(),
"--namespace",
prefixedNs,
"--wait=0",
)
return err
})
out, _, err := TestHelper.LinkerdRun(
"check",
"--proxy",
"--expected-version",
TestHelper.GetVersion(),
"--namespace",
prefixedNs,
"--wait=0",
)
if err != nil {
t.Fatalf("Check command failed\n%s", out)

View File

@ -85,16 +85,14 @@ func TestCliTap(t *testing.T) {
}
// wait for deployments to start
err = TestHelper.RetryFor(30*time.Second, func() error {
for _, deploy := range []string{"t1", "t2", "t3", "gateway"} {
if err := TestHelper.CheckDeployment(prefixedNs, deploy, 1); err != nil {
return fmt.Errorf("Error validating deployment [%s]:\n%s", deploy, err)
}
for _, deploy := range []string{"t1", "t2", "t3", "gateway"} {
if err := TestHelper.CheckPods(prefixedNs, deploy, 1); err != nil {
t.Error(err)
}
if err := TestHelper.CheckDeployment(prefixedNs, deploy, 1); err != nil {
t.Error(fmt.Errorf("Error validating deployment [%s]:\n%s", deploy, err))
}
return nil
})
if err != nil {
t.Error(err)
}
t.Run("tap a deployment", func(t *testing.T) {

View File

@ -5,6 +5,7 @@ import (
"os/exec"
"regexp"
"strings"
"time"
"github.com/linkerd/linkerd2/pkg/k8s"
coreV1 "k8s.io/api/core/v1"
@ -20,10 +21,11 @@ import (
// Kubernetes API using the environment's configured kubeconfig file.
type KubernetesHelper struct {
clientset *kubernetes.Clientset
retryFor func(time.Duration, func() error) error
}
// NewKubernetesHelper creates a new instance of KubernetesHelper.
func NewKubernetesHelper() (*KubernetesHelper, error) {
func NewKubernetesHelper(retryFor func(time.Duration, func() error) error) (*KubernetesHelper, error) {
rules := clientcmd.NewDefaultClientConfigLoadingRules()
overrides := &clientcmd.ConfigOverrides{}
kubeConfig := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(rules, overrides)
@ -39,6 +41,7 @@ func NewKubernetesHelper() (*KubernetesHelper, error) {
return &KubernetesHelper{
clientset: clientset,
retryFor: retryFor,
}, nil
}
@ -101,68 +104,68 @@ func (h *KubernetesHelper) getDeployments(namespace string) (map[string]int, err
// CheckDeployment checks that a deployment in a namespace contains the expected
// number of replicas.
func (h *KubernetesHelper) CheckDeployment(namespace string, deploymentName string, replicas int) error {
deploys, err := h.getDeployments(namespace)
if err != nil {
return err
}
return h.retryFor(30*time.Second, func() error {
deploys, err := h.getDeployments(namespace)
if err != nil {
return err
}
count, ok := deploys[deploymentName]
if !ok {
return fmt.Errorf("Deployment [%s] in namespace [%s] not found",
deploymentName, namespace)
}
count, ok := deploys[deploymentName]
if !ok {
return fmt.Errorf("Deployment [%s] in namespace [%s] not found",
deploymentName, namespace)
}
if count != replicas {
return fmt.Errorf("Expected deployment [%s] in namespace [%s] to have [%d] replicas, but found [%d]",
deploymentName, namespace, replicas, count)
}
if count != replicas {
return fmt.Errorf("Expected deployment [%s] in namespace [%s] to have [%d] replicas, but found [%d]",
deploymentName, namespace, replicas, count)
}
return nil
}
// getPods gets all pods with their pod status in the specified namespace.
func (h *KubernetesHelper) getPods(namespace string) (map[string]coreV1.PodPhase, error) {
pods, err := h.clientset.CoreV1().Pods(namespace).List(metav1.ListOptions{})
if err != nil {
return nil, err
}
podData := make(map[string]coreV1.PodPhase)
for _, pod := range pods.Items {
podData[pod.GetName()] = pod.Status.Phase
}
return podData, nil
return nil
})
}
// CheckPods checks that a deployment in a namespace contains the expected
// number of pods in the Running state.
func (h *KubernetesHelper) CheckPods(namespace string, deploymentName string, replicas int) error {
podData, err := h.getPods(namespace)
if err != nil {
return err
}
return h.retryFor(3*time.Minute, func() error {
pods, err := h.clientset.CoreV1().Pods(namespace).List(metav1.ListOptions{})
if err != nil {
return err
}
var runningPods []string
for name, status := range podData {
if strings.Contains(name, deploymentName) {
if status == "Running" {
runningPods = append(runningPods, name)
var deploymentReplicas int
for _, pod := range pods.Items {
if strings.HasPrefix(pod.Name, deploymentName){
deploymentReplicas++
if pod.Status.Phase != "Running" {
return fmt.Errorf("Pod [%s] in namespace [%s] is not running",
pod.Name, pod.Namespace)
}
for _, container := range pod.Status.ContainerStatuses {
if !container.Ready {
return fmt.Errorf("Container [%s] in pod [%s] in namespace [%s] is not running",
container.Name, pod.Name, pod.Namespace)
}
}
}
}
}
if len(runningPods) != replicas {
return fmt.Errorf("Expected deployment [%s] in namespace [%s] to have [%d] running pods, but found [%d]",
deploymentName, namespace, replicas, len(runningPods))
}
if deploymentReplicas != replicas {
return fmt.Errorf("Expected deployment [%s] in namespace [%s] to have [%d] running pods, but found [%d]",
deploymentName, namespace, replicas, deploymentReplicas)
}
return nil
return nil
})
}
// CheckService checks that a service exists in a namespace.
func (h *KubernetesHelper) CheckService(namespace string, serviceName string) error {
_, err := h.clientset.CoreV1().Services(namespace).Get(serviceName, metav1.GetOptions{})
return err
return h.retryFor(10*time.Second, func() error {
_, err := h.clientset.CoreV1().Services(namespace).Get(serviceName, metav1.GetOptions{})
return err
})
}
// GetPodsForDeployment returns all pods for the given deployment

View File

@ -80,7 +80,7 @@ func NewTestHelper() *TestHelper {
}
testHelper.version = strings.TrimSpace(version)
kubernetesHelper, err := NewKubernetesHelper()
kubernetesHelper, err := NewKubernetesHelper(testHelper.RetryFor)
if err != nil {
exit(1, "error creating kubernetes helper: "+err.Error())
}