From ff841d54fcc9e51b5d17e4455f1baf63a51713a7 Mon Sep 17 00:00:00 2001 From: Tarun Pothulapati Date: Thu, 7 Jan 2021 23:52:09 +0530 Subject: [PATCH] viz: add a retry check for core control-plane pods before install (#5434) * viz: add a retry check for core control-plane pods before install This commit adds a new check so that `viz install` waits till the control-plane pods are up. For this to work, the `prometheus` sub-system check in control-plane self-check has been removed, as we re-use healthchecks to perform this. Signed-off-by: Tarun Pothulapati --- pkg/healthcheck/healthcheck.go | 6 +-- pkg/healthcheck/healthcheck_test.go | 3 ++ test/integration/install_test.go | 9 ----- viz/cmd/install.go | 57 +++++++++++++++++++++++++++++ 4 files changed, 61 insertions(+), 14 deletions(-) diff --git a/pkg/healthcheck/healthcheck.go b/pkg/healthcheck/healthcheck.go index 605f19689..4297e21db 100644 --- a/pkg/healthcheck/healthcheck.go +++ b/pkg/healthcheck/healthcheck.go @@ -2384,11 +2384,7 @@ const running = "Running" func validateControlPlanePods(pods []corev1.Pod) error { statuses := getPodStatuses(pods) - names := []string{"controller", "identity", "sp-validator"} - // TODO: deprecate this when we drop support for checking pre-default proxy-injector control-planes - if _, found := statuses["proxy-injector"]; found { - names = append(names, "proxy-injector") - } + names := []string{"controller", "identity", "sp-validator", "proxy-injector"} for _, name := range names { pods, found := statuses[name] diff --git a/pkg/healthcheck/healthcheck_test.go b/pkg/healthcheck/healthcheck_test.go index 02e9eb2a9..edaec265d 100644 --- a/pkg/healthcheck/healthcheck_test.go +++ b/pkg/healthcheck/healthcheck_test.go @@ -1691,6 +1691,7 @@ func TestValidateControlPlanePods(t *testing.T) { pod("linkerd-controller-6f78cbd47-bc557", corev1.PodRunning, true), pod("linkerd-identity-6849948664-27982", corev1.PodRunning, true), pod("linkerd-sp-validator-24d2879ce6-cddk9", corev1.PodRunning, true), + pod("linkerd-proxy-injector-5f79ff4844-", corev1.PodRunning, true), } err := validateControlPlanePods(pods) @@ -1707,6 +1708,7 @@ func TestValidateControlPlanePods(t *testing.T) { pod("linkerd-identity-6849948664-27982", corev1.PodRunning, true), pod("linkerd-identity-6849948664-27983", corev1.PodRunning, false), pod("linkerd-identity-6849948664-27984", corev1.PodFailed, false), + pod("linkerd-proxy-injector-5f79ff4844-", corev1.PodRunning, true), pod("linkerd-sp-validator-24d2879ce6-cddk9", corev1.PodRunning, true), } @@ -1720,6 +1722,7 @@ func TestValidateControlPlanePods(t *testing.T) { pods := []corev1.Pod{ pod("linkerd-controller-6f78cbd47-bc557", corev1.PodRunning, true), pod("linkerd-identity-6849948664-27982", corev1.PodRunning, true), + pod("linkerd-proxy-injector-5f79ff4844-", corev1.PodRunning, true), pod("linkerd-sp-validator-24d2879ce6-cddk9", corev1.PodRunning, true), pod("hello-43c25d", corev1.PodRunning, true), } diff --git a/test/integration/install_test.go b/test/integration/install_test.go index 05dcc92ea..37e4bb2f8 100644 --- a/test/integration/install_test.go +++ b/test/integration/install_test.go @@ -404,15 +404,6 @@ func TestInstallOrUpgradeCli(t *testing.T) { "'kubectl apply' command failed\n%s", out) } - // Wait for the proxy injector to be up - name := "linkerd-proxy-injector" - ns := "linkerd" - o, err := TestHelper.Kubectl("", "--namespace="+ns, "wait", "--for=condition=available", "--timeout=120s", "deploy/"+name) - if err != nil { - testutil.AnnotatedFatalf(t, fmt.Sprintf("failed to wait for condition=available for deploy/%s in namespace %s", name, ns), - "failed to wait for condition=available for deploy/%s in namespace %s: %s: %s", name, ns, err, o) - } - // Install Linkerd Viz Extension exec = append(vizCmd, vizArgs...) out, err = TestHelper.LinkerdRun(exec...) diff --git a/viz/cmd/install.go b/viz/cmd/install.go index abd06c865..b85b3c1fa 100644 --- a/viz/cmd/install.go +++ b/viz/cmd/install.go @@ -6,6 +6,7 @@ import ( "io" "os" "path" + "time" "github.com/linkerd/linkerd2/pkg/charts" partials "github.com/linkerd/linkerd2/pkg/charts/static" @@ -37,6 +38,7 @@ var ( func newCmdInstall() *cobra.Command { var skipChecks bool + var wait time.Duration var options values.Options cmd := &cobra.Command{ @@ -62,6 +64,9 @@ func newCmdInstall() *cobra.Command { if !exists { return fmt.Errorf("could not find a Linkerd installation") } + + // Wait for the proxy-injector to be up and running + checkInjectorRunningOrRetryOrExit(wait) } return install(os.Stdout, options) @@ -73,6 +78,10 @@ func newCmdInstall() *cobra.Command { `Skip checks for namespace existence`, ) + cmd.Flags().DurationVar( + &wait, "wait", 300*time.Second, + "Wait for core control-plane components to be available") + flags.AddValueOptionsFlags(cmd.Flags(), &options) return cmd @@ -155,3 +164,51 @@ func render(w io.Writer, valuesOverrides map[string]interface{}) error { _, err = w.Write(buf.Bytes()) return err } + +func checkInjectorRunningOrRetryOrExit(retryDeadline time.Duration) { + checks := []healthcheck.CategoryID{ + healthcheck.KubernetesAPIChecks, + healthcheck.LinkerdControlPlaneExistenceChecks, + healthcheck.LinkerdAPIChecks, + } + + hc := healthcheck.NewHealthChecker(checks, &healthcheck.Options{ + ControlPlaneNamespace: controlPlaneNamespace, + KubeConfig: kubeconfigPath, + KubeContext: kubeContext, + Impersonate: impersonate, + ImpersonateGroup: impersonateGroup, + APIAddr: apiAddr, + RetryDeadline: time.Now().Add(retryDeadline), + }) + + hc.RunChecks(exitOnError) +} + +func exitOnError(result *healthcheck.CheckResult) { + if result.Retry { + fmt.Fprintln(os.Stderr, "Waiting for core control plane to become available") + return + } + + if result.Err != nil && !result.Warning { + var msg string + switch result.Category { + case healthcheck.KubernetesAPIChecks: + msg = "Cannot connect to Kubernetes" + case healthcheck.LinkerdControlPlaneExistenceChecks: + msg = "Cannot find Linkerd" + case healthcheck.LinkerdAPIChecks: + msg = "Cannot connect to Linkerd" + } + fmt.Fprintf(os.Stderr, "%s: %s\n", msg, result.Err) + + checkCmd := "linkerd check" + if controlPlaneNamespace != defaultLinkerdNamespace { + checkCmd += fmt.Sprintf(" --linkerd-namespace %s", controlPlaneNamespace) + } + fmt.Fprintf(os.Stderr, "Validate the install with: %s\n", checkCmd) + + os.Exit(1) + } +}