From 2442ca07bf58bebbeeb0afb6a70a53d7e2d9c7f5 Mon Sep 17 00:00:00 2001 From: Kevin Leimkuhler Date: Tue, 19 Jul 2022 12:14:55 -0600 Subject: [PATCH] Parse Pod labels for owning Deployment instead of name (#8920) Closes #8916 When a random Pod (meshed or not) is created in the `linkerd`, `linkerd-viz`, or `linkerd-jaeger` namespaces their respective `check` subcommands can fail. We parse Pod names for their owning Deployment by assuming the Pod name has a randomized suffix. For example, the `linkerd-destination` Deployment creates the `linkerd-destination-58c57dd675-7tthr` Pod. We split the name on `-` and take the first two parts (`["linkerd", "destination"]`); those first two parts make up the Deployment name. Now, if a random Pod is created in the namespace with the name `test`, we apply that same logic but hit a runtime error when trying to get the first two parts of the split. `test` did not split at all since it contains no `-` and therefore we error with `slice bounds out of range`. To fix this, we now use the fact that all Linkerd components have a `linkerd.io/control-plane-component` or `component` label with a value that is the owning Deployment. This allows us to avoid any extra parsing logic and just look at a single label value. Additionally, some of these checks get all the Pods in a namespace with the `GetPodsByNamespace` method but we don't always need something so general. In the places where we are checking specifically for Linkerd components, we can narrow this further by using the expected LabelSelector such as `linkerd.io/extension=viz`. Signed-off-by: Kevin Leimkuhler --- jaeger/cmd/check.go | 10 +++++++--- pkg/healthcheck/healthcheck.go | 17 ++++++++++++----- viz/pkg/healthcheck/healthcheck.go | 16 ++++++++++------ 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/jaeger/cmd/check.go b/jaeger/cmd/check.go index e81b2e82f..aa8ab001b 100644 --- a/jaeger/cmd/check.go +++ b/jaeger/cmd/check.go @@ -9,8 +9,10 @@ import ( pkgcmd "github.com/linkerd/linkerd2/pkg/cmd" "github.com/linkerd/linkerd2/pkg/healthcheck" + "github.com/linkerd/linkerd2/pkg/k8s" "github.com/linkerd/linkerd2/pkg/version" "github.com/spf13/cobra" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) const ( @@ -75,18 +77,20 @@ func jaegerCategory(hc *healthcheck.HealthChecker) *healthcheck.Category { WithRetryDeadline(hc.RetryDeadline). SurfaceErrorOnRetry(). WithCheck(func(ctx context.Context) error { - pods, err := hc.KubeAPIClient().GetPodsByNamespace(ctx, jaegerNamespace) + podList, err := hc.KubeAPIClient().CoreV1().Pods(jaegerNamespace).List(ctx, metav1.ListOptions{ + LabelSelector: fmt.Sprintf("%s=%s", k8s.LinkerdExtensionLabel, JaegerExtensionName), + }) if err != nil { return err } // Check for relevant pods to be present - err = healthcheck.CheckForPods(pods, []string{"jaeger-injector"}) + err = healthcheck.CheckForPods(podList.Items, []string{"jaeger-injector"}) if err != nil { return err } - return healthcheck.CheckPodsRunning(pods, jaegerNamespace) + return healthcheck.CheckPodsRunning(podList.Items, jaegerNamespace) })) checkers = append(checkers, diff --git a/pkg/healthcheck/healthcheck.go b/pkg/healthcheck/healthcheck.go index adaca7f5b..dbc4e0ab2 100644 --- a/pkg/healthcheck/healthcheck.go +++ b/pkg/healthcheck/healthcheck.go @@ -708,10 +708,13 @@ func (hc *HealthChecker) allCategories() []*Category { fatal: true, check: func(ctx context.Context) error { var err error - hc.controlPlanePods, err = hc.kubeAPI.GetPodsByNamespace(ctx, hc.ControlPlaneNamespace) + podList, err := hc.kubeAPI.CoreV1().Pods(hc.ControlPlaneNamespace).List(ctx, metav1.ListOptions{ + LabelSelector: k8s.ControllerComponentLabel, + }) if err != nil { return err } + hc.controlPlanePods = podList.Items return validateControlPlanePods(hc.controlPlanePods) }, }, @@ -2802,10 +2805,14 @@ func CheckForPods(pods []corev1.Pod, deployNames []string) error { exists := make(map[string]bool) for _, pod := range pods { - // Strip randomized suffix and take the deployment name - parts := strings.Split(pod.Name, "-") - deployName := strings.Join(parts[:len(parts)-2], "-") - exists[deployName] = true + for label, value := range pod.Labels { + // When the label value is `linkerd.io/control-plane-component` or + // `component`, we'll take its value as the name of the deployment + // that the pod is part of + if label == k8s.ControllerComponentLabel || label == "component" { + exists[value] = true + } + } } for _, expected := range deployNames { diff --git a/viz/pkg/healthcheck/healthcheck.go b/viz/pkg/healthcheck/healthcheck.go index 86b67ec13..abfd81a21 100644 --- a/viz/pkg/healthcheck/healthcheck.go +++ b/viz/pkg/healthcheck/healthcheck.go @@ -69,7 +69,7 @@ func (hc *HealthChecker) RunChecks(observer healthcheck.CheckObserver) (bool, bo // VizCategory returns a healthcheck.Category containing checkers // to verify the health of viz components func (hc *HealthChecker) VizCategory() *healthcheck.Category { - + vizSelector := fmt.Sprintf("%s=%s", k8s.LinkerdExtensionLabel, VizExtensionName) return healthcheck.NewCategory(LinkerdVizExtensionCheck, []healthcheck.Checker{ *healthcheck.NewChecker("linkerd-viz Namespace exists"). WithHintAnchor("l5d-viz-ns-exists"). @@ -153,18 +153,20 @@ func (hc *HealthChecker) VizCategory() *healthcheck.Category { WithRetryDeadline(hc.RetryDeadline). SurfaceErrorOnRetry(). WithCheck(func(ctx context.Context) error { - pods, err := hc.KubeAPIClient().GetPodsByNamespace(ctx, hc.vizNamespace) + podList, err := hc.KubeAPIClient().CoreV1().Pods(hc.vizNamespace).List(ctx, metav1.ListOptions{ + LabelSelector: vizSelector, + }) if err != nil { return err } // Check for relevant pods to be present - err = healthcheck.CheckForPods(pods, []string{"web", "tap", "metrics-api", "tap-injector"}) + err = healthcheck.CheckForPods(podList.Items, []string{"web", "tap", "metrics-api", "tap-injector"}) if err != nil { return err } - return healthcheck.CheckPodsRunning(pods, hc.vizNamespace) + return healthcheck.CheckPodsRunning(podList.Items, hc.vizNamespace) }), *healthcheck.NewChecker("viz extension proxies are healthy"). WithHintAnchor("l5d-viz-proxy-healthy"). @@ -235,12 +237,14 @@ func (hc *HealthChecker) VizCategory() *healthcheck.Category { } // Check for relevant pods to be present - pods, err := hc.KubeAPIClient().GetPodsByNamespace(ctx, hc.vizNamespace) + podList, err := hc.KubeAPIClient().CoreV1().Pods(hc.vizNamespace).List(ctx, metav1.ListOptions{ + LabelSelector: vizSelector, + }) if err != nil { return err } - return healthcheck.CheckForPods(pods, []string{"prometheus"}) + return healthcheck.CheckForPods(podList.Items, []string{"prometheus"}) }), *healthcheck.NewChecker("can initialize the client"). WithHintAnchor("l5d-viz-existence-client").