Parse Pod labels for owning Deployment instead of name (#8920)

Closes #8916

When a random Pod (meshed or not) is created in the `linkerd`, `linkerd-viz`, or
`linkerd-jaeger` namespaces their respective `check` subcommands can fail.

We parse Pod names for their owning Deployment by assuming the Pod name has a
randomized suffix. For example, the `linkerd-destination` Deployment creates the
`linkerd-destination-58c57dd675-7tthr` Pod. We split the name on `-` and take
the first two parts (`["linkerd", "destination"]`); those first two parts make
up the Deployment name.

Now, if a random Pod is created in the namespace with the name `test`, we apply
that same logic but hit a runtime error when trying to get the first two parts
of the split. `test` did not split at all since it contains no `-` and therefore
we error with `slice bounds out of range`.

To fix this, we now use the fact that all Linkerd components have a
`linkerd.io/control-plane-component` or `component` label with a value that is
the owning Deployment. This allows us to avoid any extra parsing logic and just
look at a single label value.

Additionally, some of these checks get all the Pods in a namespace with the
`GetPodsByNamespace` method but we don't always need something so general. In
the places where we are checking specifically for Linkerd components, we can
narrow this further by using the expected LabelSelector such as
`linkerd.io/extension=viz`.

Signed-off-by: Kevin Leimkuhler <kleimkuhler@icloud.com>
This commit is contained in:
Kevin Leimkuhler 2022-07-19 12:14:55 -06:00 committed by GitHub
parent e8ee25e505
commit 2442ca07bf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 29 additions and 14 deletions

View File

@ -9,8 +9,10 @@ import (
pkgcmd "github.com/linkerd/linkerd2/pkg/cmd"
"github.com/linkerd/linkerd2/pkg/healthcheck"
"github.com/linkerd/linkerd2/pkg/k8s"
"github.com/linkerd/linkerd2/pkg/version"
"github.com/spf13/cobra"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
const (
@ -75,18 +77,20 @@ func jaegerCategory(hc *healthcheck.HealthChecker) *healthcheck.Category {
WithRetryDeadline(hc.RetryDeadline).
SurfaceErrorOnRetry().
WithCheck(func(ctx context.Context) error {
pods, err := hc.KubeAPIClient().GetPodsByNamespace(ctx, jaegerNamespace)
podList, err := hc.KubeAPIClient().CoreV1().Pods(jaegerNamespace).List(ctx, metav1.ListOptions{
LabelSelector: fmt.Sprintf("%s=%s", k8s.LinkerdExtensionLabel, JaegerExtensionName),
})
if err != nil {
return err
}
// Check for relevant pods to be present
err = healthcheck.CheckForPods(pods, []string{"jaeger-injector"})
err = healthcheck.CheckForPods(podList.Items, []string{"jaeger-injector"})
if err != nil {
return err
}
return healthcheck.CheckPodsRunning(pods, jaegerNamespace)
return healthcheck.CheckPodsRunning(podList.Items, jaegerNamespace)
}))
checkers = append(checkers,

View File

@ -708,10 +708,13 @@ func (hc *HealthChecker) allCategories() []*Category {
fatal: true,
check: func(ctx context.Context) error {
var err error
hc.controlPlanePods, err = hc.kubeAPI.GetPodsByNamespace(ctx, hc.ControlPlaneNamespace)
podList, err := hc.kubeAPI.CoreV1().Pods(hc.ControlPlaneNamespace).List(ctx, metav1.ListOptions{
LabelSelector: k8s.ControllerComponentLabel,
})
if err != nil {
return err
}
hc.controlPlanePods = podList.Items
return validateControlPlanePods(hc.controlPlanePods)
},
},
@ -2802,10 +2805,14 @@ func CheckForPods(pods []corev1.Pod, deployNames []string) error {
exists := make(map[string]bool)
for _, pod := range pods {
// Strip randomized suffix and take the deployment name
parts := strings.Split(pod.Name, "-")
deployName := strings.Join(parts[:len(parts)-2], "-")
exists[deployName] = true
for label, value := range pod.Labels {
// When the label value is `linkerd.io/control-plane-component` or
// `component`, we'll take its value as the name of the deployment
// that the pod is part of
if label == k8s.ControllerComponentLabel || label == "component" {
exists[value] = true
}
}
}
for _, expected := range deployNames {

View File

@ -69,7 +69,7 @@ func (hc *HealthChecker) RunChecks(observer healthcheck.CheckObserver) (bool, bo
// VizCategory returns a healthcheck.Category containing checkers
// to verify the health of viz components
func (hc *HealthChecker) VizCategory() *healthcheck.Category {
vizSelector := fmt.Sprintf("%s=%s", k8s.LinkerdExtensionLabel, VizExtensionName)
return healthcheck.NewCategory(LinkerdVizExtensionCheck, []healthcheck.Checker{
*healthcheck.NewChecker("linkerd-viz Namespace exists").
WithHintAnchor("l5d-viz-ns-exists").
@ -153,18 +153,20 @@ func (hc *HealthChecker) VizCategory() *healthcheck.Category {
WithRetryDeadline(hc.RetryDeadline).
SurfaceErrorOnRetry().
WithCheck(func(ctx context.Context) error {
pods, err := hc.KubeAPIClient().GetPodsByNamespace(ctx, hc.vizNamespace)
podList, err := hc.KubeAPIClient().CoreV1().Pods(hc.vizNamespace).List(ctx, metav1.ListOptions{
LabelSelector: vizSelector,
})
if err != nil {
return err
}
// Check for relevant pods to be present
err = healthcheck.CheckForPods(pods, []string{"web", "tap", "metrics-api", "tap-injector"})
err = healthcheck.CheckForPods(podList.Items, []string{"web", "tap", "metrics-api", "tap-injector"})
if err != nil {
return err
}
return healthcheck.CheckPodsRunning(pods, hc.vizNamespace)
return healthcheck.CheckPodsRunning(podList.Items, hc.vizNamespace)
}),
*healthcheck.NewChecker("viz extension proxies are healthy").
WithHintAnchor("l5d-viz-proxy-healthy").
@ -235,12 +237,14 @@ func (hc *HealthChecker) VizCategory() *healthcheck.Category {
}
// Check for relevant pods to be present
pods, err := hc.KubeAPIClient().GetPodsByNamespace(ctx, hc.vizNamespace)
podList, err := hc.KubeAPIClient().CoreV1().Pods(hc.vizNamespace).List(ctx, metav1.ListOptions{
LabelSelector: vizSelector,
})
if err != nil {
return err
}
return healthcheck.CheckForPods(pods, []string{"prometheus"})
return healthcheck.CheckForPods(podList.Items, []string{"prometheus"})
}),
*healthcheck.NewChecker("can initialize the client").
WithHintAnchor("l5d-viz-existence-client").