diff --git a/pkg/healthcheck/healthcheck.go b/pkg/healthcheck/healthcheck.go index 4dcf31a42..065c27207 100644 --- a/pkg/healthcheck/healthcheck.go +++ b/pkg/healthcheck/healthcheck.go @@ -2538,7 +2538,8 @@ func CheckForPods(pods []corev1.Pod, deployNames []string) error { for _, pod := range pods { // Strip randomized suffix and take the deployment name - deployName := strings.Join(strings.Split(pod.Name, "-")[:2], "-") + parts := strings.Split(pod.Name, "-") + deployName := strings.Join(parts[:len(parts)-2], "-") exists[deployName] = true } diff --git a/test/integration/testdata/check.viz.golden b/test/integration/testdata/check.viz.golden index 4947b1663..75ef2fa88 100644 --- a/test/integration/testdata/check.viz.golden +++ b/test/integration/testdata/check.viz.golden @@ -18,12 +18,13 @@ linkerd-viz √ linkerd-viz Namespace exists √ linkerd-viz ClusterRoles exist √ linkerd-viz ClusterRoleBindings exist -√ linkerd-viz ConfigMaps exist √ tap API server has valid cert √ tap API server cert is valid for at least 60 days √ tap API service is running √ linkerd-viz pods are injected √ viz extension pods are running +√ prometheus is installed and configured correctly +√ grafana is installed and configured correctly √ can initialize the client √ viz extension self-check √ [kubernetes] linkerd viz can talk to Kubernetes diff --git a/viz/charts/linkerd-viz/templates/metrics-api.yaml b/viz/charts/linkerd-viz/templates/metrics-api.yaml index 172cd5e91..5d70980d5 100644 --- a/viz/charts/linkerd-viz/templates/metrics-api.yaml +++ b/viz/charts/linkerd-viz/templates/metrics-api.yaml @@ -72,8 +72,10 @@ spec: - -cluster-domain={{.Values.clusterDomain}} {{- if .Values.prometheusUrl }} - -prometheus-url={{.Values.prometheusUrl}} + {{- else if .Values.prometheus.enabled }} + - -prometheus-url=http://linkerd-prometheus.{{.Values.namespace}}.svc.{{.Values.clusterDomain}}:9090 {{- else }} - - -prometheus-url=http://linkerd-prometheus.linkerd-viz.svc.{{.Values.clusterDomain}}:9090 + {{ fail "Please enable `linkerd-prometheus` or provide `prometheusUrl` for the viz extension to function properly"}} {{- end }} image: {{.Values.metricsAPI.image.registry}}/{{.Values.metricsAPI.image.name}}:{{.Values.metricsAPI.image.tag}} imagePullPolicy: {{.Values.metricsAPI.pullPolicy}} diff --git a/viz/pkg/healthcheck/healthcheck.go b/viz/pkg/healthcheck/healthcheck.go index 4ad2f79b9..1ad001a56 100644 --- a/viz/pkg/healthcheck/healthcheck.go +++ b/viz/pkg/healthcheck/healthcheck.go @@ -82,21 +82,14 @@ func (hc *HealthChecker) VizCategory() healthcheck.Category { Fatal(). Warning(). WithCheck(func(ctx context.Context) error { - return healthcheck.CheckClusterRoles(ctx, hc.KubeAPIClient(), true, []string{fmt.Sprintf("linkerd-%s-prometheus", hc.vizNamespace), fmt.Sprintf("linkerd-%s-tap", hc.vizNamespace)}, "") + return healthcheck.CheckClusterRoles(ctx, hc.KubeAPIClient(), true, []string{fmt.Sprintf("linkerd-%s-tap", hc.vizNamespace), fmt.Sprintf("linkerd-%s-metrics-api", hc.vizNamespace), fmt.Sprintf("linkerd-%s-tap-admin", hc.vizNamespace), "linkerd-tap-injector"}, "") }), *healthcheck.NewChecker("linkerd-viz ClusterRoleBindings exist"). WithHintAnchor("l5d-viz-crb-exists"). Fatal(). Warning(). WithCheck(func(ctx context.Context) error { - return healthcheck.CheckClusterRoleBindings(ctx, hc.KubeAPIClient(), true, []string{fmt.Sprintf("linkerd-%s-prometheus", hc.vizNamespace), fmt.Sprintf("linkerd-%s-tap", hc.vizNamespace)}, "") - }), - *healthcheck.NewChecker("linkerd-viz ConfigMaps exist"). - WithHintAnchor("l5d-viz-cm-exists"). - Fatal(). - Warning(). - WithCheck(func(ctx context.Context) error { - return healthcheck.CheckConfigMaps(ctx, hc.KubeAPIClient(), hc.vizNamespace, true, []string{"linkerd-prometheus-config", "linkerd-grafana-config"}, "") + return healthcheck.CheckClusterRoleBindings(ctx, hc.KubeAPIClient(), true, []string{fmt.Sprintf("linkerd-%s-tap", hc.vizNamespace), fmt.Sprintf("linkerd-%s-metrics-api", hc.vizNamespace), fmt.Sprintf("linkerd-%s-tap-auth-delegator", hc.vizNamespace), "linkerd-tap-injector"}, "") }), *healthcheck.NewChecker("tap API server has valid cert"). WithHintAnchor("l5d-tap-cert-valid"). @@ -159,13 +152,73 @@ func (hc *HealthChecker) VizCategory() healthcheck.Category { } // Check for relevant pods to be present - err = healthcheck.CheckForPods(pods, []string{"linkerd-grafana", "linkerd-prometheus", "linkerd-web", "linkerd-tap"}) + err = healthcheck.CheckForPods(pods, []string{"linkerd-web", "linkerd-tap", "linkerd-metrics-api", "tap-injector"}) if err != nil { return err } return healthcheck.CheckPodsRunning(pods, "") }), + *healthcheck.NewChecker("prometheus is installed and configured correctly"). + WithHintAnchor("l5d-viz-prometheus"). + Warning(). + WithCheck(func(ctx context.Context) error { + // TODO: Skip if prometheus is disabled + // Check for ClusterRoles + err := healthcheck.CheckClusterRoles(ctx, hc.KubeAPIClient(), true, []string{fmt.Sprintf("linkerd-%s-prometheus", hc.vizNamespace)}, "") + if err != nil { + return err + } + + // Check for ClusterRoleBindings + err = healthcheck.CheckClusterRoleBindings(ctx, hc.KubeAPIClient(), true, []string{fmt.Sprintf("linkerd-%s-prometheus", hc.vizNamespace)}, "") + if err != nil { + return err + } + + // Check for ConfigMap + err = healthcheck.CheckConfigMaps(ctx, hc.KubeAPIClient(), hc.vizNamespace, true, []string{"linkerd-prometheus-config"}, "") + if err != nil { + return err + } + + // Check for relevant pods to be present + pods, err := hc.KubeAPIClient().GetPodsByNamespace(ctx, hc.vizNamespace) + if err != nil { + return err + } + + err = healthcheck.CheckForPods(pods, []string{"linkerd-prometheus"}) + if err != nil { + return err + } + + return nil + }), + *healthcheck.NewChecker("grafana is installed and configured correctly"). + WithHintAnchor("l5d-viz-grafana"). + Warning(). + WithCheck(func(ctx context.Context) error { + // TODO: Skip if grafana is disabled + // Check for ConfigMap + err := healthcheck.CheckConfigMaps(ctx, hc.KubeAPIClient(), hc.vizNamespace, true, []string{"linkerd-grafana-config"}, "") + if err != nil { + return err + } + + // Check for relevant pods to be present + pods, err := hc.KubeAPIClient().GetPodsByNamespace(ctx, hc.vizNamespace) + if err != nil { + return err + } + + err = healthcheck.CheckForPods(pods, []string{"linkerd-grafana"}) + if err != nil { + return err + } + + return nil + }), *healthcheck.NewChecker("can initialize the client"). WithHintAnchor("l5d-viz-existence-client"). Fatal().