viz: make checks aware of prom and grafana being optional (#5627)

* viz: make checks aware of prom and grafana being optional

Fixes #5618

Currently, The linkerd-viz checks fail whenever external
Prometheus is being used as those checks are not aware of
Prometheus and grafana being optional.

This commit fixes this by making the Prometheus and Grafana
as separate checks which are not fatal and these checks
can also be made dynamic and be ran only if those
components are available.

This commit also adds some of the missing resources checks,
especially that of the new `metrics-api` into viz checks

Signed-off-by: Tarun Pothulapati <tarunpothulapati@outlook.com>
This commit is contained in:
Tarun Pothulapati 2021-02-05 11:26:44 +05:30 committed by GitHub
parent c6536996f7
commit 704ed00a49
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 70 additions and 13 deletions

View File

@ -2538,7 +2538,8 @@ func CheckForPods(pods []corev1.Pod, deployNames []string) error {
for _, pod := range pods {
// Strip randomized suffix and take the deployment name
deployName := strings.Join(strings.Split(pod.Name, "-")[:2], "-")
parts := strings.Split(pod.Name, "-")
deployName := strings.Join(parts[:len(parts)-2], "-")
exists[deployName] = true
}

View File

@ -18,12 +18,13 @@ linkerd-viz
√ linkerd-viz Namespace exists
√ linkerd-viz ClusterRoles exist
√ linkerd-viz ClusterRoleBindings exist
√ linkerd-viz ConfigMaps exist
√ tap API server has valid cert
√ tap API server cert is valid for at least 60 days
√ tap API service is running
√ linkerd-viz pods are injected
√ viz extension pods are running
√ prometheus is installed and configured correctly
√ grafana is installed and configured correctly
√ can initialize the client
√ viz extension self-check
√ [kubernetes] linkerd viz can talk to Kubernetes

View File

@ -72,8 +72,10 @@ spec:
- -cluster-domain={{.Values.clusterDomain}}
{{- if .Values.prometheusUrl }}
- -prometheus-url={{.Values.prometheusUrl}}
{{- else if .Values.prometheus.enabled }}
- -prometheus-url=http://linkerd-prometheus.{{.Values.namespace}}.svc.{{.Values.clusterDomain}}:9090
{{- else }}
- -prometheus-url=http://linkerd-prometheus.linkerd-viz.svc.{{.Values.clusterDomain}}:9090
{{ fail "Please enable `linkerd-prometheus` or provide `prometheusUrl` for the viz extension to function properly"}}
{{- end }}
image: {{.Values.metricsAPI.image.registry}}/{{.Values.metricsAPI.image.name}}:{{.Values.metricsAPI.image.tag}}
imagePullPolicy: {{.Values.metricsAPI.pullPolicy}}

View File

@ -82,21 +82,14 @@ func (hc *HealthChecker) VizCategory() healthcheck.Category {
Fatal().
Warning().
WithCheck(func(ctx context.Context) error {
return healthcheck.CheckClusterRoles(ctx, hc.KubeAPIClient(), true, []string{fmt.Sprintf("linkerd-%s-prometheus", hc.vizNamespace), fmt.Sprintf("linkerd-%s-tap", hc.vizNamespace)}, "")
return healthcheck.CheckClusterRoles(ctx, hc.KubeAPIClient(), true, []string{fmt.Sprintf("linkerd-%s-tap", hc.vizNamespace), fmt.Sprintf("linkerd-%s-metrics-api", hc.vizNamespace), fmt.Sprintf("linkerd-%s-tap-admin", hc.vizNamespace), "linkerd-tap-injector"}, "")
}),
*healthcheck.NewChecker("linkerd-viz ClusterRoleBindings exist").
WithHintAnchor("l5d-viz-crb-exists").
Fatal().
Warning().
WithCheck(func(ctx context.Context) error {
return healthcheck.CheckClusterRoleBindings(ctx, hc.KubeAPIClient(), true, []string{fmt.Sprintf("linkerd-%s-prometheus", hc.vizNamespace), fmt.Sprintf("linkerd-%s-tap", hc.vizNamespace)}, "")
}),
*healthcheck.NewChecker("linkerd-viz ConfigMaps exist").
WithHintAnchor("l5d-viz-cm-exists").
Fatal().
Warning().
WithCheck(func(ctx context.Context) error {
return healthcheck.CheckConfigMaps(ctx, hc.KubeAPIClient(), hc.vizNamespace, true, []string{"linkerd-prometheus-config", "linkerd-grafana-config"}, "")
return healthcheck.CheckClusterRoleBindings(ctx, hc.KubeAPIClient(), true, []string{fmt.Sprintf("linkerd-%s-tap", hc.vizNamespace), fmt.Sprintf("linkerd-%s-metrics-api", hc.vizNamespace), fmt.Sprintf("linkerd-%s-tap-auth-delegator", hc.vizNamespace), "linkerd-tap-injector"}, "")
}),
*healthcheck.NewChecker("tap API server has valid cert").
WithHintAnchor("l5d-tap-cert-valid").
@ -159,13 +152,73 @@ func (hc *HealthChecker) VizCategory() healthcheck.Category {
}
// Check for relevant pods to be present
err = healthcheck.CheckForPods(pods, []string{"linkerd-grafana", "linkerd-prometheus", "linkerd-web", "linkerd-tap"})
err = healthcheck.CheckForPods(pods, []string{"linkerd-web", "linkerd-tap", "linkerd-metrics-api", "tap-injector"})
if err != nil {
return err
}
return healthcheck.CheckPodsRunning(pods, "")
}),
*healthcheck.NewChecker("prometheus is installed and configured correctly").
WithHintAnchor("l5d-viz-prometheus").
Warning().
WithCheck(func(ctx context.Context) error {
// TODO: Skip if prometheus is disabled
// Check for ClusterRoles
err := healthcheck.CheckClusterRoles(ctx, hc.KubeAPIClient(), true, []string{fmt.Sprintf("linkerd-%s-prometheus", hc.vizNamespace)}, "")
if err != nil {
return err
}
// Check for ClusterRoleBindings
err = healthcheck.CheckClusterRoleBindings(ctx, hc.KubeAPIClient(), true, []string{fmt.Sprintf("linkerd-%s-prometheus", hc.vizNamespace)}, "")
if err != nil {
return err
}
// Check for ConfigMap
err = healthcheck.CheckConfigMaps(ctx, hc.KubeAPIClient(), hc.vizNamespace, true, []string{"linkerd-prometheus-config"}, "")
if err != nil {
return err
}
// Check for relevant pods to be present
pods, err := hc.KubeAPIClient().GetPodsByNamespace(ctx, hc.vizNamespace)
if err != nil {
return err
}
err = healthcheck.CheckForPods(pods, []string{"linkerd-prometheus"})
if err != nil {
return err
}
return nil
}),
*healthcheck.NewChecker("grafana is installed and configured correctly").
WithHintAnchor("l5d-viz-grafana").
Warning().
WithCheck(func(ctx context.Context) error {
// TODO: Skip if grafana is disabled
// Check for ConfigMap
err := healthcheck.CheckConfigMaps(ctx, hc.KubeAPIClient(), hc.vizNamespace, true, []string{"linkerd-grafana-config"}, "")
if err != nil {
return err
}
// Check for relevant pods to be present
pods, err := hc.KubeAPIClient().GetPodsByNamespace(ctx, hc.vizNamespace)
if err != nil {
return err
}
err = healthcheck.CheckForPods(pods, []string{"linkerd-grafana"})
if err != nil {
return err
}
return nil
}),
*healthcheck.NewChecker("can initialize the client").
WithHintAnchor("l5d-viz-existence-client").
Fatal().