viz: make prom checks dynamic by using annotations (#5680)

Fixes #5652 

This PR adds new annotation that is added when a
external Prometheus is used. Based on that
annotations, The CLI can get to know if an external instance
is being used and if the annotation is absent, that the
the default instance is present.

This updates the viz Checks to skip some checkers if the default
 Prometheus instances are absent.

This PR also removes the grafana checks as they are not useful
and add unnecessary complexity.

This also cleans up some `grafanaUrl` stuff from the core
control-plane chart.

Signed-off-by: Tarun Pothulapati <tarunpothulapati@outlook.com>
This commit is contained in:
Tarun Pothulapati 2021-02-12 21:25:42 +05:30 committed by GitHub
parent 8bc732e483
commit cb6c1fce03
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 1070 additions and 37 deletions

View File

@ -145,7 +145,6 @@ Kubernetes: `>=1.13.0-0`
| disableHeartBeat | bool | `false` | Set to true to not start the heartbeat cronjob |
| enableEndpointSlices | bool | `false` | enables the use of EndpointSlice informers for the destination service; enableEndpointSlices should be set to true only if EndpointSlice K8s feature gate is on; the feature is still experimental. |
| enableH2Upgrade | bool | `true` | Allow proxies to perform transparent HTTP/2 upgrading |
| grafanaUrl | string | `""` | url of external grafana instance with reverse proxy configured. |
| heartbeatSchedule | string | `"0 0 * * *"` | Config for the heartbeat cronjob |
| identity.issuer.clockSkewAllowance | string | `"20s"` | Amount of time to allow for clock skew within a Linkerd cluster |
| identity.issuer.crtExpiry | string | `nil` | Expiration timestamp for the issuer certificate. It must be provided during install. Must match the expiry date in crtPEM |

View File

@ -37,8 +37,6 @@ cniEnabled: false
identityTrustAnchorsPEM: |
# -- Trust domain used for identity
identityTrustDomain: *cluster_domain
# -- url of external grafana instance with reverse proxy configured.
grafanaUrl: ""
# -- Additional annotations to add to all pods
podAnnotations: {}
# -- Additional labels to add to all pods

View File

@ -9,7 +9,6 @@ linkerd-viz
√ linkerd-viz pods are injected
√ viz extension pods are running
√ prometheus is installed and configured correctly
√ grafana is installed and configured correctly
√ can initialize the client
√ viz extension self-check

View File

@ -9,7 +9,6 @@ linkerd-viz
√ linkerd-viz pods are injected
√ viz extension pods are running
√ prometheus is installed and configured correctly
√ grafana is installed and configured correctly
√ can initialize the client
√ viz extension self-check

View File

@ -100,6 +100,7 @@ Kubernetes: `>=1.13.0-0`
| grafana.resources.cpu.request | string | `nil` | Amount of CPU units that the grafana container requests |
| grafana.resources.memory.limit | string | `nil` | Maximum amount of memory that grafana container can use |
| grafana.resources.memory.request | string | `nil` | Amount of memory that the grafana container requests |
| grafanaUrl | string | `""` | url of external grafana instance with reverse proxy configured. |
| identityTrustDomain | string | `"cluster.local"` | Trust domain used for identity |
| imagePullSecrets | list | `[]` | For Private docker registries, authentication is needed. Registry secrets are applied to the respective service accounts |
| installNamespace | bool | `true` | Set to false when installing in a custom namespace. |

View File

@ -10,5 +10,8 @@ metadata:
labels:
{{.Values.extensionAnnotation}}: linkerd-viz
annotations:
{{- if .Values.prometheusUrl }}
viz.linkerd.io/external-prometheus: {{.Values.prometheusUrl}}
{{- end }}
{{.Values.proxyInjectAnnotation}}: enabled
{{ end -}}

View File

@ -52,6 +52,9 @@ enablePodAntiAffinity: false
# -- url of external prometheus instance
prometheusUrl: ""
# -- url of external grafana instance with reverse proxy configured.
grafanaUrl: ""
# -- url of external jaeger instance
# Set this to `jaeger.linkerd-jaeger.svc.<clusterDomain>` if you plan to use jaeger extension
jaegerUrl: ""

View File

@ -63,6 +63,13 @@ func TestRender(t *testing.T) {
},
"install_proxy_resources.golden",
},
{
map[string]interface{}{
"grafana": map[string]interface{}{"enabled": false},
"grafanaUrl": "external-grafana.com",
},
"install_grafana_disabled.golden",
},
}
for i, tc := range testCases {

File diff suppressed because it is too large Load Diff

View File

@ -9,6 +9,7 @@ metadata:
labels:
linkerd.io/extension: linkerd-viz
annotations:
viz.linkerd.io/external-prometheus: external-prom.com
linkerd.io/inject: enabled
---
###

View File

@ -12,6 +12,7 @@ import (
"github.com/linkerd/linkerd2/pkg/tls"
"github.com/linkerd/linkerd2/viz/metrics-api/client"
pb "github.com/linkerd/linkerd2/viz/metrics-api/gen/viz"
"github.com/linkerd/linkerd2/viz/pkg/labels"
vizLabels "github.com/linkerd/linkerd2/viz/pkg/labels"
corev1 "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
@ -41,8 +42,9 @@ const (
// HealthChecker wraps Linkerd's main healthchecker, adding extra fields for Viz
type HealthChecker struct {
*healthcheck.HealthChecker
vizNamespace string
vizAPIClient pb.ApiClient
vizNamespace string
externalPrometheusURL string
}
// NewHealthChecker returns an initialized HealthChecker for Viz
@ -75,10 +77,13 @@ func (hc *HealthChecker) VizCategory() healthcheck.Category {
Fatal().
WithCheck(func(ctx context.Context) error {
vizNs, err := hc.KubeAPIClient().GetNamespaceWithExtensionLabel(ctx, "linkerd-viz")
if err == nil {
hc.vizNamespace = vizNs.Name
}
if err != nil {
return err
}
hc.vizNamespace = vizNs.Name
hc.externalPrometheusURL = vizNs.Annotations[labels.VizExternalPrometheus]
return nil
}),
*healthcheck.NewChecker("linkerd-viz ClusterRoles exist").
WithHintAnchor("l5d-viz-cr-exists").
@ -166,7 +171,10 @@ func (hc *HealthChecker) VizCategory() healthcheck.Category {
WithHintAnchor("l5d-viz-prometheus").
Warning().
WithCheck(func(ctx context.Context) error {
// TODO: Skip if prometheus is disabled
if hc.externalPrometheusURL != "" {
return &healthcheck.SkipError{Reason: "linkerd-prometheus is disabled"}
}
// Check for ClusterRoles
err := healthcheck.CheckClusterRoles(ctx, hc.KubeAPIClient(), true, []string{fmt.Sprintf("linkerd-%s-prometheus", hc.vizNamespace)}, "")
if err != nil {
@ -196,30 +204,6 @@ func (hc *HealthChecker) VizCategory() healthcheck.Category {
return err
}
return nil
}),
*healthcheck.NewChecker("grafana is installed and configured correctly").
WithHintAnchor("l5d-viz-grafana").
Warning().
WithCheck(func(ctx context.Context) error {
// TODO: Skip if grafana is disabled
// Check for ConfigMap
err := healthcheck.CheckConfigMaps(ctx, hc.KubeAPIClient(), hc.vizNamespace, true, []string{"linkerd-grafana-config"}, "")
if err != nil {
return err
}
// Check for relevant pods to be present
pods, err := hc.KubeAPIClient().GetPodsByNamespace(ctx, hc.vizNamespace)
if err != nil {
return err
}
err = healthcheck.CheckForPods(pods, []string{"linkerd-grafana"})
if err != nil {
return err
}
return nil
}),
*healthcheck.NewChecker("can initialize the client").
@ -287,8 +271,6 @@ func (hc *HealthChecker) VizDataPlaneCategory() healthcheck.Category {
return err
}
// TODO: Check if prometheus is present
return validateDataPlanePodReporting(pods)
}),
*healthcheck.NewChecker("data-plane pods have tap enabled").

View File

@ -16,6 +16,10 @@ const (
// VizTapDisabled can be used to disable tap on the injected proxy.
VizTapDisabled = VizAnnotationsPrefix + "/disable-tap"
// VizExternalPrometheus is only set on the namespace by the install
// when a external prometheus is being used
VizExternalPrometheus = VizAnnotationsPrefix + "/external-prometheus"
)
// IsTapEnabled returns true if a pod has an annotation indicating that tap