metrics: improve apiserver SLI metric name

Add new kube-apiserver SLI metric better reflecting that the metric is an SLI and not an SLO and deprecate the existing apiserver_request_slo_duration_seconds in 1.27. Although the metric is still in alpha, we prefer deprecating it for one release since it is a critical metric used for SLOs and to make sure that users that are using it have time to make the transition. Going forward we prefer going with SLI specific metrics, we will use _sli_ instead of _slo_ so for consistency purposes. Signed-off-by: Damien Grisonnet <dgrisonn@redhat.com> Kubernetes-commit: 1493da92d9513e383f8382c7e80316a3fa6c94fa
2022-09-22 19:08:34 +02:00 · 2022-09-22 19:08:34 +02:00 · 9a8e2de22e
parent ad0fbeeeec
commit 9a8e2de22e
1 changed files with 19 additions and 2 deletions
--- a/pkg/endpoints/metrics/metrics.go
+++ b/pkg/endpoints/metrics/metrics.go
@ -112,6 +112,21 @@ var (
 			// This metric is supplementary to the requestLatencies metric.
 			// It measures request duration excluding webhooks as they are mostly
 			// dependant on user configuration.
+			Buckets: []float64{0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3,
+				4, 5, 6, 8, 10, 15, 20, 30, 45, 60},
+			StabilityLevel:    compbasemetrics.ALPHA,
+			DeprecatedVersion: "1.27.0",
+		},
+		[]string{"verb", "group", "version", "resource", "subresource", "scope", "component"},
+	)
+	requestSliLatencies = compbasemetrics.NewHistogramVec(
+		&compbasemetrics.HistogramOpts{
+			Subsystem: APIServerComponent,
+			Name:      "request_sli_duration_seconds",
+			Help:      "Response latency distribution (not counting webhook duration) in seconds for each verb, group, version, resource, subresource, scope and component.",
+			// This metric is supplementary to the requestLatencies metric.
+			// It measures request duration excluding webhooks as they are mostly
+			// dependant on user configuration.
 			Buckets: []float64{0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3,
 				4, 5, 6, 8, 10, 15, 20, 30, 45, 60},
 			StabilityLevel: compbasemetrics.ALPHA,
@ -273,6 +288,7 @@ var (
 		longRunningRequestsGauge,
 		requestLatencies,
 		requestSloLatencies,
+		requestSliLatencies,
 		fieldValidationRequestLatencies,
 		responseSizes,
 		TLSHandshakeErrors,
@ -519,8 +535,9 @@ func MonitorRequest(req *http.Request, verb, group, version, resource, subresour
 	fieldValidationRequestLatencies.WithContext(req.Context()).WithLabelValues(fieldValidation, fieldValidationEnabled)

 	if wd, ok := request.LatencyTrackersFrom(req.Context()); ok {
-		sloLatency := elapsedSeconds - (wd.MutatingWebhookTracker.GetLatency() + wd.ValidatingWebhookTracker.GetLatency()).Seconds()
-		requestSloLatencies.WithContext(req.Context()).WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(sloLatency)
+		sliLatency := elapsedSeconds - (wd.MutatingWebhookTracker.GetLatency() + wd.ValidatingWebhookTracker.GetLatency()).Seconds()
+		requestSloLatencies.WithContext(req.Context()).WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(sliLatency)
+		requestSliLatencies.WithContext(req.Context()).WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(sliLatency)
 	}
 	// We are only interested in response sizes of read requests.
 	if verb == "GET" || verb == "LIST" {