Track dry-run and apply in metrics

Kubernetes-commit: 4ac0116a5d826d97cc839dca55e3351ef68f2db1
2019-03-06 14:30:04 -08:00 · 2019-03-06 14:30:04 -08:00 · 7624354bd9
parent 4834761c7e
commit 7624354bd9
1 changed files with 37 additions and 6 deletions
--- a/pkg/endpoints/metrics/metrics.go
+++ b/pkg/endpoints/metrics/metrics.go
@ -21,11 +21,14 @@ import (
 	"net"
 	"net/http"
 	"regexp"
+	"sort"
 	"strconv"
 	"strings"
 	"sync"
 	"time"

+	"k8s.io/apimachinery/pkg/apis/meta/v1/validation"
+	"k8s.io/apimachinery/pkg/types"
 	utilnet "k8s.io/apimachinery/pkg/util/net"
 	"k8s.io/apiserver/pkg/endpoints/request"

@ -50,9 +53,13 @@ var (
 	requestCounter = prometheus.NewCounterVec(
 		prometheus.CounterOpts{
 			Name: "apiserver_request_total",
-			Help: "Counter of apiserver requests broken out for each verb, group, version, resource, scope, component, client, and HTTP response contentType and code.",
+			Help: "Counter of apiserver requests broken out for each verb, dry run value, group, version, resource, scope, component, client, and HTTP response contentType and code.",
 		},
-		[]string{"verb", "group", "version", "resource", "subresource", "scope", "component", "client", "contentType", "code"},
+		// The label_name contentType doesn't follow the label_name convention defined here:
+		// https://github.com/kubernetes/community/blob/master/contributors/devel/sig-instrumentation/instrumentation.md
+		// But changing it would break backwards compatibility. Future label_names
+		// should be all lowercase and separated by underscores.
+		[]string{"verb", "dry_run", "group", "version", "resource", "subresource", "scope", "component", "client", "contentType", "code"},
 	)
 	deprecatedRequestCounter = prometheus.NewCounterVec(
 		prometheus.CounterOpts{
@ -71,14 +78,14 @@ var (
 	requestLatencies = prometheus.NewHistogramVec(
 		prometheus.HistogramOpts{
 			Name: "apiserver_request_duration_seconds",
-			Help: "Response latency distribution in seconds for each verb, group, version, resource, subresource, scope and component.",
+			Help: "Response latency distribution in seconds for each verb, dry run value, group, version, resource, subresource, scope and component.",
 			// This metric is used for verifying api call latencies SLO,
 			// as well as tracking regressions in this aspects.
 			// Thus we customize buckets significantly, to empower both usecases.
 			Buckets: []float64{0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
 				1.25, 1.5, 1.75, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5, 6, 7, 8, 9, 10, 15, 20, 25, 30, 40, 50, 60},
 		},
-		[]string{"verb", "group", "version", "resource", "subresource", "scope", "component"},
+		[]string{"verb", "dry_run", "group", "version", "resource", "subresource", "scope", "component"},
 	)
 	deprecatedRequestLatencies = prometheus.NewHistogramVec(
 		prometheus.HistogramOpts{
@ -225,12 +232,13 @@ func RecordLongRunning(req *http.Request, requestInfo *request.RequestInfo, comp
 // a request. verb must be uppercase to be backwards compatible with existing monitoring tooling.
 func MonitorRequest(req *http.Request, verb, group, version, resource, subresource, scope, component, contentType string, httpCode, respSize int, elapsed time.Duration) {
 	reportedVerb := cleanVerb(verb, req)
+	dryRun := cleanDryRun(req.URL.Query()["dryRun"])
 	client := cleanUserAgent(utilnet.GetHTTPClient(req))
 	elapsedMicroseconds := float64(elapsed / time.Microsecond)
 	elapsedSeconds := elapsed.Seconds()
-	requestCounter.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component, client, contentType, codeToString(httpCode)).Inc()
+	requestCounter.WithLabelValues(reportedVerb, dryRun, group, version, resource, subresource, scope, component, client, contentType, codeToString(httpCode)).Inc()
 	deprecatedRequestCounter.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component, client, contentType, codeToString(httpCode)).Inc()
-	requestLatencies.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedSeconds)
+	requestLatencies.WithLabelValues(reportedVerb, dryRun, group, version, resource, subresource, scope, component).Observe(elapsedSeconds)
 	deprecatedRequestLatencies.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds)
 	deprecatedRequestLatenciesSummary.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds)
 	// We are only interested in response sizes of read requests.
@ -315,9 +323,32 @@ func cleanVerb(verb string, request *http.Request) string {
 	if verb == "WATCHLIST" {
 		reportedVerb = "WATCH"
 	}
+	if verb == "PATCH" && request.Header.Get("Content-Type") == string(types.ApplyPatchType) {
+		reportedVerb = "APPLY"
+	}
 	return reportedVerb
 }

+func cleanDryRun(dryRun []string) string {
+	if err := validation.ValidateDryRun(nil, dryRun); err != nil {
+		return "invalid"
+	}
+
+	// Since dryRun could be valid with any arbitrarily long length
+	// we have to dedup and sort the elements before joining them together
+	dryRunSet := map[string]bool{}
+	for _, element := range dryRun {
+		dryRunSet[element] = true
+	}
+	dryRunUnique := []string{}
+	for element := range dryRunSet {
+		dryRunUnique = append(dryRunUnique, element)
+	}
+	sort.Strings(dryRunUnique)
+
+	return strings.Join(dryRunUnique, ",")
+}
+
 func cleanUserAgent(ua string) string {
 	// We collapse all "web browser"-type user agents into one "browser" to reduce metric cardinality.
 	if strings.HasPrefix(ua, "Mozilla/") {