From a559cb8be1c4b25c300d7c35ece1cb8ce69380e2 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Mon, 19 May 2025 11:41:09 +0200 Subject: [PATCH] Unify references to group resource in metrics Skip apiserver_storage_objects as it's a stable metrics that doesn't have "group" label. As defined in KEP-1209 adding a label is considered a breaking change so this change should be done via deprecation and introduction of new metric. Tested via: ``` kubectl get --raw /metrics | grep -i apiservice | grep -v APIServiceRegistrationController | grep -v aggregator_unavailable_apiservice | python3 -c "import sys; print('\n'.join([k+' ' + v for k,v in {a.split('{')[0]:'{'+a.split('{')[1].split('}')[0]+'}' for a in sys.stdin.readlines() if '#' not in a and '_sum' not in a and '_bucket' not in a}.items()]))" ``` Before: ``` apiserver_cache_list_fetched_objects_total {index="",resource_prefix="/apiregistration.k8s.io/apiservices"} apiserver_cache_list_returned_objects_total {resource_prefix="/apiregistration.k8s.io/apiservices"} apiserver_cache_list_total {index="",resource_prefix="/apiregistration.k8s.io/apiservices"} apiserver_longrunning_requests {component="apiserver",group="apiregistration.k8s.io",resource="apiservices",scope="cluster",subresource="",verb="WATCH",version="v1"} apiserver_request_body_size_bytes_count {resource="apiservices.apiregistration.k8s.io",verb="create"} apiserver_request_duration_seconds_count {component="apiserver",dry_run="",group="apiregistration.k8s.io",resource="apiservices",scope="resource",subresource="",verb="POST",version="v1"} apiserver_request_sli_duration_seconds_count {component="apiserver",group="apiregistration.k8s.io",resource="apiservices",scope="resource",subresource="",verb="POST",version="v1"} apiserver_request_total {code="201",component="apiserver",dry_run="",group="apiregistration.k8s.io",resource="apiservices",scope="resource",subresource="",verb="POST",version="v1"} apiserver_response_sizes_count {component="apiserver",group="apiregistration.k8s.io",resource="apiservices",scope="cluster",subresource="",verb="LIST",version="v1"} apiserver_selfrequest_total {resource="apiservices",subresource="",verb="POST"} apiserver_storage_events_received_total {resource="apiservices.apiregistration.k8s.io"} apiserver_storage_list_evaluated_objects_total {resource="apiservices.apiregistration.k8s.io"} apiserver_storage_list_fetched_objects_total {resource="apiservices.apiregistration.k8s.io"} apiserver_storage_list_returned_objects_total {resource="apiservices.apiregistration.k8s.io"} apiserver_storage_list_total {resource="apiservices.apiregistration.k8s.io"} apiserver_storage_objects {resource="apiservices.apiregistration.k8s.io"} apiserver_watch_cache_events_dispatched_total {resource="apiservices.apiregistration.k8s.io"} apiserver_watch_cache_events_received_total {resource="apiservices.apiregistration.k8s.io"} apiserver_watch_cache_initializations_total {resource="apiservices.apiregistration.k8s.io"} apiserver_watch_cache_resource_version {resource="apiservices.apiregistration.k8s.io"} apiserver_watch_events_sizes_count {group="apiregistration.k8s.io",kind="APIService",version="v1"} apiserver_watch_events_total {group="apiregistration.k8s.io",kind="APIService",version="v1"} etcd_request_duration_seconds_count {operation="listWithCount",type="/registry/apiregistration.k8s.io/apiservices/"} etcd_requests_total {operation="listWithCount",type="/registry/apiregistration.k8s.io/apiservices/"} watch_cache_capacity {resource="apiservices.apiregistration.k8s.io"} ``` After: ``` apiserver_cache_list_fetched_objects_total {group="apiregistration.k8s.io",index="",resource="apiservices"} apiserver_cache_list_returned_objects_total {group="apiregistration.k8s.io",resource="apiservices"} apiserver_cache_list_total {group="apiregistration.k8s.io",index="",resource="apiservices"} apiserver_longrunning_requests {component="apiserver",group="apiregistration.k8s.io",resource="apiservices",scope="cluster",subresource="",verb="WATCH",version="v1"} apiserver_request_body_size_bytes_count {group="apiregistration.k8s.io",resource="apiservices",verb="create"} apiserver_request_duration_seconds_count {component="apiserver",dry_run="",group="apiregistration.k8s.io",resource="apiservices",scope="resource",subresource="",verb="POST",version="v1"} apiserver_request_sli_duration_seconds_count {component="apiserver",group="apiregistration.k8s.io",resource="apiservices",scope="resource",subresource="",verb="POST",version="v1"} apiserver_request_total {code="201",component="apiserver",dry_run="",group="apiregistration.k8s.io",resource="apiservices",scope="resource",subresource="",verb="POST",version="v1"} apiserver_response_sizes_count {component="apiserver",group="apiregistration.k8s.io",resource="apiservices",scope="cluster",subresource="",verb="WATCH",version="v1"} apiserver_selfrequest_total {group="apiregistration.k8s.io",resource="apiservices",subresource="",verb="WATCH"} apiserver_storage_events_received_total {group="apiregistration.k8s.io",resource="apiservices"} apiserver_storage_list_evaluated_objects_total {group="apiregistration.k8s.io",resource="apiservices"} apiserver_storage_list_fetched_objects_total {group="apiregistration.k8s.io",resource="apiservices"} apiserver_storage_list_returned_objects_total {group="apiregistration.k8s.io",resource="apiservices"} apiserver_storage_list_total {group="apiregistration.k8s.io",resource="apiservices"} apiserver_storage_objects {resource="apiservices.apiregistration.k8s.io"} apiserver_watch_cache_events_dispatched_total {group="apiregistration.k8s.io",resource="apiservices"} apiserver_watch_cache_events_received_total {group="apiregistration.k8s.io",resource="apiservices"} apiserver_watch_cache_initializations_total {group="apiregistration.k8s.io",resource="apiservices"} apiserver_watch_cache_resource_version {group="apiregistration.k8s.io",resource="apiservices"} apiserver_watch_events_sizes_count {group="apiregistration.k8s.io",resource="apiservices",version="v1"} apiserver_watch_events_total {group="apiregistration.k8s.io",resource="apiservices",version="v1"} etcd_bookmark_counts {group="apiregistration.k8s.io",resource="apiservices"} etcd_request_duration_seconds_count {group="apiregistration.k8s.io",operation="listWithCount",resource="apiservices"} etcd_requests_total {group="apiregistration.k8s.io",operation="listWithCount",resource="apiservices"} watch_cache_capacity {group="apiregistration.k8s.io",resource="apiservices"} ``` Kubernetes-commit: f712b01ddb55f6569b930ca714499051ba8cb311 --- pkg/endpoints/handlers/create.go | 2 +- pkg/endpoints/handlers/delete.go | 4 +- pkg/endpoints/handlers/metrics/metrics.go | 7 +- pkg/endpoints/handlers/patch.go | 2 +- pkg/endpoints/handlers/response.go | 16 +- pkg/endpoints/handlers/rest.go | 4 +- pkg/endpoints/handlers/rest_test.go | 70 ++++----- pkg/endpoints/handlers/update.go | 2 +- pkg/endpoints/handlers/watch.go | 10 +- pkg/endpoints/metrics/metrics.go | 8 +- pkg/registry/generic/registry/store.go | 2 +- pkg/storage/cacher/cache_watcher.go | 4 +- pkg/storage/cacher/cacher.go | 8 +- pkg/storage/cacher/cacher_whitebox_test.go | 6 +- pkg/storage/cacher/delegator.go | 27 ++-- pkg/storage/cacher/delegator_test.go | 4 +- pkg/storage/cacher/metrics/metrics.go | 51 +++---- pkg/storage/cacher/watch_cache.go | 14 +- pkg/storage/cacher/watch_cache_test.go | 32 ++-- pkg/storage/etcd3/metrics/metrics.go | 56 +++---- pkg/storage/etcd3/metrics/metrics_test.go | 162 ++++++++++----------- pkg/storage/etcd3/store.go | 84 +++++------ pkg/storage/etcd3/watcher.go | 6 +- 23 files changed, 293 insertions(+), 288 deletions(-) diff --git a/pkg/endpoints/handlers/create.go b/pkg/endpoints/handlers/create.go index 55f310daa..6e1908aa0 100644 --- a/pkg/endpoints/handlers/create.go +++ b/pkg/endpoints/handlers/create.go @@ -91,7 +91,7 @@ func createHandler(r rest.NamedCreater, scope *RequestScope, admit admission.Int return } - body, err := limitedReadBodyWithRecordMetric(ctx, req, scope.MaxRequestBodyBytes, scope.Resource.GroupResource().String(), requestmetrics.Create) + body, err := limitedReadBodyWithRecordMetric(ctx, req, scope.MaxRequestBodyBytes, scope.Resource.GroupResource(), requestmetrics.Create) if err != nil { span.AddEvent("limitedReadBody failed", attribute.Int("len", len(body)), attribute.String("err", err.Error())) scope.err(err, w, req) diff --git a/pkg/endpoints/handlers/delete.go b/pkg/endpoints/handlers/delete.go index 0abba257e..e8b74c8a9 100644 --- a/pkg/endpoints/handlers/delete.go +++ b/pkg/endpoints/handlers/delete.go @@ -85,7 +85,7 @@ func DeleteResource(r rest.GracefulDeleter, allowsOptions bool, scope *RequestSc options := &metav1.DeleteOptions{} if allowsOptions { - body, err := limitedReadBodyWithRecordMetric(ctx, req, scope.MaxRequestBodyBytes, scope.Resource.GroupResource().String(), requestmetrics.Delete) + body, err := limitedReadBodyWithRecordMetric(ctx, req, scope.MaxRequestBodyBytes, scope.Resource.GroupResource(), requestmetrics.Delete) if err != nil { span.AddEvent("limitedReadBody failed", attribute.Int("len", len(body)), attribute.String("err", err.Error())) scope.err(err, w, req) @@ -260,7 +260,7 @@ func DeleteCollection(r rest.CollectionDeleter, checkBody bool, scope *RequestSc options := &metav1.DeleteOptions{} if checkBody { - body, err := limitedReadBodyWithRecordMetric(ctx, req, scope.MaxRequestBodyBytes, scope.Resource.GroupResource().String(), requestmetrics.DeleteCollection) + body, err := limitedReadBodyWithRecordMetric(ctx, req, scope.MaxRequestBodyBytes, scope.Resource.GroupResource(), requestmetrics.DeleteCollection) if err != nil { span.AddEvent("limitedReadBody failed", attribute.Int("len", len(body)), attribute.String("err", err.Error())) scope.err(err, w, req) diff --git a/pkg/endpoints/handlers/metrics/metrics.go b/pkg/endpoints/handlers/metrics/metrics.go index 57766924c..fc7de7140 100644 --- a/pkg/endpoints/handlers/metrics/metrics.go +++ b/pkg/endpoints/handlers/metrics/metrics.go @@ -20,6 +20,7 @@ import ( "context" "sync" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/component-base/metrics" "k8s.io/component-base/metrics/legacyregistry" ) @@ -45,7 +46,7 @@ var ( Buckets: metrics.LinearBuckets(50000, 100000, 31), StabilityLevel: metrics.ALPHA, }, - []string{"resource", "verb"}, + []string{"group", "resource", "verb"}, ) ) @@ -58,6 +59,6 @@ func Register() { }) } -func RecordRequestBodySize(ctx context.Context, resource string, verb RequestBodyVerb, size int) { - RequestBodySizes.WithContext(ctx).WithLabelValues(resource, string(verb)).Observe(float64(size)) +func RecordRequestBodySize(ctx context.Context, groupResource schema.GroupResource, verb RequestBodyVerb, size int) { + RequestBodySizes.WithContext(ctx).WithLabelValues(groupResource.Group, groupResource.Resource, string(verb)).Observe(float64(size)) } diff --git a/pkg/endpoints/handlers/patch.go b/pkg/endpoints/handlers/patch.go index acfff1961..0dc9ffb33 100644 --- a/pkg/endpoints/handlers/patch.go +++ b/pkg/endpoints/handlers/patch.go @@ -107,7 +107,7 @@ func PatchResource(r rest.Patcher, scope *RequestScope, admit admission.Interfac return } - patchBytes, err := limitedReadBodyWithRecordMetric(ctx, req, scope.MaxRequestBodyBytes, scope.Resource.GroupResource().String(), requestmetrics.Patch) + patchBytes, err := limitedReadBodyWithRecordMetric(ctx, req, scope.MaxRequestBodyBytes, scope.Resource.GroupResource(), requestmetrics.Patch) if err != nil { span.AddEvent("limitedReadBody failed", attribute.Int("len", len(patchBytes)), attribute.String("err", err.Error())) scope.err(err, w, req) diff --git a/pkg/endpoints/handlers/response.go b/pkg/endpoints/handlers/response.go index 3f7ad6121..fda524cee 100644 --- a/pkg/endpoints/handlers/response.go +++ b/pkg/endpoints/handlers/response.go @@ -143,11 +143,11 @@ func (e *watchEmbeddedEncoder) embeddedIdentifier() runtime.Identifier { // // NOTE: watchEncoder is NOT thread-safe. type watchEncoder struct { - ctx context.Context - kind schema.GroupVersionKind - embeddedEncoder runtime.Encoder - encoder runtime.Encoder - framer io.Writer + ctx context.Context + groupVersionResource schema.GroupVersionResource + embeddedEncoder runtime.Encoder + encoder runtime.Encoder + framer io.Writer watchListTransformerFn watchListTransformerFunction @@ -158,10 +158,10 @@ type watchEncoder struct { identifiers map[watch.EventType]runtime.Identifier } -func newWatchEncoder(ctx context.Context, kind schema.GroupVersionKind, embeddedEncoder runtime.Encoder, encoder runtime.Encoder, framer io.Writer, watchListTransformerFn watchListTransformerFunction) *watchEncoder { +func newWatchEncoder(ctx context.Context, gvr schema.GroupVersionResource, embeddedEncoder runtime.Encoder, encoder runtime.Encoder, framer io.Writer, watchListTransformerFn watchListTransformerFunction) *watchEncoder { return &watchEncoder{ ctx: ctx, - kind: kind, + groupVersionResource: gvr, embeddedEncoder: embeddedEncoder, encoder: encoder, framer: framer, @@ -203,7 +203,7 @@ func (e *watchEncoder) doEncode(obj runtime.Object, event watch.Event, w io.Writ Type: string(event.Type), Object: runtime.RawExtension{Raw: e.buffer.Bytes()}, } - metrics.WatchEventsSizes.WithContext(e.ctx).WithLabelValues(e.kind.Group, e.kind.Version, e.kind.Kind).Observe(float64(len(outEvent.Object.Raw))) + metrics.WatchEventsSizes.WithContext(e.ctx).WithLabelValues(e.groupVersionResource.Group, e.groupVersionResource.Version, e.groupVersionResource.Resource).Observe(float64(len(outEvent.Object.Raw))) defer e.eventBuffer.Reset() if err := e.encoder.Encode(outEvent, e.eventBuffer); err != nil { diff --git a/pkg/endpoints/handlers/rest.go b/pkg/endpoints/handlers/rest.go index 6b19f7a6a..6d5a74146 100644 --- a/pkg/endpoints/handlers/rest.go +++ b/pkg/endpoints/handlers/rest.go @@ -388,11 +388,11 @@ func limitedReadBody(req *http.Request, limit int64) ([]byte, error) { return data, nil } -func limitedReadBodyWithRecordMetric(ctx context.Context, req *http.Request, limit int64, resourceGroup string, verb requestmetrics.RequestBodyVerb) ([]byte, error) { +func limitedReadBodyWithRecordMetric(ctx context.Context, req *http.Request, limit int64, groupResource schema.GroupResource, verb requestmetrics.RequestBodyVerb) ([]byte, error) { readBody, err := limitedReadBody(req, limit) if err == nil { // only record if we've read successfully - requestmetrics.RecordRequestBodySize(ctx, resourceGroup, verb, len(readBody)) + requestmetrics.RecordRequestBodySize(ctx, groupResource, verb, len(readBody)) } return readBody, err } diff --git a/pkg/endpoints/handlers/rest_test.go b/pkg/endpoints/handlers/rest_test.go index d4b0aaab3..c7bcb7f59 100644 --- a/pkg/endpoints/handlers/rest_test.go +++ b/pkg/endpoints/handlers/rest_test.go @@ -136,40 +136,40 @@ func TestLimitedReadBody(t *testing.T) { expectedMetrics: ` # HELP apiserver_request_body_size_bytes [ALPHA] Apiserver request body size in bytes broken out by resource and verb. # TYPE apiserver_request_body_size_bytes histogram - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="50000"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="150000"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="250000"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="350000"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="450000"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="550000"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="650000"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="750000"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="850000"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="950000"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="1.05e+06"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="1.15e+06"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="1.25e+06"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="1.35e+06"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="1.45e+06"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="1.55e+06"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="1.65e+06"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="1.75e+06"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="1.85e+06"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="1.95e+06"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="2.05e+06"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="2.15e+06"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="2.25e+06"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="2.35e+06"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="2.45e+06"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="2.55e+06"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="2.65e+06"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="2.75e+06"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="2.85e+06"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="2.95e+06"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="3.05e+06"} 1 - apiserver_request_body_size_bytes_bucket{resource="resource.group",verb="create",le="+Inf"} 1 - apiserver_request_body_size_bytes_sum{resource="resource.group",verb="create"} 4 - apiserver_request_body_size_bytes_count{resource="resource.group",verb="create"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="50000"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="150000"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="250000"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="350000"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="450000"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="550000"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="650000"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="750000"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="850000"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="950000"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="1.05e+06"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="1.15e+06"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="1.25e+06"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="1.35e+06"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="1.45e+06"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="1.55e+06"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="1.65e+06"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="1.75e+06"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="1.85e+06"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="1.95e+06"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="2.05e+06"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="2.15e+06"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="2.25e+06"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="2.35e+06"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="2.45e+06"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="2.55e+06"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="2.65e+06"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="2.75e+06"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="2.85e+06"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="2.95e+06"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="3.05e+06"} 1 + apiserver_request_body_size_bytes_bucket{group="group",resource="resource",verb="create",le="+Inf"} 1 + apiserver_request_body_size_bytes_sum{group="group",resource="resource",verb="create"} 4 + apiserver_request_body_size_bytes_count{group="group",resource="resource",verb="create"} 1 `, expectedErr: false, }, @@ -186,7 +186,7 @@ func TestLimitedReadBody(t *testing.T) { if err != nil { t.Errorf("err not expected: got %v", err) } - _, err = limitedReadBodyWithRecordMetric(context.Background(), req, tc.limit, "resource.group", metrics.Create) + _, err = limitedReadBodyWithRecordMetric(context.Background(), req, tc.limit, schema.GroupResource{Group: "group", Resource: "resource"}, metrics.Create) if tc.expectedErr { if err == nil { t.Errorf("err expected: got nil") diff --git a/pkg/endpoints/handlers/update.go b/pkg/endpoints/handlers/update.go index dcc57480e..446723234 100644 --- a/pkg/endpoints/handlers/update.go +++ b/pkg/endpoints/handlers/update.go @@ -75,7 +75,7 @@ func UpdateResource(r rest.Updater, scope *RequestScope, admit admission.Interfa return } - body, err := limitedReadBodyWithRecordMetric(ctx, req, scope.MaxRequestBodyBytes, scope.Resource.GroupResource().String(), requestmetrics.Update) + body, err := limitedReadBodyWithRecordMetric(ctx, req, scope.MaxRequestBodyBytes, scope.Resource.GroupResource(), requestmetrics.Update) if err != nil { span.AddEvent("limitedReadBody failed", attribute.Int("len", len(body)), attribute.String("err", err.Error())) scope.err(err, w, req) diff --git a/pkg/endpoints/handlers/watch.go b/pkg/endpoints/handlers/watch.go index c239d1f7a..b297b3c25 100644 --- a/pkg/endpoints/handlers/watch.go +++ b/pkg/endpoints/handlers/watch.go @@ -246,8 +246,8 @@ func (s *WatchServer) HandleHTTP(w http.ResponseWriter, req *http.Request) { w.WriteHeader(http.StatusOK) flusher.Flush() - kind := s.Scope.Kind - watchEncoder := newWatchEncoder(req.Context(), kind, s.EmbeddedEncoder, s.Encoder, framer, s.watchListTransformerFn) + gvr := s.Scope.Resource + watchEncoder := newWatchEncoder(req.Context(), gvr, s.EmbeddedEncoder, s.Encoder, framer, s.watchListTransformerFn) ch := s.Watching.ResultChan() done := req.Context().Done() @@ -271,7 +271,7 @@ func (s *WatchServer) HandleHTTP(w http.ResponseWriter, req *http.Request) { // End of results. return } - metrics.WatchEvents.WithContext(req.Context()).WithLabelValues(kind.Group, kind.Version, kind.Kind).Inc() + metrics.WatchEvents.WithContext(req.Context()).WithLabelValues(gvr.Group, gvr.Version, gvr.Resource).Inc() isWatchListLatencyRecordingRequired := shouldRecordWatchListLatency(event) if err := watchEncoder.Encode(event); err != nil { @@ -315,8 +315,8 @@ func (s *WatchServer) HandleWS(ws *websocket.Conn) { framer := newWebsocketFramer(ws, s.UseTextFraming) - kind := s.Scope.Kind - watchEncoder := newWatchEncoder(context.TODO(), kind, s.EmbeddedEncoder, s.Encoder, framer, s.watchListTransformerFn) + gvr := s.Scope.Resource + watchEncoder := newWatchEncoder(context.TODO(), gvr, s.EmbeddedEncoder, s.Encoder, framer, s.watchListTransformerFn) ch := s.Watching.ResultChan() for { diff --git a/pkg/endpoints/metrics/metrics.go b/pkg/endpoints/metrics/metrics.go index 9bbbe9adc..b95e551d8 100644 --- a/pkg/endpoints/metrics/metrics.go +++ b/pkg/endpoints/metrics/metrics.go @@ -176,7 +176,7 @@ var ( Help: "Number of events sent in watch clients", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"group", "version", "kind"}, + []string{"group", "version", "resource"}, ) WatchEventsSizes = compbasemetrics.NewHistogramVec( &compbasemetrics.HistogramOpts{ @@ -186,7 +186,7 @@ var ( Buckets: compbasemetrics.ExponentialBuckets(1024, 2.0, 8), // 1K, 2K, 4K, 8K, ..., 128K. StabilityLevel: compbasemetrics.ALPHA, }, - []string{"group", "version", "kind"}, + []string{"group", "version", "resource"}, ) // Because of volatility of the base metric this is pre-aggregated one. Instead of reporting current usage all the time // it reports maximal usage during the last second. @@ -226,7 +226,7 @@ var ( Help: "Counter of apiserver self-requests broken out for each verb, API resource and subresource.", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"verb", "resource", "subresource"}, + []string{"verb", "group", "resource", "subresource"}, ) requestFilterDuration = compbasemetrics.NewHistogramVec( @@ -604,7 +604,7 @@ func MonitorRequest(req *http.Request, verb, group, version, resource, subresour // MonitorRequest happens after authentication, so we can trust the username given by the request info, ok := request.UserFrom(req.Context()) if ok && info.GetName() == user.APIServerUser { - apiSelfRequestCounter.WithContext(req.Context()).WithLabelValues(reportedVerb, resource, subresource).Inc() + apiSelfRequestCounter.WithContext(req.Context()).WithLabelValues(reportedVerb, group, resource, subresource).Inc() } if deprecated { deprecatedRequestGauge.WithContext(req.Context()).WithLabelValues(group, version, resource, subresource, removedRelease).Set(1) diff --git a/pkg/registry/generic/registry/store.go b/pkg/registry/generic/registry/store.go index 56e1720f7..beb1e19ba 100644 --- a/pkg/registry/generic/registry/store.go +++ b/pkg/registry/generic/registry/store.go @@ -1669,7 +1669,7 @@ func (e *Store) startObservingCount(period time.Duration, objectCountTracker flo count = -1 } - metrics.UpdateObjectCount(resourceName, count) + metrics.UpdateObjectCount(e.DefaultQualifiedResource, count) if objectCountTracker != nil { objectCountTracker.Set(resourceName, count) } diff --git a/pkg/storage/cacher/cache_watcher.go b/pkg/storage/cacher/cache_watcher.go index c06aefe3d..a74cf8d4a 100644 --- a/pkg/storage/cacher/cache_watcher.go +++ b/pkg/storage/cacher/cache_watcher.go @@ -176,7 +176,7 @@ func (c *cacheWatcher) add(event *watchCacheEvent, timer *time.Timer) bool { // This means that we couldn't send event to that watcher. // Since we don't want to block on it infinitely, // we simply terminate it. - metrics.TerminatedWatchersCounter.WithLabelValues(c.groupResource.String()).Inc() + metrics.TerminatedWatchersCounter.WithLabelValues(c.groupResource.Group, c.groupResource.Resource).Inc() // This means that we couldn't send event to that watcher. // Since we don't want to block on it infinitely, we simply terminate it. @@ -503,7 +503,7 @@ func (c *cacheWatcher) processInterval(ctx context.Context, cacheInterval *watch } if initEventCount > 0 { - metrics.InitCounter.WithLabelValues(c.groupResource.String()).Add(float64(initEventCount)) + metrics.InitCounter.WithLabelValues(c.groupResource.Group, c.groupResource.Resource).Add(float64(initEventCount)) } processingTime := time.Since(startTime) if processingTime > initProcessThreshold { diff --git a/pkg/storage/cacher/cacher.go b/pkg/storage/cacher/cacher.go index 5d18d8313..6b4768849 100644 --- a/pkg/storage/cacher/cacher.go +++ b/pkg/storage/cacher/cacher.go @@ -466,8 +466,8 @@ func NewCacherFromConfig(config Config) (*Cacher, error) { func (c *Cacher) startCaching(stopChannel <-chan struct{}) { c.watchCache.SetOnReplace(func() { c.ready.setReady() - klog.V(1).Infof("cacher (%v): initialized", c.groupResource.String()) - metrics.WatchCacheInitializations.WithLabelValues(c.groupResource.String()).Inc() + klog.V(1).InfoS("cacher initialized", "group", c.groupResource.Group, "resource", c.groupResource.Resource) + metrics.WatchCacheInitializations.WithLabelValues(c.groupResource.Group, c.groupResource.Resource).Inc() }) var err error defer func() { @@ -812,7 +812,7 @@ func (c *Cacher) GetList(ctx context.Context, key string, opts storage.ListOptio return err } } - metrics.RecordListCacheMetrics(c.resourcePrefix, indexUsed, len(resp.Items), listVal.Len()) + metrics.RecordListCacheMetrics(c.groupResource, indexUsed, len(resp.Items), listVal.Len()) return nil } @@ -891,7 +891,7 @@ func (c *Cacher) dispatchEvents() { c.dispatchEvent(&event) } lastProcessedResourceVersion = event.ResourceVersion - metrics.EventsCounter.WithLabelValues(c.groupResource.String()).Inc() + metrics.EventsCounter.WithLabelValues(c.groupResource.Group, c.groupResource.Resource).Inc() case <-bookmarkTimer.C(): bookmarkTimer.Reset(wait.Jitter(time.Second, 0.25)) bookmarkEvent := &watchCacheEvent{ diff --git a/pkg/storage/cacher/cacher_whitebox_test.go b/pkg/storage/cacher/cacher_whitebox_test.go index 1f12140a5..d783e5e90 100644 --- a/pkg/storage/cacher/cacher_whitebox_test.go +++ b/pkg/storage/cacher/cacher_whitebox_test.go @@ -492,7 +492,7 @@ func TestConsistentReadFallback(t *testing.T) { expectMetric: ` # HELP apiserver_watch_cache_consistent_read_total [ALPHA] Counter for consistent reads from cache. # TYPE apiserver_watch_cache_consistent_read_total counter -apiserver_watch_cache_consistent_read_total{fallback="false", resource="pods", success="true"} 1 +apiserver_watch_cache_consistent_read_total{fallback="false", group="", resource="pods", success="true"} 1 `, }, { @@ -506,7 +506,7 @@ apiserver_watch_cache_consistent_read_total{fallback="false", resource="pods", s expectMetric: ` # HELP apiserver_watch_cache_consistent_read_total [ALPHA] Counter for consistent reads from cache. # TYPE apiserver_watch_cache_consistent_read_total counter -apiserver_watch_cache_consistent_read_total{fallback="true", resource="pods", success="true"} 1 +apiserver_watch_cache_consistent_read_total{fallback="true", group="", resource="pods", success="true"} 1 `, }, { @@ -521,7 +521,7 @@ apiserver_watch_cache_consistent_read_total{fallback="true", resource="pods", su expectMetric: ` # HELP apiserver_watch_cache_consistent_read_total [ALPHA] Counter for consistent reads from cache. # TYPE apiserver_watch_cache_consistent_read_total counter -apiserver_watch_cache_consistent_read_total{fallback="true", resource="pods", success="false"} 1 +apiserver_watch_cache_consistent_read_total{fallback="true", group="", resource="pods", success="false"} 1 `, }, { diff --git a/pkg/storage/cacher/delegator.go b/pkg/storage/cacher/delegator.go index ac17fb1c8..cb7eb3399 100644 --- a/pkg/storage/cacher/delegator.go +++ b/pkg/storage/cacher/delegator.go @@ -31,6 +31,7 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/watch" "k8s.io/apiserver/pkg/audit" @@ -64,7 +65,7 @@ func NewCacheDelegator(cacher *Cacher, storage storage.Interface) *CacheDelegato stopCh: make(chan struct{}), } if ConsistencyCheckerEnabled { - d.checker = newConsistencyChecker(cacher.resourcePrefix, cacher.newListFunc, cacher, storage) + d.checker = newConsistencyChecker(cacher.resourcePrefix, cacher.groupResource, cacher.newListFunc, cacher, storage) d.wg.Add(1) go func() { defer d.wg.Done() @@ -231,12 +232,12 @@ func (c *CacheDelegator) GetList(ctx context.Context, key string, opts storage.L if err != nil { success = "false" } - metrics.ConsistentReadTotal.WithLabelValues(c.cacher.resourcePrefix, success, fallback).Add(1) + metrics.ConsistentReadTotal.WithLabelValues(c.cacher.groupResource.Group, c.cacher.groupResource.Resource, success, fallback).Add(1) } return err } if result.ConsistentRead { - metrics.ConsistentReadTotal.WithLabelValues(c.cacher.resourcePrefix, success, fallback).Add(1) + metrics.ConsistentReadTotal.WithLabelValues(c.cacher.groupResource.Group, c.cacher.groupResource.Resource, success, fallback).Add(1) } return nil } @@ -286,8 +287,9 @@ func (c *CacheDelegator) Stop() { c.wg.Wait() } -func newConsistencyChecker(resourcePrefix string, newListFunc func() runtime.Object, cacher getListerReady, etcd getLister) *consistencyChecker { +func newConsistencyChecker(resourcePrefix string, groupResource schema.GroupResource, newListFunc func() runtime.Object, cacher getListerReady, etcd getLister) *consistencyChecker { return &consistencyChecker{ + groupResource: groupResource, resourcePrefix: resourcePrefix, newListFunc: newListFunc, cacher: cacher, @@ -297,6 +299,7 @@ func newConsistencyChecker(resourcePrefix string, newListFunc func() runtime.Obj type consistencyChecker struct { resourcePrefix string + groupResource schema.GroupResource newListFunc func() runtime.Object cacher getListerReady @@ -318,25 +321,25 @@ func (c consistencyChecker) startChecking(stopCh <-chan struct{}) { return false, nil }) if err != nil { - klog.InfoS("Cache consistency check exiting", "resource", c.resourcePrefix, "err", err) + klog.InfoS("Cache consistency check exiting", "group", c.groupResource.Group, "resource", c.groupResource.Resource, "err", err) } } func (c *consistencyChecker) check(ctx context.Context) { digests, err := c.calculateDigests(ctx) if err != nil { - klog.ErrorS(err, "Cache consistency check error", "resource", c.resourcePrefix) - metrics.StorageConsistencyCheckTotal.WithLabelValues(c.resourcePrefix, "error").Inc() + klog.ErrorS(err, "Cache consistency check error", "group", c.groupResource.Group, "resource", c.groupResource.Resource) + metrics.StorageConsistencyCheckTotal.WithLabelValues(c.groupResource.Group, c.groupResource.Resource, "error").Inc() return } if digests.CacheDigest == digests.EtcdDigest { - klog.V(3).InfoS("Cache consistency check passed", "resource", c.resourcePrefix, "resourceVersion", digests.ResourceVersion, "digest", digests.CacheDigest) - metrics.StorageConsistencyCheckTotal.WithLabelValues(c.resourcePrefix, "success").Inc() + klog.V(3).InfoS("Cache consistency check passed", "group", c.groupResource.Group, "resource", c.groupResource.Resource, "resourceVersion", digests.ResourceVersion, "digest", digests.CacheDigest) + metrics.StorageConsistencyCheckTotal.WithLabelValues(c.groupResource.Group, c.groupResource.Resource, "success").Inc() } else { - klog.ErrorS(nil, "Cache consistency check failed", "resource", c.resourcePrefix, "resourceVersion", digests.ResourceVersion, "etcdDigest", digests.EtcdDigest, "cacheDigest", digests.CacheDigest) - metrics.StorageConsistencyCheckTotal.WithLabelValues(c.resourcePrefix, "failure").Inc() + klog.ErrorS(nil, "Cache consistency check failed", "group", c.groupResource.Group, "resource", c.groupResource.Resource, "resourceVersion", digests.ResourceVersion, "etcdDigest", digests.EtcdDigest, "cacheDigest", digests.CacheDigest) + metrics.StorageConsistencyCheckTotal.WithLabelValues(c.groupResource.Group, c.groupResource.Resource, "failure").Inc() // Panic on internal consistency checking enabled only by environment variable. R - panic(fmt.Sprintf("Cache consistency check failed, resource: %q, resourceVersion: %q, etcdDigest: %q, cacheDigest: %q", c.resourcePrefix, digests.ResourceVersion, digests.EtcdDigest, digests.CacheDigest)) + panic(fmt.Sprintf("Cache consistency check failed, group: %q, resource: %q, resourceVersion: %q, etcdDigest: %q, cacheDigest: %q", c.groupResource.Group, c.groupResource.Resource, digests.ResourceVersion, digests.EtcdDigest, digests.CacheDigest)) } } diff --git a/pkg/storage/cacher/delegator_test.go b/pkg/storage/cacher/delegator_test.go index f812e169f..76cbf021e 100644 --- a/pkg/storage/cacher/delegator_test.go +++ b/pkg/storage/cacher/delegator_test.go @@ -22,6 +22,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apiserver/pkg/apis/example" "k8s.io/apiserver/pkg/storage" ) @@ -34,7 +35,6 @@ func TestCalculateDigest(t *testing.T) { cacherReady bool cacherItems []example.Pod etcdItems []example.Pod - resourcePrefix string expectListKey string expectDigest storageDigest @@ -178,7 +178,7 @@ func TestCalculateDigest(t *testing.T) { }, }, } - checker := newConsistencyChecker(tc.resourcePrefix, newListFunc, cacher, etcd) + checker := newConsistencyChecker("", schema.GroupResource{}, newListFunc, cacher, etcd) digest, err := checker.calculateDigests(context.Background()) if (err != nil) != tc.expectErr { t.Fatalf("Expect error: %v, got: %v", tc.expectErr, err) diff --git a/pkg/storage/cacher/metrics/metrics.go b/pkg/storage/cacher/metrics/metrics.go index 0559708d2..b49fe08ef 100644 --- a/pkg/storage/cacher/metrics/metrics.go +++ b/pkg/storage/cacher/metrics/metrics.go @@ -19,6 +19,7 @@ package metrics import ( "sync" + "k8s.io/apimachinery/pkg/runtime/schema" compbasemetrics "k8s.io/component-base/metrics" "k8s.io/component-base/metrics/legacyregistry" ) @@ -44,7 +45,7 @@ var ( Help: "Number of LIST requests served from watch cache", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"resource_prefix", "index"}, + []string{"group", "resource", "index"}, ) listCacheNumFetched = compbasemetrics.NewCounterVec( &compbasemetrics.CounterOpts{ @@ -53,7 +54,7 @@ var ( Help: "Number of objects read from watch cache in the course of serving a LIST request", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"resource_prefix", "index"}, + []string{"group", "resource", "index"}, ) listCacheNumReturned = compbasemetrics.NewCounterVec( &compbasemetrics.CounterOpts{ @@ -62,7 +63,7 @@ var ( Help: "Number of objects returned for a LIST request from watch cache", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"resource_prefix"}, + []string{"group", "resource"}, ) InitCounter = compbasemetrics.NewCounterVec( &compbasemetrics.CounterOpts{ @@ -71,7 +72,7 @@ var ( Help: "Counter of init events processed in watch cache broken by resource type.", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"resource"}, + []string{"group", "resource"}, ) EventsReceivedCounter = compbasemetrics.NewCounterVec( @@ -82,7 +83,7 @@ var ( Help: "Counter of events received in watch cache broken by resource type.", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"resource"}, + []string{"group", "resource"}, ) EventsCounter = compbasemetrics.NewCounterVec( @@ -93,7 +94,7 @@ var ( Help: "Counter of events dispatched in watch cache broken by resource type.", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"resource"}, + []string{"group", "resource"}, ) TerminatedWatchersCounter = compbasemetrics.NewCounterVec( @@ -103,7 +104,7 @@ var ( Help: "Counter of watchers closed due to unresponsiveness broken by resource type.", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"resource"}, + []string{"group", "resource"}, ) watchCacheResourceVersion = compbasemetrics.NewGaugeVec( @@ -114,7 +115,7 @@ var ( Help: "Current resource version of watch cache broken by resource type.", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"resource"}, + []string{"group", "resource"}, ) watchCacheCapacityIncreaseTotal = compbasemetrics.NewCounterVec( @@ -124,7 +125,7 @@ var ( Help: "Total number of watch cache capacity increase events broken by resource type.", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"resource"}, + []string{"group", "resource"}, ) watchCacheCapacityDecreaseTotal = compbasemetrics.NewCounterVec( @@ -134,7 +135,7 @@ var ( Help: "Total number of watch cache capacity decrease events broken by resource type.", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"resource"}, + []string{"group", "resource"}, ) WatchCacheCapacity = compbasemetrics.NewGaugeVec( @@ -144,7 +145,7 @@ var ( Help: "Total capacity of watch cache broken by resource type.", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"resource"}, + []string{"group", "resource"}, ) WatchCacheInitializations = compbasemetrics.NewCounterVec( @@ -155,7 +156,7 @@ var ( Help: "Counter of watch cache initializations broken by resource type.", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"resource"}, + []string{"group", "resource"}, ) WatchCacheReadWait = compbasemetrics.NewHistogramVec( @@ -166,7 +167,7 @@ var ( Help: "Histogram of time spent waiting for a watch cache to become fresh.", StabilityLevel: compbasemetrics.ALPHA, Buckets: []float64{0.005, 0.025, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3}, - }, []string{"resource"}) + }, []string{"group", "resource"}) ConsistentReadTotal = compbasemetrics.NewCounterVec( &compbasemetrics.CounterOpts{ @@ -175,7 +176,7 @@ var ( Name: "consistent_read_total", Help: "Counter for consistent reads from cache.", StabilityLevel: compbasemetrics.ALPHA, - }, []string{"resource", "success", "fallback"}) + }, []string{"group", "resource", "success", "fallback"}) StorageConsistencyCheckTotal = compbasemetrics.NewCounterVec( &compbasemetrics.CounterOpts{ @@ -183,7 +184,7 @@ var ( Name: "storage_consistency_checks_total", Help: "Counter for status of consistency checks between etcd and watch cache", StabilityLevel: compbasemetrics.INTERNAL, - }, []string{"resource", "status"}) + }, []string{"group", "resource", "status"}) ) var registerMetrics sync.Once @@ -211,23 +212,23 @@ func Register() { } // RecordListCacheMetrics notes various metrics of the cost to serve a LIST request -func RecordListCacheMetrics(resourcePrefix, indexName string, numFetched, numReturned int) { - listCacheCount.WithLabelValues(resourcePrefix, indexName).Inc() - listCacheNumFetched.WithLabelValues(resourcePrefix, indexName).Add(float64(numFetched)) - listCacheNumReturned.WithLabelValues(resourcePrefix).Add(float64(numReturned)) +func RecordListCacheMetrics(groupResource schema.GroupResource, indexName string, numFetched, numReturned int) { + listCacheCount.WithLabelValues(groupResource.Group, groupResource.Resource, indexName).Inc() + listCacheNumFetched.WithLabelValues(groupResource.Group, groupResource.Resource, indexName).Add(float64(numFetched)) + listCacheNumReturned.WithLabelValues(groupResource.Group, groupResource.Resource).Add(float64(numReturned)) } // RecordResourceVersion sets the current resource version for a given resource type. -func RecordResourceVersion(resourcePrefix string, resourceVersion uint64) { - watchCacheResourceVersion.WithLabelValues(resourcePrefix).Set(float64(resourceVersion)) +func RecordResourceVersion(groupResource schema.GroupResource, resourceVersion uint64) { + watchCacheResourceVersion.WithLabelValues(groupResource.Group, groupResource.Resource).Set(float64(resourceVersion)) } // RecordsWatchCacheCapacityChange record watchCache capacity resize(increase or decrease) operations. -func RecordsWatchCacheCapacityChange(objType string, old, new int) { - WatchCacheCapacity.WithLabelValues(objType).Set(float64(new)) +func RecordsWatchCacheCapacityChange(groupResource schema.GroupResource, old, new int) { + WatchCacheCapacity.WithLabelValues(groupResource.Group, groupResource.Resource).Set(float64(new)) if old < new { - watchCacheCapacityIncreaseTotal.WithLabelValues(objType).Inc() + watchCacheCapacityIncreaseTotal.WithLabelValues(groupResource.Group, groupResource.Resource).Inc() return } - watchCacheCapacityDecreaseTotal.WithLabelValues(objType).Inc() + watchCacheCapacityDecreaseTotal.WithLabelValues(groupResource.Group, groupResource.Resource).Inc() } diff --git a/pkg/storage/cacher/watch_cache.go b/pkg/storage/cacher/watch_cache.go index 967a60c9b..192dc0572 100644 --- a/pkg/storage/cacher/watch_cache.go +++ b/pkg/storage/cacher/watch_cache.go @@ -190,7 +190,7 @@ func newWatchCache( if utilfeature.DefaultFeatureGate.Enabled(features.ListFromCacheSnapshot) { wc.snapshots = newStoreSnapshotter() } - metrics.WatchCacheCapacity.WithLabelValues(groupResource.String()).Set(float64(wc.capacity)) + metrics.WatchCacheCapacity.WithLabelValues(groupResource.Group, groupResource.Resource).Set(float64(wc.capacity)) wc.cond = sync.NewCond(wc.RLocker()) wc.indexValidator = wc.isIndexValidLocked @@ -272,7 +272,7 @@ func (w *watchCache) objectToVersionedRuntimeObject(obj interface{}) (runtime.Ob // processEvent is safe as long as there is at most one call to it in flight // at any point in time. func (w *watchCache) processEvent(event watch.Event, resourceVersion uint64, updateFunc func(*storeElement) error) error { - metrics.EventsReceivedCounter.WithLabelValues(w.groupResource.String()).Inc() + metrics.EventsReceivedCounter.WithLabelValues(w.groupResource.Group, w.groupResource.Resource).Inc() key, err := w.keyFunc(event.Object) if err != nil { @@ -343,7 +343,7 @@ func (w *watchCache) processEvent(event watch.Event, resourceVersion uint64, upd if w.eventHandler != nil { w.eventHandler(wcEvent) } - metrics.RecordResourceVersion(w.groupResource.String(), resourceVersion) + metrics.RecordResourceVersion(w.groupResource, resourceVersion) return nil } @@ -397,7 +397,7 @@ func (w *watchCache) doCacheResizeLocked(capacity int) { newCache[i%capacity] = w.cache[i%w.capacity] } w.cache = newCache - metrics.RecordsWatchCacheCapacityChange(w.groupResource.String(), w.capacity, capacity) + metrics.RecordsWatchCacheCapacityChange(w.groupResource, w.capacity, capacity) w.capacity = capacity } @@ -426,7 +426,7 @@ func (w *watchCache) UpdateResourceVersion(resourceVersion string) { } w.eventHandler(wcEvent) } - metrics.RecordResourceVersion(w.groupResource.String(), rv) + metrics.RecordResourceVersion(w.groupResource, rv) } // List returns list of pointers to objects. @@ -441,7 +441,7 @@ func (w *watchCache) waitUntilFreshAndBlock(ctx context.Context, resourceVersion startTime := w.clock.Now() defer func() { if resourceVersion > 0 { - metrics.WatchCacheReadWait.WithContext(ctx).WithLabelValues(w.groupResource.String()).Observe(w.clock.Since(startTime).Seconds()) + metrics.WatchCacheReadWait.WithContext(ctx).WithLabelValues(w.groupResource.Group, w.groupResource.Resource).Observe(w.clock.Since(startTime).Seconds()) } }() @@ -708,7 +708,7 @@ func (w *watchCache) Replace(objs []interface{}, resourceVersion string) error { } w.cond.Broadcast() - metrics.RecordResourceVersion(w.groupResource.String(), version) + metrics.RecordResourceVersion(w.groupResource, version) klog.V(3).Infof("Replaced watchCache (rev: %v) ", resourceVersion) return nil } diff --git a/pkg/storage/cacher/watch_cache_test.go b/pkg/storage/cacher/watch_cache_test.go index 20a76bbec..92a6347a2 100644 --- a/pkg/storage/cacher/watch_cache_test.go +++ b/pkg/storage/cacher/watch_cache_test.go @@ -1268,22 +1268,22 @@ func TestHistogramCacheReadWait(t *testing.T) { want: ` # HELP apiserver_watch_cache_read_wait_seconds [ALPHA] Histogram of time spent waiting for a watch cache to become fresh. # TYPE apiserver_watch_cache_read_wait_seconds histogram - apiserver_watch_cache_read_wait_seconds_bucket{resource="pods",le="0.005"} 1 - apiserver_watch_cache_read_wait_seconds_bucket{resource="pods",le="0.025"} 1 - apiserver_watch_cache_read_wait_seconds_bucket{resource="pods",le="0.05"} 1 - apiserver_watch_cache_read_wait_seconds_bucket{resource="pods",le="0.1"} 1 - apiserver_watch_cache_read_wait_seconds_bucket{resource="pods",le="0.2"} 1 - apiserver_watch_cache_read_wait_seconds_bucket{resource="pods",le="0.4"} 1 - apiserver_watch_cache_read_wait_seconds_bucket{resource="pods",le="0.6"} 1 - apiserver_watch_cache_read_wait_seconds_bucket{resource="pods",le="0.8"} 1 - apiserver_watch_cache_read_wait_seconds_bucket{resource="pods",le="1"} 1 - apiserver_watch_cache_read_wait_seconds_bucket{resource="pods",le="1.25"} 1 - apiserver_watch_cache_read_wait_seconds_bucket{resource="pods",le="1.5"} 1 - apiserver_watch_cache_read_wait_seconds_bucket{resource="pods",le="2"} 1 - apiserver_watch_cache_read_wait_seconds_bucket{resource="pods",le="3"} 1 - apiserver_watch_cache_read_wait_seconds_bucket{resource="pods",le="+Inf"} 1 - apiserver_watch_cache_read_wait_seconds_sum{resource="pods"} 0 - apiserver_watch_cache_read_wait_seconds_count{resource="pods"} 1 + apiserver_watch_cache_read_wait_seconds_bucket{group="",resource="pods",le="0.005"} 1 + apiserver_watch_cache_read_wait_seconds_bucket{group="",resource="pods",le="0.025"} 1 + apiserver_watch_cache_read_wait_seconds_bucket{group="",resource="pods",le="0.05"} 1 + apiserver_watch_cache_read_wait_seconds_bucket{group="",resource="pods",le="0.1"} 1 + apiserver_watch_cache_read_wait_seconds_bucket{group="",resource="pods",le="0.2"} 1 + apiserver_watch_cache_read_wait_seconds_bucket{group="",resource="pods",le="0.4"} 1 + apiserver_watch_cache_read_wait_seconds_bucket{group="",resource="pods",le="0.6"} 1 + apiserver_watch_cache_read_wait_seconds_bucket{group="",resource="pods",le="0.8"} 1 + apiserver_watch_cache_read_wait_seconds_bucket{group="",resource="pods",le="1"} 1 + apiserver_watch_cache_read_wait_seconds_bucket{group="",resource="pods",le="1.25"} 1 + apiserver_watch_cache_read_wait_seconds_bucket{group="",resource="pods",le="1.5"} 1 + apiserver_watch_cache_read_wait_seconds_bucket{group="",resource="pods",le="2"} 1 + apiserver_watch_cache_read_wait_seconds_bucket{group="",resource="pods",le="3"} 1 + apiserver_watch_cache_read_wait_seconds_bucket{group="",resource="pods",le="+Inf"} 1 + apiserver_watch_cache_read_wait_seconds_sum{group="",resource="pods"} 0 + apiserver_watch_cache_read_wait_seconds_count{group="",resource="pods"} 1 `, }, { diff --git a/pkg/storage/etcd3/metrics/metrics.go b/pkg/storage/etcd3/metrics/metrics.go index 747e120dc..28361c390 100644 --- a/pkg/storage/etcd3/metrics/metrics.go +++ b/pkg/storage/etcd3/metrics/metrics.go @@ -22,6 +22,7 @@ import ( "sync" "time" + "k8s.io/apimachinery/pkg/runtime/schema" compbasemetrics "k8s.io/component-base/metrics" "k8s.io/component-base/metrics/legacyregistry" "k8s.io/klog/v2" @@ -48,7 +49,7 @@ var ( 4, 5, 6, 8, 10, 15, 20, 30, 45, 60}, StabilityLevel: compbasemetrics.ALPHA, }, - []string{"operation", "type"}, + []string{"operation", "group", "resource"}, ) etcdRequestCounts = compbasemetrics.NewCounterVec( &compbasemetrics.CounterOpts{ @@ -56,7 +57,7 @@ var ( Help: "Etcd request counts for each operation and object type.", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"operation", "type"}, + []string{"operation", "group", "resource"}, ) etcdRequestErrorCounts = compbasemetrics.NewCounterVec( &compbasemetrics.CounterOpts{ @@ -64,7 +65,7 @@ var ( Help: "Etcd failed request counts for each operation and object type.", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"operation", "type"}, + []string{"operation", "group", "resource"}, ) objectCounts = compbasemetrics.NewGaugeVec( &compbasemetrics.GaugeOpts{ @@ -93,7 +94,7 @@ var ( Help: "Number of etcd events received split by kind.", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"resource"}, + []string{"group", "resource"}, ) etcdBookmarkCounts = compbasemetrics.NewGaugeVec( &compbasemetrics.GaugeOpts{ @@ -101,7 +102,7 @@ var ( Help: "Number of etcd bookmarks (progress notify events) split by kind.", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"resource"}, + []string{"group", "resource"}, ) etcdLeaseObjectCounts = compbasemetrics.NewHistogramVec( &compbasemetrics.HistogramOpts{ @@ -118,7 +119,7 @@ var ( Help: "Number of LIST requests served from storage", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"resource"}, + []string{"group", "resource"}, ) listStorageNumFetched = compbasemetrics.NewCounterVec( &compbasemetrics.CounterOpts{ @@ -126,7 +127,7 @@ var ( Help: "Number of objects read from storage in the course of serving a LIST request", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"resource"}, + []string{"group", "resource"}, ) listStorageNumSelectorEvals = compbasemetrics.NewCounterVec( &compbasemetrics.CounterOpts{ @@ -134,7 +135,7 @@ var ( Help: "Number of objects tested in the course of serving a LIST request from storage", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"resource"}, + []string{"group", "resource"}, ) listStorageNumReturned = compbasemetrics.NewCounterVec( &compbasemetrics.CounterOpts{ @@ -142,7 +143,7 @@ var ( Help: "Number of objects returned for a LIST request from storage", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"resource"}, + []string{"group", "resource"}, ) decodeErrorCounts = compbasemetrics.NewCounterVec( &compbasemetrics.CounterOpts{ @@ -151,7 +152,7 @@ var ( Help: "Number of stored object decode errors split by object type", StabilityLevel: compbasemetrics.ALPHA, }, - []string{"resource"}, + []string{"group", "resource"}, ) ) @@ -179,34 +180,33 @@ func Register() { } // UpdateObjectCount sets the apiserver_storage_object_counts metric. -func UpdateObjectCount(resourcePrefix string, count int64) { - objectCounts.WithLabelValues(resourcePrefix).Set(float64(count)) +func UpdateObjectCount(groupResource schema.GroupResource, count int64) { + objectCounts.WithLabelValues(groupResource.String()).Set(float64(count)) } // RecordEtcdRequest updates and sets the etcd_request_duration_seconds, // etcd_request_total, etcd_request_errors_total metrics. -func RecordEtcdRequest(verb, resource string, err error, startTime time.Time) { - v := []string{verb, resource} - etcdRequestLatency.WithLabelValues(v...).Observe(sinceInSeconds(startTime)) - etcdRequestCounts.WithLabelValues(v...).Inc() +func RecordEtcdRequest(verb string, groupResource schema.GroupResource, err error, startTime time.Time) { + etcdRequestLatency.WithLabelValues(verb, groupResource.Group, groupResource.Resource).Observe(sinceInSeconds(startTime)) + etcdRequestCounts.WithLabelValues(verb, groupResource.Group, groupResource.Resource).Inc() if err != nil { - etcdRequestErrorCounts.WithLabelValues(v...).Inc() + etcdRequestErrorCounts.WithLabelValues(verb, groupResource.Group, groupResource.Resource).Inc() } } // RecordEtcdEvent updated the etcd_events_received_total metric. -func RecordEtcdEvent(resource string) { - etcdEventsReceivedCounts.WithLabelValues(resource).Inc() +func RecordEtcdEvent(groupResource schema.GroupResource) { + etcdEventsReceivedCounts.WithLabelValues(groupResource.Group, groupResource.Resource).Inc() } // RecordEtcdBookmark updates the etcd_bookmark_counts metric. -func RecordEtcdBookmark(resource string) { - etcdBookmarkCounts.WithLabelValues(resource).Inc() +func RecordEtcdBookmark(groupResource schema.GroupResource) { + etcdBookmarkCounts.WithLabelValues(groupResource.Group, groupResource.Resource).Inc() } // RecordDecodeError sets the storage_decode_errors metrics. -func RecordDecodeError(resource string) { - decodeErrorCounts.WithLabelValues(resource).Inc() +func RecordDecodeError(groupResource schema.GroupResource) { + decodeErrorCounts.WithLabelValues(groupResource.Group, groupResource.Resource).Inc() } // Reset resets the etcd_request_duration_seconds metric. @@ -240,11 +240,11 @@ func UpdateLeaseObjectCount(count int64) { } // RecordListEtcd3Metrics notes various metrics of the cost to serve a LIST request -func RecordStorageListMetrics(resource string, numFetched, numEvald, numReturned int) { - listStorageCount.WithLabelValues(resource).Inc() - listStorageNumFetched.WithLabelValues(resource).Add(float64(numFetched)) - listStorageNumSelectorEvals.WithLabelValues(resource).Add(float64(numEvald)) - listStorageNumReturned.WithLabelValues(resource).Add(float64(numReturned)) +func RecordStorageListMetrics(groupResource schema.GroupResource, numFetched, numEvald, numReturned int) { + listStorageCount.WithLabelValues(groupResource.Group, groupResource.Resource).Inc() + listStorageNumFetched.WithLabelValues(groupResource.Group, groupResource.Resource).Add(float64(numFetched)) + listStorageNumSelectorEvals.WithLabelValues(groupResource.Group, groupResource.Resource).Add(float64(numEvald)) + listStorageNumReturned.WithLabelValues(groupResource.Group, groupResource.Resource).Add(float64(numReturned)) } type Monitor interface { diff --git a/pkg/storage/etcd3/metrics/metrics_test.go b/pkg/storage/etcd3/metrics/metrics_test.go index 677d77370..07de97e1f 100644 --- a/pkg/storage/etcd3/metrics/metrics_test.go +++ b/pkg/storage/etcd3/metrics/metrics_test.go @@ -23,6 +23,7 @@ import ( "testing" "time" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/component-base/metrics" "k8s.io/component-base/metrics/testutil" ) @@ -31,20 +32,19 @@ func TestRecordDecodeError(t *testing.T) { registry := metrics.NewKubeRegistry() defer registry.Reset() registry.Register(decodeErrorCounts) - resourceName := "pods" testedMetrics := "apiserver_storage_decode_errors_total" testCases := []struct { desc string - resource string + resource schema.GroupResource want string }{ { desc: "test success", - resource: resourceName, + resource: schema.GroupResource{Resource: "pods"}, want: ` # HELP apiserver_storage_decode_errors_total [ALPHA] Number of stored object decode errors split by object type # TYPE apiserver_storage_decode_errors_total counter - apiserver_storage_decode_errors_total{resource="pods"} 1 + apiserver_storage_decode_errors_total{group="",resource="pods"} 1 `, }, } @@ -80,92 +80,92 @@ func TestRecordEtcdRequest(t *testing.T) { } testCases := []struct { - desc string - operation string - typ string - err error - startTime time.Time - want string + desc string + operation string + groupResource schema.GroupResource + err error + startTime time.Time + want string }{ { - desc: "success_request", - operation: "foo", - typ: "bar", - err: nil, - startTime: time.Unix(0, 0), // 0.3s + desc: "success_request", + operation: "foo", + groupResource: schema.GroupResource{Group: "bar", Resource: "baz"}, + err: nil, + startTime: time.Unix(0, 0), // 0.3s want: `# HELP etcd_request_duration_seconds [ALPHA] Etcd request latency in seconds for each operation and object type. # TYPE etcd_request_duration_seconds histogram -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.005"} 0 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.025"} 0 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.05"} 0 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.1"} 0 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.2"} 0 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.4"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.6"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.8"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="1"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="1.25"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="1.5"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="2"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="3"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="4"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="5"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="6"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="8"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="10"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="15"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="20"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="30"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="45"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="60"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="+Inf"} 1 -etcd_request_duration_seconds_sum{operation="foo",type="bar"} 0.3 -etcd_request_duration_seconds_count{operation="foo",type="bar"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="0.005"} 0 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="0.025"} 0 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="0.05"} 0 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="0.1"} 0 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="0.2"} 0 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="0.4"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="0.6"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="0.8"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="1"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="1.25"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="1.5"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="2"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="3"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="4"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="5"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="6"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="8"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="10"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="15"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="20"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="30"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="45"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="60"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="+Inf"} 1 +etcd_request_duration_seconds_sum{group="bar",operation="foo",resource="baz"} 0.3 +etcd_request_duration_seconds_count{group="bar",operation="foo",resource="baz"} 1 # HELP etcd_requests_total [ALPHA] Etcd request counts for each operation and object type. # TYPE etcd_requests_total counter -etcd_requests_total{operation="foo",type="bar"} 1 +etcd_requests_total{group="bar",operation="foo",resource="baz"} 1 `, }, { - desc: "failed_request", - operation: "foo", - typ: "bar", - err: errors.New("some error"), - startTime: time.Unix(0, 0), // 0.3s + desc: "failed_request", + operation: "foo", + groupResource: schema.GroupResource{Group: "bar", Resource: "baz"}, + err: errors.New("some error"), + startTime: time.Unix(0, 0), // 0.3s want: `# HELP etcd_request_duration_seconds [ALPHA] Etcd request latency in seconds for each operation and object type. # TYPE etcd_request_duration_seconds histogram -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.005"} 0 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.025"} 0 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.05"} 0 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.1"} 0 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.2"} 0 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.4"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.6"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="0.8"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="1"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="1.25"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="1.5"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="2"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="3"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="4"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="5"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="6"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="8"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="10"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="15"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="20"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="30"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="45"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="60"} 1 -etcd_request_duration_seconds_bucket{operation="foo",type="bar",le="+Inf"} 1 -etcd_request_duration_seconds_sum{operation="foo",type="bar"} 0.3 -etcd_request_duration_seconds_count{operation="foo",type="bar"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="0.005"} 0 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="0.025"} 0 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="0.05"} 0 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="0.1"} 0 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="0.2"} 0 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="0.4"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="0.6"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="0.8"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="1"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="1.25"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="1.5"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="2"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="3"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="4"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="5"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="6"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="8"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="10"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="15"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="20"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="30"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="45"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="60"} 1 +etcd_request_duration_seconds_bucket{group="bar",operation="foo",resource="baz",le="+Inf"} 1 +etcd_request_duration_seconds_sum{group="bar",operation="foo",resource="baz"} 0.3 +etcd_request_duration_seconds_count{group="bar",operation="foo",resource="baz"} 1 # HELP etcd_requests_total [ALPHA] Etcd request counts for each operation and object type. # TYPE etcd_requests_total counter -etcd_requests_total{operation="foo",type="bar"} 1 +etcd_requests_total{group="bar",operation="foo",resource="baz"} 1 # HELP etcd_request_errors_total [ALPHA] Etcd failed request counts for each operation and object type. # TYPE etcd_request_errors_total counter -etcd_request_errors_total{operation="foo",type="bar"} 1 +etcd_request_errors_total{group="bar",operation="foo",resource="baz"} 1 `, }, } @@ -173,7 +173,7 @@ etcd_request_errors_total{operation="foo",type="bar"} 1 for _, test := range testCases { t.Run(test.desc, func(t *testing.T) { defer registry.Reset() - RecordEtcdRequest(test.operation, test.typ, test.err, test.startTime) + RecordEtcdRequest(test.operation, test.groupResource, test.err, test.startTime) if err := testutil.GatherAndCompare(registry, strings.NewReader(test.want), testedMetricsName...); err != nil { t.Fatal(err) } @@ -233,26 +233,26 @@ func TestUpdateObjectCount(t *testing.T) { testCases := []struct { desc string - resource string + resource schema.GroupResource count int64 want string }{ { desc: "successful fetch", - resource: "foo", + resource: schema.GroupResource{Group: "foo", Resource: "bar"}, count: 10, want: `# HELP apiserver_storage_objects [STABLE] Number of stored objects at the time of last check split by kind. In case of a fetching error, the value will be -1. # TYPE apiserver_storage_objects gauge -apiserver_storage_objects{resource="foo"} 10 +apiserver_storage_objects{resource="bar.foo"} 10 `, }, { desc: "failed fetch", - resource: "bar", + resource: schema.GroupResource{Group: "foo", Resource: "bar"}, count: -1, want: `# HELP apiserver_storage_objects [STABLE] Number of stored objects at the time of last check split by kind. In case of a fetching error, the value will be -1. # TYPE apiserver_storage_objects gauge -apiserver_storage_objects{resource="bar"} -1 +apiserver_storage_objects{resource="bar.foo"} -1 `, }, } diff --git a/pkg/storage/etcd3/store.go b/pkg/storage/etcd3/store.go index ee5f3d676..b2b42d932 100644 --- a/pkg/storage/etcd3/store.go +++ b/pkg/storage/etcd3/store.go @@ -75,17 +75,16 @@ func (d authenticatedDataString) AuthenticatedData() []byte { var _ value.Context = authenticatedDataString("") type store struct { - client *kubernetes.Client - codec runtime.Codec - versioner storage.Versioner - transformer value.Transformer - pathPrefix string - groupResource schema.GroupResource - groupResourceString string - watcher *watcher - leaseManager *leaseManager - decoder Decoder - listErrAggrFactory func() ListErrorAggregator + client *kubernetes.Client + codec runtime.Codec + versioner storage.Versioner + transformer value.Transformer + pathPrefix string + groupResource schema.GroupResource + watcher *watcher + leaseManager *leaseManager + decoder Decoder + listErrAggrFactory func() ListErrorAggregator resourcePrefix string newListFunc func() runtime.Object @@ -179,17 +178,16 @@ func newStore(c *kubernetes.Client, codec runtime.Codec, newFunc, newListFunc fu w.objectType = reflect.TypeOf(newFunc()).String() } s := &store{ - client: c, - codec: codec, - versioner: versioner, - transformer: transformer, - pathPrefix: pathPrefix, - groupResource: groupResource, - groupResourceString: groupResource.String(), - watcher: w, - leaseManager: newDefaultLeaseManager(c.Client, leaseManagerConfig), - decoder: decoder, - listErrAggrFactory: listErrAggrFactory, + client: c, + codec: codec, + versioner: versioner, + transformer: transformer, + pathPrefix: pathPrefix, + groupResource: groupResource, + watcher: w, + leaseManager: newDefaultLeaseManager(c.Client, leaseManagerConfig), + decoder: decoder, + listErrAggrFactory: listErrAggrFactory, resourcePrefix: resourcePrefix, newListFunc: newListFunc, @@ -217,7 +215,7 @@ func (s *store) Get(ctx context.Context, key string, opts storage.GetOptions, ou } startTime := time.Now() getResp, err := s.client.Kubernetes.Get(ctx, preparedKey, kubernetes.GetOptions{}) - metrics.RecordEtcdRequest("get", s.groupResourceString, err, startTime) + metrics.RecordEtcdRequest("get", s.groupResource, err, startTime) if err != nil { return err } @@ -239,7 +237,7 @@ func (s *store) Get(ctx context.Context, key string, opts storage.GetOptions, ou err = s.decoder.Decode(data, out, getResp.KV.ModRevision) if err != nil { - recordDecodeError(s.groupResourceString, preparedKey) + recordDecodeError(s.groupResource, preparedKey) return err } return nil @@ -255,7 +253,8 @@ func (s *store) Create(ctx context.Context, key string, obj, out runtime.Object, attribute.String("audit-id", audit.GetAuditIDTruncated(ctx)), attribute.String("key", key), attribute.String("type", getTypeName(obj)), - attribute.String("resource", s.groupResourceString), + attribute.String("group", s.groupResource.Group), + attribute.String("resource", s.groupResource.Resource), ) defer span.End(500 * time.Millisecond) if version, err := s.versioner.ObjectResourceVersion(obj); err == nil && version != 0 { @@ -289,7 +288,7 @@ func (s *store) Create(ctx context.Context, key string, obj, out runtime.Object, startTime := time.Now() txnResp, err := s.client.Kubernetes.OptimisticPut(ctx, preparedKey, newData, 0, kubernetes.PutOptions{LeaseID: lease}) - metrics.RecordEtcdRequest("create", s.groupResourceString, err, startTime) + metrics.RecordEtcdRequest("create", s.groupResource, err, startTime) if err != nil { span.AddEvent("Txn call failed", attribute.String("err", err.Error())) return err @@ -304,7 +303,7 @@ func (s *store) Create(ctx context.Context, key string, obj, out runtime.Object, err = s.decoder.Decode(data, out, txnResp.Revision) if err != nil { span.AddEvent("decode failed", attribute.Int("len", len(data)), attribute.String("err", err.Error())) - recordDecodeError(s.groupResourceString, preparedKey) + recordDecodeError(s.groupResource, preparedKey) return err } span.AddEvent("decode succeeded", attribute.Int("len", len(data))) @@ -408,7 +407,7 @@ func (s *store) conditionalDelete( txnResp, err := s.client.Kubernetes.OptimisticDelete(ctx, key, origState.rev, kubernetes.DeleteOptions{ GetOnFailure: true, }) - metrics.RecordEtcdRequest("delete", s.groupResourceString, err, startTime) + metrics.RecordEtcdRequest("delete", s.groupResource, err, startTime) if err != nil { return err } @@ -425,7 +424,7 @@ func (s *store) conditionalDelete( if !skipTransformDecode { err = s.decoder.Decode(origState.data, out, txnResp.Revision) if err != nil { - recordDecodeError(s.groupResourceString, key) + recordDecodeError(s.groupResource, key) return err } } @@ -445,7 +444,8 @@ func (s *store) GuaranteedUpdate( attribute.String("audit-id", audit.GetAuditIDTruncated(ctx)), attribute.String("key", key), attribute.String("type", getTypeName(destination)), - attribute.String("resource", s.groupResourceString)) + attribute.String("group", s.groupResource.Group), + attribute.String("resource", s.groupResource.Resource)) defer span.End(500 * time.Millisecond) v, err := conversion.EnforcePtr(destination) @@ -542,7 +542,7 @@ func (s *store) GuaranteedUpdate( if !origState.stale { err = s.decoder.Decode(origState.data, destination, origState.rev) if err != nil { - recordDecodeError(s.groupResourceString, preparedKey) + recordDecodeError(s.groupResource, preparedKey) return err } return nil @@ -571,7 +571,7 @@ func (s *store) GuaranteedUpdate( GetOnFailure: true, LeaseID: lease, }) - metrics.RecordEtcdRequest("update", s.groupResourceString, err, startTime) + metrics.RecordEtcdRequest("update", s.groupResource, err, startTime) if err != nil { span.AddEvent("Txn call failed", attribute.String("err", err.Error())) return err @@ -592,7 +592,7 @@ func (s *store) GuaranteedUpdate( err = s.decoder.Decode(data, destination, txnResp.Revision) if err != nil { span.AddEvent("decode failed", attribute.Int("len", len(data)), attribute.String("err", err.Error())) - recordDecodeError(s.groupResourceString, preparedKey) + recordDecodeError(s.groupResource, preparedKey) return err } span.AddEvent("decode succeeded", attribute.Int("len", len(data))) @@ -632,7 +632,7 @@ func (s *store) Count(key string) (int64, error) { startTime := time.Now() count, err := s.client.Kubernetes.Count(context.Background(), preparedKey, kubernetes.CountOptions{}) - metrics.RecordEtcdRequest("listWithCount", preparedKey, err, startTime) + metrics.RecordEtcdRequest("listWithCount", s.groupResource, err, startTime) if err != nil { return 0, err } @@ -726,7 +726,7 @@ func (s *store) GetList(ctx context.Context, key string, opts storage.ListOption // get them recorded even in error cases. defer func() { numReturn := v.Len() - metrics.RecordStorageListMetrics(s.groupResourceString, numFetched, numEvald, numReturn) + metrics.RecordStorageListMetrics(s.groupResource, numFetched, numEvald, numReturn) }() metricsOp := "get" @@ -742,7 +742,7 @@ func (s *store) GetList(ctx context.Context, key string, opts storage.ListOption Limit: limit, Continue: continueKey, }) - metrics.RecordEtcdRequest(metricsOp, s.groupResourceString, err, startTime) + metrics.RecordEtcdRequest(metricsOp, s.groupResource, err, startTime) if err != nil { return interpretListError(err, len(opts.Predicate.Continue) > 0, continueKey, keyPrefix) } @@ -794,7 +794,7 @@ func (s *store) GetList(ctx context.Context, key string, opts storage.ListOption obj, err := s.decoder.DecodeListItem(ctx, data, uint64(kv.ModRevision), newItemFunc) if err != nil { - recordDecodeError(s.groupResourceString, string(kv.Key)) + recordDecodeError(s.groupResource, string(kv.Key)) if done := aggregator.Aggregate(string(kv.Key), err); done { return aggregator.Err() } @@ -927,7 +927,7 @@ func (s *store) getCurrentState(ctx context.Context, key string, v reflect.Value return func() (*objState, error) { startTime := time.Now() getResp, err := s.client.Kubernetes.Get(ctx, key, kubernetes.GetOptions{}) - metrics.RecordEtcdRequest("get", s.groupResourceString, err, startTime) + metrics.RecordEtcdRequest("get", s.groupResource, err, startTime) if err != nil { return nil, err } @@ -979,7 +979,7 @@ func (s *store) getState(ctx context.Context, kv *mvccpb.KeyValue, key string, v state.stale = stale if err := s.decoder.Decode(state.data, state.obj, state.rev); err != nil { - recordDecodeError(s.groupResourceString, key) + recordDecodeError(s.groupResource, key) return nil, err } } @@ -1073,9 +1073,9 @@ func (s *store) prepareKey(key string) (string, error) { } // recordDecodeError record decode error split by object type. -func recordDecodeError(resource string, key string) { - metrics.RecordDecodeError(resource) - klog.V(4).Infof("Decoding %s \"%s\" failed", resource, key) +func recordDecodeError(groupResource schema.GroupResource, key string) { + metrics.RecordDecodeError(groupResource) + klog.V(4).Infof("Decoding %s \"%s\" failed", groupResource, key) } // getTypeName returns type name of an object for reporting purposes. diff --git a/pkg/storage/etcd3/watcher.go b/pkg/storage/etcd3/watcher.go index b006f6084..63c664a53 100644 --- a/pkg/storage/etcd3/watcher.go +++ b/pkg/storage/etcd3/watcher.go @@ -287,7 +287,7 @@ func (wc *watchChan) sync() error { for { startTime := time.Now() getResp, err = wc.watcher.client.KV.Get(wc.ctx, preparedKey, opts...) - metrics.RecordEtcdRequest(metricsOp, wc.watcher.groupResource.String(), err, startTime) + metrics.RecordEtcdRequest(metricsOp, wc.watcher.groupResource, err, startTime) if err != nil { return interpretListError(err, true, preparedKey, wc.key) } @@ -397,12 +397,12 @@ func (wc *watchChan) startWatching(watchClosedCh chan struct{}, initialEventsEnd } if wres.IsProgressNotify() { wc.queueEvent(progressNotifyEvent(wres.Header.GetRevision())) - metrics.RecordEtcdBookmark(wc.watcher.groupResource.String()) + metrics.RecordEtcdBookmark(wc.watcher.groupResource) continue } for _, e := range wres.Events { - metrics.RecordEtcdEvent(wc.watcher.groupResource.String()) + metrics.RecordEtcdEvent(wc.watcher.groupResource) parsedEvent, err := parseEvent(e) if err != nil { logWatchChannelErr(err)