Merge pull request #132893 from serathius/resource-size
Add apiserver_resource_size_estimate_bytes metric Kubernetes-commit: 81986595c87365f2155b20af1d361bf2fb96b544
This commit is contained in:
commit
cb7f41362b
2
go.mod
2
go.mod
|
@ -49,7 +49,7 @@ require (
|
|||
gopkg.in/go-jose/go-jose.v2 v2.6.3
|
||||
gopkg.in/natefinch/lumberjack.v2 v2.2.1
|
||||
k8s.io/api v0.0.0-20250715090528-7da28ad7db85
|
||||
k8s.io/apimachinery v0.0.0-20250715090235-1ebcba2516a6
|
||||
k8s.io/apimachinery v0.0.0-20250716210236-a75d3d8a0f22
|
||||
k8s.io/client-go v0.0.0-20250715090929-f78427e36774
|
||||
k8s.io/component-base v0.0.0-20250715211315-7e84d47542a1
|
||||
k8s.io/klog/v2 v2.130.1
|
||||
|
|
4
go.sum
4
go.sum
|
@ -298,8 +298,8 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
|||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
k8s.io/api v0.0.0-20250715090528-7da28ad7db85 h1:QfiiAaLAQKj+yvMS1ySLpj3UWzxJixBEaWvKNtzwnQM=
|
||||
k8s.io/api v0.0.0-20250715090528-7da28ad7db85/go.mod h1:MCwhlGL+lP5brf6CuU20fWxaLh/8tUSlu4VM1cOD8Lo=
|
||||
k8s.io/apimachinery v0.0.0-20250715090235-1ebcba2516a6 h1:lH8NMkqxmCWN1CvOiqbfphUcl+EAk95z+3Le9bfEbJ4=
|
||||
k8s.io/apimachinery v0.0.0-20250715090235-1ebcba2516a6/go.mod h1:TP8uyOuDEOnzGpLOdffo8hPnIjVDljZCxCM/fruV+5M=
|
||||
k8s.io/apimachinery v0.0.0-20250716210236-a75d3d8a0f22 h1:ffG+yKzD6c5mG3PMuQU3j+ifyOUViYFMNRrijfsFVSM=
|
||||
k8s.io/apimachinery v0.0.0-20250716210236-a75d3d8a0f22/go.mod h1:TP8uyOuDEOnzGpLOdffo8hPnIjVDljZCxCM/fruV+5M=
|
||||
k8s.io/client-go v0.0.0-20250715090929-f78427e36774 h1:OJXhumReMNIzlpFEEQvl89u+u7KmQ6fa4I3TpZQYjIg=
|
||||
k8s.io/client-go v0.0.0-20250715090929-f78427e36774/go.mod h1:y02d1W5RQ3IDA7qs1unUQEkERwkgLrd7fuDANdUN31E=
|
||||
k8s.io/component-base v0.0.0-20250715211315-7e84d47542a1 h1:K9Ew2I/QQt4qsJkMKrumXF94mlbf800mRkc1KJPCADs=
|
||||
|
|
|
@ -1668,18 +1668,18 @@ func (e *Store) startObservingCount(period time.Duration, objectCountTracker flo
|
|||
stopCh := make(chan struct{})
|
||||
go wait.JitterUntil(func() {
|
||||
stats, err := e.Storage.Stats(ctx)
|
||||
metrics.UpdateStoreStats(e.DefaultQualifiedResource, stats, err)
|
||||
if err != nil {
|
||||
klog.V(5).InfoS("Failed to update storage count metric", "err", err)
|
||||
stats.ObjectCount = -1
|
||||
return
|
||||
}
|
||||
|
||||
metrics.UpdateObjectCount(e.DefaultQualifiedResource, stats.ObjectCount)
|
||||
if objectCountTracker != nil {
|
||||
objectCountTracker.Set(resourceName, stats)
|
||||
}
|
||||
}, period, resourceCountPollPeriodJitter, true, stopCh)
|
||||
return func() {
|
||||
metrics.DeleteObjectCount(e.DefaultQualifiedResource)
|
||||
metrics.DeleteStoreStats(e.DefaultQualifiedResource)
|
||||
close(stopCh)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,6 +23,9 @@ import (
|
|||
"time"
|
||||
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
"k8s.io/apiserver/pkg/features"
|
||||
"k8s.io/apiserver/pkg/storage"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
compbasemetrics "k8s.io/component-base/metrics"
|
||||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
"k8s.io/klog/v2"
|
||||
|
@ -75,6 +78,14 @@ var (
|
|||
},
|
||||
[]string{"resource"},
|
||||
)
|
||||
resourceSizeEstimate = compbasemetrics.NewGaugeVec(
|
||||
&compbasemetrics.GaugeOpts{
|
||||
Name: "apiserver_resource_size_estimate_bytes",
|
||||
Help: "Estimated size of stored objects in database. Estimate is based on sum of last observed sizes of serialized objects. In case of a fetching error, the value will be -1.",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"group", "resource"},
|
||||
)
|
||||
dbTotalSize = compbasemetrics.NewGaugeVec(
|
||||
&compbasemetrics.GaugeOpts{
|
||||
Subsystem: "apiserver",
|
||||
|
@ -166,6 +177,7 @@ func Register() {
|
|||
legacyregistry.MustRegister(etcdRequestCounts)
|
||||
legacyregistry.MustRegister(etcdRequestErrorCounts)
|
||||
legacyregistry.MustRegister(objectCounts)
|
||||
legacyregistry.MustRegister(resourceSizeEstimate)
|
||||
legacyregistry.MustRegister(dbTotalSize)
|
||||
legacyregistry.CustomMustRegister(storageMonitor)
|
||||
legacyregistry.MustRegister(etcdEventsReceivedCounts)
|
||||
|
@ -179,14 +191,31 @@ func Register() {
|
|||
})
|
||||
}
|
||||
|
||||
// UpdateObjectCount sets the apiserver_storage_object_counts metric.
|
||||
func UpdateObjectCount(groupResource schema.GroupResource, count int64) {
|
||||
objectCounts.WithLabelValues(groupResource.String()).Set(float64(count))
|
||||
// UpdateStoreStats sets the stats metrics.
|
||||
func UpdateStoreStats(groupResource schema.GroupResource, stats storage.Stats, err error) {
|
||||
if err != nil {
|
||||
objectCounts.WithLabelValues(groupResource.String()).Set(-1)
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.SizeBasedListCostEstimate) {
|
||||
resourceSizeEstimate.WithLabelValues(groupResource.Group, groupResource.Resource).Set(-1)
|
||||
}
|
||||
return
|
||||
}
|
||||
objectCounts.WithLabelValues(groupResource.String()).Set(float64(stats.ObjectCount))
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.SizeBasedListCostEstimate) {
|
||||
if stats.ObjectCount > 0 && stats.EstimatedAverageObjectSizeBytes == 0 {
|
||||
resourceSizeEstimate.WithLabelValues(groupResource.Group, groupResource.Resource).Set(-1)
|
||||
} else {
|
||||
resourceSizeEstimate.WithLabelValues(groupResource.Group, groupResource.Resource).Set(float64(stats.EstimatedAverageObjectSizeBytes * stats.ObjectCount))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// DeleteObjectCount delete the apiserver_storage_object_counts metric.
|
||||
func DeleteObjectCount(groupResource schema.GroupResource) {
|
||||
// DeleteStoreStats delete the stats metrics.
|
||||
func DeleteStoreStats(groupResource schema.GroupResource) {
|
||||
objectCounts.DeleteLabelValues(groupResource.String())
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.SizeBasedListCostEstimate) {
|
||||
resourceSizeEstimate.DeleteLabelValues(groupResource.Group, groupResource.Resource)
|
||||
}
|
||||
}
|
||||
|
||||
// RecordEtcdRequest updates and sets the etcd_request_duration_seconds,
|
||||
|
|
|
@ -24,6 +24,7 @@ import (
|
|||
"time"
|
||||
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
"k8s.io/apiserver/pkg/storage"
|
||||
"k8s.io/component-base/metrics"
|
||||
"k8s.io/component-base/metrics/testutil"
|
||||
)
|
||||
|
@ -226,33 +227,64 @@ func TestStorageSizeCollector(t *testing.T) {
|
|||
|
||||
}
|
||||
|
||||
func TestUpdateObjectCount(t *testing.T) {
|
||||
func TestUpdateStoreStats(t *testing.T) {
|
||||
registry := metrics.NewKubeRegistry()
|
||||
registry.Register(objectCounts)
|
||||
testedMetrics := "apiserver_storage_objects"
|
||||
registry.MustRegister(resourceSizeEstimate)
|
||||
|
||||
testCases := []struct {
|
||||
desc string
|
||||
resource schema.GroupResource
|
||||
count int64
|
||||
stats storage.Stats
|
||||
err error
|
||||
want string
|
||||
}{
|
||||
{
|
||||
desc: "successful fetch",
|
||||
desc: "successful object count",
|
||||
resource: schema.GroupResource{Group: "foo", Resource: "bar"},
|
||||
count: 10,
|
||||
want: `# HELP apiserver_storage_objects [STABLE] Number of stored objects at the time of last check split by kind. In case of a fetching error, the value will be -1.
|
||||
stats: storage.Stats{ObjectCount: 10},
|
||||
want: `# HELP apiserver_resource_size_estimate_bytes [ALPHA] Estimated size of stored objects in database. Estimate is based on sum of last observed sizes of serialized objects. In case of a fetching error, the value will be -1.
|
||||
# TYPE apiserver_resource_size_estimate_bytes gauge
|
||||
apiserver_resource_size_estimate_bytes{group="foo",resource="bar"} -1
|
||||
# HELP apiserver_storage_objects [STABLE] Number of stored objects at the time of last check split by kind. In case of a fetching error, the value will be -1.
|
||||
# TYPE apiserver_storage_objects gauge
|
||||
apiserver_storage_objects{resource="bar.foo"} 10
|
||||
`,
|
||||
},
|
||||
{
|
||||
desc: "successful object count and size",
|
||||
resource: schema.GroupResource{Group: "foo", Resource: "bar"},
|
||||
stats: storage.Stats{ObjectCount: 10, EstimatedAverageObjectSizeBytes: 10},
|
||||
want: `# HELP apiserver_resource_size_estimate_bytes [ALPHA] Estimated size of stored objects in database. Estimate is based on sum of last observed sizes of serialized objects. In case of a fetching error, the value will be -1.
|
||||
# TYPE apiserver_resource_size_estimate_bytes gauge
|
||||
apiserver_resource_size_estimate_bytes{group="foo",resource="bar"} 100
|
||||
# HELP apiserver_storage_objects [STABLE] Number of stored objects at the time of last check split by kind. In case of a fetching error, the value will be -1.
|
||||
# TYPE apiserver_storage_objects gauge
|
||||
apiserver_storage_objects{resource="bar.foo"} 10
|
||||
`,
|
||||
},
|
||||
{
|
||||
desc: "empty object count",
|
||||
resource: schema.GroupResource{Group: "foo", Resource: "bar"},
|
||||
stats: storage.Stats{ObjectCount: 0, EstimatedAverageObjectSizeBytes: 0},
|
||||
want: `# HELP apiserver_resource_size_estimate_bytes [ALPHA] Estimated size of stored objects in database. Estimate is based on sum of last observed sizes of serialized objects. In case of a fetching error, the value will be -1.
|
||||
# TYPE apiserver_resource_size_estimate_bytes gauge
|
||||
apiserver_resource_size_estimate_bytes{group="foo",resource="bar"} 0
|
||||
# HELP apiserver_storage_objects [STABLE] Number of stored objects at the time of last check split by kind. In case of a fetching error, the value will be -1.
|
||||
# TYPE apiserver_storage_objects gauge
|
||||
apiserver_storage_objects{resource="bar.foo"} 0
|
||||
`,
|
||||
},
|
||||
{
|
||||
desc: "failed fetch",
|
||||
resource: schema.GroupResource{Group: "foo", Resource: "bar"},
|
||||
count: -1,
|
||||
err: errors.New("dummy"),
|
||||
want: `# HELP apiserver_storage_objects [STABLE] Number of stored objects at the time of last check split by kind. In case of a fetching error, the value will be -1.
|
||||
# TYPE apiserver_storage_objects gauge
|
||||
apiserver_storage_objects{resource="bar.foo"} -1
|
||||
# HELP apiserver_resource_size_estimate_bytes [ALPHA] Estimated size of stored objects in database. Estimate is based on sum of last observed sizes of serialized objects. In case of a fetching error, the value will be -1.
|
||||
# TYPE apiserver_resource_size_estimate_bytes gauge
|
||||
apiserver_resource_size_estimate_bytes{group="foo",resource="bar"} -1
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
@ -260,46 +292,53 @@ apiserver_storage_objects{resource="bar.foo"} -1
|
|||
for _, test := range testCases {
|
||||
t.Run(test.desc, func(t *testing.T) {
|
||||
defer registry.Reset()
|
||||
UpdateObjectCount(test.resource, test.count)
|
||||
if err := testutil.GatherAndCompare(registry, strings.NewReader(test.want), testedMetrics); err != nil {
|
||||
UpdateStoreStats(test.resource, test.stats, test.err)
|
||||
if err := testutil.GatherAndCompare(registry, strings.NewReader(test.want), "apiserver_storage_objects", "apiserver_resource_size_estimate_bytes"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteObjectCount(t *testing.T) {
|
||||
func TestDeleteStoreStats(t *testing.T) {
|
||||
registry := metrics.NewKubeRegistry()
|
||||
registry.MustRegister(objectCounts)
|
||||
testedMetrics := "apiserver_storage_objects"
|
||||
registry.MustRegister(resourceSizeEstimate)
|
||||
|
||||
UpdateObjectCount(schema.GroupResource{Group: "foo1", Resource: "bar1"}, int64(10))
|
||||
UpdateObjectCount(schema.GroupResource{Group: "foo2", Resource: "bar2"}, int64(20))
|
||||
UpdateStoreStats(schema.GroupResource{Group: "foo1", Resource: "bar1"}, storage.Stats{ObjectCount: 10}, nil)
|
||||
UpdateStoreStats(schema.GroupResource{Group: "foo2", Resource: "bar2"}, storage.Stats{ObjectCount: 20, EstimatedAverageObjectSizeBytes: 10}, nil)
|
||||
|
||||
expectedMetrics := `# HELP apiserver_storage_objects [STABLE] Number of stored objects at the time of last check split by kind. In case of a fetching error, the value will be -1.
|
||||
expectedMetrics := `# HELP apiserver_resource_size_estimate_bytes [ALPHA] Estimated size of stored objects in database. Estimate is based on sum of last observed sizes of serialized objects. In case of a fetching error, the value will be -1.
|
||||
# TYPE apiserver_resource_size_estimate_bytes gauge
|
||||
apiserver_resource_size_estimate_bytes{group="foo1",resource="bar1"} -1
|
||||
apiserver_resource_size_estimate_bytes{group="foo2",resource="bar2"} 200
|
||||
# HELP apiserver_storage_objects [STABLE] Number of stored objects at the time of last check split by kind. In case of a fetching error, the value will be -1.
|
||||
# TYPE apiserver_storage_objects gauge
|
||||
apiserver_storage_objects{resource="bar1.foo1"} 10
|
||||
apiserver_storage_objects{resource="bar2.foo2"} 20
|
||||
`
|
||||
if err := testutil.GatherAndCompare(registry, strings.NewReader(expectedMetrics), testedMetrics); err != nil {
|
||||
if err := testutil.GatherAndCompare(registry, strings.NewReader(expectedMetrics), "apiserver_storage_objects", "apiserver_resource_size_estimate_bytes"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
DeleteObjectCount(schema.GroupResource{Group: "foo1", Resource: "bar1"})
|
||||
DeleteStoreStats(schema.GroupResource{Group: "foo1", Resource: "bar1"})
|
||||
|
||||
expectedMetrics = `# HELP apiserver_storage_objects [STABLE] Number of stored objects at the time of last check split by kind. In case of a fetching error, the value will be -1.
|
||||
expectedMetrics = `# HELP apiserver_resource_size_estimate_bytes [ALPHA] Estimated size of stored objects in database. Estimate is based on sum of last observed sizes of serialized objects. In case of a fetching error, the value will be -1.
|
||||
# TYPE apiserver_resource_size_estimate_bytes gauge
|
||||
apiserver_resource_size_estimate_bytes{group="foo2",resource="bar2"} 200
|
||||
# HELP apiserver_storage_objects [STABLE] Number of stored objects at the time of last check split by kind. In case of a fetching error, the value will be -1.
|
||||
# TYPE apiserver_storage_objects gauge
|
||||
apiserver_storage_objects{resource="bar2.foo2"} 20
|
||||
`
|
||||
if err := testutil.GatherAndCompare(registry, strings.NewReader(expectedMetrics), testedMetrics); err != nil {
|
||||
if err := testutil.GatherAndCompare(registry, strings.NewReader(expectedMetrics), "apiserver_storage_objects", "apiserver_resource_size_estimate_bytes"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
DeleteObjectCount(schema.GroupResource{Group: "foo2", Resource: "bar2"})
|
||||
DeleteStoreStats(schema.GroupResource{Group: "foo2", Resource: "bar2"})
|
||||
expectedMetrics = `# HELP apiserver_storage_objects [STABLE] Number of stored objects at the time of last check split by kind. In case of a fetching error, the value will be -1.
|
||||
# TYPE apiserver_storage_objects gauge
|
||||
`
|
||||
if err := testutil.GatherAndCompare(registry, strings.NewReader(expectedMetrics), testedMetrics); err != nil {
|
||||
if err := testutil.GatherAndCompare(registry, strings.NewReader(expectedMetrics), "apiserver_storage_objects", "apiserver_resource_size_estimate_bytes"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -97,17 +97,6 @@ type objectCountTracker struct {
|
|||
}
|
||||
|
||||
func (t *objectCountTracker) Set(groupResource string, stats storage.Stats) {
|
||||
if stats.ObjectCount <= -1 {
|
||||
// a value of -1 indicates that the 'Count' call failed to contact
|
||||
// the storage layer, in most cases this error can be transient.
|
||||
// we will continue to work with the count that is in the cache
|
||||
// up to a certain threshold defined by staleTolerationThreshold.
|
||||
// in case this becomes a non transient error then the count for
|
||||
// the given resource will will eventually be removed from
|
||||
// the cache by the pruner.
|
||||
return
|
||||
}
|
||||
|
||||
now := t.clock.Now()
|
||||
|
||||
// lock for writing
|
||||
|
|
|
@ -30,18 +30,14 @@ func TestStorageObjectCountTracker(t *testing.T) {
|
|||
tests := []struct {
|
||||
name string
|
||||
lastUpdated time.Duration
|
||||
skipSetting bool
|
||||
count int64
|
||||
errExpected error
|
||||
countExpected int64
|
||||
}{
|
||||
{
|
||||
name: "object count not tracked for given resource",
|
||||
count: -2,
|
||||
errExpected: ObjectCountNotFoundErr,
|
||||
},
|
||||
{
|
||||
name: "transient failure",
|
||||
count: -1,
|
||||
skipSetting: true,
|
||||
errExpected: ObjectCountNotFoundErr,
|
||||
},
|
||||
{
|
||||
|
@ -76,7 +72,9 @@ func TestStorageObjectCountTracker(t *testing.T) {
|
|||
key := "foo.bar.resource"
|
||||
now := time.Now()
|
||||
fakeClock.SetTime(now.Add(-test.lastUpdated))
|
||||
tracker.Set(key, storage.Stats{ObjectCount: test.count})
|
||||
if !test.skipSetting {
|
||||
tracker.Set(key, storage.Stats{ObjectCount: test.count})
|
||||
}
|
||||
|
||||
fakeClock.SetTime(now)
|
||||
stats, err := tracker.Get(key)
|
||||
|
|
Loading…
Reference in New Issue