Add update_mode label to VPA updater metrics
This commit is contained in:
parent
3d748040d9
commit
36804f199c
|
|
@ -41,6 +41,7 @@ require (
|
|||
github.com/inconshreveable/mousetrap v1.1.0 // indirect
|
||||
github.com/josharian/intern v1.0.0 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/kylelemons/godebug v1.1.0 // indirect
|
||||
github.com/mailru/easyjson v0.9.0 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
|
|
|
|||
|
|
@ -230,7 +230,8 @@ func (u *updater) RunOnce(ctx context.Context) {
|
|||
// to contain only Pods controlled by a VPA in auto, recreate, or inPlaceOrRecreate mode
|
||||
for vpa, livePods := range controlledPods {
|
||||
vpaSize := len(livePods)
|
||||
controlledPodsCounter.Add(vpaSize, vpaSize)
|
||||
updateMode := vpa_api_util.GetUpdateMode(vpa)
|
||||
controlledPodsCounter.Add(vpaSize, updateMode, vpaSize)
|
||||
creatorToSingleGroupStatsMap, podToReplicaCreatorMap, err := u.restrictionFactory.GetCreatorMaps(livePods, vpa)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to get creator maps")
|
||||
|
|
@ -242,7 +243,6 @@ func (u *updater) RunOnce(ctx context.Context) {
|
|||
|
||||
podsForInPlace := make([]*apiv1.Pod, 0)
|
||||
podsForEviction := make([]*apiv1.Pod, 0)
|
||||
updateMode := vpa_api_util.GetUpdateMode(vpa)
|
||||
|
||||
if updateMode == vpa_types.UpdateModeInPlaceOrRecreate && features.Enabled(features.InPlaceOrRecreate) {
|
||||
podsForInPlace = u.getPodsUpdateOrder(filterNonInPlaceUpdatablePods(livePods, inPlaceLimiter), vpa)
|
||||
|
|
@ -253,7 +253,7 @@ func (u *updater) RunOnce(ctx context.Context) {
|
|||
klog.InfoS("Warning: feature gate is not enabled for this updateMode", "featuregate", features.InPlaceOrRecreate, "updateMode", vpa_types.UpdateModeInPlaceOrRecreate)
|
||||
}
|
||||
podsForEviction = u.getPodsUpdateOrder(filterNonEvictablePods(livePods, evictionLimiter), vpa)
|
||||
evictablePodsCounter.Add(vpaSize, len(podsForEviction))
|
||||
evictablePodsCounter.Add(vpaSize, updateMode, len(podsForEviction))
|
||||
}
|
||||
|
||||
withInPlaceUpdatable := false
|
||||
|
|
@ -304,7 +304,7 @@ func (u *updater) RunOnce(ctx context.Context) {
|
|||
klog.V(0).InfoS("Eviction failed", "error", evictErr, "pod", klog.KObj(pod))
|
||||
} else {
|
||||
withEvicted = true
|
||||
metrics_updater.AddEvictedPod(vpaSize)
|
||||
metrics_updater.AddEvictedPod(vpaSize, updateMode)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -315,10 +315,10 @@ func (u *updater) RunOnce(ctx context.Context) {
|
|||
vpasWithInPlaceUpdatedPodsCounter.Add(vpaSize, 1)
|
||||
}
|
||||
if withEvictable {
|
||||
vpasWithEvictablePodsCounter.Add(vpaSize, 1)
|
||||
vpasWithEvictablePodsCounter.Add(vpaSize, updateMode, 1)
|
||||
}
|
||||
if withEvicted {
|
||||
vpasWithEvictedPodsCounter.Add(vpaSize, 1)
|
||||
vpasWithEvictedPodsCounter.Add(vpaSize, updateMode, 1)
|
||||
}
|
||||
}
|
||||
timer.ObserveStep("EvictPods")
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ import (
|
|||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
|
||||
vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
|
||||
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics"
|
||||
)
|
||||
|
||||
|
|
@ -35,13 +36,20 @@ type SizeBasedGauge struct {
|
|||
gauge *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
// UpdateModeAndSizeBasedGauge is a wrapper for incrementally recording values
|
||||
// indexed by log2(VPA size) and update mode
|
||||
type UpdateModeAndSizeBasedGauge struct {
|
||||
values [metrics.MaxVpaSizeLog]map[vpa_types.UpdateMode]int
|
||||
gauge *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
var (
|
||||
controlledCount = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: metricsNamespace,
|
||||
Name: "controlled_pods_total",
|
||||
Help: "Number of Pods controlled by VPA updater.",
|
||||
}, []string{"vpa_size_log2"},
|
||||
}, []string{"vpa_size_log2", "update_mode"},
|
||||
)
|
||||
|
||||
evictableCount = prometheus.NewGaugeVec(
|
||||
|
|
@ -49,7 +57,7 @@ var (
|
|||
Namespace: metricsNamespace,
|
||||
Name: "evictable_pods_total",
|
||||
Help: "Number of Pods matching evicition criteria.",
|
||||
}, []string{"vpa_size_log2"},
|
||||
}, []string{"vpa_size_log2", "update_mode"},
|
||||
)
|
||||
|
||||
evictedCount = prometheus.NewCounterVec(
|
||||
|
|
@ -57,7 +65,7 @@ var (
|
|||
Namespace: metricsNamespace,
|
||||
Name: "evicted_pods_total",
|
||||
Help: "Number of Pods evicted by Updater to apply a new recommendation.",
|
||||
}, []string{"vpa_size_log2"},
|
||||
}, []string{"vpa_size_log2", "update_mode"},
|
||||
)
|
||||
|
||||
vpasWithEvictablePodsCount = prometheus.NewGaugeVec(
|
||||
|
|
@ -65,7 +73,7 @@ var (
|
|||
Namespace: metricsNamespace,
|
||||
Name: "vpas_with_evictable_pods_total",
|
||||
Help: "Number of VPA objects with at least one Pod matching evicition criteria.",
|
||||
}, []string{"vpa_size_log2"},
|
||||
}, []string{"vpa_size_log2", "update_mode"},
|
||||
)
|
||||
|
||||
vpasWithEvictedPodsCount = prometheus.NewGaugeVec(
|
||||
|
|
@ -73,7 +81,7 @@ var (
|
|||
Namespace: metricsNamespace,
|
||||
Name: "vpas_with_evicted_pods_total",
|
||||
Help: "Number of VPA objects with at least one evicted Pod.",
|
||||
}, []string{"vpa_size_log2"},
|
||||
}, []string{"vpa_size_log2", "update_mode"},
|
||||
)
|
||||
|
||||
inPlaceUpdatableCount = prometheus.NewGaugeVec(
|
||||
|
|
@ -138,30 +146,41 @@ func newSizeBasedGauge(gauge *prometheus.GaugeVec) *SizeBasedGauge {
|
|||
}
|
||||
}
|
||||
|
||||
// newModeAndSizeBasedGauge provides a wrapper for counting items in a loop
|
||||
func newModeAndSizeBasedGauge(gauge *prometheus.GaugeVec) *UpdateModeAndSizeBasedGauge {
|
||||
g := &UpdateModeAndSizeBasedGauge{
|
||||
gauge: gauge,
|
||||
}
|
||||
for i := range g.values {
|
||||
g.values[i] = make(map[vpa_types.UpdateMode]int)
|
||||
}
|
||||
return g
|
||||
}
|
||||
|
||||
// NewControlledPodsCounter returns a wrapper for counting Pods controlled by Updater
|
||||
func NewControlledPodsCounter() *SizeBasedGauge {
|
||||
return newSizeBasedGauge(controlledCount)
|
||||
func NewControlledPodsCounter() *UpdateModeAndSizeBasedGauge {
|
||||
return newModeAndSizeBasedGauge(controlledCount)
|
||||
}
|
||||
|
||||
// NewEvictablePodsCounter returns a wrapper for counting Pods which are matching eviction criteria
|
||||
func NewEvictablePodsCounter() *SizeBasedGauge {
|
||||
return newSizeBasedGauge(evictableCount)
|
||||
func NewEvictablePodsCounter() *UpdateModeAndSizeBasedGauge {
|
||||
return newModeAndSizeBasedGauge(evictableCount)
|
||||
}
|
||||
|
||||
// NewVpasWithEvictablePodsCounter returns a wrapper for counting VPA objects with Pods matching eviction criteria
|
||||
func NewVpasWithEvictablePodsCounter() *SizeBasedGauge {
|
||||
return newSizeBasedGauge(vpasWithEvictablePodsCount)
|
||||
func NewVpasWithEvictablePodsCounter() *UpdateModeAndSizeBasedGauge {
|
||||
return newModeAndSizeBasedGauge(vpasWithEvictablePodsCount)
|
||||
}
|
||||
|
||||
// NewVpasWithEvictedPodsCounter returns a wrapper for counting VPA objects with evicted Pods
|
||||
func NewVpasWithEvictedPodsCounter() *SizeBasedGauge {
|
||||
return newSizeBasedGauge(vpasWithEvictedPodsCount)
|
||||
func NewVpasWithEvictedPodsCounter() *UpdateModeAndSizeBasedGauge {
|
||||
return newModeAndSizeBasedGauge(vpasWithEvictedPodsCount)
|
||||
}
|
||||
|
||||
// AddEvictedPod increases the counter of pods evicted by Updater, by given VPA size
|
||||
func AddEvictedPod(vpaSize int) {
|
||||
func AddEvictedPod(vpaSize int, mode vpa_types.UpdateMode) {
|
||||
log2 := metrics.GetVpaSizeLog2(vpaSize)
|
||||
evictedCount.WithLabelValues(strconv.Itoa(log2)).Inc()
|
||||
evictedCount.WithLabelValues(strconv.Itoa(log2), string(mode)).Inc()
|
||||
}
|
||||
|
||||
// NewInPlaceUpdatablePodsCounter returns a wrapper for counting Pods which are matching in-place update criteria
|
||||
|
|
@ -203,3 +222,19 @@ func (g *SizeBasedGauge) Observe() {
|
|||
g.gauge.WithLabelValues(strconv.Itoa(log2)).Set(float64(value))
|
||||
}
|
||||
}
|
||||
|
||||
// Add increases the counter for the given VPA size and VPA update mode.
|
||||
func (g *UpdateModeAndSizeBasedGauge) Add(vpaSize int, vpaUpdateMode vpa_types.UpdateMode, value int) {
|
||||
log2 := metrics.GetVpaSizeLog2(vpaSize)
|
||||
g.values[log2][vpaUpdateMode] += value
|
||||
}
|
||||
|
||||
// Observe stores the recorded values into metrics object associated with the
|
||||
// wrapper
|
||||
func (g *UpdateModeAndSizeBasedGauge) Observe() {
|
||||
for log2, valueMap := range g.values {
|
||||
for vpaMode, value := range valueMap {
|
||||
g.gauge.WithLabelValues(strconv.Itoa(log2), string(vpaMode)).Set(float64(value))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,297 @@
|
|||
/*
|
||||
Copyright 2025 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package updater
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/testutil"
|
||||
|
||||
vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
|
||||
)
|
||||
|
||||
func TestAddEvictedPod(t *testing.T) {
|
||||
testCases := []struct {
|
||||
desc string
|
||||
vpaSize int
|
||||
mode vpa_types.UpdateMode
|
||||
log2 string
|
||||
}{
|
||||
{
|
||||
desc: "VPA size 5, mode Auto",
|
||||
vpaSize: 5,
|
||||
mode: vpa_types.UpdateModeAuto,
|
||||
log2: "2",
|
||||
},
|
||||
{
|
||||
desc: "VPA size 10, mode Off",
|
||||
vpaSize: 10,
|
||||
mode: vpa_types.UpdateModeOff,
|
||||
log2: "3",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
t.Cleanup(evictedCount.Reset)
|
||||
AddEvictedPod(tc.vpaSize, tc.mode)
|
||||
val := testutil.ToFloat64(evictedCount.WithLabelValues(tc.log2, string(tc.mode)))
|
||||
if val != 1 {
|
||||
t.Errorf("Unexpected value for evictedCount metric with labels (%s, %s): got %v, want 1", tc.log2, string(tc.mode), val)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAddInPlaceUpdatedPod(t *testing.T) {
|
||||
testCases := []struct {
|
||||
desc string
|
||||
vpaSize int
|
||||
log2 string
|
||||
}{
|
||||
{
|
||||
desc: "VPA size 10",
|
||||
vpaSize: 10,
|
||||
log2: "3",
|
||||
},
|
||||
{
|
||||
desc: "VPA size 1",
|
||||
vpaSize: 1,
|
||||
log2: "0",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
t.Cleanup(inPlaceUpdatedCount.Reset)
|
||||
AddInPlaceUpdatedPod(tc.vpaSize)
|
||||
val := testutil.ToFloat64(inPlaceUpdatedCount.WithLabelValues(tc.log2))
|
||||
if val != 1 {
|
||||
t.Errorf("Unexpected value for InPlaceUpdatedPod metric with labels (%s): got %v, want 1", tc.log2, val)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRecordFailedInPlaceUpdate(t *testing.T) {
|
||||
testCases := []struct {
|
||||
desc string
|
||||
vpaSize int
|
||||
reason string
|
||||
log2 string
|
||||
}{
|
||||
{
|
||||
desc: "VPA size 2, some reason",
|
||||
vpaSize: 2,
|
||||
reason: "some_reason",
|
||||
log2: "1",
|
||||
},
|
||||
{
|
||||
desc: "VPA size 20, another reason",
|
||||
vpaSize: 20,
|
||||
reason: "another_reason",
|
||||
log2: "4",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
t.Cleanup(failedInPlaceUpdateAttempts.Reset)
|
||||
RecordFailedInPlaceUpdate(tc.vpaSize, tc.reason)
|
||||
val := testutil.ToFloat64(failedInPlaceUpdateAttempts.WithLabelValues(tc.log2, tc.reason))
|
||||
if val != 1 {
|
||||
t.Errorf("Unexpected value for FailedInPlaceUpdate metric with labels (%s, %s): got %v, want 1", tc.log2, tc.reason, val)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateModeAndSizeBasedGauge(t *testing.T) {
|
||||
type addition struct {
|
||||
vpaSize int
|
||||
mode vpa_types.UpdateMode
|
||||
value int
|
||||
}
|
||||
type expectation struct {
|
||||
labels []string
|
||||
value float64
|
||||
}
|
||||
testCases := []struct {
|
||||
desc string
|
||||
newCounter func() *UpdateModeAndSizeBasedGauge
|
||||
metric *prometheus.GaugeVec
|
||||
metricName string
|
||||
additions []addition
|
||||
expectedMetrics []expectation
|
||||
}{
|
||||
{
|
||||
desc: "ControlledPodsCounter",
|
||||
newCounter: NewControlledPodsCounter,
|
||||
metric: controlledCount,
|
||||
metricName: "vpa_updater_controlled_pods_total",
|
||||
additions: []addition{
|
||||
{1, vpa_types.UpdateModeAuto, 5},
|
||||
{2, vpa_types.UpdateModeOff, 10},
|
||||
{2, vpa_types.UpdateModeAuto, 2},
|
||||
{2, vpa_types.UpdateModeAuto, 7},
|
||||
},
|
||||
expectedMetrics: []expectation{
|
||||
{[]string{"0" /* log2(1) */, "Auto"}, 5},
|
||||
{[]string{"1" /* log2(2) */, "Auto"}, 9},
|
||||
{[]string{"1" /* log2(2) */, "Off"}, 10},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "EvictablePodsCounter",
|
||||
newCounter: NewEvictablePodsCounter,
|
||||
metric: evictableCount,
|
||||
metricName: "vpa_updater_evictable_pods_total",
|
||||
additions: []addition{
|
||||
{4, vpa_types.UpdateModeAuto, 3},
|
||||
{1, vpa_types.UpdateModeRecreate, 8},
|
||||
},
|
||||
expectedMetrics: []expectation{
|
||||
{[]string{"2" /* log2(4) */, "Auto"}, 3},
|
||||
{[]string{"0" /* log2(1) */, "Recreate"}, 8},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "VpasWithEvictablePodsCounter",
|
||||
newCounter: NewVpasWithEvictablePodsCounter,
|
||||
metric: vpasWithEvictablePodsCount,
|
||||
metricName: "vpa_updater_vpas_with_evictable_pods_total",
|
||||
additions: []addition{
|
||||
{1, vpa_types.UpdateModeOff, 1},
|
||||
{2, vpa_types.UpdateModeAuto, 1},
|
||||
},
|
||||
expectedMetrics: []expectation{
|
||||
{[]string{"0" /* log2(1) */, "Off"}, 1},
|
||||
{[]string{"1" /* log2(2) */, "Auto"}, 1},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "VpasWithEvictedPodsCounter",
|
||||
newCounter: NewVpasWithEvictedPodsCounter,
|
||||
metric: vpasWithEvictedPodsCount,
|
||||
metricName: "vpa_updater_vpas_with_evicted_pods_total",
|
||||
additions: []addition{
|
||||
{1, vpa_types.UpdateModeAuto, 2},
|
||||
{1, vpa_types.UpdateModeAuto, 3},
|
||||
},
|
||||
expectedMetrics: []expectation{
|
||||
{[]string{"0" /* log2(1) */, "Auto"}, 5},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
t.Cleanup(tc.metric.Reset)
|
||||
counter := tc.newCounter()
|
||||
for _, add := range tc.additions {
|
||||
counter.Add(add.vpaSize, add.mode, add.value)
|
||||
}
|
||||
counter.Observe()
|
||||
for _, expected := range tc.expectedMetrics {
|
||||
val := testutil.ToFloat64(tc.metric.WithLabelValues(expected.labels...))
|
||||
if val != expected.value {
|
||||
t.Errorf("Unexpected value for metric %s with labels %v: got %v, want %v", tc.metricName, expected.labels, val, expected.value)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSizeBasedGauge(t *testing.T) {
|
||||
type addition struct {
|
||||
vpaSize int
|
||||
value int
|
||||
}
|
||||
type expectation struct {
|
||||
labels []string
|
||||
value float64
|
||||
}
|
||||
testCases := []struct {
|
||||
desc string
|
||||
newCounter func() *SizeBasedGauge
|
||||
metric *prometheus.GaugeVec
|
||||
metricName string
|
||||
additions []addition
|
||||
expectedMetrics []expectation
|
||||
}{
|
||||
{
|
||||
desc: "InPlaceUpdatablePodsCounter",
|
||||
newCounter: NewInPlaceUpdatablePodsCounter,
|
||||
metric: inPlaceUpdatableCount,
|
||||
metricName: "vpa_updater_in_place_updatable_pods_total",
|
||||
additions: []addition{
|
||||
{1, 5},
|
||||
{2, 10},
|
||||
},
|
||||
expectedMetrics: []expectation{
|
||||
{[]string{"0" /* log2(1) */}, 5},
|
||||
{[]string{"1" /* log2(2) */}, 10},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "VpasWithInPlaceUpdatablePodsCounter",
|
||||
newCounter: NewVpasWithInPlaceUpdatablePodsCounter,
|
||||
metric: vpasWithInPlaceUpdatablePodsCount,
|
||||
metricName: "vpa_updater_vpas_with_in_place_updatable_pods_total",
|
||||
additions: []addition{
|
||||
{10, 1},
|
||||
{20, 1},
|
||||
},
|
||||
expectedMetrics: []expectation{
|
||||
{[]string{"3" /* log2(10) */}, 1},
|
||||
{[]string{"4" /* log2(20) */}, 1},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "VpasWithInPlaceUpdatedPodsCounter",
|
||||
newCounter: NewVpasWithInPlaceUpdatedPodsCounter,
|
||||
metric: vpasWithInPlaceUpdatedPodsCount,
|
||||
metricName: "vpa_updater_vpas_with_in_place_updated_pods_total",
|
||||
additions: []addition{
|
||||
{2, 4},
|
||||
{4, 5},
|
||||
},
|
||||
expectedMetrics: []expectation{
|
||||
{[]string{"1" /* log2(2) */}, 4},
|
||||
{[]string{"2" /* log2(4) */}, 5},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
t.Cleanup(tc.metric.Reset)
|
||||
counter := tc.newCounter()
|
||||
for _, add := range tc.additions {
|
||||
counter.Add(add.vpaSize, add.value)
|
||||
}
|
||||
counter.Observe()
|
||||
for _, expected := range tc.expectedMetrics {
|
||||
val := testutil.ToFloat64(tc.metric.WithLabelValues(expected.labels...))
|
||||
if val != expected.value {
|
||||
t.Errorf("Unexpected value for metric %s with labels %v: got %v, want %v", tc.metricName, expected.labels, val, expected.value)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue