diff --git a/vertical-pod-autoscaler/pkg/recommender/model/aggregate_container_state.go b/vertical-pod-autoscaler/pkg/recommender/model/aggregate_container_state.go index 244f60f4fb..b2e330c2d9 100644 --- a/vertical-pod-autoscaler/pkg/recommender/model/aggregate_container_state.go +++ b/vertical-pod-autoscaler/pkg/recommender/model/aggregate_container_state.go @@ -73,6 +73,9 @@ type ContainerStateAggregator interface { // NeedsRecommendation returns true if this aggregator should have // a recommendation calculated. NeedsRecommendation() bool + // GetUpdateMode returns the update mode of VPA controlling this aggregator, + // nil if aggregator is not autoscaled. + GetUpdateMode() *vpa_types.UpdateMode } // AggregateContainerState holds input signals aggregated from a set of containers. @@ -93,6 +96,7 @@ type AggregateContainerState struct { CreationTime time.Time LastRecommendation corev1.ResourceList IsUnderVPA bool + UpdateMode *vpa_types.UpdateMode } // GetLastRecommendation returns last recorded recommendation. @@ -105,11 +109,18 @@ func (a *AggregateContainerState) NeedsRecommendation() bool { return a.IsUnderVPA } +// GetUpdateMode returns the update mode of VPA controlling this aggregator, +// nil if aggregator is not autoscaled. +func (a *AggregateContainerState) GetUpdateMode() *vpa_types.UpdateMode { + return a.UpdateMode +} + // MarkNotAutoscaled registers that this container state is not contorled by // a VPA object. func (a *AggregateContainerState) MarkNotAutoscaled() { a.IsUnderVPA = false a.LastRecommendation = nil + a.UpdateMode = nil } // MergeContainerState merges two AggregateContainerStates. @@ -284,3 +295,9 @@ func (p *ContainerStateAggregatorProxy) NeedsRecommendation() bool { aggregator := p.cluster.findOrCreateAggregateContainerState(p.containerID) return aggregator.NeedsRecommendation() } + +// GetUpdateMode returns update mode of VPA controlling the aggregator. +func (p *ContainerStateAggregatorProxy) GetUpdateMode() *vpa_types.UpdateMode { + aggregator := p.cluster.findOrCreateAggregateContainerState(p.containerID) + return aggregator.GetUpdateMode() +} diff --git a/vertical-pod-autoscaler/pkg/recommender/model/container.go b/vertical-pod-autoscaler/pkg/recommender/model/container.go index 86ed5032df..7196dd6a05 100644 --- a/vertical-pod-autoscaler/pkg/recommender/model/container.go +++ b/vertical-pod-autoscaler/pkg/recommender/model/container.go @@ -87,39 +87,44 @@ func (container *ContainerState) addCPUSample(sample *ContainerUsageSample) bool if !sample.isValid(ResourceCPU) || !sample.MeasureStart.After(container.LastCPUSampleStart) { return false // Discard invalid, duplicate or out-of-order samples. } - container.observeRecommendationUsageDiff(sample.Usage, false, corev1.ResourceCPU) + container.observeQualityMetrics(sample.Usage, false, corev1.ResourceCPU) container.aggregator.AddSample(sample) container.LastCPUSampleStart = sample.MeasureStart return true } -func (container *ContainerState) observeRecommendationUsageDiff(usage ResourceAmount, isOOM bool, resource corev1.ResourceName) { +func (container *ContainerState) observeQualityMetrics(usage ResourceAmount, isOOM bool, resource corev1.ResourceName) { if !container.aggregator.NeedsRecommendation() { return } + updateMode := container.aggregator.GetUpdateMode() + var usageValue float64 + switch resource { + case corev1.ResourceCPU: + usageValue = CoresFromCPUAmount(usage) + case corev1.ResourceMemory: + usageValue = BytesFromMemoryAmount(usage) + } if container.aggregator.GetLastRecommendation() == nil { - metrics_quality.ObserveMissingRecommendation(isOOM, resource) + metrics_quality.ObserveQualityMetricsRecommendationMissing(usageValue, isOOM, resource, updateMode) return } recommendation := container.aggregator.GetLastRecommendation()[resource] - var recommendationValue float64 - var usageValue float64 if recommendation.IsZero() { - metrics_quality.ObserveMissingRecommendation(isOOM, resource) + metrics_quality.ObserveQualityMetricsRecommendationMissing(usageValue, isOOM, resource, updateMode) return } + var recommendationValue float64 switch resource { case corev1.ResourceCPU: recommendationValue = float64(recommendation.MilliValue()) / 1000.0 - usageValue = CoresFromCPUAmount(usage) case corev1.ResourceMemory: recommendationValue = float64(recommendation.Value()) - usageValue = BytesFromMemoryAmount(usage) default: klog.Warningf("Unknown resource: %v", resource) return } - metrics_quality.ObserveUsageRecommendationRelativeDiff(usageValue, recommendationValue, isOOM, resource) + metrics_quality.ObserveQualityMetrics(usageValue, recommendationValue, isOOM, resource, updateMode) } // GetMaxMemoryPeak returns maximum memory usage in the sample, possibly estimated from OOM @@ -163,7 +168,7 @@ func (container *ContainerState) addMemorySample(sample *ContainerUsageSample, i container.oomPeak = 0 addNewPeak = true } - container.observeRecommendationUsageDiff(sample.Usage, isOOM, corev1.ResourceMemory) + container.observeQualityMetrics(sample.Usage, isOOM, corev1.ResourceMemory) if addNewPeak { newPeak := ContainerUsageSample{ MeasureStart: container.WindowEnd, diff --git a/vertical-pod-autoscaler/pkg/recommender/model/vpa.go b/vertical-pod-autoscaler/pkg/recommender/model/vpa.go index 04c4ed37bc..18395ad727 100644 --- a/vertical-pod-autoscaler/pkg/recommender/model/vpa.go +++ b/vertical-pod-autoscaler/pkg/recommender/model/vpa.go @@ -128,6 +128,7 @@ func (vpa *Vpa) UseAggregationIfMatching(aggregationKey AggregateStateKey, aggre if vpa.matchesAggregation(aggregationKey) { vpa.aggregateContainerStates[aggregationKey] = aggregation aggregation.IsUnderVPA = true + aggregation.UpdateMode = vpa.UpdateMode return true } return false diff --git a/vertical-pod-autoscaler/pkg/recommender/model/vpa_test.go b/vertical-pod-autoscaler/pkg/recommender/model/vpa_test.go index 6ee1c44da8..147a446379 100644 --- a/vertical-pod-autoscaler/pkg/recommender/model/vpa_test.go +++ b/vertical-pod-autoscaler/pkg/recommender/model/vpa_test.go @@ -219,52 +219,64 @@ func TestUpdateRecommendation(t *testing.T) { } func TestUseAggregationIfMatching(t *testing.T) { + modeOff := vpa_types.UpdateModeOff + modeAuto := vpa_types.UpdateModeAuto cases := []struct { name string aggregations []string vpaSelector string + updateMode *vpa_types.UpdateMode container string containerLabels map[string]string isUnderVPA bool expectedAggregations []string + expectedUpdateMode *vpa_types.UpdateMode expectedNeedsRecommendation bool }{ { name: "First matching aggregation", aggregations: []string{}, vpaSelector: testSelectorStr, + updateMode: &modeOff, container: "test-container", containerLabels: testLabels, isUnderVPA: false, expectedAggregations: []string{"test-container"}, expectedNeedsRecommendation: true, + expectedUpdateMode: &modeOff, }, { name: "New matching aggregation", aggregations: []string{"test-container"}, vpaSelector: testSelectorStr, + updateMode: &modeAuto, container: "second-container", containerLabels: testLabels, isUnderVPA: false, expectedAggregations: []string{"test-container", "second-container"}, expectedNeedsRecommendation: true, + expectedUpdateMode: &modeAuto, }, { name: "Existing matching aggregation", aggregations: []string{"test-container"}, vpaSelector: testSelectorStr, + updateMode: &modeOff, container: "test-container", containerLabels: testLabels, isUnderVPA: true, expectedAggregations: []string{"test-container"}, expectedNeedsRecommendation: true, + expectedUpdateMode: &modeOff, }, { name: "Aggregation not matching", aggregations: []string{"test-container"}, vpaSelector: testSelectorStr, + updateMode: &modeAuto, container: "second-container", containerLabels: map[string]string{"different": "labels"}, isUnderVPA: false, expectedAggregations: []string{"test-container"}, expectedNeedsRecommendation: false, + expectedUpdateMode: nil, }, } for _, tc := range cases { @@ -276,13 +288,7 @@ func TestUseAggregationIfMatching(t *testing.T) { t.FailNow() } vpa := NewVpa(VpaID{Namespace: namespace, VpaName: "my-favourite-vpa"}, selector, anyTime) - for _, container := range tc.aggregations { - vpa.aggregateContainerStates[mockAggregateStateKey{ - namespace: namespace, - containerName: container, - labels: labels.Set(testLabels).String(), - }] = &AggregateContainerState{} - } + vpa.UpdateMode = tc.updateMode key := mockAggregateStateKey{ namespace: namespace, containerName: tc.container, @@ -291,6 +297,20 @@ func TestUseAggregationIfMatching(t *testing.T) { aggregation := &AggregateContainerState{ IsUnderVPA: tc.isUnderVPA, } + for _, container := range tc.aggregations { + containerKey := mockAggregateStateKey{ + namespace: namespace, + containerName: container, + labels: labels.Set(testLabels).String(), + } + if container == tc.container { + aggregation.UpdateMode = vpa.UpdateMode + vpa.aggregateContainerStates[containerKey] = aggregation + } else { + vpa.aggregateContainerStates[key] = &AggregateContainerState{UpdateMode: vpa.UpdateMode} + } + } + vpa.UseAggregationIfMatching(key, aggregation) assert.Equal(t, len(tc.expectedAggregations), len(vpa.aggregateContainerStates), "AggregateContainerStates has unexpected size") for _, container := range tc.expectedAggregations { @@ -304,6 +324,13 @@ func TestUseAggregationIfMatching(t *testing.T) { assert.True(t, found, "Container %s not found in aggregateContainerStates", container) } assert.Equal(t, tc.expectedNeedsRecommendation, aggregation.NeedsRecommendation()) + if tc.expectedUpdateMode == nil { + assert.Nil(t, aggregation.GetUpdateMode()) + } else { + if assert.NotNil(t, aggregation.GetUpdateMode()) { + assert.Equal(t, *tc.expectedUpdateMode, *aggregation.GetUpdateMode()) + } + } }) } } diff --git a/vertical-pod-autoscaler/pkg/utils/metrics/quality/quality.go b/vertical-pod-autoscaler/pkg/utils/metrics/quality/quality.go index 32dc04f5db..56fa15bcbc 100644 --- a/vertical-pod-autoscaler/pkg/utils/metrics/quality/quality.go +++ b/vertical-pod-autoscaler/pkg/utils/metrics/quality/quality.go @@ -18,10 +18,13 @@ limitations under the License. package quality import ( + "math" "strconv" corev1 "k8s.io/api/core/v1" + vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1beta2" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics" + "k8s.io/klog" "github.com/prometheus/client_golang/prometheus" ) @@ -30,22 +33,77 @@ const ( metricsNamespace = metrics.TopMetricsNamespace + "quality" ) +var ( + // Buckets between 0.01 and 655.36 cores + cpuBuckets = prometheus.ExponentialBuckets(0.01, 2., 17) + // Buckets between 1MB and 65.5 GB + memoryBuckets = prometheus.ExponentialBuckets(1e6, 2., 17) +) + var ( usageRecommendationRelativeDiff = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Namespace: metricsNamespace, Name: "usage_recommendation_relative_diffs", - Help: "Diffs between usage and recommendation, normalized by recommendation value", + Help: "Diffs between recommendation and usage, normalized by recommendation value", Buckets: []float64{-1., -.75, -.5, -.25, -.1, -.05, -0.025, -.01, -.005, -0.0025, -.001, 0., .001, .0025, .005, .01, .025, .05, .1, .25, .5, .75, 1., 2.5, 5., 10., 25., 50., 100.}, - }, []string{"resource", "is_oom"}, + }, []string{"update_mode", "resource", "is_oom"}, ) usageMissingRecommendationCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: metricsNamespace, Name: "usage_sample_missing_recommendation_count", - Help: "Count of usage samples when a recommendation should be present but is missing.", - }, []string{"resource", "is_oom"}, + Help: "Count of usage samples when a recommendation should be present but is missing", + }, []string{"update_mode", "resource", "is_oom"}, + ) + cpuRecommendationOverUsageDiff = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: metricsNamespace, + Name: "cpu_recommendation_over_usage_diffs_cores", + Help: "Absolute diffs between recommendation and usage for CPU when recommendation > usage", + Buckets: cpuBuckets, + }, []string{"update_mode", "recommendation_missing"}, + ) + memoryRecommendationOverUsageDiff = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: metricsNamespace, + Name: "mem_recommendation_over_usage_diffs_bytes", + Help: "Absolute diffs between recommendation and usage for memory when recommendation > usage", + Buckets: memoryBuckets, + }, []string{"update_mode", "recommendation_missing", "is_oom"}, + ) + cpuRecommendationLowerOrEqualUsageDiff = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: metricsNamespace, + Name: "cpu_recommendation_lower_equal_usage_diffs_cores", + Help: "Absolute diffs between recommendation and usage for CPU when recommendation <= usage", + Buckets: cpuBuckets, + }, []string{"update_mode", "recommendation_missing"}, + ) + memoryRecommendationLowerOrEqualUsageDiff = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: metricsNamespace, + Name: "mem_recommendation_lower_equal_usage_diffs_bytes", + Help: "Absolute diffs between recommendation and usage for memory when recommendation <= usage", + Buckets: memoryBuckets, + }, []string{"update_mode", "recommendation_missing", "is_oom"}, + ) + cpuRecommendations = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: metricsNamespace, + Name: "cpu_recommendations_cores", + Help: "CPU recommendation values as observed on recorded usage sample", + Buckets: cpuBuckets, + }, []string{"update_mode"}, + ) + memoryRecommendations = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: metricsNamespace, + Name: "mem_recommendations_bytes", + Help: "Memory recommendation values as observed on recorded usage sample", + Buckets: memoryBuckets, + }, []string{"update_mode", "is_oom"}, ) ) @@ -53,17 +111,82 @@ var ( func Register() { prometheus.MustRegister(usageRecommendationRelativeDiff) prometheus.MustRegister(usageMissingRecommendationCounter) + prometheus.MustRegister(cpuRecommendationOverUsageDiff) + prometheus.MustRegister(memoryRecommendationOverUsageDiff) + prometheus.MustRegister(cpuRecommendationLowerOrEqualUsageDiff) + prometheus.MustRegister(memoryRecommendationLowerOrEqualUsageDiff) + prometheus.MustRegister(cpuRecommendations) + prometheus.MustRegister(memoryRecommendations) } -// ObserveUsageRecommendationRelativeDiff records relative diff between usage and +// observeUsageRecommendationRelativeDiff records relative diff between usage and // recommendation if recommendation has a positive value. -func ObserveUsageRecommendationRelativeDiff(usage, recommendation float64, isOOM bool, resource corev1.ResourceName) { +func observeUsageRecommendationRelativeDiff(usage, recommendation float64, isOOM bool, resource corev1.ResourceName, updateMode *vpa_types.UpdateMode) { if recommendation > 0 { - usageRecommendationRelativeDiff.WithLabelValues(string(resource), strconv.FormatBool(isOOM)).Observe((usage - recommendation) / recommendation) + usageRecommendationRelativeDiff.WithLabelValues(updateModeToString(updateMode), string(resource), strconv.FormatBool(isOOM)).Observe((usage - recommendation) / recommendation) } } -// ObserveMissingRecommendation counts usage samples with missing recommendations. -func ObserveMissingRecommendation(isOOM bool, resource corev1.ResourceName) { - usageMissingRecommendationCounter.WithLabelValues(string(resource), strconv.FormatBool(isOOM)).Inc() +// observeMissingRecommendation counts usage samples with missing recommendations. +func observeMissingRecommendation(isOOM bool, resource corev1.ResourceName, updateMode *vpa_types.UpdateMode) { + usageMissingRecommendationCounter.WithLabelValues(updateModeToString(updateMode), string(resource), strconv.FormatBool(isOOM)).Inc() +} + +// observeUsageRecommendationDiff records absolute diff between usage and +// recommendation. +func observeUsageRecommendationDiff(usage, recommendation float64, isRecommendationMissing, isOOM bool, resource corev1.ResourceName, updateMode *vpa_types.UpdateMode) { + recommendationOverUsage := recommendation > usage + diff := math.Abs(usage - recommendation) + switch resource { + case corev1.ResourceCPU: + if recommendationOverUsage { + cpuRecommendationOverUsageDiff.WithLabelValues(updateModeToString(updateMode), + strconv.FormatBool(isRecommendationMissing)).Observe(diff) + } else { + cpuRecommendationLowerOrEqualUsageDiff.WithLabelValues(updateModeToString(updateMode), + strconv.FormatBool(isRecommendationMissing)).Observe(diff) + } + case corev1.ResourceMemory: + if recommendationOverUsage { + memoryRecommendationOverUsageDiff.WithLabelValues(updateModeToString(updateMode), + strconv.FormatBool(isRecommendationMissing), strconv.FormatBool(isOOM)).Observe(diff) + } else { + memoryRecommendationLowerOrEqualUsageDiff.WithLabelValues(updateModeToString(updateMode), + strconv.FormatBool(isRecommendationMissing), strconv.FormatBool(isOOM)).Observe(diff) + } + default: + klog.Warningf("Unknown resource: %v", resource) + } +} + +// observeRecommendation records the value of recommendation. +func observeRecommendation(recommendation float64, isOOM bool, resource corev1.ResourceName, updateMode *vpa_types.UpdateMode) { + switch resource { + case corev1.ResourceCPU: + cpuRecommendations.WithLabelValues(updateModeToString(updateMode)).Observe(recommendation) + case corev1.ResourceMemory: + memoryRecommendations.WithLabelValues(updateModeToString(updateMode), strconv.FormatBool(isOOM)).Observe(recommendation) + default: + klog.Warningf("Unknown resource: %v", resource) + } +} + +// ObserveQualityMetrics records all quality metrics that we can derive from recommendation and usage. +func ObserveQualityMetrics(usage, recommendation float64, isOOM bool, resource corev1.ResourceName, updateMode *vpa_types.UpdateMode) { + observeRecommendation(recommendation, isOOM, resource, updateMode) + observeUsageRecommendationDiff(usage, recommendation, false, isOOM, resource, updateMode) + observeUsageRecommendationRelativeDiff(usage, recommendation, isOOM, resource, updateMode) +} + +// ObserveQualityMetricsRecommendationMissing records all quality metrics that we can derive from usage when recommendation is missing. +func ObserveQualityMetricsRecommendationMissing(usage float64, isOOM bool, resource corev1.ResourceName, updateMode *vpa_types.UpdateMode) { + observeMissingRecommendation(isOOM, resource, updateMode) + observeUsageRecommendationDiff(usage, 0, true, isOOM, resource, updateMode) +} + +func updateModeToString(updateMode *vpa_types.UpdateMode) string { + if updateMode == nil { + return "" + } + return string(*updateMode) }