Merge pull request #2028 from bskiba/quality-cont

VPA: Record more quality metrics
This commit is contained in:
Kubernetes Prow Robot 2019-05-17 05:26:05 -07:00 committed by GitHub
commit d9a2dcf5aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 200 additions and 27 deletions

View File

@ -73,6 +73,9 @@ type ContainerStateAggregator interface {
// NeedsRecommendation returns true if this aggregator should have
// a recommendation calculated.
NeedsRecommendation() bool
// GetUpdateMode returns the update mode of VPA controlling this aggregator,
// nil if aggregator is not autoscaled.
GetUpdateMode() *vpa_types.UpdateMode
}
// AggregateContainerState holds input signals aggregated from a set of containers.
@ -93,6 +96,7 @@ type AggregateContainerState struct {
CreationTime time.Time
LastRecommendation corev1.ResourceList
IsUnderVPA bool
UpdateMode *vpa_types.UpdateMode
}
// GetLastRecommendation returns last recorded recommendation.
@ -105,11 +109,18 @@ func (a *AggregateContainerState) NeedsRecommendation() bool {
return a.IsUnderVPA
}
// GetUpdateMode returns the update mode of VPA controlling this aggregator,
// nil if aggregator is not autoscaled.
func (a *AggregateContainerState) GetUpdateMode() *vpa_types.UpdateMode {
return a.UpdateMode
}
// MarkNotAutoscaled registers that this container state is not contorled by
// a VPA object.
func (a *AggregateContainerState) MarkNotAutoscaled() {
a.IsUnderVPA = false
a.LastRecommendation = nil
a.UpdateMode = nil
}
// MergeContainerState merges two AggregateContainerStates.
@ -284,3 +295,9 @@ func (p *ContainerStateAggregatorProxy) NeedsRecommendation() bool {
aggregator := p.cluster.findOrCreateAggregateContainerState(p.containerID)
return aggregator.NeedsRecommendation()
}
// GetUpdateMode returns update mode of VPA controlling the aggregator.
func (p *ContainerStateAggregatorProxy) GetUpdateMode() *vpa_types.UpdateMode {
aggregator := p.cluster.findOrCreateAggregateContainerState(p.containerID)
return aggregator.GetUpdateMode()
}

View File

@ -87,39 +87,44 @@ func (container *ContainerState) addCPUSample(sample *ContainerUsageSample) bool
if !sample.isValid(ResourceCPU) || !sample.MeasureStart.After(container.LastCPUSampleStart) {
return false // Discard invalid, duplicate or out-of-order samples.
}
container.observeRecommendationUsageDiff(sample.Usage, false, corev1.ResourceCPU)
container.observeQualityMetrics(sample.Usage, false, corev1.ResourceCPU)
container.aggregator.AddSample(sample)
container.LastCPUSampleStart = sample.MeasureStart
return true
}
func (container *ContainerState) observeRecommendationUsageDiff(usage ResourceAmount, isOOM bool, resource corev1.ResourceName) {
func (container *ContainerState) observeQualityMetrics(usage ResourceAmount, isOOM bool, resource corev1.ResourceName) {
if !container.aggregator.NeedsRecommendation() {
return
}
updateMode := container.aggregator.GetUpdateMode()
var usageValue float64
switch resource {
case corev1.ResourceCPU:
usageValue = CoresFromCPUAmount(usage)
case corev1.ResourceMemory:
usageValue = BytesFromMemoryAmount(usage)
}
if container.aggregator.GetLastRecommendation() == nil {
metrics_quality.ObserveMissingRecommendation(isOOM, resource)
metrics_quality.ObserveQualityMetricsRecommendationMissing(usageValue, isOOM, resource, updateMode)
return
}
recommendation := container.aggregator.GetLastRecommendation()[resource]
var recommendationValue float64
var usageValue float64
if recommendation.IsZero() {
metrics_quality.ObserveMissingRecommendation(isOOM, resource)
metrics_quality.ObserveQualityMetricsRecommendationMissing(usageValue, isOOM, resource, updateMode)
return
}
var recommendationValue float64
switch resource {
case corev1.ResourceCPU:
recommendationValue = float64(recommendation.MilliValue()) / 1000.0
usageValue = CoresFromCPUAmount(usage)
case corev1.ResourceMemory:
recommendationValue = float64(recommendation.Value())
usageValue = BytesFromMemoryAmount(usage)
default:
klog.Warningf("Unknown resource: %v", resource)
return
}
metrics_quality.ObserveUsageRecommendationRelativeDiff(usageValue, recommendationValue, isOOM, resource)
metrics_quality.ObserveQualityMetrics(usageValue, recommendationValue, isOOM, resource, updateMode)
}
// GetMaxMemoryPeak returns maximum memory usage in the sample, possibly estimated from OOM
@ -163,7 +168,7 @@ func (container *ContainerState) addMemorySample(sample *ContainerUsageSample, i
container.oomPeak = 0
addNewPeak = true
}
container.observeRecommendationUsageDiff(sample.Usage, isOOM, corev1.ResourceMemory)
container.observeQualityMetrics(sample.Usage, isOOM, corev1.ResourceMemory)
if addNewPeak {
newPeak := ContainerUsageSample{
MeasureStart: container.WindowEnd,

View File

@ -128,6 +128,7 @@ func (vpa *Vpa) UseAggregationIfMatching(aggregationKey AggregateStateKey, aggre
if vpa.matchesAggregation(aggregationKey) {
vpa.aggregateContainerStates[aggregationKey] = aggregation
aggregation.IsUnderVPA = true
aggregation.UpdateMode = vpa.UpdateMode
return true
}
return false

View File

@ -219,52 +219,64 @@ func TestUpdateRecommendation(t *testing.T) {
}
func TestUseAggregationIfMatching(t *testing.T) {
modeOff := vpa_types.UpdateModeOff
modeAuto := vpa_types.UpdateModeAuto
cases := []struct {
name string
aggregations []string
vpaSelector string
updateMode *vpa_types.UpdateMode
container string
containerLabels map[string]string
isUnderVPA bool
expectedAggregations []string
expectedUpdateMode *vpa_types.UpdateMode
expectedNeedsRecommendation bool
}{
{
name: "First matching aggregation",
aggregations: []string{},
vpaSelector: testSelectorStr,
updateMode: &modeOff,
container: "test-container",
containerLabels: testLabels,
isUnderVPA: false,
expectedAggregations: []string{"test-container"},
expectedNeedsRecommendation: true,
expectedUpdateMode: &modeOff,
}, {
name: "New matching aggregation",
aggregations: []string{"test-container"},
vpaSelector: testSelectorStr,
updateMode: &modeAuto,
container: "second-container",
containerLabels: testLabels,
isUnderVPA: false,
expectedAggregations: []string{"test-container", "second-container"},
expectedNeedsRecommendation: true,
expectedUpdateMode: &modeAuto,
}, {
name: "Existing matching aggregation",
aggregations: []string{"test-container"},
vpaSelector: testSelectorStr,
updateMode: &modeOff,
container: "test-container",
containerLabels: testLabels,
isUnderVPA: true,
expectedAggregations: []string{"test-container"},
expectedNeedsRecommendation: true,
expectedUpdateMode: &modeOff,
}, {
name: "Aggregation not matching",
aggregations: []string{"test-container"},
vpaSelector: testSelectorStr,
updateMode: &modeAuto,
container: "second-container",
containerLabels: map[string]string{"different": "labels"},
isUnderVPA: false,
expectedAggregations: []string{"test-container"},
expectedNeedsRecommendation: false,
expectedUpdateMode: nil,
},
}
for _, tc := range cases {
@ -276,13 +288,7 @@ func TestUseAggregationIfMatching(t *testing.T) {
t.FailNow()
}
vpa := NewVpa(VpaID{Namespace: namespace, VpaName: "my-favourite-vpa"}, selector, anyTime)
for _, container := range tc.aggregations {
vpa.aggregateContainerStates[mockAggregateStateKey{
namespace: namespace,
containerName: container,
labels: labels.Set(testLabels).String(),
}] = &AggregateContainerState{}
}
vpa.UpdateMode = tc.updateMode
key := mockAggregateStateKey{
namespace: namespace,
containerName: tc.container,
@ -291,6 +297,20 @@ func TestUseAggregationIfMatching(t *testing.T) {
aggregation := &AggregateContainerState{
IsUnderVPA: tc.isUnderVPA,
}
for _, container := range tc.aggregations {
containerKey := mockAggregateStateKey{
namespace: namespace,
containerName: container,
labels: labels.Set(testLabels).String(),
}
if container == tc.container {
aggregation.UpdateMode = vpa.UpdateMode
vpa.aggregateContainerStates[containerKey] = aggregation
} else {
vpa.aggregateContainerStates[key] = &AggregateContainerState{UpdateMode: vpa.UpdateMode}
}
}
vpa.UseAggregationIfMatching(key, aggregation)
assert.Equal(t, len(tc.expectedAggregations), len(vpa.aggregateContainerStates), "AggregateContainerStates has unexpected size")
for _, container := range tc.expectedAggregations {
@ -304,6 +324,13 @@ func TestUseAggregationIfMatching(t *testing.T) {
assert.True(t, found, "Container %s not found in aggregateContainerStates", container)
}
assert.Equal(t, tc.expectedNeedsRecommendation, aggregation.NeedsRecommendation())
if tc.expectedUpdateMode == nil {
assert.Nil(t, aggregation.GetUpdateMode())
} else {
if assert.NotNil(t, aggregation.GetUpdateMode()) {
assert.Equal(t, *tc.expectedUpdateMode, *aggregation.GetUpdateMode())
}
}
})
}
}

View File

@ -18,10 +18,13 @@ limitations under the License.
package quality
import (
"math"
"strconv"
corev1 "k8s.io/api/core/v1"
vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1beta2"
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics"
"k8s.io/klog"
"github.com/prometheus/client_golang/prometheus"
)
@ -30,22 +33,77 @@ const (
metricsNamespace = metrics.TopMetricsNamespace + "quality"
)
var (
// Buckets between 0.01 and 655.36 cores
cpuBuckets = prometheus.ExponentialBuckets(0.01, 2., 17)
// Buckets between 1MB and 65.5 GB
memoryBuckets = prometheus.ExponentialBuckets(1e6, 2., 17)
)
var (
usageRecommendationRelativeDiff = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: metricsNamespace,
Name: "usage_recommendation_relative_diffs",
Help: "Diffs between usage and recommendation, normalized by recommendation value",
Help: "Diffs between recommendation and usage, normalized by recommendation value",
Buckets: []float64{-1., -.75, -.5, -.25, -.1, -.05, -0.025, -.01, -.005, -0.0025, -.001, 0.,
.001, .0025, .005, .01, .025, .05, .1, .25, .5, .75, 1., 2.5, 5., 10., 25., 50., 100.},
}, []string{"resource", "is_oom"},
}, []string{"update_mode", "resource", "is_oom"},
)
usageMissingRecommendationCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Name: "usage_sample_missing_recommendation_count",
Help: "Count of usage samples when a recommendation should be present but is missing.",
}, []string{"resource", "is_oom"},
Help: "Count of usage samples when a recommendation should be present but is missing",
}, []string{"update_mode", "resource", "is_oom"},
)
cpuRecommendationOverUsageDiff = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: metricsNamespace,
Name: "cpu_recommendation_over_usage_diffs_cores",
Help: "Absolute diffs between recommendation and usage for CPU when recommendation > usage",
Buckets: cpuBuckets,
}, []string{"update_mode", "recommendation_missing"},
)
memoryRecommendationOverUsageDiff = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: metricsNamespace,
Name: "mem_recommendation_over_usage_diffs_bytes",
Help: "Absolute diffs between recommendation and usage for memory when recommendation > usage",
Buckets: memoryBuckets,
}, []string{"update_mode", "recommendation_missing", "is_oom"},
)
cpuRecommendationLowerOrEqualUsageDiff = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: metricsNamespace,
Name: "cpu_recommendation_lower_equal_usage_diffs_cores",
Help: "Absolute diffs between recommendation and usage for CPU when recommendation <= usage",
Buckets: cpuBuckets,
}, []string{"update_mode", "recommendation_missing"},
)
memoryRecommendationLowerOrEqualUsageDiff = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: metricsNamespace,
Name: "mem_recommendation_lower_equal_usage_diffs_bytes",
Help: "Absolute diffs between recommendation and usage for memory when recommendation <= usage",
Buckets: memoryBuckets,
}, []string{"update_mode", "recommendation_missing", "is_oom"},
)
cpuRecommendations = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: metricsNamespace,
Name: "cpu_recommendations_cores",
Help: "CPU recommendation values as observed on recorded usage sample",
Buckets: cpuBuckets,
}, []string{"update_mode"},
)
memoryRecommendations = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: metricsNamespace,
Name: "mem_recommendations_bytes",
Help: "Memory recommendation values as observed on recorded usage sample",
Buckets: memoryBuckets,
}, []string{"update_mode", "is_oom"},
)
)
@ -53,17 +111,82 @@ var (
func Register() {
prometheus.MustRegister(usageRecommendationRelativeDiff)
prometheus.MustRegister(usageMissingRecommendationCounter)
prometheus.MustRegister(cpuRecommendationOverUsageDiff)
prometheus.MustRegister(memoryRecommendationOverUsageDiff)
prometheus.MustRegister(cpuRecommendationLowerOrEqualUsageDiff)
prometheus.MustRegister(memoryRecommendationLowerOrEqualUsageDiff)
prometheus.MustRegister(cpuRecommendations)
prometheus.MustRegister(memoryRecommendations)
}
// ObserveUsageRecommendationRelativeDiff records relative diff between usage and
// observeUsageRecommendationRelativeDiff records relative diff between usage and
// recommendation if recommendation has a positive value.
func ObserveUsageRecommendationRelativeDiff(usage, recommendation float64, isOOM bool, resource corev1.ResourceName) {
func observeUsageRecommendationRelativeDiff(usage, recommendation float64, isOOM bool, resource corev1.ResourceName, updateMode *vpa_types.UpdateMode) {
if recommendation > 0 {
usageRecommendationRelativeDiff.WithLabelValues(string(resource), strconv.FormatBool(isOOM)).Observe((usage - recommendation) / recommendation)
usageRecommendationRelativeDiff.WithLabelValues(updateModeToString(updateMode), string(resource), strconv.FormatBool(isOOM)).Observe((usage - recommendation) / recommendation)
}
}
// ObserveMissingRecommendation counts usage samples with missing recommendations.
func ObserveMissingRecommendation(isOOM bool, resource corev1.ResourceName) {
usageMissingRecommendationCounter.WithLabelValues(string(resource), strconv.FormatBool(isOOM)).Inc()
// observeMissingRecommendation counts usage samples with missing recommendations.
func observeMissingRecommendation(isOOM bool, resource corev1.ResourceName, updateMode *vpa_types.UpdateMode) {
usageMissingRecommendationCounter.WithLabelValues(updateModeToString(updateMode), string(resource), strconv.FormatBool(isOOM)).Inc()
}
// observeUsageRecommendationDiff records absolute diff between usage and
// recommendation.
func observeUsageRecommendationDiff(usage, recommendation float64, isRecommendationMissing, isOOM bool, resource corev1.ResourceName, updateMode *vpa_types.UpdateMode) {
recommendationOverUsage := recommendation > usage
diff := math.Abs(usage - recommendation)
switch resource {
case corev1.ResourceCPU:
if recommendationOverUsage {
cpuRecommendationOverUsageDiff.WithLabelValues(updateModeToString(updateMode),
strconv.FormatBool(isRecommendationMissing)).Observe(diff)
} else {
cpuRecommendationLowerOrEqualUsageDiff.WithLabelValues(updateModeToString(updateMode),
strconv.FormatBool(isRecommendationMissing)).Observe(diff)
}
case corev1.ResourceMemory:
if recommendationOverUsage {
memoryRecommendationOverUsageDiff.WithLabelValues(updateModeToString(updateMode),
strconv.FormatBool(isRecommendationMissing), strconv.FormatBool(isOOM)).Observe(diff)
} else {
memoryRecommendationLowerOrEqualUsageDiff.WithLabelValues(updateModeToString(updateMode),
strconv.FormatBool(isRecommendationMissing), strconv.FormatBool(isOOM)).Observe(diff)
}
default:
klog.Warningf("Unknown resource: %v", resource)
}
}
// observeRecommendation records the value of recommendation.
func observeRecommendation(recommendation float64, isOOM bool, resource corev1.ResourceName, updateMode *vpa_types.UpdateMode) {
switch resource {
case corev1.ResourceCPU:
cpuRecommendations.WithLabelValues(updateModeToString(updateMode)).Observe(recommendation)
case corev1.ResourceMemory:
memoryRecommendations.WithLabelValues(updateModeToString(updateMode), strconv.FormatBool(isOOM)).Observe(recommendation)
default:
klog.Warningf("Unknown resource: %v", resource)
}
}
// ObserveQualityMetrics records all quality metrics that we can derive from recommendation and usage.
func ObserveQualityMetrics(usage, recommendation float64, isOOM bool, resource corev1.ResourceName, updateMode *vpa_types.UpdateMode) {
observeRecommendation(recommendation, isOOM, resource, updateMode)
observeUsageRecommendationDiff(usage, recommendation, false, isOOM, resource, updateMode)
observeUsageRecommendationRelativeDiff(usage, recommendation, isOOM, resource, updateMode)
}
// ObserveQualityMetricsRecommendationMissing records all quality metrics that we can derive from usage when recommendation is missing.
func ObserveQualityMetricsRecommendationMissing(usage float64, isOOM bool, resource corev1.ResourceName, updateMode *vpa_types.UpdateMode) {
observeMissingRecommendation(isOOM, resource, updateMode)
observeUsageRecommendationDiff(usage, 0, true, isOOM, resource, updateMode)
}
func updateModeToString(updateMode *vpa_types.UpdateMode) string {
if updateMode == nil {
return ""
}
return string(*updateMode)
}