Merge pull request #5275 from navinjoy/oom-params

VPA: make parameters oomBumpUpRatio and oomMinBumpUp configurable
This commit is contained in:
Kubernetes Prow Robot 2023-01-30 04:46:51 -08:00 committed by GitHub
commit 65c098b6f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 36 additions and 7 deletions

View File

@ -304,6 +304,22 @@ Please note the usage of the following arguments to override default names and p
You can then choose which recommender to use by setting `recommenders` inside the `VerticalPodAutoscaler` spec.
### Custom memory bump-up after OOMKill
After an OOMKill event was observed, VPA increases the memory recommendation based on the observed memory usage in the event according to this formula: `recommendation = memory-usage-in-oomkill-event + max(oom-min-bump-up-bytes, memory-usage-in-oomkill-event * oom-bump-up-ratio)`.
You can configure the minimum bump-up as well as the multiplier by specifying startup arguments for the recommender:
`oom-bump-up-ratio` specifies the memory bump up ratio when OOM occurred, default is `1.2`. This means, memory will be increased by 20% after an OOMKill event.
`oom-min-bump-up-bytes` specifies minimal increase of memory after observing OOM. Defaults to `100 * 1024 * 1024` (=100MiB)
Usage in recommender deployment
```
containers:
- name: recommender
args:
- --oom-bump-up-ratio=2.0
- --oom-min-bump-up-bytes=524288000
```
### Using CPU management with static policy
If you are using the [CPU management with static policy](https://kubernetes.io/docs/tasks/administer-cluster/cpu-management-policies/#static-policy) for some containers,
@ -314,7 +330,7 @@ The annotation format is the following:
```
vpa-post-processor.kubernetes.io/{containerName}_integerCPU=true
```
# Known limitations
* Whenever VPA updates the pod resources, the pod is recreated, which causes all

View File

@ -18,9 +18,10 @@ package main
import (
"flag"
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/input"
"time"
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/input"
apiv1 "k8s.io/api/core/v1"
"k8s.io/autoscaler/vertical-pod-autoscaler/common"
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/input/history"
@ -65,6 +66,8 @@ var (
memoryAggregationIntervalCount = flag.Int64("memory-aggregation-interval-count", model.DefaultMemoryAggregationIntervalCount, `The number of consecutive memory-aggregation-intervals which make up the MemoryAggregationWindowLength which in turn is the period for memory usage aggregation by VPA. In other words, MemoryAggregationWindowLength = memory-aggregation-interval * memory-aggregation-interval-count.`)
memoryHistogramDecayHalfLife = flag.Duration("memory-histogram-decay-half-life", model.DefaultMemoryHistogramDecayHalfLife, `The amount of time it takes a historical memory usage sample to lose half of its weight. In other words, a fresh usage sample is twice as 'important' as one with age equal to the half life period.`)
cpuHistogramDecayHalfLife = flag.Duration("cpu-histogram-decay-half-life", model.DefaultCPUHistogramDecayHalfLife, `The amount of time it takes a historical CPU usage sample to lose half of its weight.`)
oomBumpUpRatio = flag.Float64("oom-bump-up-ratio", model.DefaultOOMBumpUpRatio, `The memory bump up ratio when OOM occurred, default is 1.2.`)
oomMinBumpUp = flag.Float64("oom-min-bump-up-bytes", model.DefaultOOMMinBumpUp, `The minimal increase of memory when OOM occurred in bytes, default is 100 * 1024 * 1024`)
)
// Post processors flags
@ -80,7 +83,7 @@ func main() {
config := common.CreateKubeConfigOrDie(*kubeconfig, float32(*kubeApiQps), int(*kubeApiBurst))
model.InitializeAggregationsConfig(model.NewAggregationsConfig(*memoryAggregationInterval, *memoryAggregationIntervalCount, *memoryHistogramDecayHalfLife, *cpuHistogramDecayHalfLife))
model.InitializeAggregationsConfig(model.NewAggregationsConfig(*memoryAggregationInterval, *memoryAggregationIntervalCount, *memoryHistogramDecayHalfLife, *cpuHistogramDecayHalfLife, *oomBumpUpRatio, *oomMinBumpUp))
healthCheck := metrics.NewHealthCheck(*metricsFetcherInterval*5, true)
metrics.Initialize(*address, healthCheck)

View File

@ -51,6 +51,10 @@ type AggregationsConfig struct {
// CPUHistogramDecayHalfLife is the amount of time it takes a historical
// CPU usage sample to lose half of its weight.
CPUHistogramDecayHalfLife time.Duration
// OOMBumpUpRatio specifies the memory bump up ratio when OOM occurred.
OOMBumpUpRatio float64
// OOMMinBumpUp specifies the minimal increase of memory when OOM occurred in bytes.
OOMMinBumpUp float64
}
const (
@ -71,6 +75,10 @@ const (
// DefaultCPUHistogramDecayHalfLife is the default value for CPUHistogramDecayHalfLife.
// CPU usage sample to lose half of its weight.
DefaultCPUHistogramDecayHalfLife = time.Hour * 24
// DefaultOOMBumpUpRatio is the default value for OOMBumpUpRatio.
DefaultOOMBumpUpRatio float64 = 1.2 // Memory is increased by 20% after an OOMKill.
// DefaultOOMMinBumpUp is the default value for OOMMinBumpUp.
DefaultOOMMinBumpUp float64 = 100 * 1024 * 1024 // Memory is increased by at least 100MB after an OOMKill.
)
// GetMemoryAggregationWindowLength returns the total length of the memory usage history aggregated by VPA.
@ -103,13 +111,15 @@ func (a *AggregationsConfig) memoryHistogramOptions() util.HistogramOptions {
}
// NewAggregationsConfig creates a new AggregationsConfig based on the supplied parameters and default values.
func NewAggregationsConfig(memoryAggregationInterval time.Duration, memoryAggregationIntervalCount int64, memoryHistogramDecayHalfLife, cpuHistogramDecayHalfLife time.Duration) *AggregationsConfig {
func NewAggregationsConfig(memoryAggregationInterval time.Duration, memoryAggregationIntervalCount int64, memoryHistogramDecayHalfLife, cpuHistogramDecayHalfLife time.Duration, oomBumpUpRatio float64, oomMinBumpUp float64) *AggregationsConfig {
a := &AggregationsConfig{
MemoryAggregationInterval: memoryAggregationInterval,
MemoryAggregationIntervalCount: memoryAggregationIntervalCount,
HistogramBucketSizeGrowth: DefaultHistogramBucketSizeGrowth,
MemoryHistogramDecayHalfLife: memoryHistogramDecayHalfLife,
CPUHistogramDecayHalfLife: cpuHistogramDecayHalfLife,
OOMBumpUpRatio: oomBumpUpRatio,
OOMMinBumpUp: oomMinBumpUp,
}
a.CPUHistogramOptions = a.cpuHistogramOptions()
a.MemoryHistogramOptions = a.memoryHistogramOptions()
@ -121,7 +131,7 @@ var aggregationsConfig *AggregationsConfig
// GetAggregationsConfig gets the aggregations config. Initializes to default values if not initialized already.
func GetAggregationsConfig() *AggregationsConfig {
if aggregationsConfig == nil {
aggregationsConfig = NewAggregationsConfig(DefaultMemoryAggregationInterval, DefaultMemoryAggregationIntervalCount, DefaultMemoryHistogramDecayHalfLife, DefaultCPUHistogramDecayHalfLife)
aggregationsConfig = NewAggregationsConfig(DefaultMemoryAggregationInterval, DefaultMemoryAggregationIntervalCount, DefaultMemoryHistogramDecayHalfLife, DefaultCPUHistogramDecayHalfLife, DefaultOOMBumpUpRatio, DefaultOOMMinBumpUp)
}
return aggregationsConfig

View File

@ -199,8 +199,8 @@ func (container *ContainerState) RecordOOM(timestamp time.Time, requestedMemory
// Get max of the request and the recent usage-based memory peak.
// Omitting oomPeak here to protect against recommendation running too high on subsequent OOMs.
memoryUsed := ResourceAmountMax(requestedMemory, container.memoryPeak)
memoryNeeded := ResourceAmountMax(memoryUsed+MemoryAmountFromBytes(OOMMinBumpUp),
ScaleResource(memoryUsed, OOMBumpUpRatio))
memoryNeeded := ResourceAmountMax(memoryUsed+MemoryAmountFromBytes(GetAggregationsConfig().OOMMinBumpUp),
ScaleResource(memoryUsed, GetAggregationsConfig().OOMBumpUpRatio))
oomMemorySample := ContainerUsageSample{
MeasureStart: timestamp,