Merge pull request #354 from electronicarts/feature/mtcode/scale-down-delay-flags
Introduce new flags to control scale down behavior
This commit is contained in:
commit
012909d64e
|
|
@ -100,10 +100,12 @@ type AutoscalingOptions struct {
|
||||||
NodeGroups []string
|
NodeGroups []string
|
||||||
// ScaleDownEnabled is used to allow CA to scale down the cluster
|
// ScaleDownEnabled is used to allow CA to scale down the cluster
|
||||||
ScaleDownEnabled bool
|
ScaleDownEnabled bool
|
||||||
// ScaleDownDelay sets the duration from the last scale up to the time when CA starts to check scale down options
|
// ScaleDownDelayAfterAdd sets the duration from the last scale up to the time when CA starts to check scale down options
|
||||||
ScaleDownDelay time.Duration
|
ScaleDownDelayAfterAdd time.Duration
|
||||||
// ScaleDownTrialInterval sets how often scale down possibility is check
|
// ScaleDownDelayAfterDelete sets the duration between scale down attempts if scale down removes one or more nodes
|
||||||
ScaleDownTrialInterval time.Duration
|
ScaleDownDelayAfterDelete time.Duration
|
||||||
|
// ScaleDownDelayAfterFailure sets the duration before the next scale down attempt if scale down results in an error
|
||||||
|
ScaleDownDelayAfterFailure time.Duration
|
||||||
// ScaleDownNonEmptyCandidatesCount is the maximum number of non empty nodes
|
// ScaleDownNonEmptyCandidatesCount is the maximum number of non empty nodes
|
||||||
// considered at once as candidates for scale down.
|
// considered at once as candidates for scale down.
|
||||||
ScaleDownNonEmptyCandidatesCount int
|
ScaleDownNonEmptyCandidatesCount int
|
||||||
|
|
|
||||||
|
|
@ -36,9 +36,10 @@ type StaticAutoscaler struct {
|
||||||
// AutoscalingContext consists of validated settings and options for this autoscaler
|
// AutoscalingContext consists of validated settings and options for this autoscaler
|
||||||
*AutoscalingContext
|
*AutoscalingContext
|
||||||
kube_util.ListerRegistry
|
kube_util.ListerRegistry
|
||||||
lastScaleUpTime time.Time
|
lastScaleUpTime time.Time
|
||||||
lastScaleDownFailedTrial time.Time
|
lastScaleDownDeleteTime time.Time
|
||||||
scaleDown *ScaleDown
|
lastScaleDownFailTime time.Time
|
||||||
|
scaleDown *ScaleDown
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewStaticAutoscaler creates an instance of Autoscaler filled with provided parameters
|
// NewStaticAutoscaler creates an instance of Autoscaler filled with provided parameters
|
||||||
|
|
@ -59,11 +60,12 @@ func NewStaticAutoscaler(opts AutoscalingOptions, predicateChecker *simulator.Pr
|
||||||
scaleDown := NewScaleDown(autoscalingContext)
|
scaleDown := NewScaleDown(autoscalingContext)
|
||||||
|
|
||||||
return &StaticAutoscaler{
|
return &StaticAutoscaler{
|
||||||
AutoscalingContext: autoscalingContext,
|
AutoscalingContext: autoscalingContext,
|
||||||
ListerRegistry: listerRegistry,
|
ListerRegistry: listerRegistry,
|
||||||
lastScaleUpTime: time.Now(),
|
lastScaleUpTime: time.Now(),
|
||||||
lastScaleDownFailedTrial: time.Now(),
|
lastScaleDownDeleteTime: time.Now(),
|
||||||
scaleDown: scaleDown,
|
lastScaleDownFailTime: time.Now(),
|
||||||
|
scaleDown: scaleDown,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -278,14 +280,16 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
|
||||||
}
|
}
|
||||||
|
|
||||||
// In dry run only utilization is updated
|
// In dry run only utilization is updated
|
||||||
calculateUnneededOnly := a.lastScaleUpTime.Add(a.ScaleDownDelay).After(currentTime) ||
|
calculateUnneededOnly := a.lastScaleUpTime.Add(a.ScaleDownDelayAfterAdd).After(currentTime) ||
|
||||||
a.lastScaleDownFailedTrial.Add(a.ScaleDownTrialInterval).After(currentTime) ||
|
a.lastScaleDownFailTime.Add(a.ScaleDownDelayAfterFailure).After(currentTime) ||
|
||||||
|
a.lastScaleDownDeleteTime.Add(a.ScaleDownDelayAfterDelete).After(currentTime) ||
|
||||||
schedulablePodsPresent ||
|
schedulablePodsPresent ||
|
||||||
scaleDown.nodeDeleteStatus.IsDeleteInProgress()
|
scaleDown.nodeDeleteStatus.IsDeleteInProgress()
|
||||||
|
|
||||||
glog.V(4).Infof("Scale down status: unneededOnly=%v lastScaleUpTime=%s "+
|
glog.V(4).Infof("Scale down status: unneededOnly=%v lastScaleUpTime=%s "+
|
||||||
"lastScaleDownFailedTrail=%s schedulablePodsPresent=%v", calculateUnneededOnly,
|
"lastScaleDownDeleteTime=%v lastScaleDownFailTime=%s schedulablePodsPresent=%v isDeleteInProgress=%v",
|
||||||
a.lastScaleUpTime, a.lastScaleDownFailedTrial, schedulablePodsPresent)
|
calculateUnneededOnly, a.lastScaleUpTime, a.lastScaleDownDeleteTime, a.lastScaleDownFailTime,
|
||||||
|
schedulablePodsPresent, scaleDown.nodeDeleteStatus.IsDeleteInProgress())
|
||||||
|
|
||||||
if !calculateUnneededOnly {
|
if !calculateUnneededOnly {
|
||||||
glog.V(4).Infof("Starting scale down")
|
glog.V(4).Infof("Starting scale down")
|
||||||
|
|
@ -310,7 +314,9 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
|
||||||
return typedErr
|
return typedErr
|
||||||
}
|
}
|
||||||
if result == ScaleDownError {
|
if result == ScaleDownError {
|
||||||
a.lastScaleDownFailedTrial = currentTime
|
a.lastScaleDownFailTime = currentTime
|
||||||
|
} else if result == ScaleDownNodeDeleted {
|
||||||
|
a.lastScaleDownDeleteTime = currentTime
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -188,10 +188,10 @@ func TestStaticAutoscalerRunOnce(t *testing.T) {
|
||||||
sd := NewScaleDown(context)
|
sd := NewScaleDown(context)
|
||||||
|
|
||||||
autoscaler := &StaticAutoscaler{AutoscalingContext: context,
|
autoscaler := &StaticAutoscaler{AutoscalingContext: context,
|
||||||
ListerRegistry: listerRegistry,
|
ListerRegistry: listerRegistry,
|
||||||
lastScaleUpTime: time.Now(),
|
lastScaleUpTime: time.Now(),
|
||||||
lastScaleDownFailedTrial: time.Now(),
|
lastScaleDownFailTime: time.Now(),
|
||||||
scaleDown: sd}
|
scaleDown: sd}
|
||||||
|
|
||||||
// MaxNodesTotal reached.
|
// MaxNodesTotal reached.
|
||||||
readyNodeListerMock.On("List").Return([]*apiv1.Node{n1}, nil).Once()
|
readyNodeListerMock.On("List").Return([]*apiv1.Node{n1}, nil).Once()
|
||||||
|
|
@ -360,10 +360,10 @@ func TestStaticAutoscalerRunOnceWithAutoprovisionedEnabled(t *testing.T) {
|
||||||
sd := NewScaleDown(context)
|
sd := NewScaleDown(context)
|
||||||
|
|
||||||
autoscaler := &StaticAutoscaler{AutoscalingContext: context,
|
autoscaler := &StaticAutoscaler{AutoscalingContext: context,
|
||||||
ListerRegistry: listerRegistry,
|
ListerRegistry: listerRegistry,
|
||||||
lastScaleUpTime: time.Now(),
|
lastScaleUpTime: time.Now(),
|
||||||
lastScaleDownFailedTrial: time.Now(),
|
lastScaleDownFailTime: time.Now(),
|
||||||
scaleDown: sd}
|
scaleDown: sd}
|
||||||
|
|
||||||
// Scale up.
|
// Scale up.
|
||||||
readyNodeListerMock.On("List").Return([]*apiv1.Node{n1}, nil).Once()
|
readyNodeListerMock.On("List").Return([]*apiv1.Node{n1}, nil).Once()
|
||||||
|
|
|
||||||
|
|
@ -75,16 +75,18 @@ var (
|
||||||
namespace = flag.String("namespace", "kube-system", "Namespace in which cluster-autoscaler run. If a --configmap flag is also provided, ensure that the configmap exists in this namespace before CA runs.")
|
namespace = flag.String("namespace", "kube-system", "Namespace in which cluster-autoscaler run. If a --configmap flag is also provided, ensure that the configmap exists in this namespace before CA runs.")
|
||||||
nodeGroupAutoDiscovery = flag.String("node-group-auto-discovery", "", "One or more definition(s) of node group auto-discovery. A definition is expressed `<name of discoverer per cloud provider>:[<key>[=<value>]]`. Only the `aws` cloud provider is currently supported. The only valid discoverer for it is `asg` and the valid key is `tag`. For example, specifying `--cloud-provider aws` and `--node-group-auto-discovery asg:tag=cluster-autoscaler/auto-discovery/enabled,kubernetes.io/cluster/<YOUR CLUSTER NAME>` results in ASGs tagged with `cluster-autoscaler/auto-discovery/enabled` and `kubernetes.io/cluster/<YOUR CLUSTER NAME>` to be considered as target node groups")
|
nodeGroupAutoDiscovery = flag.String("node-group-auto-discovery", "", "One or more definition(s) of node group auto-discovery. A definition is expressed `<name of discoverer per cloud provider>:[<key>[=<value>]]`. Only the `aws` cloud provider is currently supported. The only valid discoverer for it is `asg` and the valid key is `tag`. For example, specifying `--cloud-provider aws` and `--node-group-auto-discovery asg:tag=cluster-autoscaler/auto-discovery/enabled,kubernetes.io/cluster/<YOUR CLUSTER NAME>` results in ASGs tagged with `cluster-autoscaler/auto-discovery/enabled` and `kubernetes.io/cluster/<YOUR CLUSTER NAME>` to be considered as target node groups")
|
||||||
scaleDownEnabled = flag.Bool("scale-down-enabled", true, "Should CA scale down the cluster")
|
scaleDownEnabled = flag.Bool("scale-down-enabled", true, "Should CA scale down the cluster")
|
||||||
scaleDownDelay = flag.Duration("scale-down-delay", 10*time.Minute,
|
scaleDownDelayAfterAdd = flag.Duration("scale-down-delay-after-add", 10*time.Minute,
|
||||||
"Duration from the last scale up to the time when CA starts to check scale down options")
|
"How long after scale up that scale down evaluation resumes")
|
||||||
|
scaleDownDelayAfterDelete = flag.Duration("scale-down-delay-after-delete", *scanInterval,
|
||||||
|
"How long after node deletion that scale down evaluation resumes, defaults to scanInterval")
|
||||||
|
scaleDownDelayAfterFailure = flag.Duration("scale-down-delay-after-failure", 3*time.Minute,
|
||||||
|
"How long after scale down failure that scale down evaluation resumes")
|
||||||
scaleDownUnneededTime = flag.Duration("scale-down-unneeded-time", 10*time.Minute,
|
scaleDownUnneededTime = flag.Duration("scale-down-unneeded-time", 10*time.Minute,
|
||||||
"How long a node should be unneeded before it is eligible for scale down")
|
"How long a node should be unneeded before it is eligible for scale down")
|
||||||
scaleDownUnreadyTime = flag.Duration("scale-down-unready-time", 20*time.Minute,
|
scaleDownUnreadyTime = flag.Duration("scale-down-unready-time", 20*time.Minute,
|
||||||
"How long an unready node should be unneeded before it is eligible for scale down")
|
"How long an unready node should be unneeded before it is eligible for scale down")
|
||||||
scaleDownUtilizationThreshold = flag.Float64("scale-down-utilization-threshold", 0.5,
|
scaleDownUtilizationThreshold = flag.Float64("scale-down-utilization-threshold", 0.5,
|
||||||
"Node utilization level, defined as sum of requested resources divided by capacity, below which a node can be considered for scale down")
|
"Node utilization level, defined as sum of requested resources divided by capacity, below which a node can be considered for scale down")
|
||||||
scaleDownTrialInterval = flag.Duration("scale-down-trial-interval", 1*time.Minute,
|
|
||||||
"How often scale down possiblity is check")
|
|
||||||
scaleDownNonEmptyCandidatesCount = flag.Int("scale-down-non-empty-candidates-count", 30,
|
scaleDownNonEmptyCandidatesCount = flag.Int("scale-down-non-empty-candidates-count", 30,
|
||||||
"Maximum number of non empty nodes considered in one iteration as candidates for scale down with drain."+
|
"Maximum number of non empty nodes considered in one iteration as candidates for scale down with drain."+
|
||||||
"Lower value means better CA responsiveness but possible slower scale down latency."+
|
"Lower value means better CA responsiveness but possible slower scale down latency."+
|
||||||
|
|
@ -158,9 +160,10 @@ func createAutoscalerOptions() core.AutoscalerOptions {
|
||||||
MinMemoryTotal: minMemoryTotal,
|
MinMemoryTotal: minMemoryTotal,
|
||||||
NodeGroups: nodeGroupsFlag,
|
NodeGroups: nodeGroupsFlag,
|
||||||
UnregisteredNodeRemovalTime: *unregisteredNodeRemovalTime,
|
UnregisteredNodeRemovalTime: *unregisteredNodeRemovalTime,
|
||||||
ScaleDownDelay: *scaleDownDelay,
|
ScaleDownDelayAfterAdd: *scaleDownDelayAfterAdd,
|
||||||
|
ScaleDownDelayAfterDelete: *scaleDownDelayAfterDelete,
|
||||||
|
ScaleDownDelayAfterFailure: *scaleDownDelayAfterFailure,
|
||||||
ScaleDownEnabled: *scaleDownEnabled,
|
ScaleDownEnabled: *scaleDownEnabled,
|
||||||
ScaleDownTrialInterval: *scaleDownTrialInterval,
|
|
||||||
ScaleDownUnneededTime: *scaleDownUnneededTime,
|
ScaleDownUnneededTime: *scaleDownUnneededTime,
|
||||||
ScaleDownUnreadyTime: *scaleDownUnreadyTime,
|
ScaleDownUnreadyTime: *scaleDownUnreadyTime,
|
||||||
ScaleDownUtilizationThreshold: *scaleDownUtilizationThreshold,
|
ScaleDownUtilizationThreshold: *scaleDownUtilizationThreshold,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue