Add configurable delay for pod age before considering for scale-up

- This is intended to address the issue described in https://github.com/kubernetes/autoscaler/issues/923
  - the delay is configurable via a CLI option
  - in production (on AWS) we set this to a value of 2m
  - the delay could possibly be set as low as 30s and still be effective depending on your workload and environment
  - the default of 0 for the CLI option results in no change to the CA's behavior from defaults.

Change-Id: I7e3f36bb48641faaf8a392cca01a12b07fb0ee35
This commit is contained in:
Steve Scaffidi 2018-09-14 13:55:09 -04:00
parent 4b3357df41
commit 33b93cbc5f
3 changed files with 24 additions and 0 deletions

View File

@ -115,4 +115,6 @@ type AutoscalingOptions struct {
ExpendablePodsPriorityCutoff int
// Regional tells whether the cluster is regional.
Regional bool
// Pods newer than this will not be considered as unschedulable for scale-up.
NewPodScaleUpBuffer time.Duration
}

View File

@ -247,6 +247,9 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
glog.V(4).Info("No schedulable pods")
}
// finally, filter out pods that are too "young" to safely be considered for a scale-up (delay is configurable)
unschedulablePodsToHelp = a.filterOutYoungPods(unschedulablePodsToHelp, currentTime)
if len(unschedulablePodsToHelp) == 0 {
glog.V(1).Info("No unschedulable pods")
} else if a.MaxNodesTotal > 0 && len(readyNodes) >= a.MaxNodesTotal {
@ -355,6 +358,23 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
return nil
}
// don't consider pods newer than newPodScaleUpBuffer seconds old as unschedulable
func (a *StaticAutoscaler) filterOutYoungPods(allUnschedulablePods []*apiv1.Pod, currentTime time.Time) []*apiv1.Pod {
// only consider unschedulable pods older than X
var oldUnschedulablePods []*apiv1.Pod
newPodScaleUpBuffer := a.AutoscalingOptions.NewPodScaleUpBuffer
for _, pod := range allUnschedulablePods {
podAge := currentTime.Sub(pod.CreationTimestamp.Time)
if podAge > newPodScaleUpBuffer {
oldUnschedulablePods = append(oldUnschedulablePods, pod)
} else {
glog.V(3).Infof("Pod %s is %.3f seconds old, too new to consider unschedulable", pod.Name, podAge.Seconds())
}
}
return oldUnschedulablePods
}
// ExitCleanUp performs all necessary clean-ups when the autoscaler's exiting.
func (a *StaticAutoscaler) ExitCleanUp() {
a.processors.CleanUp()

View File

@ -148,6 +148,7 @@ var (
unremovableNodeRecheckTimeout = flag.Duration("unremovable-node-recheck-timeout", 5*time.Minute, "The timeout before we check again a node that couldn't be removed before")
expendablePodsPriorityCutoff = flag.Int("expendable-pods-priority-cutoff", -10, "Pods with priority below cutoff will be expendable. They can be killed without any consideration during scale down and they don't cause scale up. Pods with null priority (PodPriority disabled) are non expendable.")
regional = flag.Bool("regional", false, "Cluster is regional.")
newPodScaleUpBuffer = flag.Duration("new-pod-scale-up-buffer", 0*time.Second, "Pods less than this many seconds old will not be considered for scale-up.")
)
func createAutoscalingOptions() config.AutoscalingOptions {
@ -205,6 +206,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
UnremovableNodeRecheckTimeout: *unremovableNodeRecheckTimeout,
ExpendablePodsPriorityCutoff: *expendablePodsPriorityCutoff,
Regional: *regional,
NewPodScaleUpBuffer: *newPodScaleUpBuffer,
}
}