Add configurable delay for pod age before considering for scale-up

- This is intended to address the issue described in https://github.com/kubernetes/autoscaler/issues/923 - the delay is configurable via a CLI option - in production (on AWS) we set this to a value of 2m - the delay could possibly be set as low as 30s and still be effective depending on your workload and environment - the default of 0 for the CLI option results in no change to the CA's behavior from defaults. Change-Id: I7e3f36bb48641faaf8a392cca01a12b07fb0ee35
2018-09-14 13:55:09 -04:00 · 2018-09-14 13:55:09 -04:00 · 33b93cbc5f
parent 4b3357df41
commit 33b93cbc5f
3 changed files with 24 additions and 0 deletions
--- a/cluster-autoscaler/config/autoscaling_options.go
+++ b/cluster-autoscaler/config/autoscaling_options.go
@ -115,4 +115,6 @@ type AutoscalingOptions struct {
 	ExpendablePodsPriorityCutoff int
 	// Regional tells whether the cluster is regional.
 	Regional bool
+	// Pods newer than this will not be considered as unschedulable for scale-up.
+	NewPodScaleUpBuffer time.Duration
 }
--- a/cluster-autoscaler/core/static_autoscaler.go
+++ b/cluster-autoscaler/core/static_autoscaler.go
@ -247,6 +247,9 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
 		glog.V(4).Info("No schedulable pods")
 	}

+	// finally, filter out pods that are too "young" to safely be considered for a scale-up (delay is configurable)
+	unschedulablePodsToHelp = a.filterOutYoungPods(unschedulablePodsToHelp, currentTime)
+
 	if len(unschedulablePodsToHelp) == 0 {
 		glog.V(1).Info("No unschedulable pods")
 	} else if a.MaxNodesTotal > 0 && len(readyNodes) >= a.MaxNodesTotal {
@ -355,6 +358,23 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
 	return nil
 }

+// don't consider pods newer than newPodScaleUpBuffer seconds old as unschedulable
+func (a *StaticAutoscaler) filterOutYoungPods(allUnschedulablePods []*apiv1.Pod, currentTime time.Time) []*apiv1.Pod {
+	// only consider unschedulable pods older than X
+	var oldUnschedulablePods []*apiv1.Pod
+	newPodScaleUpBuffer := a.AutoscalingOptions.NewPodScaleUpBuffer
+	for _, pod := range allUnschedulablePods {
+		podAge := currentTime.Sub(pod.CreationTimestamp.Time)
+		if podAge > newPodScaleUpBuffer {
+			oldUnschedulablePods = append(oldUnschedulablePods, pod)
+		} else {
+			glog.V(3).Infof("Pod %s is %.3f seconds old, too new to consider unschedulable", pod.Name, podAge.Seconds())
+
+		}
+	}
+	return oldUnschedulablePods
+}
+
 // ExitCleanUp performs all necessary clean-ups when the autoscaler's exiting.
 func (a *StaticAutoscaler) ExitCleanUp() {
 	a.processors.CleanUp()
--- a/cluster-autoscaler/main.go
+++ b/cluster-autoscaler/main.go
@ -148,6 +148,7 @@ var (
 	unremovableNodeRecheckTimeout = flag.Duration("unremovable-node-recheck-timeout", 5*time.Minute, "The timeout before we check again a node that couldn't be removed before")
 	expendablePodsPriorityCutoff  = flag.Int("expendable-pods-priority-cutoff", -10, "Pods with priority below cutoff will be expendable. They can be killed without any consideration during scale down and they don't cause scale up. Pods with null priority (PodPriority disabled) are non expendable.")
 	regional                      = flag.Bool("regional", false, "Cluster is regional.")
+	newPodScaleUpBuffer           = flag.Duration("new-pod-scale-up-buffer", 0*time.Second, "Pods less than this many seconds old will not be considered for scale-up.")
 )

 func createAutoscalingOptions() config.AutoscalingOptions {
@ -205,6 +206,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
 		UnremovableNodeRecheckTimeout:    *unremovableNodeRecheckTimeout,
 		ExpendablePodsPriorityCutoff:     *expendablePodsPriorityCutoff,
 		Regional:                         *regional,
+		NewPodScaleUpBuffer:              *newPodScaleUpBuffer,
 	}
 }