From 15b10c8f6760f3afc213af11c9e5ad4898c0fd0c Mon Sep 17 00:00:00 2001 From: Marcin Wielgus Date: Wed, 20 Dec 2017 21:19:10 +0100 Subject: [PATCH] Skip iteration if pending pods are too new --- cluster-autoscaler/core/static_autoscaler.go | 17 ++++++++++++++++- cluster-autoscaler/core/utils.go | 10 ++++++++++ cluster-autoscaler/core/utils_test.go | 12 ++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go index 7cc1ad47ba..719849c854 100644 --- a/cluster-autoscaler/core/static_autoscaler.go +++ b/cluster-autoscaler/core/static_autoscaler.go @@ -32,6 +32,11 @@ import ( "github.com/golang/glog" ) +const ( + // How old the oldest unschedulable pod should be before starting scale up. + unschedulablePodTimeBuffer = 2 * time.Second +) + // StaticAutoscaler is an autoscaler which has all the core functionality of a CA but without the reconfiguration feature type StaticAutoscaler struct { // AutoscalingContext consists of validated settings and options for this autoscaler @@ -240,10 +245,19 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError glog.V(4).Info("No schedulable pods") } + // If all pending pods are new we may want to skip a real scale down (just like if the pods were handled). + allPendingPodsToHelpAreNew := false + if len(unschedulablePodsToHelp) == 0 { glog.V(1).Info("No unschedulable pods") } else if a.MaxNodesTotal > 0 && len(readyNodes) >= a.MaxNodesTotal { glog.V(1).Info("Max total nodes in cluster reached") + } else if getOldestCreateTime(unschedulablePodsToHelp).Add(unschedulablePodTimeBuffer).After(currentTime) { + // The assumption here is that these pods have been created very recently and probably there + // is more pods to come. In theory we could check the newest pod time but then if pod were created + // slowly but at the pace of 1 every 2 seconds then no scale up would be triggered for long time. + allPendingPodsToHelpAreNew = true + glog.V(1).Info("Unschedulable pods are very new, waiting one iteration for more") } else { daemonsets, err := a.ListerRegistry.DaemonSetLister().List() if err != nil { @@ -301,7 +315,8 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError a.lastScaleDownFailTime.Add(a.ScaleDownDelayAfterFailure).After(currentTime) || a.lastScaleDownDeleteTime.Add(a.ScaleDownDelayAfterDelete).After(currentTime) || schedulablePodsPresent || - scaleDown.nodeDeleteStatus.IsDeleteInProgress() + scaleDown.nodeDeleteStatus.IsDeleteInProgress() || + allPendingPodsToHelpAreNew glog.V(4).Infof("Scale down status: unneededOnly=%v lastScaleUpTime=%s "+ "lastScaleDownDeleteTime=%v lastScaleDownFailTime=%s schedulablePodsPresent=%v isDeleteInProgress=%v", diff --git a/cluster-autoscaler/core/utils.go b/cluster-autoscaler/core/utils.go index 8c2c9ccf9d..9aeb1301df 100644 --- a/cluster-autoscaler/core/utils.go +++ b/cluster-autoscaler/core/utils.go @@ -491,3 +491,13 @@ func UpdateClusterStateMetrics(csr *clusterstate.ClusterStateRegistry) { readiness := csr.GetClusterReadiness() metrics.UpdateNodesCount(readiness.Ready, readiness.Unready+readiness.LongNotStarted, readiness.NotStarted) } + +func getOldestCreateTime(pods []*apiv1.Pod) time.Time { + oldest := time.Now() + for _, pod := range pods { + if oldest.After(pod.CreationTimestamp.Time) { + oldest = pod.CreationTimestamp.Time + } + } + return oldest +} diff --git a/cluster-autoscaler/core/utils_test.go b/cluster-autoscaler/core/utils_test.go index 8bb1a74fe2..d596d2c08a 100644 --- a/cluster-autoscaler/core/utils_test.go +++ b/cluster-autoscaler/core/utils_test.go @@ -527,3 +527,15 @@ func TestGetNodeCoresAndMemory(t *testing.T) { _, _, err = getNodeCoresAndMemory(node) assert.Error(t, err) } + +func TestGetOldestPod(t *testing.T) { + p1 := BuildTestPod("p1", 500, 1000) + p1.CreationTimestamp = metav1.NewTime(time.Now().Add(-1 * time.Minute)) + p2 := BuildTestPod("p2", 500, 1000) + p2.CreationTimestamp = metav1.NewTime(time.Now().Add(+1 * time.Minute)) + p3 := BuildTestPod("p3", 500, 1000) + p3.CreationTimestamp = metav1.NewTime(time.Now()) + + assert.Equal(t, p1.CreationTimestamp.Time, getOldestCreateTime([]*apiv1.Pod{p1, p2, p3})) + assert.Equal(t, p1.CreationTimestamp.Time, getOldestCreateTime([]*apiv1.Pod{p3, p2, p1})) +}