Skip iteration if pending pods are too new

This commit is contained in:
Marcin Wielgus 2017-12-20 21:19:10 +01:00
parent 8225983307
commit 15b10c8f67
3 changed files with 38 additions and 1 deletions

View File

@ -32,6 +32,11 @@ import (
"github.com/golang/glog"
)
const (
// How old the oldest unschedulable pod should be before starting scale up.
unschedulablePodTimeBuffer = 2 * time.Second
)
// StaticAutoscaler is an autoscaler which has all the core functionality of a CA but without the reconfiguration feature
type StaticAutoscaler struct {
// AutoscalingContext consists of validated settings and options for this autoscaler
@ -240,10 +245,19 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
glog.V(4).Info("No schedulable pods")
}
// If all pending pods are new we may want to skip a real scale down (just like if the pods were handled).
allPendingPodsToHelpAreNew := false
if len(unschedulablePodsToHelp) == 0 {
glog.V(1).Info("No unschedulable pods")
} else if a.MaxNodesTotal > 0 && len(readyNodes) >= a.MaxNodesTotal {
glog.V(1).Info("Max total nodes in cluster reached")
} else if getOldestCreateTime(unschedulablePodsToHelp).Add(unschedulablePodTimeBuffer).After(currentTime) {
// The assumption here is that these pods have been created very recently and probably there
// is more pods to come. In theory we could check the newest pod time but then if pod were created
// slowly but at the pace of 1 every 2 seconds then no scale up would be triggered for long time.
allPendingPodsToHelpAreNew = true
glog.V(1).Info("Unschedulable pods are very new, waiting one iteration for more")
} else {
daemonsets, err := a.ListerRegistry.DaemonSetLister().List()
if err != nil {
@ -301,7 +315,8 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
a.lastScaleDownFailTime.Add(a.ScaleDownDelayAfterFailure).After(currentTime) ||
a.lastScaleDownDeleteTime.Add(a.ScaleDownDelayAfterDelete).After(currentTime) ||
schedulablePodsPresent ||
scaleDown.nodeDeleteStatus.IsDeleteInProgress()
scaleDown.nodeDeleteStatus.IsDeleteInProgress() ||
allPendingPodsToHelpAreNew
glog.V(4).Infof("Scale down status: unneededOnly=%v lastScaleUpTime=%s "+
"lastScaleDownDeleteTime=%v lastScaleDownFailTime=%s schedulablePodsPresent=%v isDeleteInProgress=%v",

View File

@ -491,3 +491,13 @@ func UpdateClusterStateMetrics(csr *clusterstate.ClusterStateRegistry) {
readiness := csr.GetClusterReadiness()
metrics.UpdateNodesCount(readiness.Ready, readiness.Unready+readiness.LongNotStarted, readiness.NotStarted)
}
func getOldestCreateTime(pods []*apiv1.Pod) time.Time {
oldest := time.Now()
for _, pod := range pods {
if oldest.After(pod.CreationTimestamp.Time) {
oldest = pod.CreationTimestamp.Time
}
}
return oldest
}

View File

@ -527,3 +527,15 @@ func TestGetNodeCoresAndMemory(t *testing.T) {
_, _, err = getNodeCoresAndMemory(node)
assert.Error(t, err)
}
func TestGetOldestPod(t *testing.T) {
p1 := BuildTestPod("p1", 500, 1000)
p1.CreationTimestamp = metav1.NewTime(time.Now().Add(-1 * time.Minute))
p2 := BuildTestPod("p2", 500, 1000)
p2.CreationTimestamp = metav1.NewTime(time.Now().Add(+1 * time.Minute))
p3 := BuildTestPod("p3", 500, 1000)
p3.CreationTimestamp = metav1.NewTime(time.Now())
assert.Equal(t, p1.CreationTimestamp.Time, getOldestCreateTime([]*apiv1.Pod{p1, p2, p3}))
assert.Equal(t, p1.CreationTimestamp.Time, getOldestCreateTime([]*apiv1.Pod{p3, p2, p1}))
}