Break node categorization in scale down planner on timeout.
This commit is contained in:
parent
170cf0f2aa
commit
bae587d20c
|
|
@ -119,6 +119,9 @@ type AutoscalingOptions struct {
|
|||
// The formula to calculate additional candidates number is following:
|
||||
// max(#nodes * ScaleDownCandidatesPoolRatio, ScaleDownCandidatesPoolMinCount)
|
||||
ScaleDownCandidatesPoolMinCount int
|
||||
// ScaleDownSimulationTimeout defines the maximum time that can be
|
||||
// spent on scale down simulation.
|
||||
ScaleDownSimulationTimeout time.Duration
|
||||
// NodeDeletionDelayTimeout is maximum time CA waits for removing delay-deletion.cluster-autoscaler.kubernetes.io/ annotations before deleting the node.
|
||||
NodeDeletionDelayTimeout time.Duration
|
||||
// WriteStatusConfigMap tells if the status information should be written to a ConfigMap
|
||||
|
|
|
|||
|
|
@ -285,10 +285,14 @@ func (p *Planner) categorizeNodes(podDestinations map[string]bool, scaleDownCand
|
|||
p.unremovableNodes.Add(n)
|
||||
}
|
||||
p.nodeUtilizationMap = utilizationMap
|
||||
for _, node := range currentlyUnneededNodeNames {
|
||||
// TODO(x13n): break on timeout. Figure out how to handle nodes
|
||||
// identified as unneeded in previous iteration, but now
|
||||
// skipped due to timeout.
|
||||
timer := time.NewTimer(p.context.ScaleDownSimulationTimeout)
|
||||
for i, node := range currentlyUnneededNodeNames {
|
||||
select {
|
||||
case <-timer.C:
|
||||
klog.Warningf("%d out of %d nodes skipped in scale down simulation due to timeout.", len(currentlyUnneededNodeNames)-i, len(currentlyUnneededNodeNames))
|
||||
break
|
||||
default:
|
||||
}
|
||||
removable, unremovable := p.rs.SimulateNodeRemoval(node, podDestinations, p.latestUpdate, pdbs)
|
||||
if unremovable != nil {
|
||||
unremovableCount += 1
|
||||
|
|
|
|||
|
|
@ -395,7 +395,7 @@ func TestUpdateClusterState(t *testing.T) {
|
|||
assert.NoError(t, err)
|
||||
registry := kube_util.NewListerRegistry(nil, nil, nil, nil, nil, nil, nil, nil, rsLister, nil)
|
||||
provider := testprovider.NewTestCloudProvider(nil, nil)
|
||||
context, err := NewScaleTestAutoscalingContext(config.AutoscalingOptions{}, &fake.Clientset{}, registry, provider, nil, nil)
|
||||
context, err := NewScaleTestAutoscalingContext(config.AutoscalingOptions{ScaleDownSimulationTimeout: 5 * time.Minute}, &fake.Clientset{}, registry, provider, nil, nil)
|
||||
assert.NoError(t, err)
|
||||
clustersnapshot.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, tc.nodes, tc.pods)
|
||||
deleteOptions := simulator.NodeDeleteOptions{}
|
||||
|
|
|
|||
|
|
@ -211,6 +211,7 @@ var (
|
|||
skipNodesWithLocalStorage = flag.Bool("skip-nodes-with-local-storage", true, "If true cluster autoscaler will never delete nodes with pods with local storage, e.g. EmptyDir or HostPath")
|
||||
minReplicaCount = flag.Int("min-replica-count", 0, "Minimum number or replicas that a replica set or replication controller should have to allow their pods deletion in scale down")
|
||||
nodeDeleteDelayAfterTaint = flag.Duration("node-delete-delay-after-taint", 5*time.Second, "How long to wait before deleting a node after tainting it")
|
||||
scaleDownSimulationTimeout = flag.Duration("scale-down-simulation-timeout", 5*time.Minute, "How long should we run scale down simulation.")
|
||||
)
|
||||
|
||||
func createAutoscalingOptions() config.AutoscalingOptions {
|
||||
|
|
@ -307,6 +308,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
|
|||
SkipNodesWithLocalStorage: *skipNodesWithLocalStorage,
|
||||
MinReplicaCount: *minReplicaCount,
|
||||
NodeDeleteDelayAfterTaint: *nodeDeleteDelayAfterTaint,
|
||||
ScaleDownSimulationTimeout: *scaleDownSimulationTimeout,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue