Break node categorization in scale down planner on timeout.
This commit is contained in:
parent
170cf0f2aa
commit
bae587d20c
|
|
@ -119,6 +119,9 @@ type AutoscalingOptions struct {
|
||||||
// The formula to calculate additional candidates number is following:
|
// The formula to calculate additional candidates number is following:
|
||||||
// max(#nodes * ScaleDownCandidatesPoolRatio, ScaleDownCandidatesPoolMinCount)
|
// max(#nodes * ScaleDownCandidatesPoolRatio, ScaleDownCandidatesPoolMinCount)
|
||||||
ScaleDownCandidatesPoolMinCount int
|
ScaleDownCandidatesPoolMinCount int
|
||||||
|
// ScaleDownSimulationTimeout defines the maximum time that can be
|
||||||
|
// spent on scale down simulation.
|
||||||
|
ScaleDownSimulationTimeout time.Duration
|
||||||
// NodeDeletionDelayTimeout is maximum time CA waits for removing delay-deletion.cluster-autoscaler.kubernetes.io/ annotations before deleting the node.
|
// NodeDeletionDelayTimeout is maximum time CA waits for removing delay-deletion.cluster-autoscaler.kubernetes.io/ annotations before deleting the node.
|
||||||
NodeDeletionDelayTimeout time.Duration
|
NodeDeletionDelayTimeout time.Duration
|
||||||
// WriteStatusConfigMap tells if the status information should be written to a ConfigMap
|
// WriteStatusConfigMap tells if the status information should be written to a ConfigMap
|
||||||
|
|
|
||||||
|
|
@ -285,10 +285,14 @@ func (p *Planner) categorizeNodes(podDestinations map[string]bool, scaleDownCand
|
||||||
p.unremovableNodes.Add(n)
|
p.unremovableNodes.Add(n)
|
||||||
}
|
}
|
||||||
p.nodeUtilizationMap = utilizationMap
|
p.nodeUtilizationMap = utilizationMap
|
||||||
for _, node := range currentlyUnneededNodeNames {
|
timer := time.NewTimer(p.context.ScaleDownSimulationTimeout)
|
||||||
// TODO(x13n): break on timeout. Figure out how to handle nodes
|
for i, node := range currentlyUnneededNodeNames {
|
||||||
// identified as unneeded in previous iteration, but now
|
select {
|
||||||
// skipped due to timeout.
|
case <-timer.C:
|
||||||
|
klog.Warningf("%d out of %d nodes skipped in scale down simulation due to timeout.", len(currentlyUnneededNodeNames)-i, len(currentlyUnneededNodeNames))
|
||||||
|
break
|
||||||
|
default:
|
||||||
|
}
|
||||||
removable, unremovable := p.rs.SimulateNodeRemoval(node, podDestinations, p.latestUpdate, pdbs)
|
removable, unremovable := p.rs.SimulateNodeRemoval(node, podDestinations, p.latestUpdate, pdbs)
|
||||||
if unremovable != nil {
|
if unremovable != nil {
|
||||||
unremovableCount += 1
|
unremovableCount += 1
|
||||||
|
|
|
||||||
|
|
@ -395,7 +395,7 @@ func TestUpdateClusterState(t *testing.T) {
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
registry := kube_util.NewListerRegistry(nil, nil, nil, nil, nil, nil, nil, nil, rsLister, nil)
|
registry := kube_util.NewListerRegistry(nil, nil, nil, nil, nil, nil, nil, nil, rsLister, nil)
|
||||||
provider := testprovider.NewTestCloudProvider(nil, nil)
|
provider := testprovider.NewTestCloudProvider(nil, nil)
|
||||||
context, err := NewScaleTestAutoscalingContext(config.AutoscalingOptions{}, &fake.Clientset{}, registry, provider, nil, nil)
|
context, err := NewScaleTestAutoscalingContext(config.AutoscalingOptions{ScaleDownSimulationTimeout: 5 * time.Minute}, &fake.Clientset{}, registry, provider, nil, nil)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
clustersnapshot.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, tc.nodes, tc.pods)
|
clustersnapshot.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, tc.nodes, tc.pods)
|
||||||
deleteOptions := simulator.NodeDeleteOptions{}
|
deleteOptions := simulator.NodeDeleteOptions{}
|
||||||
|
|
|
||||||
|
|
@ -211,6 +211,7 @@ var (
|
||||||
skipNodesWithLocalStorage = flag.Bool("skip-nodes-with-local-storage", true, "If true cluster autoscaler will never delete nodes with pods with local storage, e.g. EmptyDir or HostPath")
|
skipNodesWithLocalStorage = flag.Bool("skip-nodes-with-local-storage", true, "If true cluster autoscaler will never delete nodes with pods with local storage, e.g. EmptyDir or HostPath")
|
||||||
minReplicaCount = flag.Int("min-replica-count", 0, "Minimum number or replicas that a replica set or replication controller should have to allow their pods deletion in scale down")
|
minReplicaCount = flag.Int("min-replica-count", 0, "Minimum number or replicas that a replica set or replication controller should have to allow their pods deletion in scale down")
|
||||||
nodeDeleteDelayAfterTaint = flag.Duration("node-delete-delay-after-taint", 5*time.Second, "How long to wait before deleting a node after tainting it")
|
nodeDeleteDelayAfterTaint = flag.Duration("node-delete-delay-after-taint", 5*time.Second, "How long to wait before deleting a node after tainting it")
|
||||||
|
scaleDownSimulationTimeout = flag.Duration("scale-down-simulation-timeout", 5*time.Minute, "How long should we run scale down simulation.")
|
||||||
)
|
)
|
||||||
|
|
||||||
func createAutoscalingOptions() config.AutoscalingOptions {
|
func createAutoscalingOptions() config.AutoscalingOptions {
|
||||||
|
|
@ -307,6 +308,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
|
||||||
SkipNodesWithLocalStorage: *skipNodesWithLocalStorage,
|
SkipNodesWithLocalStorage: *skipNodesWithLocalStorage,
|
||||||
MinReplicaCount: *minReplicaCount,
|
MinReplicaCount: *minReplicaCount,
|
||||||
NodeDeleteDelayAfterTaint: *nodeDeleteDelayAfterTaint,
|
NodeDeleteDelayAfterTaint: *nodeDeleteDelayAfterTaint,
|
||||||
|
ScaleDownSimulationTimeout: *scaleDownSimulationTimeout,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue