cluster-autoscaler: Add --max-pod-eviction-time flag

Add a flag to allow the user configure then MaxPodEvictionTime to values
other than the default 2m. This is needed in cases a pod takes more than
2 minutes to be evicted.

Signed-off-by: Grigoris Thanasoulas <gregth@arrikto.com>
This commit is contained in:
Grigoris Thanasoulas 2021-08-25 12:38:32 +03:00
parent 561a9da9e4
commit 719a53e8d7
3 changed files with 5 additions and 3 deletions

View File

@ -147,6 +147,8 @@ type AutoscalingOptions struct {
MaxBulkSoftTaintCount int
// MaxBulkSoftTaintTime sets the maximum duration of single run of PreferNoSchedule tainting.
MaxBulkSoftTaintTime time.Duration
// MaxPodEvictionTime sets the maximum time CA tries to evict a pod before giving up.
MaxPodEvictionTime time.Duration
// IgnoredTaints is a list of taints to ignore when considering a node template for scheduling.
IgnoredTaints []string
// BalancingExtraIgnoredLabels is a list of labels to additionally ignore when comparing if two node groups are similar.

View File

@ -68,8 +68,6 @@ const (
MaxKubernetesEmptyNodeDeletionTime = 3 * time.Minute
// MaxCloudProviderNodeDeletionTime is the maximum time needed by cloud provider to delete a node.
MaxCloudProviderNodeDeletionTime = 5 * time.Minute
// MaxPodEvictionTime is the maximum time CA tries to evict a pod before giving up.
MaxPodEvictionTime = 2 * time.Minute
// EvictionRetryTime is the time after CA retries failed pod eviction.
EvictionRetryTime = 10 * time.Second
// PodEvictionHeadroom is the extra time we wait to catch situations when the pod is ignoring SIGTERM and
@ -1118,7 +1116,7 @@ func (sd *ScaleDown) deleteNode(node *apiv1.Node, pods []*apiv1.Pod, daemonSetPo
daemonSetPods = daemonset.PodsToEvict(daemonSetPods, sd.context.DaemonSetEvictionForOccupiedNodes)
// attempt drain
evictionResults, err := drainNode(node, pods, daemonSetPods, sd.context.ClientSet, sd.context.Recorder, sd.context.MaxGracefulTerminationSec, MaxPodEvictionTime, EvictionRetryTime, PodEvictionHeadroom)
evictionResults, err := drainNode(node, pods, daemonSetPods, sd.context.ClientSet, sd.context.Recorder, sd.context.MaxGracefulTerminationSec, sd.context.AutoscalingOptions.MaxPodEvictionTime, EvictionRetryTime, PodEvictionHeadroom)
if err != nil {
return status.NodeDeleteResult{ResultType: status.NodeDeleteErrorFailedToEvictPods, Err: err, PodEvictionResults: evictionResults}
}

View File

@ -140,6 +140,7 @@ var (
okTotalUnreadyCount = flag.Int("ok-total-unready-count", 3, "Number of allowed unready nodes, irrespective of max-total-unready-percentage")
scaleUpFromZero = flag.Bool("scale-up-from-zero", true, "Should CA scale up when there 0 ready nodes.")
maxNodeProvisionTime = flag.Duration("max-node-provision-time", 15*time.Minute, "Maximum time CA waits for node to be provisioned")
maxPodEvictionTime = flag.Duration("max-pod-eviction-time", 2*time.Minute, "Maximum time CA tries to evict a pod before giving up")
nodeGroupsFlag = multiStringFlag(
"nodes",
"sets min,max size and other configuration data for a node group in a format accepted by cloud provider. Can be used multiple times. Format: <min>:<max>:<other...>")
@ -234,6 +235,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
MaxEmptyBulkDelete: *maxEmptyBulkDeleteFlag,
MaxGracefulTerminationSec: *maxGracefulTerminationFlag,
MaxNodeProvisionTime: *maxNodeProvisionTime,
MaxPodEvictionTime: *maxPodEvictionTime,
MaxNodesTotal: *maxNodesTotal,
MaxCoresTotal: maxCoresTotal,
MinCoresTotal: minCoresTotal,