cluster-autoscaler: Add --max-pod-eviction-time flag

Add a flag to allow the user configure then MaxPodEvictionTime to values other than the default 2m. This is needed in cases a pod takes more than 2 minutes to be evicted. Signed-off-by: Grigoris Thanasoulas <gregth@arrikto.com>
2021-08-25 12:38:32 +03:00 · 2021-08-25 12:38:32 +03:00 · 719a53e8d7
parent 561a9da9e4
commit 719a53e8d7
3 changed files with 5 additions and 3 deletions
--- a/cluster-autoscaler/config/autoscaling_options.go
+++ b/cluster-autoscaler/config/autoscaling_options.go
@ -147,6 +147,8 @@ type AutoscalingOptions struct {
 	MaxBulkSoftTaintCount int
 	// MaxBulkSoftTaintTime sets the maximum duration of single run of PreferNoSchedule tainting.
 	MaxBulkSoftTaintTime time.Duration
+	// MaxPodEvictionTime sets the maximum time CA tries to evict a pod before giving up.
+	MaxPodEvictionTime time.Duration
 	// IgnoredTaints is a list of taints to ignore when considering a node template for scheduling.
 	IgnoredTaints []string
 	// BalancingExtraIgnoredLabels is a list of labels to additionally ignore when comparing if two node groups are similar.
--- a/cluster-autoscaler/core/scaledown/legacy/legacy.go
+++ b/cluster-autoscaler/core/scaledown/legacy/legacy.go
@ -68,8 +68,6 @@ const (
 	MaxKubernetesEmptyNodeDeletionTime = 3 * time.Minute
 	// MaxCloudProviderNodeDeletionTime is the maximum time needed by cloud provider to delete a node.
 	MaxCloudProviderNodeDeletionTime = 5 * time.Minute
-	// MaxPodEvictionTime is the maximum time CA tries to evict a pod before giving up.
-	MaxPodEvictionTime = 2 * time.Minute
 	// EvictionRetryTime is the time after CA retries failed pod eviction.
 	EvictionRetryTime = 10 * time.Second
 	// PodEvictionHeadroom is the extra time we wait to catch situations when the pod is ignoring SIGTERM and
@ -1118,7 +1116,7 @@ func (sd *ScaleDown) deleteNode(node *apiv1.Node, pods []*apiv1.Pod, daemonSetPo
 	daemonSetPods = daemonset.PodsToEvict(daemonSetPods, sd.context.DaemonSetEvictionForOccupiedNodes)

 	// attempt drain
-	evictionResults, err := drainNode(node, pods, daemonSetPods, sd.context.ClientSet, sd.context.Recorder, sd.context.MaxGracefulTerminationSec, MaxPodEvictionTime, EvictionRetryTime, PodEvictionHeadroom)
+	evictionResults, err := drainNode(node, pods, daemonSetPods, sd.context.ClientSet, sd.context.Recorder, sd.context.MaxGracefulTerminationSec, sd.context.AutoscalingOptions.MaxPodEvictionTime, EvictionRetryTime, PodEvictionHeadroom)
 	if err != nil {
 		return status.NodeDeleteResult{ResultType: status.NodeDeleteErrorFailedToEvictPods, Err: err, PodEvictionResults: evictionResults}
 	}
--- a/cluster-autoscaler/main.go
+++ b/cluster-autoscaler/main.go
@ -140,6 +140,7 @@ var (
 	okTotalUnreadyCount        = flag.Int("ok-total-unready-count", 3, "Number of allowed unready nodes, irrespective of max-total-unready-percentage")
 	scaleUpFromZero            = flag.Bool("scale-up-from-zero", true, "Should CA scale up when there 0 ready nodes.")
 	maxNodeProvisionTime       = flag.Duration("max-node-provision-time", 15*time.Minute, "Maximum time CA waits for node to be provisioned")
+	maxPodEvictionTime         = flag.Duration("max-pod-eviction-time", 2*time.Minute, "Maximum time CA tries to evict a pod before giving up")
 	nodeGroupsFlag             = multiStringFlag(
 		"nodes",
 		"sets min,max size and other configuration data for a node group in a format accepted by cloud provider. Can be used multiple times. Format: <min>:<max>:<other...>")
@ -234,6 +235,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
 		MaxEmptyBulkDelete:                 *maxEmptyBulkDeleteFlag,
 		MaxGracefulTerminationSec:          *maxGracefulTerminationFlag,
 		MaxNodeProvisionTime:               *maxNodeProvisionTime,
+		MaxPodEvictionTime:                 *maxPodEvictionTime,
 		MaxNodesTotal:                      *maxNodesTotal,
 		MaxCoresTotal:                      maxCoresTotal,
 		MinCoresTotal:                      minCoresTotal,