cluster-autoscaler: Add --max-pod-eviction-time flag
Add a flag to allow the user configure then MaxPodEvictionTime to values other than the default 2m. This is needed in cases a pod takes more than 2 minutes to be evicted. Signed-off-by: Grigoris Thanasoulas <gregth@arrikto.com>
This commit is contained in:
parent
561a9da9e4
commit
719a53e8d7
|
|
@ -147,6 +147,8 @@ type AutoscalingOptions struct {
|
||||||
MaxBulkSoftTaintCount int
|
MaxBulkSoftTaintCount int
|
||||||
// MaxBulkSoftTaintTime sets the maximum duration of single run of PreferNoSchedule tainting.
|
// MaxBulkSoftTaintTime sets the maximum duration of single run of PreferNoSchedule tainting.
|
||||||
MaxBulkSoftTaintTime time.Duration
|
MaxBulkSoftTaintTime time.Duration
|
||||||
|
// MaxPodEvictionTime sets the maximum time CA tries to evict a pod before giving up.
|
||||||
|
MaxPodEvictionTime time.Duration
|
||||||
// IgnoredTaints is a list of taints to ignore when considering a node template for scheduling.
|
// IgnoredTaints is a list of taints to ignore when considering a node template for scheduling.
|
||||||
IgnoredTaints []string
|
IgnoredTaints []string
|
||||||
// BalancingExtraIgnoredLabels is a list of labels to additionally ignore when comparing if two node groups are similar.
|
// BalancingExtraIgnoredLabels is a list of labels to additionally ignore when comparing if two node groups are similar.
|
||||||
|
|
|
||||||
|
|
@ -68,8 +68,6 @@ const (
|
||||||
MaxKubernetesEmptyNodeDeletionTime = 3 * time.Minute
|
MaxKubernetesEmptyNodeDeletionTime = 3 * time.Minute
|
||||||
// MaxCloudProviderNodeDeletionTime is the maximum time needed by cloud provider to delete a node.
|
// MaxCloudProviderNodeDeletionTime is the maximum time needed by cloud provider to delete a node.
|
||||||
MaxCloudProviderNodeDeletionTime = 5 * time.Minute
|
MaxCloudProviderNodeDeletionTime = 5 * time.Minute
|
||||||
// MaxPodEvictionTime is the maximum time CA tries to evict a pod before giving up.
|
|
||||||
MaxPodEvictionTime = 2 * time.Minute
|
|
||||||
// EvictionRetryTime is the time after CA retries failed pod eviction.
|
// EvictionRetryTime is the time after CA retries failed pod eviction.
|
||||||
EvictionRetryTime = 10 * time.Second
|
EvictionRetryTime = 10 * time.Second
|
||||||
// PodEvictionHeadroom is the extra time we wait to catch situations when the pod is ignoring SIGTERM and
|
// PodEvictionHeadroom is the extra time we wait to catch situations when the pod is ignoring SIGTERM and
|
||||||
|
|
@ -1118,7 +1116,7 @@ func (sd *ScaleDown) deleteNode(node *apiv1.Node, pods []*apiv1.Pod, daemonSetPo
|
||||||
daemonSetPods = daemonset.PodsToEvict(daemonSetPods, sd.context.DaemonSetEvictionForOccupiedNodes)
|
daemonSetPods = daemonset.PodsToEvict(daemonSetPods, sd.context.DaemonSetEvictionForOccupiedNodes)
|
||||||
|
|
||||||
// attempt drain
|
// attempt drain
|
||||||
evictionResults, err := drainNode(node, pods, daemonSetPods, sd.context.ClientSet, sd.context.Recorder, sd.context.MaxGracefulTerminationSec, MaxPodEvictionTime, EvictionRetryTime, PodEvictionHeadroom)
|
evictionResults, err := drainNode(node, pods, daemonSetPods, sd.context.ClientSet, sd.context.Recorder, sd.context.MaxGracefulTerminationSec, sd.context.AutoscalingOptions.MaxPodEvictionTime, EvictionRetryTime, PodEvictionHeadroom)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return status.NodeDeleteResult{ResultType: status.NodeDeleteErrorFailedToEvictPods, Err: err, PodEvictionResults: evictionResults}
|
return status.NodeDeleteResult{ResultType: status.NodeDeleteErrorFailedToEvictPods, Err: err, PodEvictionResults: evictionResults}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -140,6 +140,7 @@ var (
|
||||||
okTotalUnreadyCount = flag.Int("ok-total-unready-count", 3, "Number of allowed unready nodes, irrespective of max-total-unready-percentage")
|
okTotalUnreadyCount = flag.Int("ok-total-unready-count", 3, "Number of allowed unready nodes, irrespective of max-total-unready-percentage")
|
||||||
scaleUpFromZero = flag.Bool("scale-up-from-zero", true, "Should CA scale up when there 0 ready nodes.")
|
scaleUpFromZero = flag.Bool("scale-up-from-zero", true, "Should CA scale up when there 0 ready nodes.")
|
||||||
maxNodeProvisionTime = flag.Duration("max-node-provision-time", 15*time.Minute, "Maximum time CA waits for node to be provisioned")
|
maxNodeProvisionTime = flag.Duration("max-node-provision-time", 15*time.Minute, "Maximum time CA waits for node to be provisioned")
|
||||||
|
maxPodEvictionTime = flag.Duration("max-pod-eviction-time", 2*time.Minute, "Maximum time CA tries to evict a pod before giving up")
|
||||||
nodeGroupsFlag = multiStringFlag(
|
nodeGroupsFlag = multiStringFlag(
|
||||||
"nodes",
|
"nodes",
|
||||||
"sets min,max size and other configuration data for a node group in a format accepted by cloud provider. Can be used multiple times. Format: <min>:<max>:<other...>")
|
"sets min,max size and other configuration data for a node group in a format accepted by cloud provider. Can be used multiple times. Format: <min>:<max>:<other...>")
|
||||||
|
|
@ -234,6 +235,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
|
||||||
MaxEmptyBulkDelete: *maxEmptyBulkDeleteFlag,
|
MaxEmptyBulkDelete: *maxEmptyBulkDeleteFlag,
|
||||||
MaxGracefulTerminationSec: *maxGracefulTerminationFlag,
|
MaxGracefulTerminationSec: *maxGracefulTerminationFlag,
|
||||||
MaxNodeProvisionTime: *maxNodeProvisionTime,
|
MaxNodeProvisionTime: *maxNodeProvisionTime,
|
||||||
|
MaxPodEvictionTime: *maxPodEvictionTime,
|
||||||
MaxNodesTotal: *maxNodesTotal,
|
MaxNodesTotal: *maxNodesTotal,
|
||||||
MaxCoresTotal: maxCoresTotal,
|
MaxCoresTotal: maxCoresTotal,
|
||||||
MinCoresTotal: minCoresTotal,
|
MinCoresTotal: minCoresTotal,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue