diff --git a/cluster-autoscaler/clusterstate/clusterstate.go b/cluster-autoscaler/clusterstate/clusterstate.go index 2240d3f228..c51b287a8c 100644 --- a/cluster-autoscaler/clusterstate/clusterstate.go +++ b/cluster-autoscaler/clusterstate/clusterstate.go @@ -86,6 +86,8 @@ type ClusterStateRegistryConfig struct { // Minimum number of nodes that must be unready for MaxTotalUnreadyPercentage to apply. // This is to ensure that in very small clusters (e.g. 2 nodes) a single node's failure doesn't disable autoscaling. OkTotalUnreadyCount int + // NodeGroupKeepBackoffOutOfResources is whether a backoff can be removed before expiration when a scale-up fails due to the cloud provider being out of resources. + NodeGroupKeepBackoffOutOfResources bool } // IncorrectNodeGroupSize contains information about how much the current size of the node group @@ -264,7 +266,11 @@ func (csr *ClusterStateRegistry) updateScaleRequests(currentTime time.Time) { // scale-out finished successfully // remove it and reset node group backoff delete(csr.scaleUpRequests, nodeGroupName) - csr.backoff.RemoveBackoff(scaleUpRequest.NodeGroup, csr.nodeInfosForGroups[scaleUpRequest.NodeGroup.Id()]) + shouldKeepBackoff := csr.config.NodeGroupKeepBackoffOutOfResources && csr.backoff.IsNodeGroupOutOfResources(scaleUpRequest.NodeGroup) + if !shouldKeepBackoff { + klog.V(4).Infof("Removing backoff for node group %v", scaleUpRequest.NodeGroup.Id()) + csr.backoff.RemoveBackoff(scaleUpRequest.NodeGroup, csr.nodeInfosForGroups[scaleUpRequest.NodeGroup.Id()]) + } klog.V(4).Infof("Scale up in group %v finished successfully in %v", nodeGroupName, currentTime.Sub(scaleUpRequest.Time)) continue diff --git a/cluster-autoscaler/config/autoscaling_options.go b/cluster-autoscaler/config/autoscaling_options.go index b335c05dcb..3f12020bb8 100644 --- a/cluster-autoscaler/config/autoscaling_options.go +++ b/cluster-autoscaler/config/autoscaling_options.go @@ -249,6 +249,8 @@ type AutoscalingOptions struct { MaxNodeGroupBackoffDuration time.Duration // NodeGroupBackoffResetTimeout is the time after last failed scale-up when the backoff duration is reset. NodeGroupBackoffResetTimeout time.Duration + // NodeGroupKeepBackoffOutOfResources is whether a backoff can be removed before expiration when a scale-up fails due to the cloud provider being out of resources. + NodeGroupKeepBackoffOutOfResources bool // MaxScaleDownParallelism is the maximum number of nodes (both empty and needing drain) that can be deleted in parallel. MaxScaleDownParallelism int // MaxDrainParallelism is the maximum number of nodes needing drain, that can be drained and deleted in parallel. diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index d0480eb1f9..8464cda729 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -231,6 +231,7 @@ var ( "maxNodeGroupBackoffDuration is the maximum backoff duration for a NodeGroup after new nodes failed to start.") nodeGroupBackoffResetTimeout = flag.Duration("node-group-backoff-reset-timeout", 3*time.Hour, "nodeGroupBackoffResetTimeout is the time after last failed scale-up when the backoff duration is reset.") + nodeGroupKeepBackoffOutOfResources = flag.Bool("node-group-keep-backoff-out-of-resources", false, "Prevents removal of backoff before expiration when a scale-up fails due to the cloud provider being out of resources.") maxScaleDownParallelismFlag = flag.Int("max-scale-down-parallelism", 10, "Maximum number of nodes (both empty and needing drain) that can be deleted in parallel.") maxDrainParallelismFlag = flag.Int("max-drain-parallelism", 1, "Maximum number of nodes needing drain, that can be drained and deleted in parallel.") recordDuplicatedEvents = flag.Bool("record-duplicated-events", false, "enable duplication of similar events within a 5 minute window.") @@ -406,6 +407,7 @@ func createAutoscalingOptions() config.AutoscalingOptions { InitialNodeGroupBackoffDuration: *initialNodeGroupBackoffDuration, MaxNodeGroupBackoffDuration: *maxNodeGroupBackoffDuration, NodeGroupBackoffResetTimeout: *nodeGroupBackoffResetTimeout, + NodeGroupKeepBackoffOutOfResources: *nodeGroupKeepBackoffOutOfResources, MaxScaleDownParallelism: *maxScaleDownParallelismFlag, MaxDrainParallelism: *maxDrainParallelismFlag, RecordDuplicatedEvents: *recordDuplicatedEvents, diff --git a/cluster-autoscaler/utils/backoff/backoff.go b/cluster-autoscaler/utils/backoff/backoff.go index a4409d2f99..85da73556c 100644 --- a/cluster-autoscaler/utils/backoff/backoff.go +++ b/cluster-autoscaler/utils/backoff/backoff.go @@ -39,4 +39,6 @@ type Backoff interface { RemoveBackoff(nodeGroup cloudprovider.NodeGroup, nodeInfo *schedulerframework.NodeInfo) // RemoveStaleBackoffData removes stale backoff data. RemoveStaleBackoffData(currentTime time.Time) + // IsNodeGroupOutOfResources returns true if the given node group is out of resources. + IsNodeGroupOutOfResources(nodeGroup cloudprovider.NodeGroup) bool } diff --git a/cluster-autoscaler/utils/backoff/exponential_backoff.go b/cluster-autoscaler/utils/backoff/exponential_backoff.go index a65b9c323d..eafca64e55 100644 --- a/cluster-autoscaler/utils/backoff/exponential_backoff.go +++ b/cluster-autoscaler/utils/backoff/exponential_backoff.go @@ -38,6 +38,7 @@ type exponentialBackoffInfo struct { backoffUntil time.Time lastFailedExecution time.Time errorInfo cloudprovider.InstanceErrorInfo + errorClass cloudprovider.InstanceErrorClass } // NewExponentialBackoff creates an instance of exponential backoff. @@ -89,6 +90,7 @@ func (b *exponentialBackoff) Backoff(nodeGroup cloudprovider.NodeGroup, nodeInfo backoffUntil: backoffUntil, lastFailedExecution: currentTime, errorInfo: errorInfo, + errorClass: errorClass, } return backoffUntil } @@ -118,3 +120,9 @@ func (b *exponentialBackoff) RemoveStaleBackoffData(currentTime time.Time) { } } } + +// IsNodeGroupOutOfResources returns true if the given node group is out of resources. +func (b *exponentialBackoff) IsNodeGroupOutOfResources(nodeGroup cloudprovider.NodeGroup) bool { + backoffInfo, found := b.backoffInfo[b.nodeGroupKey(nodeGroup)] + return found && backoffInfo.errorClass == cloudprovider.OutOfResourcesErrorClass +}