cluster-autoscaler: Add option to disable scale down of unready nodes

Add flag '--scale-down-unready-enabled' to enable or disable scale-down of unready nodes. Default value set to true for backwards compatibility (i.e., allow scale-down of unready nodes). Signed-off-by: Grigoris Thanasoulas <gregth@arrikto.com>
2023-02-05 18:33:58 +02:00 · 2023-02-05 18:33:58 +02:00 · 6cf8c329da
parent 60bda22e64
commit 6cf8c329da
6 changed files with 60 additions and 21 deletions
--- a/cluster-autoscaler/config/autoscaling_options.go
+++ b/cluster-autoscaler/config/autoscaling_options.go
@ -128,6 +128,8 @@ type AutoscalingOptions struct {
 	EnforceNodeGroupMinSize bool
 	// ScaleDownEnabled is used to allow CA to scale down the cluster
 	ScaleDownEnabled bool
+	// ScaleDownUnreadyEnabled is used to allow CA to scale down unready nodes of the cluster
+	ScaleDownUnreadyEnabled bool
 	// ScaleDownDelayAfterAdd sets the duration from the last scale up to the time when CA starts to check scale down options
 	ScaleDownDelayAfterAdd time.Duration
 	// ScaleDownDelayAfterDelete sets the duration between scale down attempts if scale down removes one or more nodes
--- a/cluster-autoscaler/core/scaledown/eligibility/eligibility.go
+++ b/cluster-autoscaler/core/scaledown/eligibility/eligibility.go
@ -30,6 +30,7 @@ import (
 	"k8s.io/autoscaler/cluster-autoscaler/utils/klogx"

 	apiv1 "k8s.io/api/core/v1"
+	kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
 	klog "k8s.io/klog/v2"
 	schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
 )
@ -135,6 +136,15 @@ func (c *Checker) unremovableReasonAndNodeUtilization(context *context.Autoscali
 		return simulator.NotAutoscaled, nil
 	}

+	// If scale down of unready nodes is disabled, skip the node if it is unready
+	if !context.ScaleDownUnreadyEnabled {
+		ready, _, _ := kube_util.GetReadinessState(node)
+		if !ready {
+			klog.V(4).Infof("Skipping unready node %s from delete consideration - scale-down of unready nodes is disabled", node.Name)
+			return simulator.ScaleDownUnreadyDisabled, nil
+		}
+	}
+
 	underutilized, err := c.isNodeBelowUtilizationThreshold(context, node, nodeGroup, utilInfo)
 	if err != nil {
 		klog.Warningf("Failed to check utilization thresholds for %s: %v", node.Name, err)
--- a/cluster-autoscaler/core/scaledown/eligibility/eligibility_test.go
+++ b/cluster-autoscaler/core/scaledown/eligibility/eligibility_test.go
@ -50,6 +50,9 @@ func TestFilterOutUnremovable(t *testing.T) {
 	noScaleDownNode.Annotations = map[string]string{ScaleDownDisabledKey: "true"}
 	SetNodeReadyState(noScaleDownNode, true, time.Time{})

+	unreadyNode := BuildTestNode("unready", 1000, 10)
+	SetNodeReadyState(unreadyNode, false, time.Time{})
+
 	bigPod := BuildTestPod("bigPod", 600, 0)
 	bigPod.Spec.NodeName = "regular"

@ -57,37 +60,55 @@ func TestFilterOutUnremovable(t *testing.T) {
 	smallPod.Spec.NodeName = "regular"

 	testCases := []struct {
-		desc  string
-		nodes []*apiv1.Node
-		pods  []*apiv1.Pod
-		want  []string
+		desc             string
+		nodes            []*apiv1.Node
+		pods             []*apiv1.Pod
+		want             []string
+		scaleDownUnready bool
 	}{
 		{
-			desc:  "regular node stays",
-			nodes: []*apiv1.Node{regularNode},
-			want:  []string{"regular"},
+			desc:             "regular node stays",
+			nodes:            []*apiv1.Node{regularNode},
+			want:             []string{"regular"},
+			scaleDownUnready: true,
 		},
 		{
-			desc:  "recently deleted node is filtered out",
-			nodes: []*apiv1.Node{regularNode, justDeletedNode},
-			want:  []string{"regular"},
+			desc:             "recently deleted node is filtered out",
+			nodes:            []*apiv1.Node{regularNode, justDeletedNode},
+			want:             []string{"regular"},
+			scaleDownUnready: true,
 		},
 		{
-			desc:  "marked no scale down is filtered out",
-			nodes: []*apiv1.Node{noScaleDownNode, regularNode},
-			want:  []string{"regular"},
+			desc:             "marked no scale down is filtered out",
+			nodes:            []*apiv1.Node{noScaleDownNode, regularNode},
+			want:             []string{"regular"},
+			scaleDownUnready: true,
 		},
 		{
-			desc:  "highly utilized node is filtered out",
-			nodes: []*apiv1.Node{regularNode},
-			pods:  []*apiv1.Pod{bigPod},
-			want:  []string{},
+			desc:             "highly utilized node is filtered out",
+			nodes:            []*apiv1.Node{regularNode},
+			pods:             []*apiv1.Pod{bigPod},
+			want:             []string{},
+			scaleDownUnready: true,
 		},
 		{
-			desc:  "underutilized node stays",
-			nodes: []*apiv1.Node{regularNode},
-			pods:  []*apiv1.Pod{smallPod},
-			want:  []string{"regular"},
+			desc:             "underutilized node stays",
+			nodes:            []*apiv1.Node{regularNode},
+			pods:             []*apiv1.Pod{smallPod},
+			want:             []string{"regular"},
+			scaleDownUnready: true,
+		},
+		{
+			desc:             "unready node stays",
+			nodes:            []*apiv1.Node{unreadyNode},
+			want:             []string{"unready"},
+			scaleDownUnready: true,
+		},
+		{
+			desc:             "unready node is filtered oud when scale-down of unready is disabled",
+			nodes:            []*apiv1.Node{unreadyNode},
+			want:             []string{},
+			scaleDownUnready: false,
 		},
 	}
 	for _, tc := range testCases {
@ -97,6 +118,7 @@ func TestFilterOutUnremovable(t *testing.T) {
 			c := NewChecker(&staticThresholdGetter{0.5})
 			options := config.AutoscalingOptions{
 				UnremovableNodeRecheckTimeout: 5 * time.Minute,
+				ScaleDownUnreadyEnabled:       tc.scaleDownUnready,
 			}
 			provider := testprovider.NewTestCloudProvider(nil, nil)
 			provider.AddNodeGroup("ng1", 1, 10, 2)
--- a/cluster-autoscaler/core/scaledown/legacy/legacy_test.go
+++ b/cluster-autoscaler/core/scaledown/legacy/legacy_test.go
@ -1110,6 +1110,7 @@ func TestNoScaleDownUnready(t *testing.T) {
 			ScaleDownUnreadyTime:          time.Hour,
 		},
 		MaxGracefulTerminationSec: 60,
+		ScaleDownUnreadyEnabled:   true,
 	}

 	podLister := kube_util.NewTestPodLister([]*apiv1.Pod{p2})
--- a/cluster-autoscaler/main.go
+++ b/cluster-autoscaler/main.go
@ -98,6 +98,7 @@ var (
 	namespace               = flag.String("namespace", "kube-system", "Namespace in which cluster-autoscaler run.")
 	enforceNodeGroupMinSize = flag.Bool("enforce-node-group-min-size", false, "Should CA scale up the node group to the configured min size if needed.")
 	scaleDownEnabled        = flag.Bool("scale-down-enabled", true, "Should CA scale down the cluster")
+	scaleDownUnreadyEnabled = flag.Bool("scale-down-unready-enabled", true, "Should CA scale down unready nodes of the cluster")
 	scaleDownDelayAfterAdd  = flag.Duration("scale-down-delay-after-add", 10*time.Minute,
 		"How long after scale up that scale down evaluation resumes")
 	scaleDownDelayAfterDelete = flag.Duration("scale-down-delay-after-delete", 0,
@ -279,6 +280,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
 		ScaleDownDelayAfterDelete:          *scaleDownDelayAfterDelete,
 		ScaleDownDelayAfterFailure:         *scaleDownDelayAfterFailure,
 		ScaleDownEnabled:                   *scaleDownEnabled,
+		ScaleDownUnreadyEnabled:            *scaleDownUnreadyEnabled,
 		ScaleDownNonEmptyCandidatesCount:   *scaleDownNonEmptyCandidatesCount,
 		ScaleDownCandidatesPoolRatio:       *scaleDownCandidatesPoolRatio,
 		ScaleDownCandidatesPoolMinCount:    *scaleDownCandidatesPoolMinCount,
--- a/cluster-autoscaler/simulator/cluster.go
+++ b/cluster-autoscaler/simulator/cluster.go
@ -61,6 +61,8 @@ const (
 	NoReason UnremovableReason = iota
 	// ScaleDownDisabledAnnotation - node can't be removed because it has a "scale down disabled" annotation.
 	ScaleDownDisabledAnnotation
+	// ScaleDownUnreadyDisabled - node can't be removed because it is unready and scale down is disabled for unready nodes.
+	ScaleDownUnreadyDisabled
 	// NotAutoscaled - node can't be removed because it doesn't belong to an autoscaled node group.
 	NotAutoscaled
 	// NotUnneededLongEnough - node can't be removed because it wasn't unneeded for long enough.