cluster-autoscaler: Add option to disable scale down of unready nodes
Add flag '--scale-down-unready-enabled' to enable or disable scale-down of unready nodes. Default value set to true for backwards compatibility (i.e., allow scale-down of unready nodes). Signed-off-by: Grigoris Thanasoulas <gregth@arrikto.com>
This commit is contained in:
parent
60bda22e64
commit
6cf8c329da
|
|
@ -128,6 +128,8 @@ type AutoscalingOptions struct {
|
|||
EnforceNodeGroupMinSize bool
|
||||
// ScaleDownEnabled is used to allow CA to scale down the cluster
|
||||
ScaleDownEnabled bool
|
||||
// ScaleDownUnreadyEnabled is used to allow CA to scale down unready nodes of the cluster
|
||||
ScaleDownUnreadyEnabled bool
|
||||
// ScaleDownDelayAfterAdd sets the duration from the last scale up to the time when CA starts to check scale down options
|
||||
ScaleDownDelayAfterAdd time.Duration
|
||||
// ScaleDownDelayAfterDelete sets the duration between scale down attempts if scale down removes one or more nodes
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import (
|
|||
"k8s.io/autoscaler/cluster-autoscaler/utils/klogx"
|
||||
|
||||
apiv1 "k8s.io/api/core/v1"
|
||||
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
|
||||
klog "k8s.io/klog/v2"
|
||||
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
|
@ -135,6 +136,15 @@ func (c *Checker) unremovableReasonAndNodeUtilization(context *context.Autoscali
|
|||
return simulator.NotAutoscaled, nil
|
||||
}
|
||||
|
||||
// If scale down of unready nodes is disabled, skip the node if it is unready
|
||||
if !context.ScaleDownUnreadyEnabled {
|
||||
ready, _, _ := kube_util.GetReadinessState(node)
|
||||
if !ready {
|
||||
klog.V(4).Infof("Skipping unready node %s from delete consideration - scale-down of unready nodes is disabled", node.Name)
|
||||
return simulator.ScaleDownUnreadyDisabled, nil
|
||||
}
|
||||
}
|
||||
|
||||
underutilized, err := c.isNodeBelowUtilizationThreshold(context, node, nodeGroup, utilInfo)
|
||||
if err != nil {
|
||||
klog.Warningf("Failed to check utilization thresholds for %s: %v", node.Name, err)
|
||||
|
|
|
|||
|
|
@ -50,6 +50,9 @@ func TestFilterOutUnremovable(t *testing.T) {
|
|||
noScaleDownNode.Annotations = map[string]string{ScaleDownDisabledKey: "true"}
|
||||
SetNodeReadyState(noScaleDownNode, true, time.Time{})
|
||||
|
||||
unreadyNode := BuildTestNode("unready", 1000, 10)
|
||||
SetNodeReadyState(unreadyNode, false, time.Time{})
|
||||
|
||||
bigPod := BuildTestPod("bigPod", 600, 0)
|
||||
bigPod.Spec.NodeName = "regular"
|
||||
|
||||
|
|
@ -57,37 +60,55 @@ func TestFilterOutUnremovable(t *testing.T) {
|
|||
smallPod.Spec.NodeName = "regular"
|
||||
|
||||
testCases := []struct {
|
||||
desc string
|
||||
nodes []*apiv1.Node
|
||||
pods []*apiv1.Pod
|
||||
want []string
|
||||
desc string
|
||||
nodes []*apiv1.Node
|
||||
pods []*apiv1.Pod
|
||||
want []string
|
||||
scaleDownUnready bool
|
||||
}{
|
||||
{
|
||||
desc: "regular node stays",
|
||||
nodes: []*apiv1.Node{regularNode},
|
||||
want: []string{"regular"},
|
||||
desc: "regular node stays",
|
||||
nodes: []*apiv1.Node{regularNode},
|
||||
want: []string{"regular"},
|
||||
scaleDownUnready: true,
|
||||
},
|
||||
{
|
||||
desc: "recently deleted node is filtered out",
|
||||
nodes: []*apiv1.Node{regularNode, justDeletedNode},
|
||||
want: []string{"regular"},
|
||||
desc: "recently deleted node is filtered out",
|
||||
nodes: []*apiv1.Node{regularNode, justDeletedNode},
|
||||
want: []string{"regular"},
|
||||
scaleDownUnready: true,
|
||||
},
|
||||
{
|
||||
desc: "marked no scale down is filtered out",
|
||||
nodes: []*apiv1.Node{noScaleDownNode, regularNode},
|
||||
want: []string{"regular"},
|
||||
desc: "marked no scale down is filtered out",
|
||||
nodes: []*apiv1.Node{noScaleDownNode, regularNode},
|
||||
want: []string{"regular"},
|
||||
scaleDownUnready: true,
|
||||
},
|
||||
{
|
||||
desc: "highly utilized node is filtered out",
|
||||
nodes: []*apiv1.Node{regularNode},
|
||||
pods: []*apiv1.Pod{bigPod},
|
||||
want: []string{},
|
||||
desc: "highly utilized node is filtered out",
|
||||
nodes: []*apiv1.Node{regularNode},
|
||||
pods: []*apiv1.Pod{bigPod},
|
||||
want: []string{},
|
||||
scaleDownUnready: true,
|
||||
},
|
||||
{
|
||||
desc: "underutilized node stays",
|
||||
nodes: []*apiv1.Node{regularNode},
|
||||
pods: []*apiv1.Pod{smallPod},
|
||||
want: []string{"regular"},
|
||||
desc: "underutilized node stays",
|
||||
nodes: []*apiv1.Node{regularNode},
|
||||
pods: []*apiv1.Pod{smallPod},
|
||||
want: []string{"regular"},
|
||||
scaleDownUnready: true,
|
||||
},
|
||||
{
|
||||
desc: "unready node stays",
|
||||
nodes: []*apiv1.Node{unreadyNode},
|
||||
want: []string{"unready"},
|
||||
scaleDownUnready: true,
|
||||
},
|
||||
{
|
||||
desc: "unready node is filtered oud when scale-down of unready is disabled",
|
||||
nodes: []*apiv1.Node{unreadyNode},
|
||||
want: []string{},
|
||||
scaleDownUnready: false,
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
|
|
@ -97,6 +118,7 @@ func TestFilterOutUnremovable(t *testing.T) {
|
|||
c := NewChecker(&staticThresholdGetter{0.5})
|
||||
options := config.AutoscalingOptions{
|
||||
UnremovableNodeRecheckTimeout: 5 * time.Minute,
|
||||
ScaleDownUnreadyEnabled: tc.scaleDownUnready,
|
||||
}
|
||||
provider := testprovider.NewTestCloudProvider(nil, nil)
|
||||
provider.AddNodeGroup("ng1", 1, 10, 2)
|
||||
|
|
|
|||
|
|
@ -1110,6 +1110,7 @@ func TestNoScaleDownUnready(t *testing.T) {
|
|||
ScaleDownUnreadyTime: time.Hour,
|
||||
},
|
||||
MaxGracefulTerminationSec: 60,
|
||||
ScaleDownUnreadyEnabled: true,
|
||||
}
|
||||
|
||||
podLister := kube_util.NewTestPodLister([]*apiv1.Pod{p2})
|
||||
|
|
|
|||
|
|
@ -98,6 +98,7 @@ var (
|
|||
namespace = flag.String("namespace", "kube-system", "Namespace in which cluster-autoscaler run.")
|
||||
enforceNodeGroupMinSize = flag.Bool("enforce-node-group-min-size", false, "Should CA scale up the node group to the configured min size if needed.")
|
||||
scaleDownEnabled = flag.Bool("scale-down-enabled", true, "Should CA scale down the cluster")
|
||||
scaleDownUnreadyEnabled = flag.Bool("scale-down-unready-enabled", true, "Should CA scale down unready nodes of the cluster")
|
||||
scaleDownDelayAfterAdd = flag.Duration("scale-down-delay-after-add", 10*time.Minute,
|
||||
"How long after scale up that scale down evaluation resumes")
|
||||
scaleDownDelayAfterDelete = flag.Duration("scale-down-delay-after-delete", 0,
|
||||
|
|
@ -279,6 +280,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
|
|||
ScaleDownDelayAfterDelete: *scaleDownDelayAfterDelete,
|
||||
ScaleDownDelayAfterFailure: *scaleDownDelayAfterFailure,
|
||||
ScaleDownEnabled: *scaleDownEnabled,
|
||||
ScaleDownUnreadyEnabled: *scaleDownUnreadyEnabled,
|
||||
ScaleDownNonEmptyCandidatesCount: *scaleDownNonEmptyCandidatesCount,
|
||||
ScaleDownCandidatesPoolRatio: *scaleDownCandidatesPoolRatio,
|
||||
ScaleDownCandidatesPoolMinCount: *scaleDownCandidatesPoolMinCount,
|
||||
|
|
|
|||
|
|
@ -61,6 +61,8 @@ const (
|
|||
NoReason UnremovableReason = iota
|
||||
// ScaleDownDisabledAnnotation - node can't be removed because it has a "scale down disabled" annotation.
|
||||
ScaleDownDisabledAnnotation
|
||||
// ScaleDownUnreadyDisabled - node can't be removed because it is unready and scale down is disabled for unready nodes.
|
||||
ScaleDownUnreadyDisabled
|
||||
// NotAutoscaled - node can't be removed because it doesn't belong to an autoscaled node group.
|
||||
NotAutoscaled
|
||||
// NotUnneededLongEnough - node can't be removed because it wasn't unneeded for long enough.
|
||||
|
|
|
|||
Loading…
Reference in New Issue