cluster-autoscaler: Add option to disable scale down of unready nodes
Add flag '--scale-down-unready-enabled' to enable or disable scale-down of unready nodes. Default value set to true for backwards compatibility (i.e., allow scale-down of unready nodes). Signed-off-by: Grigoris Thanasoulas <gregth@arrikto.com>
This commit is contained in:
parent
60bda22e64
commit
6cf8c329da
|
|
@ -128,6 +128,8 @@ type AutoscalingOptions struct {
|
||||||
EnforceNodeGroupMinSize bool
|
EnforceNodeGroupMinSize bool
|
||||||
// ScaleDownEnabled is used to allow CA to scale down the cluster
|
// ScaleDownEnabled is used to allow CA to scale down the cluster
|
||||||
ScaleDownEnabled bool
|
ScaleDownEnabled bool
|
||||||
|
// ScaleDownUnreadyEnabled is used to allow CA to scale down unready nodes of the cluster
|
||||||
|
ScaleDownUnreadyEnabled bool
|
||||||
// ScaleDownDelayAfterAdd sets the duration from the last scale up to the time when CA starts to check scale down options
|
// ScaleDownDelayAfterAdd sets the duration from the last scale up to the time when CA starts to check scale down options
|
||||||
ScaleDownDelayAfterAdd time.Duration
|
ScaleDownDelayAfterAdd time.Duration
|
||||||
// ScaleDownDelayAfterDelete sets the duration between scale down attempts if scale down removes one or more nodes
|
// ScaleDownDelayAfterDelete sets the duration between scale down attempts if scale down removes one or more nodes
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ import (
|
||||||
"k8s.io/autoscaler/cluster-autoscaler/utils/klogx"
|
"k8s.io/autoscaler/cluster-autoscaler/utils/klogx"
|
||||||
|
|
||||||
apiv1 "k8s.io/api/core/v1"
|
apiv1 "k8s.io/api/core/v1"
|
||||||
|
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
|
||||||
klog "k8s.io/klog/v2"
|
klog "k8s.io/klog/v2"
|
||||||
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
|
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
|
||||||
)
|
)
|
||||||
|
|
@ -135,6 +136,15 @@ func (c *Checker) unremovableReasonAndNodeUtilization(context *context.Autoscali
|
||||||
return simulator.NotAutoscaled, nil
|
return simulator.NotAutoscaled, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If scale down of unready nodes is disabled, skip the node if it is unready
|
||||||
|
if !context.ScaleDownUnreadyEnabled {
|
||||||
|
ready, _, _ := kube_util.GetReadinessState(node)
|
||||||
|
if !ready {
|
||||||
|
klog.V(4).Infof("Skipping unready node %s from delete consideration - scale-down of unready nodes is disabled", node.Name)
|
||||||
|
return simulator.ScaleDownUnreadyDisabled, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
underutilized, err := c.isNodeBelowUtilizationThreshold(context, node, nodeGroup, utilInfo)
|
underutilized, err := c.isNodeBelowUtilizationThreshold(context, node, nodeGroup, utilInfo)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Warningf("Failed to check utilization thresholds for %s: %v", node.Name, err)
|
klog.Warningf("Failed to check utilization thresholds for %s: %v", node.Name, err)
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,9 @@ func TestFilterOutUnremovable(t *testing.T) {
|
||||||
noScaleDownNode.Annotations = map[string]string{ScaleDownDisabledKey: "true"}
|
noScaleDownNode.Annotations = map[string]string{ScaleDownDisabledKey: "true"}
|
||||||
SetNodeReadyState(noScaleDownNode, true, time.Time{})
|
SetNodeReadyState(noScaleDownNode, true, time.Time{})
|
||||||
|
|
||||||
|
unreadyNode := BuildTestNode("unready", 1000, 10)
|
||||||
|
SetNodeReadyState(unreadyNode, false, time.Time{})
|
||||||
|
|
||||||
bigPod := BuildTestPod("bigPod", 600, 0)
|
bigPod := BuildTestPod("bigPod", 600, 0)
|
||||||
bigPod.Spec.NodeName = "regular"
|
bigPod.Spec.NodeName = "regular"
|
||||||
|
|
||||||
|
|
@ -57,37 +60,55 @@ func TestFilterOutUnremovable(t *testing.T) {
|
||||||
smallPod.Spec.NodeName = "regular"
|
smallPod.Spec.NodeName = "regular"
|
||||||
|
|
||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
desc string
|
desc string
|
||||||
nodes []*apiv1.Node
|
nodes []*apiv1.Node
|
||||||
pods []*apiv1.Pod
|
pods []*apiv1.Pod
|
||||||
want []string
|
want []string
|
||||||
|
scaleDownUnready bool
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
desc: "regular node stays",
|
desc: "regular node stays",
|
||||||
nodes: []*apiv1.Node{regularNode},
|
nodes: []*apiv1.Node{regularNode},
|
||||||
want: []string{"regular"},
|
want: []string{"regular"},
|
||||||
|
scaleDownUnready: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
desc: "recently deleted node is filtered out",
|
desc: "recently deleted node is filtered out",
|
||||||
nodes: []*apiv1.Node{regularNode, justDeletedNode},
|
nodes: []*apiv1.Node{regularNode, justDeletedNode},
|
||||||
want: []string{"regular"},
|
want: []string{"regular"},
|
||||||
|
scaleDownUnready: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
desc: "marked no scale down is filtered out",
|
desc: "marked no scale down is filtered out",
|
||||||
nodes: []*apiv1.Node{noScaleDownNode, regularNode},
|
nodes: []*apiv1.Node{noScaleDownNode, regularNode},
|
||||||
want: []string{"regular"},
|
want: []string{"regular"},
|
||||||
|
scaleDownUnready: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
desc: "highly utilized node is filtered out",
|
desc: "highly utilized node is filtered out",
|
||||||
nodes: []*apiv1.Node{regularNode},
|
nodes: []*apiv1.Node{regularNode},
|
||||||
pods: []*apiv1.Pod{bigPod},
|
pods: []*apiv1.Pod{bigPod},
|
||||||
want: []string{},
|
want: []string{},
|
||||||
|
scaleDownUnready: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
desc: "underutilized node stays",
|
desc: "underutilized node stays",
|
||||||
nodes: []*apiv1.Node{regularNode},
|
nodes: []*apiv1.Node{regularNode},
|
||||||
pods: []*apiv1.Pod{smallPod},
|
pods: []*apiv1.Pod{smallPod},
|
||||||
want: []string{"regular"},
|
want: []string{"regular"},
|
||||||
|
scaleDownUnready: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "unready node stays",
|
||||||
|
nodes: []*apiv1.Node{unreadyNode},
|
||||||
|
want: []string{"unready"},
|
||||||
|
scaleDownUnready: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "unready node is filtered oud when scale-down of unready is disabled",
|
||||||
|
nodes: []*apiv1.Node{unreadyNode},
|
||||||
|
want: []string{},
|
||||||
|
scaleDownUnready: false,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
|
|
@ -97,6 +118,7 @@ func TestFilterOutUnremovable(t *testing.T) {
|
||||||
c := NewChecker(&staticThresholdGetter{0.5})
|
c := NewChecker(&staticThresholdGetter{0.5})
|
||||||
options := config.AutoscalingOptions{
|
options := config.AutoscalingOptions{
|
||||||
UnremovableNodeRecheckTimeout: 5 * time.Minute,
|
UnremovableNodeRecheckTimeout: 5 * time.Minute,
|
||||||
|
ScaleDownUnreadyEnabled: tc.scaleDownUnready,
|
||||||
}
|
}
|
||||||
provider := testprovider.NewTestCloudProvider(nil, nil)
|
provider := testprovider.NewTestCloudProvider(nil, nil)
|
||||||
provider.AddNodeGroup("ng1", 1, 10, 2)
|
provider.AddNodeGroup("ng1", 1, 10, 2)
|
||||||
|
|
|
||||||
|
|
@ -1110,6 +1110,7 @@ func TestNoScaleDownUnready(t *testing.T) {
|
||||||
ScaleDownUnreadyTime: time.Hour,
|
ScaleDownUnreadyTime: time.Hour,
|
||||||
},
|
},
|
||||||
MaxGracefulTerminationSec: 60,
|
MaxGracefulTerminationSec: 60,
|
||||||
|
ScaleDownUnreadyEnabled: true,
|
||||||
}
|
}
|
||||||
|
|
||||||
podLister := kube_util.NewTestPodLister([]*apiv1.Pod{p2})
|
podLister := kube_util.NewTestPodLister([]*apiv1.Pod{p2})
|
||||||
|
|
|
||||||
|
|
@ -98,6 +98,7 @@ var (
|
||||||
namespace = flag.String("namespace", "kube-system", "Namespace in which cluster-autoscaler run.")
|
namespace = flag.String("namespace", "kube-system", "Namespace in which cluster-autoscaler run.")
|
||||||
enforceNodeGroupMinSize = flag.Bool("enforce-node-group-min-size", false, "Should CA scale up the node group to the configured min size if needed.")
|
enforceNodeGroupMinSize = flag.Bool("enforce-node-group-min-size", false, "Should CA scale up the node group to the configured min size if needed.")
|
||||||
scaleDownEnabled = flag.Bool("scale-down-enabled", true, "Should CA scale down the cluster")
|
scaleDownEnabled = flag.Bool("scale-down-enabled", true, "Should CA scale down the cluster")
|
||||||
|
scaleDownUnreadyEnabled = flag.Bool("scale-down-unready-enabled", true, "Should CA scale down unready nodes of the cluster")
|
||||||
scaleDownDelayAfterAdd = flag.Duration("scale-down-delay-after-add", 10*time.Minute,
|
scaleDownDelayAfterAdd = flag.Duration("scale-down-delay-after-add", 10*time.Minute,
|
||||||
"How long after scale up that scale down evaluation resumes")
|
"How long after scale up that scale down evaluation resumes")
|
||||||
scaleDownDelayAfterDelete = flag.Duration("scale-down-delay-after-delete", 0,
|
scaleDownDelayAfterDelete = flag.Duration("scale-down-delay-after-delete", 0,
|
||||||
|
|
@ -279,6 +280,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
|
||||||
ScaleDownDelayAfterDelete: *scaleDownDelayAfterDelete,
|
ScaleDownDelayAfterDelete: *scaleDownDelayAfterDelete,
|
||||||
ScaleDownDelayAfterFailure: *scaleDownDelayAfterFailure,
|
ScaleDownDelayAfterFailure: *scaleDownDelayAfterFailure,
|
||||||
ScaleDownEnabled: *scaleDownEnabled,
|
ScaleDownEnabled: *scaleDownEnabled,
|
||||||
|
ScaleDownUnreadyEnabled: *scaleDownUnreadyEnabled,
|
||||||
ScaleDownNonEmptyCandidatesCount: *scaleDownNonEmptyCandidatesCount,
|
ScaleDownNonEmptyCandidatesCount: *scaleDownNonEmptyCandidatesCount,
|
||||||
ScaleDownCandidatesPoolRatio: *scaleDownCandidatesPoolRatio,
|
ScaleDownCandidatesPoolRatio: *scaleDownCandidatesPoolRatio,
|
||||||
ScaleDownCandidatesPoolMinCount: *scaleDownCandidatesPoolMinCount,
|
ScaleDownCandidatesPoolMinCount: *scaleDownCandidatesPoolMinCount,
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,8 @@ const (
|
||||||
NoReason UnremovableReason = iota
|
NoReason UnremovableReason = iota
|
||||||
// ScaleDownDisabledAnnotation - node can't be removed because it has a "scale down disabled" annotation.
|
// ScaleDownDisabledAnnotation - node can't be removed because it has a "scale down disabled" annotation.
|
||||||
ScaleDownDisabledAnnotation
|
ScaleDownDisabledAnnotation
|
||||||
|
// ScaleDownUnreadyDisabled - node can't be removed because it is unready and scale down is disabled for unready nodes.
|
||||||
|
ScaleDownUnreadyDisabled
|
||||||
// NotAutoscaled - node can't be removed because it doesn't belong to an autoscaled node group.
|
// NotAutoscaled - node can't be removed because it doesn't belong to an autoscaled node group.
|
||||||
NotAutoscaled
|
NotAutoscaled
|
||||||
// NotUnneededLongEnough - node can't be removed because it wasn't unneeded for long enough.
|
// NotUnneededLongEnough - node can't be removed because it wasn't unneeded for long enough.
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue