From d2fe118db94c8717df8efef158cc99b414c24f9f Mon Sep 17 00:00:00 2001 From: Mahmoud Atwa Date: Fri, 22 Sep 2023 20:51:34 +0000 Subject: [PATCH] Add startup taint flag, prefix & add status taint prefix --- cluster-autoscaler/main.go | 3 +- cluster-autoscaler/utils/taints/taints.go | 29 +++++++++++--- .../utils/taints/taints_test.go | 40 +++++++++++++++++-- 3 files changed, 62 insertions(+), 10 deletions(-) diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index ab21e8f0fa..123b9ecfb2 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -198,6 +198,7 @@ var ( newPodScaleUpDelay = flag.Duration("new-pod-scale-up-delay", 0*time.Second, "Pods less than this old will not be considered for scale-up. Can be increased for individual pods through annotation 'cluster-autoscaler.kubernetes.io/pod-scale-up-delay'.") ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group") + startupTaintFlag = multiStringFlag("startup-taint", "Specifies a taint to ignore in node templates when considering to scale a node group (Equivalent to ignore-taint)") balancingIgnoreLabelsFlag = multiStringFlag("balancing-ignore-label", "Specifies a label to ignore in addition to the basic and cloud-provider set of labels when comparing if two node groups are similar") balancingLabelsFlag = multiStringFlag("balancing-label", "Specifies a label to use for comparing if two node groups are similar, rather than the built in heuristics. Setting this flag disables all other comparison logic, and cannot be combined with --balancing-ignore-label.") awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only") @@ -346,7 +347,7 @@ func createAutoscalingOptions() config.AutoscalingOptions { ExpendablePodsPriorityCutoff: *expendablePodsPriorityCutoff, Regional: *regional, NewPodScaleUpDelay: *newPodScaleUpDelay, - IgnoredTaints: *ignoreTaintsFlag, + IgnoredTaints: append(*ignoreTaintsFlag, *startupTaintFlag...), BalancingExtraIgnoredLabels: *balancingIgnoreLabelsFlag, BalancingLabels: *balancingLabelsFlag, KubeConfigPath: *kubeConfigFile, diff --git a/cluster-autoscaler/utils/taints/taints.go b/cluster-autoscaler/utils/taints/taints.go index 1f0a1ac348..07381e6f9b 100644 --- a/cluster-autoscaler/utils/taints/taints.go +++ b/cluster-autoscaler/utils/taints/taints.go @@ -44,6 +44,12 @@ const ( // IgnoreTaintPrefix any taint starting with it will be filtered out from autoscaler template node. IgnoreTaintPrefix = "ignore-taint.cluster-autoscaler.kubernetes.io/" + // StartupTaintPrefix (Same as IgnoreTaintPrefix) any taint starting with it will be filtered out from autoscaler template node. + StartupTaintPrefix = "startup-taint.cluster-autoscaler.kubernetes.io/" + + // StartupTaintPrefix (Same as IgnoreTaintPrefix) any taint starting with it will be filtered out from autoscaler template node. + DefaultStatusTaintPrefix = "status-taint.cluster-autoscaler.kubernetes.io/" + gkeNodeTerminationHandlerTaint = "cloud.google.com/impending-node-termination" // AWS: Indicates that a node has volumes stuck in attaching state and hence it is not fit for scheduling more pods @@ -55,8 +61,9 @@ type TaintKeySet map[string]bool // TaintConfig is a config of taints that require special handling type TaintConfig struct { - IgnoredTaints TaintKeySet - StatusTaints TaintKeySet + IgnoredTaints TaintKeySet + StatusTaints TaintKeySet + StatusTaintPrefix string } // NewTaintConfig returns the taint config extracted from options @@ -74,8 +81,9 @@ func NewTaintConfig(opts config.AutoscalingOptions) TaintConfig { } return TaintConfig{ - IgnoredTaints: ignoredTaints, - StatusTaints: statusTaints, + IgnoredTaints: ignoredTaints, + StatusTaints: statusTaints, + StatusTaintPrefix: DefaultStatusTaintPrefix, } } @@ -352,6 +360,15 @@ func SanitizeTaints(taints []apiv1.Taint, taintConfig TaintConfig) []apiv1.Taint continue } + if strings.HasPrefix(taint.Key, StartupTaintPrefix) { + klog.V(4).Infof("Removing taint %s based on prefix, when creation template from node", taint.Key) + continue + } + if strings.HasPrefix(taint.Key, DefaultStatusTaintPrefix) { + klog.V(4).Infof("Removing status taint %s, when creating template from node", taint.Key) + continue + } + if _, exists := taintConfig.StatusTaints[taint.Key]; exists { klog.V(4).Infof("Removing status taint %s, when creating template from node", taint.Key) continue @@ -376,10 +393,10 @@ func FilterOutNodesWithIgnoredTaints(ignoredTaints TaintKeySet, allNodes, readyN ready := true for _, t := range node.Spec.Taints { _, hasIgnoredTaint := ignoredTaints[t.Key] - if hasIgnoredTaint || strings.HasPrefix(t.Key, IgnoreTaintPrefix) { + if hasIgnoredTaint || strings.HasPrefix(t.Key, IgnoreTaintPrefix) || strings.HasPrefix(t.Key, StartupTaintPrefix) { ready = false nodesWithIgnoredTaints[node.Name] = kubernetes.GetUnreadyNodeCopy(node, kubernetes.IgnoreTaint) - klog.V(3).Infof("Overriding status of node %v, which seems to have ignored taint %q", node.Name, t.Key) + klog.V(3).Infof("Overriding status of node %v, which seems to have ignored or startup taint %q", node.Name, t.Key) break } } diff --git a/cluster-autoscaler/utils/taints/taints_test.go b/cluster-autoscaler/utils/taints/taints_test.go index 276cada33e..78f0953d26 100644 --- a/cluster-autoscaler/utils/taints/taints_test.go +++ b/cluster-autoscaler/utils/taints/taints_test.go @@ -388,7 +388,7 @@ func TestFilterOutNodesWithIgnoredTaints(t *testing.T) { }, }, }, - "no ignored taint, one unready prefixed tainted node": { + "no ignored taint, one node unready prefixed with ignore taint": { readyNodes: 0, allNodes: 1, ignoredTaints: map[string]bool{}, @@ -411,6 +411,29 @@ func TestFilterOutNodesWithIgnoredTaints(t *testing.T) { }, }, }, + "no ignored taint, one node unready prefixed with startup taint": { + readyNodes: 0, + allNodes: 1, + ignoredTaints: map[string]bool{}, + node: &apiv1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "notReadyTainted", + CreationTimestamp: metav1.NewTime(time.Now()), + }, + Spec: apiv1.NodeSpec{ + Taints: []apiv1.Taint{ + { + Key: StartupTaintPrefix + "another-taint", + Value: "myValue", + Effect: apiv1.TaintEffectNoSchedule, + }, + }, + }, + Status: apiv1.NodeStatus{ + Conditions: []apiv1.NodeCondition{readyCondition}, + }, + }, + }, "no ignored taint, two taints": { readyNodes: 1, allNodes: 1, @@ -485,6 +508,16 @@ func TestSanitizeTaints(t *testing.T) { Value: "myValue", Effect: apiv1.TaintEffectNoSchedule, }, + { + Key: DefaultStatusTaintPrefix + "some-taint", + Value: "myValue", + Effect: apiv1.TaintEffectNoSchedule, + }, + { + Key: StartupTaintPrefix + "some-taint", + Value: "myValue", + Effect: apiv1.TaintEffectNoSchedule, + }, { Key: "test-taint", Value: "test2", @@ -522,8 +555,9 @@ func TestSanitizeTaints(t *testing.T) { }, } taintConfig := TaintConfig{ - IgnoredTaints: map[string]bool{"ignore-me": true}, - StatusTaints: map[string]bool{"status-me": true}, + IgnoredTaints: map[string]bool{"ignore-me": true}, + StatusTaints: map[string]bool{"status-me": true}, + StatusTaintPrefix: DefaultStatusTaintPrefix, } newTaints := SanitizeTaints(node.Spec.Taints, taintConfig)