feat: set `IgnoreDaemonSetsUtilization` per nodegroup

Signed-off-by: vadasambar <surajrbanakar@gmail.com> fix: test cases failing for actuator and scaledown/eligibility - abstract default values into `config` Signed-off-by: vadasambar <surajrbanakar@gmail.com> refactor: rename global `IgnoreDaemonSetsUtilization` -> `GlobalIgnoreDaemonSetsUtilization` in code - there is no change in the flag name - rename `thresholdGetter` -> `configGetter` and tweak it to accomodate `GetIgnoreDaemonSetsUtilization` Signed-off-by: vadasambar <surajrbanakar@gmail.com> refactor: reset help text for `ignore-daemonsets-utilization` flag - because per nodegroup override is supported only for AWS ASG tags as of now Signed-off-by: vadasambar <surajrbanakar@gmail.com> docs: add info about overriding `--ignore-daemonsets-utilization` per ASG - in AWS cloud provider README Signed-off-by: vadasambar <surajrbanakar@gmail.com> refactor: use a limiting interface in actuator in place of `NodeGroupConfigProcessor` interface - to limit the functions that can be used - since we need it only for `GetIgnoreDaemonSetsUtilization` Signed-off-by: vadasambar <surajrbanakar@gmail.com> fix: tests failing for actuator - rename `staticNodeGroupConfigProcessor` -> `MockNodeGroupConfigGetter` - move `MockNodeGroupConfigGetter` to test/common so that it can be used in different tests Signed-off-by: vadasambar <surajrbanakar@gmail.com> fix: go lint errors for `MockNodeGroupConfigGetter` Signed-off-by: vadasambar <surajrbanakar@gmail.com> test: add tests for `IgnoreDaemonSetsUtilization` in cloud provider dir Signed-off-by: vadasambar <surajrbanakar@gmail.com> test: update node group config processor tests for `IgnoreDaemonSetsUtilization` Signed-off-by: vadasambar <surajrbanakar@gmail.com> test: update eligibility test cases for `IgnoreDaemonSetsUtilization` Signed-off-by: vadasambar <surajrbanakar@gmail.com> test: run actuation tests for 2 NGS - one with `IgnoreDaemonSetsUtilization`: `false` - one with `IgnoreDaemonSetsUtilization`: `true` Signed-off-by: vadasambar <surajrbanakar@gmail.com> test: add tests for `IgnoreDaemonSetsUtilization` in actuator - add helper to generate multiple ds pods dynamically - get rid of mock config processor because it is not required Signed-off-by: vadasambar <surajrbanakar@gmail.com> test: fix failing tests for actuator Signed-off-by: vadasambar <surajrbanakar@gmail.com> refactor: remove `GlobalIgnoreDaemonSetUtilization` autoscaling option - not required Signed-off-by: vadasambar <surajrbanakar@gmail.com> fix: warn message `DefaultScaleDownUnreadyTimeKey` -> `DefaultIgnoreDaemonSetsUtilizationKey` Signed-off-by: vadasambar <surajrbanakar@gmail.com> refactor: use `generateDsPods` instead of `generateDsPod` Signed-off-by: vadasambar <surajrbanakar@gmail.com> refactor: `globaIgnoreDaemonSetsUtilization` -> `ignoreDaemonSetsUtilization` Signed-off-by: vadasambar <surajrbanakar@gmail.com>
2023-04-10 11:20:00 +05:30 · 2023-04-10 11:20:00 +05:30 · 7941bab214
parent b569db410f
commit 7941bab214
15 changed files with 205 additions and 60 deletions
--- a/cluster-autoscaler/cloudprovider/aws/README.md
+++ b/cluster-autoscaler/cloudprovider/aws/README.md
@ -246,6 +246,8 @@ as string). Currently supported autoscaling options (and example values) are:
  (overrides `--scale-down-unneeded-time` value for that specific ASG)
 * `k8s.io/cluster-autoscaler/node-template/autoscaling-options/scaledownunreadytime`: `20m0s`
  (overrides `--scale-down-unready-time` value for that specific ASG)
+* `k8s.io/cluster-autoscaler/node-template/autoscaling-options/ignoredaemonsetsutilization`: `true`
+  (overrides `--ignore-daemonsets-utilization` value for that specific ASG) 

 **NOTE:** It is your responsibility to ensure such labels and/or taints are
 applied via the node's kubelet configuration at startup. Cluster Autoscaler will not set the node taints for you.
--- a/cluster-autoscaler/cloudprovider/aws/aws_manager.go
+++ b/cluster-autoscaler/cloudprovider/aws/aws_manager.go
@ -245,6 +245,15 @@ func (m *AwsManager) GetAsgOptions(asg asg, defaults config.NodeGroupAutoscaling
 		}
 	}

+	if stringOpt, found := options[config.DefaultIgnoreDaemonSetsUtilizationKey]; found {
+		if opt, err := strconv.ParseBool(stringOpt); err != nil {
+			klog.Warningf("failed to convert asg %s %s tag to bool: %v",
+				asg.Name, config.DefaultIgnoreDaemonSetsUtilizationKey, err)
+		} else {
+			defaults.IgnoreDaemonSetsUtilization = opt
+		}
+	}
+
 	return &defaults
 }

--- a/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go
+++ b/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go
@ -130,6 +130,7 @@ func TestGetAsgOptions(t *testing.T) {
 		ScaleDownGpuUtilizationThreshold: 0.2,
 		ScaleDownUnneededTime:            time.Second,
 		ScaleDownUnreadyTime:             time.Minute,
+		IgnoreDaemonSetsUtilization:      false,
 	}

 	tests := []struct {
@ -145,39 +146,60 @@ func TestGetAsgOptions(t *testing.T) {
 		{
 			description: "keep defaults on invalid tags values",
 			tags: map[string]string{
-				"scaledownutilizationthreshold": "not-a-float",
-				"scaledownunneededtime":         "not-a-duration",
-				"ScaleDownUnreadyTime":          "",
+				config.DefaultScaleDownUtilizationThresholdKey: "not-a-float",
+				config.DefaultScaleDownUnneededTimeKey:         "not-a-duration",
+				"ScaleDownUnreadyTime":                         "",
+				config.DefaultIgnoreDaemonSetsUtilizationKey:   "not-a-bool",
 			},
 			expected: &defaultOptions,
 		},
 		{
 			description: "use provided tags and fill missing with defaults",
 			tags: map[string]string{
-				"scaledownutilizationthreshold": "0.42",
-				"scaledownunneededtime":         "1h",
+				config.DefaultScaleDownUtilizationThresholdKey: "0.42",
+				config.DefaultScaleDownUnneededTimeKey:         "1h",
+				config.DefaultIgnoreDaemonSetsUtilizationKey:   "true",
 			},
 			expected: &config.NodeGroupAutoscalingOptions{
 				ScaleDownUtilizationThreshold:    0.42,
 				ScaleDownGpuUtilizationThreshold: defaultOptions.ScaleDownGpuUtilizationThreshold,
 				ScaleDownUnneededTime:            time.Hour,
 				ScaleDownUnreadyTime:             defaultOptions.ScaleDownUnreadyTime,
+				IgnoreDaemonSetsUtilization:      true,
+			},
+		},
+		{
+			description: "use provided tags (happy path)",
+			tags: map[string]string{
+				config.DefaultScaleDownUtilizationThresholdKey:    "0.42",
+				config.DefaultScaleDownUnneededTimeKey:            "1h",
+				config.DefaultScaleDownGpuUtilizationThresholdKey: "0.7",
+				config.DefaultScaleDownUnreadyTimeKey:             "25m",
+				config.DefaultIgnoreDaemonSetsUtilizationKey:      "true",
+			},
+			expected: &config.NodeGroupAutoscalingOptions{
+				ScaleDownUtilizationThreshold:    0.42,
+				ScaleDownGpuUtilizationThreshold: 0.7,
+				ScaleDownUnneededTime:            time.Hour,
+				ScaleDownUnreadyTime:             25 * time.Minute,
+				IgnoreDaemonSetsUtilization:      true,
 			},
 		},
 		{
 			description: "ignore unknown tags",
 			tags: map[string]string{
-				"scaledownutilizationthreshold":    "0.6",
-				"scaledowngpuutilizationthreshold": "0.7",
-				"scaledownunneededtime":            "1m",
-				"scaledownunreadytime":             "1h",
-				"notyetspecified":                  "42",
+				config.DefaultScaleDownUtilizationThresholdKey:    "0.6",
+				config.DefaultScaleDownGpuUtilizationThresholdKey: "0.7",
+				config.DefaultScaleDownUnneededTimeKey:            "1m",
+				config.DefaultScaleDownUnreadyTimeKey:             "1h",
+				"notyetspecified":                                 "42",
 			},
 			expected: &config.NodeGroupAutoscalingOptions{
 				ScaleDownUtilizationThreshold:    0.6,
 				ScaleDownGpuUtilizationThreshold: 0.7,
 				ScaleDownUnneededTime:            time.Minute,
 				ScaleDownUnreadyTime:             time.Hour,
+				IgnoreDaemonSetsUtilization:      false,
 			},
 		},
 	}
--- a/cluster-autoscaler/config/autoscaling_options.go
+++ b/cluster-autoscaler/config/autoscaling_options.go
@ -48,6 +48,8 @@ type NodeGroupAutoscalingOptions struct {
 	MaxNodeProvisionTime time.Duration
 	// ZeroOrMaxNodeScaling means that a node group should be scaled up to maximum size or down to zero nodes all at once instead of one-by-one.
 	ZeroOrMaxNodeScaling bool
+	// IgnoreDaemonSetsUtilization sets if daemonsets utilization should be considered during node scale-down
+	IgnoreDaemonSetsUtilization bool
 }

 // GCEOptions contain autoscaling options specific to GCE cloud provider.
@ -117,8 +119,6 @@ type AutoscalingOptions struct {
 	GRPCExpanderCert string
 	// GRPCExpanderURL is the url of the gRPC server when using the gRPC expander
 	GRPCExpanderURL string
-	// IgnoreDaemonSetsUtilization is whether CA will ignore DaemonSet pods when calculating resource utilization for scaling down
-	IgnoreDaemonSetsUtilization bool
 	// IgnoreMirrorPodsUtilization is whether CA will ignore Mirror pods when calculating resource utilization for scaling down
 	IgnoreMirrorPodsUtilization bool
 	// MaxGracefulTerminationSec is maximum number of seconds scale down waits for pods to terminate before
--- a/cluster-autoscaler/config/const.go
+++ b/cluster-autoscaler/config/const.go
@ -16,6 +16,8 @@ limitations under the License.

 package config

+import "time"
+
 const (
 	// DefaultMaxClusterCores is the default maximum number of cores in the cluster.
 	DefaultMaxClusterCores = 5000 * 64
@ -32,4 +34,14 @@ const (
 	DefaultScaleDownUnreadyTimeKey = "scaledownunreadytime"
 	// DefaultMaxNodeProvisionTimeKey identifies MaxNodeProvisionTime autoscaling option
 	DefaultMaxNodeProvisionTimeKey = "maxnodeprovisiontime"
+	// DefaultIgnoreDaemonSetsUtilizationKey identifies IgnoreDaemonSetsUtilization autoscaling option
+	DefaultIgnoreDaemonSetsUtilizationKey = "ignoredaemonsetsutilization"
+	// DefaultScaleDownUnneededTime identifies ScaleDownUnneededTime autoscaling option
+	DefaultScaleDownUnneededTime = 10 * time.Minute
+	// DefaultScaleDownUnreadyTime identifies ScaleDownUnreadyTime autoscaling option
+	DefaultScaleDownUnreadyTime = 20 * time.Minute
+	// DefaultScaleDownUtilizationThreshold identifies ScaleDownUtilizationThreshold autoscaling option
+	DefaultScaleDownUtilizationThreshold = 0.5
+	// DefaultScaleDownGpuUtilizationThreshold identifies ScaleDownGpuUtilizationThreshold autoscaling option
+	DefaultScaleDownGpuUtilizationThreshold = 0.5
 )
--- a/cluster-autoscaler/core/scaledown/actuation/actuator.go
+++ b/cluster-autoscaler/core/scaledown/actuation/actuator.go
@ -33,6 +33,7 @@ import (
 	"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/status"
 	"k8s.io/autoscaler/cluster-autoscaler/core/utils"
 	"k8s.io/autoscaler/cluster-autoscaler/metrics"
+	"k8s.io/autoscaler/cluster-autoscaler/processors"
 	"k8s.io/autoscaler/cluster-autoscaler/simulator"
 	"k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot"
 	"k8s.io/autoscaler/cluster-autoscaler/simulator/utilization"
@ -52,10 +53,18 @@ type Actuator struct {
 	// This is a larger change to the code structure which impacts some existing actuator unit tests
 	// as well as Cluster Autoscaler implementations that may override ScaleDownSetProcessor
 	budgetProcessor *budgets.ScaleDownBudgetProcessor
+	configGetter    actuatorNodeGroupConfigGetter
+}
+
+// actuatorNodeGroupConfigGetter is an interface to limit the functions that can be used
+// from NodeGroupConfigProcessor interface
+type actuatorNodeGroupConfigGetter interface {
+	// GetIgnoreDaemonSetsUtilization returns IgnoreDaemonSetsUtilization value that should be used for a given NodeGroup.
+	GetIgnoreDaemonSetsUtilization(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (bool, error)
 }

 // NewActuator returns a new instance of Actuator.
-func NewActuator(ctx *context.AutoscalingContext, csr *clusterstate.ClusterStateRegistry, ndt *deletiontracker.NodeDeletionTracker, deleteOptions simulator.NodeDeleteOptions) *Actuator {
+func NewActuator(ctx *context.AutoscalingContext, csr *clusterstate.ClusterStateRegistry, ndt *deletiontracker.NodeDeletionTracker, deleteOptions simulator.NodeDeleteOptions, processors *processors.AutoscalingProcessors) *Actuator {
 	ndb := NewNodeDeletionBatcher(ctx, csr, ndt, ctx.NodeDeletionBatcherInterval)
 	return &Actuator{
 		ctx:                   ctx,
@ -64,6 +73,7 @@ func NewActuator(ctx *context.AutoscalingContext, csr *clusterstate.ClusterState
 		nodeDeletionScheduler: NewGroupDeletionScheduler(ctx, ndt, ndb, NewDefaultEvictor(deleteOptions, ndt)),
 		budgetProcessor:       budgets.NewScaleDownBudgetProcessor(ctx),
 		deleteOptions:         deleteOptions,
+		configGetter:          processors.NodeGroupConfigProcessor,
 	}
 }

@ -263,8 +273,14 @@ func (a *Actuator) scaleDownNodeToReport(node *apiv1.Node, drain bool) (*status.
 	if err != nil {
 		return nil, err
 	}
+
+	ignoreDaemonSetsUtilization, err := a.configGetter.GetIgnoreDaemonSetsUtilization(a.ctx, nodeGroup)
+	if err != nil {
+		return nil, err
+	}
+
 	gpuConfig := a.ctx.CloudProvider.GetNodeGpuConfig(node)
-	utilInfo, err := utilization.Calculate(nodeInfo, a.ctx.IgnoreDaemonSetsUtilization, a.ctx.IgnoreMirrorPodsUtilization, gpuConfig, time.Now())
+	utilInfo, err := utilization.Calculate(nodeInfo, ignoreDaemonSetsUtilization, a.ctx.IgnoreMirrorPodsUtilization, gpuConfig, time.Now())
 	if err != nil {
 		return nil, err
 	}
--- a/cluster-autoscaler/core/scaledown/eligibility/eligibility.go
+++ b/cluster-autoscaler/core/scaledown/eligibility/eligibility.go
@ -41,20 +41,22 @@ const (

 // Checker is responsible for deciding which nodes pass the criteria for scale down.
 type Checker struct {
-	thresholdGetter utilizationThresholdGetter
+	configGetter nodeGroupConfigGetter
 }

-type utilizationThresholdGetter interface {
+type nodeGroupConfigGetter interface {
 	// GetScaleDownUtilizationThreshold returns ScaleDownUtilizationThreshold value that should be used for a given NodeGroup.
 	GetScaleDownUtilizationThreshold(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (float64, error)
 	// GetScaleDownGpuUtilizationThreshold returns ScaleDownGpuUtilizationThreshold value that should be used for a given NodeGroup.
 	GetScaleDownGpuUtilizationThreshold(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (float64, error)
+	// GetIgnoreDaemonSetsUtilization returns IgnoreDaemonSetsUtilization value that should be used for a given NodeGroup.
+	GetIgnoreDaemonSetsUtilization(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (bool, error)
 }

 // NewChecker creates a new Checker object.
-func NewChecker(thresholdGetter utilizationThresholdGetter) *Checker {
+func NewChecker(configGetter nodeGroupConfigGetter) *Checker {
 	return &Checker{
-		thresholdGetter: thresholdGetter,
+		configGetter: configGetter,
 	}
 }

@ -118,12 +120,6 @@ func (c *Checker) unremovableReasonAndNodeUtilization(context *context.Autoscali
 		return simulator.ScaleDownDisabledAnnotation, nil
 	}

-	gpuConfig := context.CloudProvider.GetNodeGpuConfig(node)
-	utilInfo, err := utilization.Calculate(nodeInfo, context.IgnoreDaemonSetsUtilization, context.IgnoreMirrorPodsUtilization, gpuConfig, timestamp)
-	if err != nil {
-		klog.Warningf("Failed to calculate utilization for %s: %v", node.Name, err)
-	}
-
 	nodeGroup, err := context.CloudProvider.NodeGroupForNode(node)
 	if err != nil {
 		klog.Warning("Node group not found for node %v: %v", node.Name, err)
@ -136,6 +132,18 @@ func (c *Checker) unremovableReasonAndNodeUtilization(context *context.Autoscali
 		return simulator.NotAutoscaled, nil
 	}

+	ignoreDaemonSetsUtilization, err := c.configGetter.GetIgnoreDaemonSetsUtilization(context, nodeGroup)
+	if err != nil {
+		klog.Warningf("Couldn't retrieve `IgnoreDaemonSetsUtilization` option for node %v: %v", node.Name, err)
+		return simulator.UnexpectedError, nil
+	}
+
+	gpuConfig := context.CloudProvider.GetNodeGpuConfig(node)
+	utilInfo, err := utilization.Calculate(nodeInfo, ignoreDaemonSetsUtilization, context.IgnoreMirrorPodsUtilization, gpuConfig, timestamp)
+	if err != nil {
+		klog.Warningf("Failed to calculate utilization for %s: %v", node.Name, err)
+	}
+
 	// If scale down of unready nodes is disabled, skip the node if it is unready
 	if !context.ScaleDownUnreadyEnabled {
 		ready, _, _ := kube_util.GetReadinessState(node)
@ -166,12 +174,12 @@ func (c *Checker) isNodeBelowUtilizationThreshold(context *context.AutoscalingCo
 	var err error
 	gpuConfig := context.CloudProvider.GetNodeGpuConfig(node)
 	if gpuConfig != nil {
-		threshold, err = c.thresholdGetter.GetScaleDownGpuUtilizationThreshold(context, nodeGroup)
+		threshold, err = c.configGetter.GetScaleDownGpuUtilizationThreshold(context, nodeGroup)
 		if err != nil {
 			return false, err
 		}
 	} else {
-		threshold, err = c.thresholdGetter.GetScaleDownUtilizationThreshold(context, nodeGroup)
+		threshold, err = c.configGetter.GetScaleDownUtilizationThreshold(context, nodeGroup)
 		if err != nil {
 			return false, err
 		}
--- a/cluster-autoscaler/core/scaledown/eligibility/eligibility_test.go
+++ b/cluster-autoscaler/core/scaledown/eligibility/eligibility_test.go
@ -21,12 +21,11 @@ import (
 	"testing"
 	"time"

-	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
 	testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test"
 	"k8s.io/autoscaler/cluster-autoscaler/config"
-	"k8s.io/autoscaler/cluster-autoscaler/context"
 	"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/unremovable"
 	. "k8s.io/autoscaler/cluster-autoscaler/core/test"
+	"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupconfig"
 	"k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/taints"
 	. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
@ -36,6 +35,15 @@ import (
 	"k8s.io/client-go/kubernetes/fake"
 )

+type testCase struct {
+	desc                        string
+	nodes                       []*apiv1.Node
+	pods                        []*apiv1.Pod
+	want                        []string
+	scaleDownUnready            bool
+	ignoreDaemonSetsUtilization bool
+}
+
 func TestFilterOutUnremovable(t *testing.T) {
 	now := time.Now()

@ -59,13 +67,10 @@ func TestFilterOutUnremovable(t *testing.T) {
 	smallPod := BuildTestPod("smallPod", 100, 0)
 	smallPod.Spec.NodeName = "regular"

-	testCases := []struct {
-		desc             string
-		nodes            []*apiv1.Node
-		pods             []*apiv1.Pod
-		want             []string
-		scaleDownUnready bool
-	}{
+	dsPod := BuildDSTestPod("dsPod", 500, 0)
+	dsPod.Spec.NodeName = "regular"
+
+	testCases := []testCase{
 		{
 			desc:             "regular node stays",
 			nodes:            []*apiv1.Node{regularNode},
@ -111,14 +116,53 @@ func TestFilterOutUnremovable(t *testing.T) {
 			scaleDownUnready: false,
 		},
 	}
+
+	allTestCases := testCases
+
+	// run all test cases again with `IgnoreDaemonSetsUtilization` set to true
 	for _, tc := range testCases {
+		t := tc // shallow copy
+		t.ignoreDaemonSetsUtilization = true
+		allTestCases = append(allTestCases, t)
+	}
+
+	ignoreDsCases := []testCase{
+		{
+			desc:                        "high utilization daemonsets node is filtered out",
+			nodes:                       []*apiv1.Node{regularNode},
+			pods:                        []*apiv1.Pod{smallPod, dsPod},
+			want:                        []string{},
+			scaleDownUnready:            true,
+			ignoreDaemonSetsUtilization: false,
+		},
+		{
+			desc:                        "high utilization daemonsets node stays",
+			nodes:                       []*apiv1.Node{regularNode},
+			pods:                        []*apiv1.Pod{smallPod, dsPod},
+			want:                        []string{"regular"},
+			scaleDownUnready:            true,
+			ignoreDaemonSetsUtilization: true,
+		},
+	}
+
+	allTestCases = append(allTestCases, ignoreDsCases...)
+
+	for _, tc := range allTestCases {
 		tc := tc
 		t.Run(tc.desc, func(t *testing.T) {
 			t.Parallel()
-			c := NewChecker(&staticThresholdGetter{0.5})
+			s := nodegroupconfig.DelegatingNodeGroupConfigProcessor{}
+			c := NewChecker(&s)
 			options := config.AutoscalingOptions{
 				UnremovableNodeRecheckTimeout: 5 * time.Minute,
 				ScaleDownUnreadyEnabled:       tc.scaleDownUnready,
+				NodeGroupDefaults: config.NodeGroupAutoscalingOptions{
+					ScaleDownUtilizationThreshold:    config.DefaultScaleDownUtilizationThreshold,
+					ScaleDownGpuUtilizationThreshold: config.DefaultScaleDownGpuUtilizationThreshold,
+					ScaleDownUnneededTime:            config.DefaultScaleDownUnneededTime,
+					ScaleDownUnreadyTime:             config.DefaultScaleDownUnreadyTime,
+					IgnoreDaemonSetsUtilization:      tc.ignoreDaemonSetsUtilization,
+				},
 			}
 			provider := testprovider.NewTestCloudProvider(nil, nil)
 			provider.AddNodeGroup("ng1", 1, 10, 2)
@ -136,15 +180,3 @@ func TestFilterOutUnremovable(t *testing.T) {
 		})
 	}
 }
-
-type staticThresholdGetter struct {
-	threshold float64
-}
-
-func (s *staticThresholdGetter) GetScaleDownUtilizationThreshold(_ *context.AutoscalingContext, _ cloudprovider.NodeGroup) (float64, error) {
-	return s.threshold, nil
-}
-
-func (s *staticThresholdGetter) GetScaleDownGpuUtilizationThreshold(_ *context.AutoscalingContext, _ cloudprovider.NodeGroup) (float64, error) {
-	return s.threshold, nil
-}
--- a/cluster-autoscaler/core/scaledown/legacy/legacy_test.go
+++ b/cluster-autoscaler/core/scaledown/legacy/legacy_test.go
@ -1303,7 +1303,8 @@ func newWrapperForTesting(ctx *context.AutoscalingContext, clusterStateRegistry
 		MinReplicaCount:                   0,
 		SkipNodesWithCustomControllerPods: true,
 	}
-	sd := NewScaleDown(ctx, NewTestProcessors(ctx), ndt, deleteOptions)
-	actuator := actuation.NewActuator(ctx, clusterStateRegistry, ndt, deleteOptions)
+	processors := NewTestProcessors(ctx)
+	sd := NewScaleDown(ctx, processors, ndt, deleteOptions)
+	actuator := actuation.NewActuator(ctx, clusterStateRegistry, ndt, deleteOptions, processors)
 	return NewScaleDownWrapper(sd, actuator)
 }
--- a/cluster-autoscaler/core/static_autoscaler.go
+++ b/cluster-autoscaler/core/static_autoscaler.go
@ -172,7 +172,7 @@ func NewStaticAutoscaler(
 	// during the struct creation rather than here.
 	ndt := deletiontracker.NewNodeDeletionTracker(0 * time.Second)
 	scaleDown := legacy.NewScaleDown(autoscalingContext, processors, ndt, deleteOptions)
-	actuator := actuation.NewActuator(autoscalingContext, clusterStateRegistry, ndt, deleteOptions)
+	actuator := actuation.NewActuator(autoscalingContext, clusterStateRegistry, ndt, deleteOptions, processors)
 	autoscalingContext.ScaleDownActuator = actuator

 	var scaleDownPlanner scaledown.Planner
--- a/cluster-autoscaler/core/static_autoscaler_test.go
+++ b/cluster-autoscaler/core/static_autoscaler_test.go
@ -159,7 +159,7 @@ func (m *onNodeGroupDeleteMock) Delete(id string) error {

 func setUpScaleDownActuator(ctx *context.AutoscalingContext, options config.AutoscalingOptions) {
 	deleteOptions := simulator.NewNodeDeleteOptions(options)
-	ctx.ScaleDownActuator = actuation.NewActuator(ctx, nil, deletiontracker.NewNodeDeletionTracker(0*time.Second), deleteOptions)
+	ctx.ScaleDownActuator = actuation.NewActuator(ctx, nil, deletiontracker.NewNodeDeletionTracker(0*time.Second), deleteOptions, NewTestProcessors(ctx))
 }

 func TestStaticAutoscalerRunOnce(t *testing.T) {
@ -1433,7 +1433,7 @@ func TestStaticAutoscalerUpcomingScaleDownCandidates(t *testing.T) {
 	csr.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute))

 	// Setting the Actuator is necessary for testing any scale-down logic, it shouldn't have anything to do in this test.
-	actuator := actuation.NewActuator(&ctx, csr, deletiontracker.NewNodeDeletionTracker(0*time.Second), simulator.NodeDeleteOptions{})
+	actuator := actuation.NewActuator(&ctx, csr, deletiontracker.NewNodeDeletionTracker(0*time.Second), simulator.NodeDeleteOptions{}, NewTestProcessors(&ctx))
 	ctx.ScaleDownActuator = actuator

 	// Fake planner that keeps track of the scale-down candidates passed to UpdateClusterState.
@ -1761,7 +1761,7 @@ func newScaleDownPlannerAndActuator(t *testing.T, ctx *context.AutoscalingContex
 	}
 	ndt := deletiontracker.NewNodeDeletionTracker(0 * time.Second)
 	sd := legacy.NewScaleDown(ctx, p, ndt, deleteOptions)
-	actuator := actuation.NewActuator(ctx, cs, ndt, deleteOptions)
+	actuator := actuation.NewActuator(ctx, cs, ndt, deleteOptions, p)
 	wrapper := legacy.NewScaleDownWrapper(sd, actuator)
 	return wrapper, wrapper
 }
--- a/cluster-autoscaler/main.go
+++ b/cluster-autoscaler/main.go
@ -107,13 +107,13 @@ var (
 		"How long after node deletion that scale down evaluation resumes, defaults to scanInterval")
 	scaleDownDelayAfterFailure = flag.Duration("scale-down-delay-after-failure", 3*time.Minute,
 		"How long after scale down failure that scale down evaluation resumes")
-	scaleDownUnneededTime = flag.Duration("scale-down-unneeded-time", 10*time.Minute,
+	scaleDownUnneededTime = flag.Duration("scale-down-unneeded-time", config.DefaultScaleDownUnneededTime,
 		"How long a node should be unneeded before it is eligible for scale down")
-	scaleDownUnreadyTime = flag.Duration("scale-down-unready-time", 20*time.Minute,
+	scaleDownUnreadyTime = flag.Duration("scale-down-unready-time", config.DefaultScaleDownUnreadyTime,
 		"How long an unready node should be unneeded before it is eligible for scale down")
-	scaleDownUtilizationThreshold = flag.Float64("scale-down-utilization-threshold", 0.5,
+	scaleDownUtilizationThreshold = flag.Float64("scale-down-utilization-threshold", config.DefaultScaleDownUtilizationThreshold,
 		"Sum of cpu or memory of all pods running on the node divided by node's corresponding allocatable resource, below which a node can be considered for scale down")
-	scaleDownGpuUtilizationThreshold = flag.Float64("scale-down-gpu-utilization-threshold", 0.5,
+	scaleDownGpuUtilizationThreshold = flag.Float64("scale-down-gpu-utilization-threshold", config.DefaultScaleDownGpuUtilizationThreshold,
 		"Sum of gpu requests of all pods running on the node divided by node's allocatable resource, below which a node can be considered for scale down."+
 			"Utilization calculation only cares about gpu resource for accelerator node. cpu and memory utilization will be ignored.")
 	scaleDownNonEmptyCandidatesCount = flag.Int("scale-down-non-empty-candidates-count", 30,
@ -258,7 +258,9 @@ func createAutoscalingOptions() config.AutoscalingOptions {
 			ScaleDownGpuUtilizationThreshold: *scaleDownGpuUtilizationThreshold,
 			ScaleDownUnneededTime:            *scaleDownUnneededTime,
 			ScaleDownUnreadyTime:             *scaleDownUnreadyTime,
-			MaxNodeProvisionTime:             *maxNodeProvisionTime,
+			// per nodegroup setting, defaulted to global setting
+			IgnoreDaemonSetsUtilization: *ignoreDaemonSetsUtilization,
+			MaxNodeProvisionTime:        *maxNodeProvisionTime,
 		},
 		CloudConfig:                      *cloudConfig,
 		CloudProviderName:                *cloudProviderFlag,
@ -271,7 +273,6 @@ func createAutoscalingOptions() config.AutoscalingOptions {
 		ExpanderNames:                    *expanderFlag,
 		GRPCExpanderCert:                 *grpcExpanderCert,
 		GRPCExpanderURL:                  *grpcExpanderURL,
-		IgnoreDaemonSetsUtilization:      *ignoreDaemonSetsUtilization,
 		IgnoreMirrorPodsUtilization:      *ignoreMirrorPodsUtilization,
 		MaxBulkSoftTaintCount:            *maxBulkSoftTaintCount,
 		MaxBulkSoftTaintTime:             *maxBulkSoftTaintTime,
--- a/cluster-autoscaler/processors/nodegroupconfig/node_group_config_processor.go
+++ b/cluster-autoscaler/processors/nodegroupconfig/node_group_config_processor.go
@ -35,6 +35,8 @@ type NodeGroupConfigProcessor interface {
 	GetScaleDownGpuUtilizationThreshold(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (float64, error)
 	// GetMaxNodeProvisionTime return MaxNodeProvisionTime value that should be used for a given NodeGroup.
 	GetMaxNodeProvisionTime(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (time.Duration, error)
+	// GetIgnoreDaemonSetsUtilization returns IgnoreDaemonSetsUtilization value that should be used for a given NodeGroup.
+	GetIgnoreDaemonSetsUtilization(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (bool, error)
 	// CleanUp cleans up processor's internal structures.
 	CleanUp()
 }
@ -105,6 +107,18 @@ func (p *DelegatingNodeGroupConfigProcessor) GetMaxNodeProvisionTime(context *co
 	return ngConfig.MaxNodeProvisionTime, nil
 }

+// GetIgnoreDaemonSetsUtilization returns IgnoreDaemonSetsUtilization value that should be used for a given NodeGroup.
+func (p *DelegatingNodeGroupConfigProcessor) GetIgnoreDaemonSetsUtilization(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (bool, error) {
+	ngConfig, err := nodeGroup.GetOptions(context.NodeGroupDefaults)
+	if err != nil && err != cloudprovider.ErrNotImplemented {
+		return false, err
+	}
+	if ngConfig == nil || err == cloudprovider.ErrNotImplemented {
+		return context.NodeGroupDefaults.IgnoreDaemonSetsUtilization, nil
+	}
+	return ngConfig.IgnoreDaemonSetsUtilization, nil
+}
+
 // CleanUp cleans up processor's internal structures.
 func (p *DelegatingNodeGroupConfigProcessor) CleanUp() {
 }
--- a/cluster-autoscaler/processors/nodegroupconfig/node_group_config_processor_test.go
+++ b/cluster-autoscaler/processors/nodegroupconfig/node_group_config_processor_test.go
@ -49,6 +49,7 @@ func TestDelegatingNodeGroupConfigProcessor(t *testing.T) {
 		ScaleDownGpuUtilizationThreshold: 0.6,
 		ScaleDownUtilizationThreshold:    0.5,
 		MaxNodeProvisionTime:             15 * time.Minute,
+		IgnoreDaemonSetsUtilization:      true,
 	}
 	ngOpts := &config.NodeGroupAutoscalingOptions{
 		ScaleDownUnneededTime:            10 * time.Minute,
@ -56,6 +57,7 @@ func TestDelegatingNodeGroupConfigProcessor(t *testing.T) {
 		ScaleDownGpuUtilizationThreshold: 0.85,
 		ScaleDownUtilizationThreshold:    0.75,
 		MaxNodeProvisionTime:             60 * time.Minute,
+		IgnoreDaemonSetsUtilization:      false,
 	}

 	testUnneededTime := func(t *testing.T, p DelegatingNodeGroupConfigProcessor, c *context.AutoscalingContext, ng cloudprovider.NodeGroup, w Want, we error) {
@ -109,18 +111,32 @@ func TestDelegatingNodeGroupConfigProcessor(t *testing.T) {
 		assert.Equal(t, res, results[w])
 	}

+	// for IgnoreDaemonSetsUtilization
+	testIgnoreDSUtilization := func(t *testing.T, p DelegatingNodeGroupConfigProcessor, c *context.AutoscalingContext, ng cloudprovider.NodeGroup, w Want, we error) {
+		res, err := p.GetIgnoreDaemonSetsUtilization(c, ng)
+		assert.Equal(t, err, we)
+		results := map[Want]bool{
+			NIL:    false,
+			GLOBAL: true,
+			NG:     false,
+		}
+		assert.Equal(t, res, results[w])
+	}
+
 	funcs := map[string]func(*testing.T, DelegatingNodeGroupConfigProcessor, *context.AutoscalingContext, cloudprovider.NodeGroup, Want, error){
 		"ScaleDownUnneededTime":            testUnneededTime,
 		"ScaleDownUnreadyTime":             testUnreadyTime,
 		"ScaleDownUtilizationThreshold":    testUtilizationThreshold,
 		"ScaleDownGpuUtilizationThreshold": testGpuThreshold,
 		"MaxNodeProvisionTime":             testMaxNodeProvisionTime,
+		"IgnoreDaemonSetsUtilization":      testIgnoreDSUtilization,
 		"MultipleOptions": func(t *testing.T, p DelegatingNodeGroupConfigProcessor, c *context.AutoscalingContext, ng cloudprovider.NodeGroup, w Want, we error) {
 			testUnneededTime(t, p, c, ng, w, we)
 			testUnreadyTime(t, p, c, ng, w, we)
 			testUtilizationThreshold(t, p, c, ng, w, we)
 			testGpuThreshold(t, p, c, ng, w, we)
 			testMaxNodeProvisionTime(t, p, c, ng, w, we)
+			testIgnoreDSUtilization(t, p, c, ng, w, we)
 		},
 		"RepeatingTheSameCallGivesConsistentResults": func(t *testing.T, p DelegatingNodeGroupConfigProcessor, c *context.AutoscalingContext, ng cloudprovider.NodeGroup, w Want, we error) {
 			testUnneededTime(t, p, c, ng, w, we)
@ -128,6 +144,9 @@ func TestDelegatingNodeGroupConfigProcessor(t *testing.T) {
 			// throw in a different call
 			testGpuThreshold(t, p, c, ng, w, we)
 			testUnneededTime(t, p, c, ng, w, we)
+			// throw in another different call
+			testIgnoreDSUtilization(t, p, c, ng, w, we)
+			testUnneededTime(t, p, c, ng, w, we)
 		},
 	}

--- a/cluster-autoscaler/utils/test/test_utils.go
+++ b/cluster-autoscaler/utils/test/test_utils.go
@ -67,6 +67,15 @@ func BuildTestPod(name string, cpu int64, mem int64) *apiv1.Pod {
 	return pod
 }

+// BuildDSTestPod creates a DaemonSet pod with cpu and memory.
+func BuildDSTestPod(name string, cpu int64, mem int64) *apiv1.Pod {
+
+	pod := BuildTestPod(name, cpu, mem)
+	pod.OwnerReferences = GenerateOwnerReferences("ds", "DaemonSet", "apps/v1", "some-uid")
+
+	return pod
+}
+
 // BuildTestPodWithEphemeralStorage creates a pod with cpu, memory and ephemeral storage resources.
 func BuildTestPodWithEphemeralStorage(name string, cpu, mem, ephemeralStorage int64) *apiv1.Pod {
 	startTime := metav1.Unix(0, 0)