Merge pull request #5672 from vadasambar/feat/5399/ignore-daemonsets-utilization-per-nodegroup

feat: set `IgnoreDaemonSetsUtilization` per nodegroup for AWS
This commit is contained in:
Kubernetes Prow Robot 2023-07-12 07:43:12 -07:00 committed by GitHub
commit c6893e9e28
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 623 additions and 322 deletions

View File

@ -246,6 +246,8 @@ as string). Currently supported autoscaling options (and example values) are:
(overrides `--scale-down-unneeded-time` value for that specific ASG) (overrides `--scale-down-unneeded-time` value for that specific ASG)
* `k8s.io/cluster-autoscaler/node-template/autoscaling-options/scaledownunreadytime`: `20m0s` * `k8s.io/cluster-autoscaler/node-template/autoscaling-options/scaledownunreadytime`: `20m0s`
(overrides `--scale-down-unready-time` value for that specific ASG) (overrides `--scale-down-unready-time` value for that specific ASG)
* `k8s.io/cluster-autoscaler/node-template/autoscaling-options/ignoredaemonsetsutilization`: `true`
(overrides `--ignore-daemonsets-utilization` value for that specific ASG)
**NOTE:** It is your responsibility to ensure such labels and/or taints are **NOTE:** It is your responsibility to ensure such labels and/or taints are
applied via the node's kubelet configuration at startup. Cluster Autoscaler will not set the node taints for you. applied via the node's kubelet configuration at startup. Cluster Autoscaler will not set the node taints for you.

View File

@ -245,6 +245,15 @@ func (m *AwsManager) GetAsgOptions(asg asg, defaults config.NodeGroupAutoscaling
} }
} }
if stringOpt, found := options[config.DefaultIgnoreDaemonSetsUtilizationKey]; found {
if opt, err := strconv.ParseBool(stringOpt); err != nil {
klog.Warningf("failed to convert asg %s %s tag to bool: %v",
asg.Name, config.DefaultIgnoreDaemonSetsUtilizationKey, err)
} else {
defaults.IgnoreDaemonSetsUtilization = opt
}
}
return &defaults return &defaults
} }

View File

@ -130,6 +130,7 @@ func TestGetAsgOptions(t *testing.T) {
ScaleDownGpuUtilizationThreshold: 0.2, ScaleDownGpuUtilizationThreshold: 0.2,
ScaleDownUnneededTime: time.Second, ScaleDownUnneededTime: time.Second,
ScaleDownUnreadyTime: time.Minute, ScaleDownUnreadyTime: time.Minute,
IgnoreDaemonSetsUtilization: false,
} }
tests := []struct { tests := []struct {
@ -145,32 +146,52 @@ func TestGetAsgOptions(t *testing.T) {
{ {
description: "keep defaults on invalid tags values", description: "keep defaults on invalid tags values",
tags: map[string]string{ tags: map[string]string{
"scaledownutilizationthreshold": "not-a-float", config.DefaultScaleDownUtilizationThresholdKey: "not-a-float",
"scaledownunneededtime": "not-a-duration", config.DefaultScaleDownUnneededTimeKey: "not-a-duration",
"ScaleDownUnreadyTime": "", "ScaleDownUnreadyTime": "",
config.DefaultIgnoreDaemonSetsUtilizationKey: "not-a-bool",
}, },
expected: &defaultOptions, expected: &defaultOptions,
}, },
{ {
description: "use provided tags and fill missing with defaults", description: "use provided tags and fill missing with defaults",
tags: map[string]string{ tags: map[string]string{
"scaledownutilizationthreshold": "0.42", config.DefaultScaleDownUtilizationThresholdKey: "0.42",
"scaledownunneededtime": "1h", config.DefaultScaleDownUnneededTimeKey: "1h",
config.DefaultIgnoreDaemonSetsUtilizationKey: "true",
}, },
expected: &config.NodeGroupAutoscalingOptions{ expected: &config.NodeGroupAutoscalingOptions{
ScaleDownUtilizationThreshold: 0.42, ScaleDownUtilizationThreshold: 0.42,
ScaleDownGpuUtilizationThreshold: defaultOptions.ScaleDownGpuUtilizationThreshold, ScaleDownGpuUtilizationThreshold: defaultOptions.ScaleDownGpuUtilizationThreshold,
ScaleDownUnneededTime: time.Hour, ScaleDownUnneededTime: time.Hour,
ScaleDownUnreadyTime: defaultOptions.ScaleDownUnreadyTime, ScaleDownUnreadyTime: defaultOptions.ScaleDownUnreadyTime,
IgnoreDaemonSetsUtilization: true,
},
},
{
description: "use provided tags (happy path)",
tags: map[string]string{
config.DefaultScaleDownUtilizationThresholdKey: "0.42",
config.DefaultScaleDownUnneededTimeKey: "1h",
config.DefaultScaleDownGpuUtilizationThresholdKey: "0.7",
config.DefaultScaleDownUnreadyTimeKey: "25m",
config.DefaultIgnoreDaemonSetsUtilizationKey: "true",
},
expected: &config.NodeGroupAutoscalingOptions{
ScaleDownUtilizationThreshold: 0.42,
ScaleDownGpuUtilizationThreshold: 0.7,
ScaleDownUnneededTime: time.Hour,
ScaleDownUnreadyTime: 25 * time.Minute,
IgnoreDaemonSetsUtilization: true,
}, },
}, },
{ {
description: "ignore unknown tags", description: "ignore unknown tags",
tags: map[string]string{ tags: map[string]string{
"scaledownutilizationthreshold": "0.6", config.DefaultScaleDownUtilizationThresholdKey: "0.6",
"scaledowngpuutilizationthreshold": "0.7", config.DefaultScaleDownGpuUtilizationThresholdKey: "0.7",
"scaledownunneededtime": "1m", config.DefaultScaleDownUnneededTimeKey: "1m",
"scaledownunreadytime": "1h", config.DefaultScaleDownUnreadyTimeKey: "1h",
"notyetspecified": "42", "notyetspecified": "42",
}, },
expected: &config.NodeGroupAutoscalingOptions{ expected: &config.NodeGroupAutoscalingOptions{
@ -178,6 +199,7 @@ func TestGetAsgOptions(t *testing.T) {
ScaleDownGpuUtilizationThreshold: 0.7, ScaleDownGpuUtilizationThreshold: 0.7,
ScaleDownUnneededTime: time.Minute, ScaleDownUnneededTime: time.Minute,
ScaleDownUnreadyTime: time.Hour, ScaleDownUnreadyTime: time.Hour,
IgnoreDaemonSetsUtilization: false,
}, },
}, },
} }

View File

@ -48,6 +48,8 @@ type NodeGroupAutoscalingOptions struct {
MaxNodeProvisionTime time.Duration MaxNodeProvisionTime time.Duration
// ZeroOrMaxNodeScaling means that a node group should be scaled up to maximum size or down to zero nodes all at once instead of one-by-one. // ZeroOrMaxNodeScaling means that a node group should be scaled up to maximum size or down to zero nodes all at once instead of one-by-one.
ZeroOrMaxNodeScaling bool ZeroOrMaxNodeScaling bool
// IgnoreDaemonSetsUtilization sets if daemonsets utilization should be considered during node scale-down
IgnoreDaemonSetsUtilization bool
} }
// GCEOptions contain autoscaling options specific to GCE cloud provider. // GCEOptions contain autoscaling options specific to GCE cloud provider.
@ -117,8 +119,6 @@ type AutoscalingOptions struct {
GRPCExpanderCert string GRPCExpanderCert string
// GRPCExpanderURL is the url of the gRPC server when using the gRPC expander // GRPCExpanderURL is the url of the gRPC server when using the gRPC expander
GRPCExpanderURL string GRPCExpanderURL string
// IgnoreDaemonSetsUtilization is whether CA will ignore DaemonSet pods when calculating resource utilization for scaling down
IgnoreDaemonSetsUtilization bool
// IgnoreMirrorPodsUtilization is whether CA will ignore Mirror pods when calculating resource utilization for scaling down // IgnoreMirrorPodsUtilization is whether CA will ignore Mirror pods when calculating resource utilization for scaling down
IgnoreMirrorPodsUtilization bool IgnoreMirrorPodsUtilization bool
// MaxGracefulTerminationSec is maximum number of seconds scale down waits for pods to terminate before // MaxGracefulTerminationSec is maximum number of seconds scale down waits for pods to terminate before

View File

@ -16,6 +16,8 @@ limitations under the License.
package config package config
import "time"
const ( const (
// DefaultMaxClusterCores is the default maximum number of cores in the cluster. // DefaultMaxClusterCores is the default maximum number of cores in the cluster.
DefaultMaxClusterCores = 5000 * 64 DefaultMaxClusterCores = 5000 * 64
@ -32,4 +34,14 @@ const (
DefaultScaleDownUnreadyTimeKey = "scaledownunreadytime" DefaultScaleDownUnreadyTimeKey = "scaledownunreadytime"
// DefaultMaxNodeProvisionTimeKey identifies MaxNodeProvisionTime autoscaling option // DefaultMaxNodeProvisionTimeKey identifies MaxNodeProvisionTime autoscaling option
DefaultMaxNodeProvisionTimeKey = "maxnodeprovisiontime" DefaultMaxNodeProvisionTimeKey = "maxnodeprovisiontime"
// DefaultIgnoreDaemonSetsUtilizationKey identifies IgnoreDaemonSetsUtilization autoscaling option
DefaultIgnoreDaemonSetsUtilizationKey = "ignoredaemonsetsutilization"
// DefaultScaleDownUnneededTime identifies ScaleDownUnneededTime autoscaling option
DefaultScaleDownUnneededTime = 10 * time.Minute
// DefaultScaleDownUnreadyTime identifies ScaleDownUnreadyTime autoscaling option
DefaultScaleDownUnreadyTime = 20 * time.Minute
// DefaultScaleDownUtilizationThreshold identifies ScaleDownUtilizationThreshold autoscaling option
DefaultScaleDownUtilizationThreshold = 0.5
// DefaultScaleDownGpuUtilizationThreshold identifies ScaleDownGpuUtilizationThreshold autoscaling option
DefaultScaleDownGpuUtilizationThreshold = 0.5
) )

View File

@ -52,10 +52,18 @@ type Actuator struct {
// This is a larger change to the code structure which impacts some existing actuator unit tests // This is a larger change to the code structure which impacts some existing actuator unit tests
// as well as Cluster Autoscaler implementations that may override ScaleDownSetProcessor // as well as Cluster Autoscaler implementations that may override ScaleDownSetProcessor
budgetProcessor *budgets.ScaleDownBudgetProcessor budgetProcessor *budgets.ScaleDownBudgetProcessor
configGetter actuatorNodeGroupConfigGetter
}
// actuatorNodeGroupConfigGetter is an interface to limit the functions that can be used
// from NodeGroupConfigProcessor interface
type actuatorNodeGroupConfigGetter interface {
// GetIgnoreDaemonSetsUtilization returns IgnoreDaemonSetsUtilization value that should be used for a given NodeGroup.
GetIgnoreDaemonSetsUtilization(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (bool, error)
} }
// NewActuator returns a new instance of Actuator. // NewActuator returns a new instance of Actuator.
func NewActuator(ctx *context.AutoscalingContext, csr *clusterstate.ClusterStateRegistry, ndt *deletiontracker.NodeDeletionTracker, deleteOptions simulator.NodeDeleteOptions) *Actuator { func NewActuator(ctx *context.AutoscalingContext, csr *clusterstate.ClusterStateRegistry, ndt *deletiontracker.NodeDeletionTracker, deleteOptions simulator.NodeDeleteOptions, configGetter actuatorNodeGroupConfigGetter) *Actuator {
ndb := NewNodeDeletionBatcher(ctx, csr, ndt, ctx.NodeDeletionBatcherInterval) ndb := NewNodeDeletionBatcher(ctx, csr, ndt, ctx.NodeDeletionBatcherInterval)
return &Actuator{ return &Actuator{
ctx: ctx, ctx: ctx,
@ -64,6 +72,7 @@ func NewActuator(ctx *context.AutoscalingContext, csr *clusterstate.ClusterState
nodeDeletionScheduler: NewGroupDeletionScheduler(ctx, ndt, ndb, NewDefaultEvictor(deleteOptions, ndt)), nodeDeletionScheduler: NewGroupDeletionScheduler(ctx, ndt, ndb, NewDefaultEvictor(deleteOptions, ndt)),
budgetProcessor: budgets.NewScaleDownBudgetProcessor(ctx), budgetProcessor: budgets.NewScaleDownBudgetProcessor(ctx),
deleteOptions: deleteOptions, deleteOptions: deleteOptions,
configGetter: configGetter,
} }
} }
@ -263,8 +272,14 @@ func (a *Actuator) scaleDownNodeToReport(node *apiv1.Node, drain bool) (*status.
if err != nil { if err != nil {
return nil, err return nil, err
} }
ignoreDaemonSetsUtilization, err := a.configGetter.GetIgnoreDaemonSetsUtilization(a.ctx, nodeGroup)
if err != nil {
return nil, err
}
gpuConfig := a.ctx.CloudProvider.GetNodeGpuConfig(node) gpuConfig := a.ctx.CloudProvider.GetNodeGpuConfig(node)
utilInfo, err := utilization.Calculate(nodeInfo, a.ctx.IgnoreDaemonSetsUtilization, a.ctx.IgnoreMirrorPodsUtilization, gpuConfig, time.Now()) utilInfo, err := utilization.Calculate(nodeInfo, ignoreDaemonSetsUtilization, a.ctx.IgnoreMirrorPodsUtilization, gpuConfig, time.Now())
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -43,19 +43,14 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/deletiontracker" "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/deletiontracker"
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/status" "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/status"
. "k8s.io/autoscaler/cluster-autoscaler/core/test" . "k8s.io/autoscaler/cluster-autoscaler/core/test"
"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupconfig"
"k8s.io/autoscaler/cluster-autoscaler/simulator/utilization" "k8s.io/autoscaler/cluster-autoscaler/simulator/utilization"
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
"k8s.io/autoscaler/cluster-autoscaler/utils/taints" "k8s.io/autoscaler/cluster-autoscaler/utils/taints"
. "k8s.io/autoscaler/cluster-autoscaler/utils/test" . "k8s.io/autoscaler/cluster-autoscaler/utils/test"
) )
func TestStartDeletion(t *testing.T) { type startDeletionTestCase struct {
testNg := testprovider.NewTestNodeGroup("test", 0, 100, 3, true, false, "n1-standard-2", nil, nil)
atomic2 := sizedNodeGroup("atomic-2", 2, true)
atomic4 := sizedNodeGroup("atomic-4", 4, true)
toBeDeletedTaint := apiv1.Taint{Key: taints.ToBeDeletedTaint, Effect: apiv1.TaintEffectNoSchedule}
for tn, tc := range map[string]struct {
emptyNodes []*budgets.NodeGroupView emptyNodes []*budgets.NodeGroupView
drainNodes []*budgets.NodeGroupView drainNodes []*budgets.NodeGroupView
pods map[string][]*apiv1.Pod pods map[string][]*apiv1.Pod
@ -68,7 +63,20 @@ func TestStartDeletion(t *testing.T) {
wantDeletedNodes []string wantDeletedNodes []string
wantTaintUpdates map[string][][]apiv1.Taint wantTaintUpdates map[string][][]apiv1.Taint
wantNodeDeleteResults map[string]status.NodeDeleteResult wantNodeDeleteResults map[string]status.NodeDeleteResult
}{ }
func getStartDeletionTestCases(testNg *testprovider.TestNodeGroup, ignoreDaemonSetsUtilization bool, suffix string) map[string]startDeletionTestCase {
toBeDeletedTaint := apiv1.Taint{Key: taints.ToBeDeletedTaint, Effect: apiv1.TaintEffectNoSchedule}
dsUtilInfo := generateUtilInfo(2./8., 2./8.)
if ignoreDaemonSetsUtilization {
dsUtilInfo = generateUtilInfo(0./8., 0./8.)
}
atomic2 := sizedNodeGroup("atomic-2", 2, true)
atomic4 := sizedNodeGroup("atomic-4", 4, true)
testCases := map[string]startDeletionTestCase{
"nothing to delete": { "nothing to delete": {
emptyNodes: nil, emptyNodes: nil,
drainNodes: nil, drainNodes: nil,
@ -528,8 +536,8 @@ func TestStartDeletion(t *testing.T) {
"DS pods are evicted from empty nodes, but don't block deletion on error": { "DS pods are evicted from empty nodes, but don't block deletion on error": {
emptyNodes: generateNodeGroupViewList(testNg, 0, 2), emptyNodes: generateNodeGroupViewList(testNg, 0, 2),
pods: map[string][]*apiv1.Pod{ pods: map[string][]*apiv1.Pod{
"test-node-0": {generateDsPod("test-node-0-ds-pod-0", "test-node-0"), generateDsPod("test-node-0-ds-pod-1", "test-node-0")}, "test-node-0": generateDsPods(2, "test-node-0"),
"test-node-1": {generateDsPod("test-node-1-ds-pod-0", "test-node-1"), generateDsPod("test-node-1-ds-pod-1", "test-node-1")}, "test-node-1": generateDsPods(2, "test-node-1"),
}, },
failedPodDrain: map[string]bool{"test-node-1-ds-pod-0": true}, failedPodDrain: map[string]bool{"test-node-1-ds-pod-0": true},
wantStatus: &status.ScaleDownStatus{ wantStatus: &status.ScaleDownStatus{
@ -539,13 +547,13 @@ func TestStartDeletion(t *testing.T) {
Node: generateNode("test-node-0"), Node: generateNode("test-node-0"),
NodeGroup: testNg, NodeGroup: testNg,
EvictedPods: nil, EvictedPods: nil,
UtilInfo: generateUtilInfo(2./8., 2./8.), UtilInfo: dsUtilInfo,
}, },
{ {
Node: generateNode("test-node-1"), Node: generateNode("test-node-1"),
NodeGroup: testNg, NodeGroup: testNg,
EvictedPods: nil, EvictedPods: nil,
UtilInfo: generateUtilInfo(2./8., 2./8.), UtilInfo: dsUtilInfo,
}, },
}, },
}, },
@ -564,6 +572,111 @@ func TestStartDeletion(t *testing.T) {
"test-node-1": {ResultType: status.NodeDeleteOk}, "test-node-1": {ResultType: status.NodeDeleteOk},
}, },
}, },
"DS pods and deletion with drain": {
drainNodes: generateNodeGroupViewList(testNg, 0, 2),
pods: map[string][]*apiv1.Pod{
"test-node-0": generateDsPods(2, "test-node-0"),
"test-node-1": generateDsPods(2, "test-node-1"),
},
wantStatus: &status.ScaleDownStatus{
Result: status.ScaleDownNodeDeleteStarted,
ScaledDownNodes: []*status.ScaleDownNode{
{
Node: generateNode("test-node-0"),
NodeGroup: testNg,
// this is nil because DaemonSetEvictionForOccupiedNodes is
// not enabled for drained nodes in this test suite
EvictedPods: nil,
UtilInfo: dsUtilInfo,
},
{
Node: generateNode("test-node-1"),
NodeGroup: testNg,
// this is nil because DaemonSetEvictionForOccupiedNodes is
// not enabled for drained nodes in this test suite
EvictedPods: nil,
UtilInfo: dsUtilInfo,
},
},
},
wantDeletedNodes: []string{"test-node-0", "test-node-1"},
// same as evicted pods
wantDeletedPods: nil,
wantTaintUpdates: map[string][][]apiv1.Taint{
"test-node-0": {
{toBeDeletedTaint},
},
"test-node-1": {
{toBeDeletedTaint},
},
},
wantNodeDeleteResults: map[string]status.NodeDeleteResult{
"test-node-0": {ResultType: status.NodeDeleteOk},
"test-node-1": {ResultType: status.NodeDeleteOk},
},
},
"DS pods and empty and drain deletion work correctly together": {
emptyNodes: generateNodeGroupViewList(testNg, 0, 2),
drainNodes: generateNodeGroupViewList(testNg, 2, 4),
pods: map[string][]*apiv1.Pod{
"test-node-2": removablePods(2, "test-node-2"),
"test-node-3": generateDsPods(2, "test-node-3"),
},
wantStatus: &status.ScaleDownStatus{
Result: status.ScaleDownNodeDeleteStarted,
ScaledDownNodes: []*status.ScaleDownNode{
{
Node: generateNode("test-node-0"),
NodeGroup: testNg,
EvictedPods: nil,
UtilInfo: generateUtilInfo(0, 0),
},
{
Node: generateNode("test-node-1"),
NodeGroup: testNg,
EvictedPods: nil,
UtilInfo: generateUtilInfo(0, 0),
},
{
Node: generateNode("test-node-2"),
NodeGroup: testNg,
EvictedPods: removablePods(2, "test-node-2"),
UtilInfo: generateUtilInfo(2./8., 2./8.),
},
{
Node: generateNode("test-node-3"),
NodeGroup: testNg,
// this is nil because DaemonSetEvictionForOccupiedNodes is
// not enabled for drained nodes in this test suite
EvictedPods: nil,
UtilInfo: dsUtilInfo,
},
},
},
wantDeletedNodes: []string{"test-node-0", "test-node-1", "test-node-2", "test-node-3"},
// same as evicted pods
wantDeletedPods: nil,
wantTaintUpdates: map[string][][]apiv1.Taint{
"test-node-0": {
{toBeDeletedTaint},
},
"test-node-1": {
{toBeDeletedTaint},
},
"test-node-2": {
{toBeDeletedTaint},
},
"test-node-3": {
{toBeDeletedTaint},
},
},
wantNodeDeleteResults: map[string]status.NodeDeleteResult{
"test-node-0": {ResultType: status.NodeDeleteOk},
"test-node-1": {ResultType: status.NodeDeleteOk},
"test-node-2": {ResultType: status.NodeDeleteOk},
"test-node-3": {ResultType: status.NodeDeleteOk},
},
},
"nodes with pods are not deleted if the node is passed as empty": { "nodes with pods are not deleted if the node is passed as empty": {
emptyNodes: generateNodeGroupViewList(testNg, 0, 2), emptyNodes: generateNodeGroupViewList(testNg, 0, 2),
pods: map[string][]*apiv1.Pod{ pods: map[string][]*apiv1.Pod{
@ -668,7 +781,37 @@ func TestStartDeletion(t *testing.T) {
"atomic-2-node-1": {ResultType: status.NodeDeleteErrorInternal, Err: cmpopts.AnyError}, "atomic-2-node-1": {ResultType: status.NodeDeleteErrorInternal, Err: cmpopts.AnyError},
}, },
}, },
} { }
testCasesWithNGNames := map[string]startDeletionTestCase{}
for k, v := range testCases {
testCasesWithNGNames[k+" "+suffix] = v
}
return testCasesWithNGNames
}
func TestStartDeletion(t *testing.T) {
testNg1 := testprovider.NewTestNodeGroup("test", 100, 0, 3, true, false, "n1-standard-2", nil, nil)
opts1 := &config.NodeGroupAutoscalingOptions{
IgnoreDaemonSetsUtilization: false,
}
testNg1.SetOptions(opts1)
testNg2 := testprovider.NewTestNodeGroup("test", 100, 0, 3, true, false, "n1-standard-2", nil, nil)
opts2 := &config.NodeGroupAutoscalingOptions{
IgnoreDaemonSetsUtilization: true,
}
testNg2.SetOptions(opts2)
testSets := []map[string]startDeletionTestCase{
// IgnoreDaemonSetsUtilization is false
getStartDeletionTestCases(testNg1, opts1.IgnoreDaemonSetsUtilization, "testNg1"),
// IgnoreDaemonSetsUtilization is true
getStartDeletionTestCases(testNg2, opts2.IgnoreDaemonSetsUtilization, "testNg2"),
}
for _, testSet := range testSets {
for tn, tc := range testSet {
t.Run(tn, func(t *testing.T) { t.Run(tn, func(t *testing.T) {
// This is needed because the tested code starts goroutines that can technically live longer than the execution // This is needed because the tested code starts goroutines that can technically live longer than the execution
// of a single test case, and the goroutines eventually access tc in fakeClient hooks below. // of a single test case, and the goroutines eventually access tc in fakeClient hooks below.
@ -836,6 +979,7 @@ func TestStartDeletion(t *testing.T) {
ctx: &ctx, clusterState: csr, nodeDeletionTracker: ndt, ctx: &ctx, clusterState: csr, nodeDeletionTracker: ndt,
nodeDeletionScheduler: NewGroupDeletionScheduler(&ctx, ndt, ndb, evictor), nodeDeletionScheduler: NewGroupDeletionScheduler(&ctx, ndt, ndb, evictor),
budgetProcessor: budgets.NewScaleDownBudgetProcessor(&ctx), budgetProcessor: budgets.NewScaleDownBudgetProcessor(&ctx),
configGetter: nodegroupconfig.NewDefaultNodeGroupConfigProcessor(),
} }
gotStatus, gotErr := actuator.StartDeletion(allEmptyNodes, allDrainNodes) gotStatus, gotErr := actuator.StartDeletion(allEmptyNodes, allDrainNodes)
if diff := cmp.Diff(tc.wantErr, gotErr, cmpopts.EquateErrors()); diff != "" { if diff := cmp.Diff(tc.wantErr, gotErr, cmpopts.EquateErrors()); diff != "" {
@ -925,6 +1069,7 @@ func TestStartDeletion(t *testing.T) {
} }
}) })
} }
}
} }
func TestStartDeletionInBatchBasic(t *testing.T) { func TestStartDeletionInBatchBasic(t *testing.T) {
@ -1181,8 +1326,18 @@ func removablePod(name string, node string) *apiv1.Pod {
} }
} }
func generateDsPods(count int, node string) []*apiv1.Pod {
var result []*apiv1.Pod
for i := 0; i < count; i++ {
name := fmt.Sprintf("ds-pod-%d", i)
result = append(result, generateDsPod(name, node))
}
return result
}
func generateDsPod(name string, node string) *apiv1.Pod { func generateDsPod(name string, node string) *apiv1.Pod {
pod := removablePod(name, node) pod := removablePod(fmt.Sprintf("%s-%s", node, name), node)
pod.OwnerReferences = GenerateOwnerReferences("ds", "DaemonSet", "apps/v1", "some-uid") pod.OwnerReferences = GenerateOwnerReferences("ds", "DaemonSet", "apps/v1", "some-uid")
return pod return pod
} }

View File

@ -41,20 +41,22 @@ const (
// Checker is responsible for deciding which nodes pass the criteria for scale down. // Checker is responsible for deciding which nodes pass the criteria for scale down.
type Checker struct { type Checker struct {
thresholdGetter utilizationThresholdGetter configGetter nodeGroupConfigGetter
} }
type utilizationThresholdGetter interface { type nodeGroupConfigGetter interface {
// GetScaleDownUtilizationThreshold returns ScaleDownUtilizationThreshold value that should be used for a given NodeGroup. // GetScaleDownUtilizationThreshold returns ScaleDownUtilizationThreshold value that should be used for a given NodeGroup.
GetScaleDownUtilizationThreshold(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (float64, error) GetScaleDownUtilizationThreshold(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (float64, error)
// GetScaleDownGpuUtilizationThreshold returns ScaleDownGpuUtilizationThreshold value that should be used for a given NodeGroup. // GetScaleDownGpuUtilizationThreshold returns ScaleDownGpuUtilizationThreshold value that should be used for a given NodeGroup.
GetScaleDownGpuUtilizationThreshold(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (float64, error) GetScaleDownGpuUtilizationThreshold(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (float64, error)
// GetIgnoreDaemonSetsUtilization returns IgnoreDaemonSetsUtilization value that should be used for a given NodeGroup.
GetIgnoreDaemonSetsUtilization(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (bool, error)
} }
// NewChecker creates a new Checker object. // NewChecker creates a new Checker object.
func NewChecker(thresholdGetter utilizationThresholdGetter) *Checker { func NewChecker(configGetter nodeGroupConfigGetter) *Checker {
return &Checker{ return &Checker{
thresholdGetter: thresholdGetter, configGetter: configGetter,
} }
} }
@ -118,12 +120,6 @@ func (c *Checker) unremovableReasonAndNodeUtilization(context *context.Autoscali
return simulator.ScaleDownDisabledAnnotation, nil return simulator.ScaleDownDisabledAnnotation, nil
} }
gpuConfig := context.CloudProvider.GetNodeGpuConfig(node)
utilInfo, err := utilization.Calculate(nodeInfo, context.IgnoreDaemonSetsUtilization, context.IgnoreMirrorPodsUtilization, gpuConfig, timestamp)
if err != nil {
klog.Warningf("Failed to calculate utilization for %s: %v", node.Name, err)
}
nodeGroup, err := context.CloudProvider.NodeGroupForNode(node) nodeGroup, err := context.CloudProvider.NodeGroupForNode(node)
if err != nil { if err != nil {
klog.Warning("Node group not found for node %v: %v", node.Name, err) klog.Warning("Node group not found for node %v: %v", node.Name, err)
@ -136,6 +132,18 @@ func (c *Checker) unremovableReasonAndNodeUtilization(context *context.Autoscali
return simulator.NotAutoscaled, nil return simulator.NotAutoscaled, nil
} }
ignoreDaemonSetsUtilization, err := c.configGetter.GetIgnoreDaemonSetsUtilization(context, nodeGroup)
if err != nil {
klog.Warningf("Couldn't retrieve `IgnoreDaemonSetsUtilization` option for node %v: %v", node.Name, err)
return simulator.UnexpectedError, nil
}
gpuConfig := context.CloudProvider.GetNodeGpuConfig(node)
utilInfo, err := utilization.Calculate(nodeInfo, ignoreDaemonSetsUtilization, context.IgnoreMirrorPodsUtilization, gpuConfig, timestamp)
if err != nil {
klog.Warningf("Failed to calculate utilization for %s: %v", node.Name, err)
}
// If scale down of unready nodes is disabled, skip the node if it is unready // If scale down of unready nodes is disabled, skip the node if it is unready
if !context.ScaleDownUnreadyEnabled { if !context.ScaleDownUnreadyEnabled {
ready, _, _ := kube_util.GetReadinessState(node) ready, _, _ := kube_util.GetReadinessState(node)
@ -166,12 +174,12 @@ func (c *Checker) isNodeBelowUtilizationThreshold(context *context.AutoscalingCo
var err error var err error
gpuConfig := context.CloudProvider.GetNodeGpuConfig(node) gpuConfig := context.CloudProvider.GetNodeGpuConfig(node)
if gpuConfig != nil { if gpuConfig != nil {
threshold, err = c.thresholdGetter.GetScaleDownGpuUtilizationThreshold(context, nodeGroup) threshold, err = c.configGetter.GetScaleDownGpuUtilizationThreshold(context, nodeGroup)
if err != nil { if err != nil {
return false, err return false, err
} }
} else { } else {
threshold, err = c.thresholdGetter.GetScaleDownUtilizationThreshold(context, nodeGroup) threshold, err = c.configGetter.GetScaleDownUtilizationThreshold(context, nodeGroup)
if err != nil { if err != nil {
return false, err return false, err
} }

View File

@ -21,12 +21,11 @@ import (
"testing" "testing"
"time" "time"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test" testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test"
"k8s.io/autoscaler/cluster-autoscaler/config" "k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/context"
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/unremovable" "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/unremovable"
. "k8s.io/autoscaler/cluster-autoscaler/core/test" . "k8s.io/autoscaler/cluster-autoscaler/core/test"
"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupconfig"
"k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot"
"k8s.io/autoscaler/cluster-autoscaler/utils/taints" "k8s.io/autoscaler/cluster-autoscaler/utils/taints"
. "k8s.io/autoscaler/cluster-autoscaler/utils/test" . "k8s.io/autoscaler/cluster-autoscaler/utils/test"
@ -36,8 +35,16 @@ import (
"k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/kubernetes/fake"
) )
func TestFilterOutUnremovable(t *testing.T) { type testCase struct {
now := time.Now() desc string
nodes []*apiv1.Node
pods []*apiv1.Pod
want []string
scaleDownUnready bool
ignoreDaemonSetsUtilization bool
}
func getTestCases(ignoreDaemonSetsUtilization bool, suffix string, now time.Time) []testCase {
regularNode := BuildTestNode("regular", 1000, 10) regularNode := BuildTestNode("regular", 1000, 10)
SetNodeReadyState(regularNode, true, time.Time{}) SetNodeReadyState(regularNode, true, time.Time{})
@ -59,13 +66,10 @@ func TestFilterOutUnremovable(t *testing.T) {
smallPod := BuildTestPod("smallPod", 100, 0) smallPod := BuildTestPod("smallPod", 100, 0)
smallPod.Spec.NodeName = "regular" smallPod.Spec.NodeName = "regular"
testCases := []struct { dsPod := BuildDSTestPod("dsPod", 500, 0)
desc string dsPod.Spec.NodeName = "regular"
nodes []*apiv1.Node
pods []*apiv1.Pod testCases := []testCase{
want []string
scaleDownUnready bool
}{
{ {
desc: "regular node stays", desc: "regular node stays",
nodes: []*apiv1.Node{regularNode}, nodes: []*apiv1.Node{regularNode},
@ -111,14 +115,57 @@ func TestFilterOutUnremovable(t *testing.T) {
scaleDownUnready: false, scaleDownUnready: false,
}, },
} }
finalTestCases := []testCase{}
for _, tc := range testCases { for _, tc := range testCases {
tc.desc = tc.desc + " " + suffix
if ignoreDaemonSetsUtilization {
tc.ignoreDaemonSetsUtilization = true
}
finalTestCases = append(finalTestCases, tc)
}
if ignoreDaemonSetsUtilization {
finalTestCases = append(testCases, testCase{
desc: "high utilization daemonsets node is filtered out",
nodes: []*apiv1.Node{regularNode},
pods: []*apiv1.Pod{smallPod, dsPod},
want: []string{},
scaleDownUnready: true,
ignoreDaemonSetsUtilization: false,
},
testCase{
desc: "high utilization daemonsets node stays",
nodes: []*apiv1.Node{regularNode},
pods: []*apiv1.Pod{smallPod, dsPod},
want: []string{"regular"},
scaleDownUnready: true,
ignoreDaemonSetsUtilization: true,
})
}
return finalTestCases
}
func TestFilterOutUnremovable(t *testing.T) {
now := time.Now()
for _, tc := range append(getTestCases(false, "IgnoreDaemonSetUtilization=false", now),
getTestCases(true, "IgnoreDaemonsetUtilization=true", now)...) {
tc := tc tc := tc
t.Run(tc.desc, func(t *testing.T) { t.Run(tc.desc, func(t *testing.T) {
t.Parallel() t.Parallel()
c := NewChecker(&staticThresholdGetter{0.5}) s := nodegroupconfig.DelegatingNodeGroupConfigProcessor{}
c := NewChecker(&s)
options := config.AutoscalingOptions{ options := config.AutoscalingOptions{
UnremovableNodeRecheckTimeout: 5 * time.Minute, UnremovableNodeRecheckTimeout: 5 * time.Minute,
ScaleDownUnreadyEnabled: tc.scaleDownUnready, ScaleDownUnreadyEnabled: tc.scaleDownUnready,
NodeGroupDefaults: config.NodeGroupAutoscalingOptions{
ScaleDownUtilizationThreshold: config.DefaultScaleDownUtilizationThreshold,
ScaleDownGpuUtilizationThreshold: config.DefaultScaleDownGpuUtilizationThreshold,
ScaleDownUnneededTime: config.DefaultScaleDownUnneededTime,
ScaleDownUnreadyTime: config.DefaultScaleDownUnreadyTime,
IgnoreDaemonSetsUtilization: tc.ignoreDaemonSetsUtilization,
},
} }
provider := testprovider.NewTestCloudProvider(nil, nil) provider := testprovider.NewTestCloudProvider(nil, nil)
provider.AddNodeGroup("ng1", 1, 10, 2) provider.AddNodeGroup("ng1", 1, 10, 2)
@ -136,15 +183,3 @@ func TestFilterOutUnremovable(t *testing.T) {
}) })
} }
} }
type staticThresholdGetter struct {
threshold float64
}
func (s *staticThresholdGetter) GetScaleDownUtilizationThreshold(_ *context.AutoscalingContext, _ cloudprovider.NodeGroup) (float64, error) {
return s.threshold, nil
}
func (s *staticThresholdGetter) GetScaleDownGpuUtilizationThreshold(_ *context.AutoscalingContext, _ cloudprovider.NodeGroup) (float64, error) {
return s.threshold, nil
}

View File

@ -1303,7 +1303,8 @@ func newWrapperForTesting(ctx *context.AutoscalingContext, clusterStateRegistry
MinReplicaCount: 0, MinReplicaCount: 0,
SkipNodesWithCustomControllerPods: true, SkipNodesWithCustomControllerPods: true,
} }
sd := NewScaleDown(ctx, NewTestProcessors(ctx), ndt, deleteOptions) processors := NewTestProcessors(ctx)
actuator := actuation.NewActuator(ctx, clusterStateRegistry, ndt, deleteOptions) sd := NewScaleDown(ctx, processors, ndt, deleteOptions)
actuator := actuation.NewActuator(ctx, clusterStateRegistry, ndt, deleteOptions, processors.NodeGroupConfigProcessor)
return NewScaleDownWrapper(sd, actuator) return NewScaleDownWrapper(sd, actuator)
} }

View File

@ -171,7 +171,7 @@ func NewStaticAutoscaler(
// during the struct creation rather than here. // during the struct creation rather than here.
ndt := deletiontracker.NewNodeDeletionTracker(0 * time.Second) ndt := deletiontracker.NewNodeDeletionTracker(0 * time.Second)
scaleDown := legacy.NewScaleDown(autoscalingContext, processors, ndt, deleteOptions) scaleDown := legacy.NewScaleDown(autoscalingContext, processors, ndt, deleteOptions)
actuator := actuation.NewActuator(autoscalingContext, clusterStateRegistry, ndt, deleteOptions) actuator := actuation.NewActuator(autoscalingContext, clusterStateRegistry, ndt, deleteOptions, processors.NodeGroupConfigProcessor)
autoscalingContext.ScaleDownActuator = actuator autoscalingContext.ScaleDownActuator = actuator
var scaleDownPlanner scaledown.Planner var scaleDownPlanner scaledown.Planner

View File

@ -159,7 +159,7 @@ func (m *onNodeGroupDeleteMock) Delete(id string) error {
func setUpScaleDownActuator(ctx *context.AutoscalingContext, options config.AutoscalingOptions) { func setUpScaleDownActuator(ctx *context.AutoscalingContext, options config.AutoscalingOptions) {
deleteOptions := simulator.NewNodeDeleteOptions(options) deleteOptions := simulator.NewNodeDeleteOptions(options)
ctx.ScaleDownActuator = actuation.NewActuator(ctx, nil, deletiontracker.NewNodeDeletionTracker(0*time.Second), deleteOptions) ctx.ScaleDownActuator = actuation.NewActuator(ctx, nil, deletiontracker.NewNodeDeletionTracker(0*time.Second), deleteOptions, NewTestProcessors(ctx).NodeGroupConfigProcessor)
} }
func TestStaticAutoscalerRunOnce(t *testing.T) { func TestStaticAutoscalerRunOnce(t *testing.T) {
@ -1433,7 +1433,7 @@ func TestStaticAutoscalerUpcomingScaleDownCandidates(t *testing.T) {
csr.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute)) csr.RegisterProviders(clusterstate.NewMockMaxNodeProvisionTimeProvider(15 * time.Minute))
// Setting the Actuator is necessary for testing any scale-down logic, it shouldn't have anything to do in this test. // Setting the Actuator is necessary for testing any scale-down logic, it shouldn't have anything to do in this test.
actuator := actuation.NewActuator(&ctx, csr, deletiontracker.NewNodeDeletionTracker(0*time.Second), simulator.NodeDeleteOptions{}) actuator := actuation.NewActuator(&ctx, csr, deletiontracker.NewNodeDeletionTracker(0*time.Second), simulator.NodeDeleteOptions{}, NewTestProcessors(&ctx).NodeGroupConfigProcessor)
ctx.ScaleDownActuator = actuator ctx.ScaleDownActuator = actuator
// Fake planner that keeps track of the scale-down candidates passed to UpdateClusterState. // Fake planner that keeps track of the scale-down candidates passed to UpdateClusterState.
@ -1761,7 +1761,7 @@ func newScaleDownPlannerAndActuator(t *testing.T, ctx *context.AutoscalingContex
} }
ndt := deletiontracker.NewNodeDeletionTracker(0 * time.Second) ndt := deletiontracker.NewNodeDeletionTracker(0 * time.Second)
sd := legacy.NewScaleDown(ctx, p, ndt, deleteOptions) sd := legacy.NewScaleDown(ctx, p, ndt, deleteOptions)
actuator := actuation.NewActuator(ctx, cs, ndt, deleteOptions) actuator := actuation.NewActuator(ctx, cs, ndt, deleteOptions, p.NodeGroupConfigProcessor)
wrapper := legacy.NewScaleDownWrapper(sd, actuator) wrapper := legacy.NewScaleDownWrapper(sd, actuator)
return wrapper, wrapper return wrapper, wrapper
} }

View File

@ -108,13 +108,13 @@ var (
"How long after node deletion that scale down evaluation resumes, defaults to scanInterval") "How long after node deletion that scale down evaluation resumes, defaults to scanInterval")
scaleDownDelayAfterFailure = flag.Duration("scale-down-delay-after-failure", 3*time.Minute, scaleDownDelayAfterFailure = flag.Duration("scale-down-delay-after-failure", 3*time.Minute,
"How long after scale down failure that scale down evaluation resumes") "How long after scale down failure that scale down evaluation resumes")
scaleDownUnneededTime = flag.Duration("scale-down-unneeded-time", 10*time.Minute, scaleDownUnneededTime = flag.Duration("scale-down-unneeded-time", config.DefaultScaleDownUnneededTime,
"How long a node should be unneeded before it is eligible for scale down") "How long a node should be unneeded before it is eligible for scale down")
scaleDownUnreadyTime = flag.Duration("scale-down-unready-time", 20*time.Minute, scaleDownUnreadyTime = flag.Duration("scale-down-unready-time", config.DefaultScaleDownUnreadyTime,
"How long an unready node should be unneeded before it is eligible for scale down") "How long an unready node should be unneeded before it is eligible for scale down")
scaleDownUtilizationThreshold = flag.Float64("scale-down-utilization-threshold", 0.5, scaleDownUtilizationThreshold = flag.Float64("scale-down-utilization-threshold", config.DefaultScaleDownUtilizationThreshold,
"Sum of cpu or memory of all pods running on the node divided by node's corresponding allocatable resource, below which a node can be considered for scale down") "Sum of cpu or memory of all pods running on the node divided by node's corresponding allocatable resource, below which a node can be considered for scale down")
scaleDownGpuUtilizationThreshold = flag.Float64("scale-down-gpu-utilization-threshold", 0.5, scaleDownGpuUtilizationThreshold = flag.Float64("scale-down-gpu-utilization-threshold", config.DefaultScaleDownGpuUtilizationThreshold,
"Sum of gpu requests of all pods running on the node divided by node's allocatable resource, below which a node can be considered for scale down."+ "Sum of gpu requests of all pods running on the node divided by node's allocatable resource, below which a node can be considered for scale down."+
"Utilization calculation only cares about gpu resource for accelerator node. cpu and memory utilization will be ignored.") "Utilization calculation only cares about gpu resource for accelerator node. cpu and memory utilization will be ignored.")
scaleDownNonEmptyCandidatesCount = flag.Int("scale-down-non-empty-candidates-count", 30, scaleDownNonEmptyCandidatesCount = flag.Int("scale-down-non-empty-candidates-count", 30,
@ -259,6 +259,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
ScaleDownGpuUtilizationThreshold: *scaleDownGpuUtilizationThreshold, ScaleDownGpuUtilizationThreshold: *scaleDownGpuUtilizationThreshold,
ScaleDownUnneededTime: *scaleDownUnneededTime, ScaleDownUnneededTime: *scaleDownUnneededTime,
ScaleDownUnreadyTime: *scaleDownUnreadyTime, ScaleDownUnreadyTime: *scaleDownUnreadyTime,
IgnoreDaemonSetsUtilization: *ignoreDaemonSetsUtilization,
MaxNodeProvisionTime: *maxNodeProvisionTime, MaxNodeProvisionTime: *maxNodeProvisionTime,
}, },
CloudConfig: *cloudConfig, CloudConfig: *cloudConfig,
@ -272,7 +273,6 @@ func createAutoscalingOptions() config.AutoscalingOptions {
ExpanderNames: *expanderFlag, ExpanderNames: *expanderFlag,
GRPCExpanderCert: *grpcExpanderCert, GRPCExpanderCert: *grpcExpanderCert,
GRPCExpanderURL: *grpcExpanderURL, GRPCExpanderURL: *grpcExpanderURL,
IgnoreDaemonSetsUtilization: *ignoreDaemonSetsUtilization,
IgnoreMirrorPodsUtilization: *ignoreMirrorPodsUtilization, IgnoreMirrorPodsUtilization: *ignoreMirrorPodsUtilization,
MaxBulkSoftTaintCount: *maxBulkSoftTaintCount, MaxBulkSoftTaintCount: *maxBulkSoftTaintCount,
MaxBulkSoftTaintTime: *maxBulkSoftTaintTime, MaxBulkSoftTaintTime: *maxBulkSoftTaintTime,

View File

@ -35,6 +35,8 @@ type NodeGroupConfigProcessor interface {
GetScaleDownGpuUtilizationThreshold(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (float64, error) GetScaleDownGpuUtilizationThreshold(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (float64, error)
// GetMaxNodeProvisionTime return MaxNodeProvisionTime value that should be used for a given NodeGroup. // GetMaxNodeProvisionTime return MaxNodeProvisionTime value that should be used for a given NodeGroup.
GetMaxNodeProvisionTime(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (time.Duration, error) GetMaxNodeProvisionTime(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (time.Duration, error)
// GetIgnoreDaemonSetsUtilization returns IgnoreDaemonSetsUtilization value that should be used for a given NodeGroup.
GetIgnoreDaemonSetsUtilization(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (bool, error)
// CleanUp cleans up processor's internal structures. // CleanUp cleans up processor's internal structures.
CleanUp() CleanUp()
} }
@ -105,6 +107,18 @@ func (p *DelegatingNodeGroupConfigProcessor) GetMaxNodeProvisionTime(context *co
return ngConfig.MaxNodeProvisionTime, nil return ngConfig.MaxNodeProvisionTime, nil
} }
// GetIgnoreDaemonSetsUtilization returns IgnoreDaemonSetsUtilization value that should be used for a given NodeGroup.
func (p *DelegatingNodeGroupConfigProcessor) GetIgnoreDaemonSetsUtilization(context *context.AutoscalingContext, nodeGroup cloudprovider.NodeGroup) (bool, error) {
ngConfig, err := nodeGroup.GetOptions(context.NodeGroupDefaults)
if err != nil && err != cloudprovider.ErrNotImplemented {
return false, err
}
if ngConfig == nil || err == cloudprovider.ErrNotImplemented {
return context.NodeGroupDefaults.IgnoreDaemonSetsUtilization, nil
}
return ngConfig.IgnoreDaemonSetsUtilization, nil
}
// CleanUp cleans up processor's internal structures. // CleanUp cleans up processor's internal structures.
func (p *DelegatingNodeGroupConfigProcessor) CleanUp() { func (p *DelegatingNodeGroupConfigProcessor) CleanUp() {
} }

View File

@ -49,6 +49,7 @@ func TestDelegatingNodeGroupConfigProcessor(t *testing.T) {
ScaleDownGpuUtilizationThreshold: 0.6, ScaleDownGpuUtilizationThreshold: 0.6,
ScaleDownUtilizationThreshold: 0.5, ScaleDownUtilizationThreshold: 0.5,
MaxNodeProvisionTime: 15 * time.Minute, MaxNodeProvisionTime: 15 * time.Minute,
IgnoreDaemonSetsUtilization: true,
} }
ngOpts := &config.NodeGroupAutoscalingOptions{ ngOpts := &config.NodeGroupAutoscalingOptions{
ScaleDownUnneededTime: 10 * time.Minute, ScaleDownUnneededTime: 10 * time.Minute,
@ -56,6 +57,7 @@ func TestDelegatingNodeGroupConfigProcessor(t *testing.T) {
ScaleDownGpuUtilizationThreshold: 0.85, ScaleDownGpuUtilizationThreshold: 0.85,
ScaleDownUtilizationThreshold: 0.75, ScaleDownUtilizationThreshold: 0.75,
MaxNodeProvisionTime: 60 * time.Minute, MaxNodeProvisionTime: 60 * time.Minute,
IgnoreDaemonSetsUtilization: false,
} }
testUnneededTime := func(t *testing.T, p DelegatingNodeGroupConfigProcessor, c *context.AutoscalingContext, ng cloudprovider.NodeGroup, w Want, we error) { testUnneededTime := func(t *testing.T, p DelegatingNodeGroupConfigProcessor, c *context.AutoscalingContext, ng cloudprovider.NodeGroup, w Want, we error) {
@ -109,18 +111,32 @@ func TestDelegatingNodeGroupConfigProcessor(t *testing.T) {
assert.Equal(t, res, results[w]) assert.Equal(t, res, results[w])
} }
// for IgnoreDaemonSetsUtilization
testIgnoreDSUtilization := func(t *testing.T, p DelegatingNodeGroupConfigProcessor, c *context.AutoscalingContext, ng cloudprovider.NodeGroup, w Want, we error) {
res, err := p.GetIgnoreDaemonSetsUtilization(c, ng)
assert.Equal(t, err, we)
results := map[Want]bool{
NIL: false,
GLOBAL: true,
NG: false,
}
assert.Equal(t, res, results[w])
}
funcs := map[string]func(*testing.T, DelegatingNodeGroupConfigProcessor, *context.AutoscalingContext, cloudprovider.NodeGroup, Want, error){ funcs := map[string]func(*testing.T, DelegatingNodeGroupConfigProcessor, *context.AutoscalingContext, cloudprovider.NodeGroup, Want, error){
"ScaleDownUnneededTime": testUnneededTime, "ScaleDownUnneededTime": testUnneededTime,
"ScaleDownUnreadyTime": testUnreadyTime, "ScaleDownUnreadyTime": testUnreadyTime,
"ScaleDownUtilizationThreshold": testUtilizationThreshold, "ScaleDownUtilizationThreshold": testUtilizationThreshold,
"ScaleDownGpuUtilizationThreshold": testGpuThreshold, "ScaleDownGpuUtilizationThreshold": testGpuThreshold,
"MaxNodeProvisionTime": testMaxNodeProvisionTime, "MaxNodeProvisionTime": testMaxNodeProvisionTime,
"IgnoreDaemonSetsUtilization": testIgnoreDSUtilization,
"MultipleOptions": func(t *testing.T, p DelegatingNodeGroupConfigProcessor, c *context.AutoscalingContext, ng cloudprovider.NodeGroup, w Want, we error) { "MultipleOptions": func(t *testing.T, p DelegatingNodeGroupConfigProcessor, c *context.AutoscalingContext, ng cloudprovider.NodeGroup, w Want, we error) {
testUnneededTime(t, p, c, ng, w, we) testUnneededTime(t, p, c, ng, w, we)
testUnreadyTime(t, p, c, ng, w, we) testUnreadyTime(t, p, c, ng, w, we)
testUtilizationThreshold(t, p, c, ng, w, we) testUtilizationThreshold(t, p, c, ng, w, we)
testGpuThreshold(t, p, c, ng, w, we) testGpuThreshold(t, p, c, ng, w, we)
testMaxNodeProvisionTime(t, p, c, ng, w, we) testMaxNodeProvisionTime(t, p, c, ng, w, we)
testIgnoreDSUtilization(t, p, c, ng, w, we)
}, },
"RepeatingTheSameCallGivesConsistentResults": func(t *testing.T, p DelegatingNodeGroupConfigProcessor, c *context.AutoscalingContext, ng cloudprovider.NodeGroup, w Want, we error) { "RepeatingTheSameCallGivesConsistentResults": func(t *testing.T, p DelegatingNodeGroupConfigProcessor, c *context.AutoscalingContext, ng cloudprovider.NodeGroup, w Want, we error) {
testUnneededTime(t, p, c, ng, w, we) testUnneededTime(t, p, c, ng, w, we)
@ -128,6 +144,9 @@ func TestDelegatingNodeGroupConfigProcessor(t *testing.T) {
// throw in a different call // throw in a different call
testGpuThreshold(t, p, c, ng, w, we) testGpuThreshold(t, p, c, ng, w, we)
testUnneededTime(t, p, c, ng, w, we) testUnneededTime(t, p, c, ng, w, we)
// throw in another different call
testIgnoreDSUtilization(t, p, c, ng, w, we)
testUnneededTime(t, p, c, ng, w, we)
}, },
} }

View File

@ -67,6 +67,15 @@ func BuildTestPod(name string, cpu int64, mem int64) *apiv1.Pod {
return pod return pod
} }
// BuildDSTestPod creates a DaemonSet pod with cpu and memory.
func BuildDSTestPod(name string, cpu int64, mem int64) *apiv1.Pod {
pod := BuildTestPod(name, cpu, mem)
pod.OwnerReferences = GenerateOwnerReferences("ds", "DaemonSet", "apps/v1", "some-uid")
return pod
}
// BuildTestPodWithEphemeralStorage creates a pod with cpu, memory and ephemeral storage resources. // BuildTestPodWithEphemeralStorage creates a pod with cpu, memory and ephemeral storage resources.
func BuildTestPodWithEphemeralStorage(name string, cpu, mem, ephemeralStorage int64) *apiv1.Pod { func BuildTestPodWithEphemeralStorage(name string, cpu, mem, ephemeralStorage int64) *apiv1.Pod {
startTime := metav1.Unix(0, 0) startTime := metav1.Unix(0, 0)