From 189d25bc89968cd4067e6470411d23acce60fced Mon Sep 17 00:00:00 2001 From: Xuan Liu Date: Tue, 16 Sep 2025 16:50:03 -0400 Subject: [PATCH] Add Unit Tests --- .../clusterapi/clusterapi_nodegroup_test.go | 4 ++ .../clusterstate/clusterstate_test.go | 39 +++++++++++++++++-- .../core/static_autoscaler_test.go | 2 +- .../node_group_config_processor.go | 4 +- .../node_group_config_processor_test.go | 15 +++++++ 5 files changed, 57 insertions(+), 7 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go index 2999f8d02d..4a75edbec4 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go @@ -1703,6 +1703,7 @@ func TestNodeGroupGetOptions(t *testing.T) { ScaleDownUnneededTime: time.Second, ScaleDownUnreadyTime: time.Minute, MaxNodeProvisionTime: 15 * time.Minute, + MaxNodeStartupTime: 35 * time.Minute, } cases := []struct { @@ -1723,6 +1724,7 @@ func TestNodeGroupGetOptions(t *testing.T) { config.DefaultScaleDownUnneededTimeKey: "1h", config.DefaultScaleDownUnreadyTimeKey: "30m", config.DefaultMaxNodeProvisionTimeKey: "60m", + config.DefaultMaxNodeStartupTimeKey: "35m", }, expected: &config.NodeGroupAutoscalingOptions{ ScaleDownGpuUtilizationThreshold: 0.6, @@ -1730,6 +1732,7 @@ func TestNodeGroupGetOptions(t *testing.T) { ScaleDownUnneededTime: time.Hour, ScaleDownUnreadyTime: 30 * time.Minute, MaxNodeProvisionTime: 60 * time.Minute, + MaxNodeStartupTime: 35 * time.Minute, }, }, { @@ -1744,6 +1747,7 @@ func TestNodeGroupGetOptions(t *testing.T) { ScaleDownUnneededTime: time.Minute, ScaleDownUnreadyTime: defaultOptions.ScaleDownUnreadyTime, MaxNodeProvisionTime: 15 * time.Minute, + MaxNodeStartupTime: 35 * time.Minute, }, }, { diff --git a/cluster-autoscaler/clusterstate/clusterstate_test.go b/cluster-autoscaler/clusterstate/clusterstate_test.go index 62f8b97b46..f5e610480a 100644 --- a/cluster-autoscaler/clusterstate/clusterstate_test.go +++ b/cluster-autoscaler/clusterstate/clusterstate_test.go @@ -423,7 +423,7 @@ func TestTooManyUnready(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker()) + }, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute, MaxNodeStartupTime: 35 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker()) err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now) assert.NoError(t, err) assert.False(t, clusterstate.IsClusterHealthy()) @@ -462,6 +462,37 @@ func TestUnreadyLongAfterCreation(t *testing.T) { assert.Empty(t, upcomingRegistered["ng1"]) } +func TestUnreadyAfterCreationWithIncreasedStartupTime(t *testing.T) { + now := time.Now() + + ng1_1 := BuildTestNode("ng1-1", 1000, 1000) + SetNodeReadyState(ng1_1, true, now.Add(-time.Minute)) + ng2_1 := BuildTestNode("ng2-1", 1000, 1000) + SetNodeReadyState(ng2_1, false, now.Add(-time.Minute)) + ng2_1.CreationTimestamp = metav1.Time{Time: now.Add(-30 * time.Minute)} + + provider := testprovider.NewTestCloudProviderBuilder().Build() + provider.AddNodeGroup("ng1", 1, 10, 1) + provider.AddNodeGroup("ng2", 1, 10, 1) + provider.AddNode("ng1", ng1_1) + provider.AddNode("ng2", ng2_1) + + assert.NotNil(t, provider) + fakeClient := &fake.Clientset{} + fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "some-map") + clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ + MaxTotalUnreadyPercentage: 10, + OkTotalUnreadyCount: 1, + }, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute, MaxNodeStartupTime: 35 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker()) + err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now) + assert.NoError(t, err) + assert.Equal(t, 0, len(clusterstate.GetClusterReadiness().Unready)) + assert.Equal(t, 1, len(clusterstate.GetClusterReadiness().NotStarted)) + upcoming, upcomingRegistered := clusterstate.GetUpcomingNodes() + assert.Equal(t, 0, upcoming["ng1"]) + assert.Empty(t, upcomingRegistered["ng1"]) +} + func TestNotStarted(t *testing.T) { now := time.Now() @@ -484,7 +515,7 @@ func TestNotStarted(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker()) + }, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute, MaxNodeStartupTime: 35 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker()) err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now) assert.NoError(t, err) assert.Equal(t, 1, len(clusterstate.GetClusterReadiness().NotStarted)) @@ -546,7 +577,7 @@ func TestRegisterScaleDown(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker()) + }, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute, MaxNodeStartupTime: 35 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker()) now := time.Now() clusterstate.RegisterScaleDown(provider.GetNodeGroup("ng1"), "ng1-1", now.Add(time.Minute), now) assert.Equal(t, 1, len(clusterstate.scaleDownRequests)) @@ -639,7 +670,7 @@ func TestUpcomingNodes(t *testing.T) { clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{ MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1, - }, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker()) + }, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute, MaxNodeStartupTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker()) err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1, ng3_1, ng4_1, ng5_1, ng5_2}, nil, now) assert.NoError(t, err) assert.Empty(t, clusterstate.GetScaleUpFailures()) diff --git a/cluster-autoscaler/core/static_autoscaler_test.go b/cluster-autoscaler/core/static_autoscaler_test.go index f78acf386f..905edcf147 100644 --- a/cluster-autoscaler/core/static_autoscaler_test.go +++ b/cluster-autoscaler/core/static_autoscaler_test.go @@ -2257,7 +2257,7 @@ func TestStaticAutoscalerUpcomingScaleDownCandidates(t *testing.T) { // Create CSR with unhealthy cluster protection effectively disabled, to guarantee we reach the tested logic. csrConfig := clusterstate.ClusterStateRegistryConfig{OkTotalUnreadyCount: nodeGroupCount * unreadyNodesCount} - csr := clusterstate.NewClusterStateRegistry(provider, csrConfig, ctx.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), processors.AsyncNodeGroupStateChecker) + csr := clusterstate.NewClusterStateRegistry(provider, csrConfig, ctx.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute, MaxNodeStartupTime: 15 * time.Minute}), processors.AsyncNodeGroupStateChecker) // Setting the Actuator is necessary for testing any scale-down logic, it shouldn't have anything to do in this test. actuator := actuation.NewActuator(&ctx, csr, deletiontracker.NewNodeDeletionTracker(0*time.Second), options.NodeDeleteOptions{}, nil, processorstest.NewTestProcessors(&ctx).NodeGroupConfigProcessor) diff --git a/cluster-autoscaler/processors/nodegroupconfig/node_group_config_processor.go b/cluster-autoscaler/processors/nodegroupconfig/node_group_config_processor.go index 385d218988..4542343ca6 100644 --- a/cluster-autoscaler/processors/nodegroupconfig/node_group_config_processor.go +++ b/cluster-autoscaler/processors/nodegroupconfig/node_group_config_processor.go @@ -110,11 +110,11 @@ func (p *DelegatingNodeGroupConfigProcessor) GetMaxNodeProvisionTime(nodeGroup c return ngConfig.MaxNodeProvisionTime, nil } -// GetMaxNodeProvisionTime returns MaxNodeStartupTime value that should be used for a given NodeGroup. +// GetMaxNodeStartupTime returns MaxNodeStartupTime value that should be used for a given NodeGroup. func (p *DelegatingNodeGroupConfigProcessor) GetMaxNodeStartupTime(nodeGroup cloudprovider.NodeGroup) (time.Duration, error) { ngConfig, err := nodeGroup.GetOptions(p.nodeGroupDefaults) if err != nil && err != cloudprovider.ErrNotImplemented { - return time.Duration(0), err + return 15 * time.Minute, err } if ngConfig == nil || err == cloudprovider.ErrNotImplemented { return p.nodeGroupDefaults.MaxNodeStartupTime, nil diff --git a/cluster-autoscaler/processors/nodegroupconfig/node_group_config_processor_test.go b/cluster-autoscaler/processors/nodegroupconfig/node_group_config_processor_test.go index 101d538a81..16fe11c247 100644 --- a/cluster-autoscaler/processors/nodegroupconfig/node_group_config_processor_test.go +++ b/cluster-autoscaler/processors/nodegroupconfig/node_group_config_processor_test.go @@ -47,6 +47,7 @@ func TestDelegatingNodeGroupConfigProcessor(t *testing.T) { ScaleDownGpuUtilizationThreshold: 0.6, ScaleDownUtilizationThreshold: 0.5, MaxNodeProvisionTime: 15 * time.Minute, + MaxNodeStartupTime: 15 * time.Minute, IgnoreDaemonSetsUtilization: true, } ngOpts := &config.NodeGroupAutoscalingOptions{ @@ -55,6 +56,7 @@ func TestDelegatingNodeGroupConfigProcessor(t *testing.T) { ScaleDownGpuUtilizationThreshold: 0.85, ScaleDownUtilizationThreshold: 0.75, MaxNodeProvisionTime: 60 * time.Minute, + MaxNodeStartupTime: 35 * time.Minute, IgnoreDaemonSetsUtilization: false, } @@ -109,6 +111,17 @@ func TestDelegatingNodeGroupConfigProcessor(t *testing.T) { assert.Equal(t, res, results[w]) } + testMaxNodeStartupTime := func(t *testing.T, p NodeGroupConfigProcessor, ng cloudprovider.NodeGroup, w Want, we error) { + res, err := p.GetMaxNodeStartupTime(ng) + assert.Equal(t, err, we) + results := map[Want]time.Duration{ + NIL: 15 * time.Minute, + GLOBAL: 15 * time.Minute, + NG: 35 * time.Minute, + } + assert.Equal(t, res, results[w]) + } + // for IgnoreDaemonSetsUtilization testIgnoreDSUtilization := func(t *testing.T, p NodeGroupConfigProcessor, ng cloudprovider.NodeGroup, w Want, we error) { res, err := p.GetIgnoreDaemonSetsUtilization(ng) @@ -127,6 +140,7 @@ func TestDelegatingNodeGroupConfigProcessor(t *testing.T) { "ScaleDownUtilizationThreshold": testUtilizationThreshold, "ScaleDownGpuUtilizationThreshold": testGpuThreshold, "MaxNodeProvisionTime": testMaxNodeProvisionTime, + "MaxNodeStartupTime": testMaxNodeStartupTime, "IgnoreDaemonSetsUtilization": testIgnoreDSUtilization, "MultipleOptions": func(t *testing.T, p NodeGroupConfigProcessor, ng cloudprovider.NodeGroup, w Want, we error) { testUnneededTime(t, p, ng, w, we) @@ -134,6 +148,7 @@ func TestDelegatingNodeGroupConfigProcessor(t *testing.T) { testUtilizationThreshold(t, p, ng, w, we) testGpuThreshold(t, p, ng, w, we) testMaxNodeProvisionTime(t, p, ng, w, we) + testMaxNodeStartupTime(t, p, ng, w, we) testIgnoreDSUtilization(t, p, ng, w, we) }, "RepeatingTheSameCallGivesConsistentResults": func(t *testing.T, p NodeGroupConfigProcessor, ng cloudprovider.NodeGroup, w Want, we error) {