autoscaler/cluster-autoscaler/clusterstate/clusterstate_test.go

1571 lines
64 KiB
Go

/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package clusterstate
import (
"fmt"
"testing"
"time"
apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test"
"k8s.io/autoscaler/cluster-autoscaler/clusterstate/api"
"k8s.io/autoscaler/cluster-autoscaler/clusterstate/utils"
"k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/metrics"
"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupconfig"
"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroups/asyncnodegroups"
"k8s.io/autoscaler/cluster-autoscaler/utils/taints"
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
"k8s.io/client-go/kubernetes/fake"
kube_record "k8s.io/client-go/tools/record"
"github.com/stretchr/testify/assert"
"k8s.io/autoscaler/cluster-autoscaler/utils/backoff"
)
// GetCloudProviderDeletedNodeNames returns a list of the names of nodes removed
// from cloud provider but registered in Kubernetes.
func GetCloudProviderDeletedNodeNames(csr *ClusterStateRegistry) []string {
csr.Lock()
defer csr.Unlock()
result := make([]string, 0, len(csr.deletedNodes))
for nodeName := range csr.deletedNodes {
result = append(result, nodeName)
}
return result
}
func TestOKWithScaleUp(t *testing.T) {
now := time.Now()
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
ng2_1 := BuildTestNode("ng2-1", 1000, 1000)
SetNodeReadyState(ng2_1, true, now.Add(-time.Minute))
provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNodeGroup("ng1", 1, 10, 5)
provider.AddNodeGroup("ng2", 1, 10, 1)
provider.AddNode("ng1", ng1_1)
provider.AddNode("ng2", ng2_1)
assert.NotNil(t, provider)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap")
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
clusterstate.RegisterScaleUp(provider.GetNodeGroup("ng1"), 4, time.Now())
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now)
assert.NoError(t, err)
assert.True(t, clusterstate.IsClusterHealthy())
assert.Empty(t, clusterstate.GetScaleUpFailures())
status := clusterstate.GetStatus(now)
assert.Equal(t, api.ClusterAutoscalerInProgress, status.ClusterWide.ScaleUp.Status)
assert.Equal(t, 2, len(status.NodeGroups))
ng1Checked := false
ng2Checked := true
for _, nodeGroupStatus := range status.NodeGroups {
if nodeGroupStatus.Name == "ng1" {
assert.Equal(t, api.ClusterAutoscalerInProgress, nodeGroupStatus.ScaleUp.Status)
ng1Checked = true
}
if nodeGroupStatus.Name == "ng2" {
assert.Equal(t, api.ClusterAutoscalerNoActivity, nodeGroupStatus.ScaleUp.Status)
ng2Checked = true
}
}
assert.True(t, ng1Checked)
assert.True(t, ng2Checked)
}
func TestEmptyOK(t *testing.T) {
now := time.Now()
provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNodeGroup("ng1", 0, 10, 0)
assert.NotNil(t, provider)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap")
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
err := clusterstate.UpdateNodes([]*apiv1.Node{}, nil, now.Add(-5*time.Second))
assert.NoError(t, err)
assert.True(t, clusterstate.IsClusterHealthy())
assert.Empty(t, clusterstate.GetScaleUpFailures())
assert.True(t, clusterstate.IsNodeGroupHealthy("ng1"))
assert.False(t, clusterstate.IsNodeGroupScalingUp("ng1"))
assert.False(t, clusterstate.HasNodeGroupStartedScaleUp("ng1"))
provider.AddNodeGroup("ng1", 0, 10, 3)
clusterstate.RegisterScaleUp(provider.GetNodeGroup("ng1"), 3, now.Add(-3*time.Second))
// clusterstate.scaleUpRequests["ng1"].Time = now.Add(-3 * time.Second)
// clusterstate.scaleUpRequests["ng1"].ExpectedAddTime = now.Add(1 * time.Minute)
err = clusterstate.UpdateNodes([]*apiv1.Node{}, nil, now)
assert.NoError(t, err)
assert.True(t, clusterstate.IsClusterHealthy())
assert.True(t, clusterstate.IsNodeGroupHealthy("ng1"))
assert.True(t, clusterstate.IsNodeGroupScalingUp("ng1"))
assert.True(t, clusterstate.HasNodeGroupStartedScaleUp("ng1"))
}
func TestHasNodeGroupStartedScaleUp(t *testing.T) {
tests := map[string]struct {
initialSize int
delta int
}{
"Target size reverts back to zero": {
initialSize: 0,
delta: 3,
},
}
for tn, tc := range tests {
t.Run(tn, func(t *testing.T) {
now := time.Now()
provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNodeGroup("ng1", 0, 5, tc.initialSize)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap")
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
err := clusterstate.UpdateNodes([]*apiv1.Node{}, nil, now.Add(-5*time.Second))
assert.NoError(t, err)
assert.False(t, clusterstate.IsNodeGroupScalingUp("ng1"))
assert.False(t, clusterstate.HasNodeGroupStartedScaleUp("ng1"))
provider.AddNodeGroup("ng1", 0, 5, tc.initialSize+tc.delta)
clusterstate.RegisterScaleUp(provider.GetNodeGroup("ng1"), tc.delta, now.Add(-3*time.Second))
err = clusterstate.UpdateNodes([]*apiv1.Node{}, nil, now)
assert.NoError(t, err)
assert.True(t, clusterstate.IsNodeGroupScalingUp("ng1"))
assert.True(t, clusterstate.HasNodeGroupStartedScaleUp("ng1"))
provider.AddNodeGroup("ng1", 0, 5, tc.initialSize)
clusterstate.Recalculate()
assert.False(t, clusterstate.IsNodeGroupScalingUp("ng1"))
assert.True(t, clusterstate.HasNodeGroupStartedScaleUp("ng1"))
})
}
}
// TestRecalculateStateAfterNodeGroupSizeChanged checks that Recalculate updates state correctly after
// some node group size changed. We verify that acceptable ranges are updated accordingly
// and that the UpcomingNodes reflect the node group size change (important for recalculating state after
// deleting scale-up nodes that failed to create).
func TestRecalculateStateAfterNodeGroupSizeChanged(t *testing.T) {
ngName := "ng1"
testCases := []struct {
name string
acceptableRange AcceptableRange
readiness Readiness
newTarget int
scaleUpRequest *ScaleUpRequest
wantAcceptableRange AcceptableRange
wantUpcoming int
}{
{
name: "failed scale up by 3 nodes",
acceptableRange: AcceptableRange{MinNodes: 1, CurrentTarget: 4, MaxNodes: 4},
readiness: Readiness{Ready: make([]string, 1)},
newTarget: 1,
wantAcceptableRange: AcceptableRange{MinNodes: 1, CurrentTarget: 1, MaxNodes: 1},
wantUpcoming: 0,
}, {
name: "partially failed scale up",
acceptableRange: AcceptableRange{MinNodes: 5, CurrentTarget: 7, MaxNodes: 8},
readiness: Readiness{Ready: make([]string, 5)},
newTarget: 6,
wantAcceptableRange: AcceptableRange{MinNodes: 5, CurrentTarget: 6, MaxNodes: 6},
scaleUpRequest: &ScaleUpRequest{Increase: 1},
wantUpcoming: 1,
}, {
name: "scale up ongoing, no change",
acceptableRange: AcceptableRange{MinNodes: 1, CurrentTarget: 4, MaxNodes: 4},
readiness: Readiness{Ready: make([]string, 1)},
newTarget: 4,
wantAcceptableRange: AcceptableRange{MinNodes: 1, CurrentTarget: 4, MaxNodes: 4},
scaleUpRequest: &ScaleUpRequest{Increase: 3},
wantUpcoming: 3,
}, {
name: "no scale up, no change",
acceptableRange: AcceptableRange{MinNodes: 4, CurrentTarget: 4, MaxNodes: 4},
readiness: Readiness{Ready: make([]string, 4)},
newTarget: 4,
wantAcceptableRange: AcceptableRange{MinNodes: 4, CurrentTarget: 4, MaxNodes: 4},
wantUpcoming: 0,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNodeGroup(ngName, 0, 1000, tc.newTarget)
fakeLogRecorder, _ := utils.NewStatusMapRecorder(&fake.Clientset{}, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap")
clusterState := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{}, fakeLogRecorder,
newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
clusterState.acceptableRanges = map[string]AcceptableRange{ngName: tc.acceptableRange}
clusterState.perNodeGroupReadiness = map[string]Readiness{ngName: tc.readiness}
if tc.scaleUpRequest != nil {
clusterState.scaleUpRequests = map[string]*ScaleUpRequest{ngName: tc.scaleUpRequest}
}
clusterState.Recalculate()
assert.Equal(t, tc.wantAcceptableRange, clusterState.acceptableRanges[ngName])
upcomingCounts, _ := clusterState.GetUpcomingNodes()
if upcoming, found := upcomingCounts[ngName]; found {
assert.Equal(t, tc.wantUpcoming, upcoming, "Unexpected upcoming nodes count, want: %d got: %d", tc.wantUpcoming, upcomingCounts[ngName])
}
})
}
}
func TestOKOneUnreadyNode(t *testing.T) {
now := time.Now()
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
ng2_1 := BuildTestNode("ng2-1", 1000, 1000)
SetNodeReadyState(ng2_1, false, now.Add(-time.Minute))
provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNodeGroup("ng1", 1, 10, 1)
provider.AddNodeGroup("ng2", 1, 10, 1)
provider.AddNode("ng1", ng1_1)
provider.AddNode("ng2", ng2_1)
assert.NotNil(t, provider)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap")
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now)
assert.NoError(t, err)
assert.True(t, clusterstate.IsClusterHealthy())
assert.Empty(t, clusterstate.GetScaleUpFailures())
assert.True(t, clusterstate.IsNodeGroupHealthy("ng1"))
status := clusterstate.GetStatus(now)
assert.Equal(t, api.ClusterAutoscalerHealthy, status.ClusterWide.Health.Status)
assert.Equal(t, api.ClusterAutoscalerNoActivity, status.ClusterWide.ScaleUp.Status)
assert.Equal(t, 2, len(status.NodeGroups))
ng1Checked := false
for _, nodeGroupStatus := range status.NodeGroups {
if nodeGroupStatus.Name == "ng1" {
assert.Equal(t, api.ClusterAutoscalerHealthy, nodeGroupStatus.Health.Status)
ng1Checked = true
}
}
assert.True(t, ng1Checked)
}
func TestNodeWithoutNodeGroupDontCrash(t *testing.T) {
now := time.Now()
noNgNode := BuildTestNode("no_ng", 1000, 1000)
SetNodeReadyState(noNgNode, true, now.Add(-time.Minute))
provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNode("no_ng", noNgNode)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap")
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
err := clusterstate.UpdateNodes([]*apiv1.Node{noNgNode}, nil, now)
assert.NoError(t, err)
assert.Empty(t, clusterstate.GetScaleUpFailures())
clusterstate.UpdateScaleDownCandidates([]*apiv1.Node{noNgNode}, now)
}
func TestOKOneUnreadyNodeWithScaleDownCandidate(t *testing.T) {
now := time.Now()
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
ng2_1 := BuildTestNode("ng2-1", 1000, 1000)
SetNodeReadyState(ng2_1, false, now.Add(-time.Minute))
provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNodeGroup("ng1", 1, 10, 1)
provider.AddNodeGroup("ng2", 1, 10, 1)
provider.AddNode("ng1", ng1_1)
provider.AddNode("ng2", ng2_1)
assert.NotNil(t, provider)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap")
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now)
clusterstate.UpdateScaleDownCandidates([]*apiv1.Node{ng1_1}, now)
assert.NoError(t, err)
assert.True(t, clusterstate.IsClusterHealthy())
assert.Empty(t, clusterstate.GetScaleUpFailures())
assert.True(t, clusterstate.IsNodeGroupHealthy("ng1"))
status := clusterstate.GetStatus(now)
assert.Equal(t, api.ClusterAutoscalerHealthy, status.ClusterWide.Health.Status)
assert.Equal(t, api.ClusterAutoscalerNoActivity, status.ClusterWide.ScaleUp.Status)
assert.Equal(t, api.ClusterAutoscalerCandidatesPresent, status.ClusterWide.ScaleDown.Status)
assert.Equal(t, 2, len(status.NodeGroups))
ng1Checked := false
ng2Checked := false
for _, nodeGroupStatus := range status.NodeGroups {
if nodeGroupStatus.Name == "ng1" {
assert.Equal(t, api.ClusterAutoscalerHealthy, nodeGroupStatus.Health.Status)
assert.Equal(t, api.ClusterAutoscalerCandidatesPresent, nodeGroupStatus.ScaleDown.Status)
ng1Checked = true
}
if nodeGroupStatus.Name == "ng2" {
assert.Equal(t, api.ClusterAutoscalerHealthy, nodeGroupStatus.Health.Status)
assert.Equal(t, api.ClusterAutoscalerNoCandidates, nodeGroupStatus.ScaleDown.Status)
ng2Checked = true
}
}
assert.True(t, ng1Checked)
assert.True(t, ng2Checked)
}
func TestMissingNodes(t *testing.T) {
now := time.Now()
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
ng2_1 := BuildTestNode("ng2-1", 1000, 1000)
SetNodeReadyState(ng2_1, true, now.Add(-time.Minute))
provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNodeGroup("ng1", 1, 10, 5)
provider.AddNodeGroup("ng2", 1, 10, 1)
provider.AddNode("ng1", ng1_1)
provider.AddNode("ng2", ng2_1)
assert.NotNil(t, provider)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap")
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now)
assert.NoError(t, err)
assert.True(t, clusterstate.IsClusterHealthy())
assert.Empty(t, clusterstate.GetScaleUpFailures())
assert.False(t, clusterstate.IsNodeGroupHealthy("ng1"))
status := clusterstate.GetStatus(now)
assert.Equal(t, api.ClusterAutoscalerHealthy, status.ClusterWide.Health.Status)
assert.Equal(t, 2, len(status.NodeGroups))
ng1Checked := false
for _, nodeGroupStatus := range status.NodeGroups {
if nodeGroupStatus.Name == "ng1" {
assert.Equal(t, api.ClusterAutoscalerUnhealthy, nodeGroupStatus.Health.Status)
ng1Checked = true
}
}
assert.True(t, ng1Checked)
}
func TestTooManyUnready(t *testing.T) {
now := time.Now()
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
SetNodeReadyState(ng1_1, false, now.Add(-time.Minute))
ng2_1 := BuildTestNode("ng2-1", 1000, 1000)
SetNodeReadyState(ng2_1, false, now.Add(-time.Minute))
provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNodeGroup("ng1", 1, 10, 1)
provider.AddNodeGroup("ng2", 1, 10, 1)
provider.AddNode("ng1", ng1_1)
provider.AddNode("ng2", ng2_1)
assert.NotNil(t, provider)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap")
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now)
assert.NoError(t, err)
assert.False(t, clusterstate.IsClusterHealthy())
assert.Empty(t, clusterstate.GetScaleUpFailures())
assert.True(t, clusterstate.IsNodeGroupHealthy("ng1"))
}
func TestUnreadyLongAfterCreation(t *testing.T) {
now := time.Now()
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
ng2_1 := BuildTestNode("ng2-1", 1000, 1000)
SetNodeReadyState(ng2_1, false, now.Add(-time.Minute))
ng2_1.CreationTimestamp = metav1.Time{Time: now.Add(-30 * time.Minute)}
provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNodeGroup("ng1", 1, 10, 1)
provider.AddNodeGroup("ng2", 1, 10, 1)
provider.AddNode("ng1", ng1_1)
provider.AddNode("ng2", ng2_1)
assert.NotNil(t, provider)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "some-map")
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now)
assert.NoError(t, err)
assert.Equal(t, 1, len(clusterstate.GetClusterReadiness().Unready))
assert.Equal(t, 0, len(clusterstate.GetClusterReadiness().NotStarted))
upcoming, upcomingRegistered := clusterstate.GetUpcomingNodes()
assert.Equal(t, 0, upcoming["ng1"])
assert.Empty(t, upcomingRegistered["ng1"])
}
func TestNotStarted(t *testing.T) {
now := time.Now()
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
ng2_1 := BuildTestNode("ng2-1", 1000, 1000)
SetNodeReadyState(ng2_1, false, now.Add(-4*time.Minute))
SetNodeNotReadyTaint(ng2_1)
ng2_1.CreationTimestamp = metav1.Time{Time: now.Add(-10 * time.Minute)}
provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNodeGroup("ng1", 1, 10, 1)
provider.AddNodeGroup("ng2", 1, 10, 1)
provider.AddNode("ng1", ng1_1)
provider.AddNode("ng2", ng2_1)
assert.NotNil(t, provider)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "some-map")
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now)
assert.NoError(t, err)
assert.Equal(t, 1, len(clusterstate.GetClusterReadiness().NotStarted))
assert.Equal(t, 1, len(clusterstate.GetClusterReadiness().Ready))
// node ng2_1 moves condition to ready
SetNodeReadyState(ng2_1, true, now.Add(-4*time.Minute))
err = clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now)
assert.NoError(t, err)
assert.Equal(t, 1, len(clusterstate.GetClusterReadiness().NotStarted))
assert.Equal(t, 1, len(clusterstate.GetClusterReadiness().Ready))
// node ng2_1 no longer has the taint
RemoveNodeNotReadyTaint(ng2_1)
err = clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now)
assert.NoError(t, err)
assert.Equal(t, 0, len(clusterstate.GetClusterReadiness().NotStarted))
assert.Equal(t, 2, len(clusterstate.GetClusterReadiness().Ready))
}
func TestExpiredScaleUp(t *testing.T) {
now := time.Now()
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNodeGroup("ng1", 1, 10, 5)
provider.AddNode("ng1", ng1_1)
assert.NotNil(t, provider)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap")
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 2 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
clusterstate.RegisterScaleUp(provider.GetNodeGroup("ng1"), 4, now.Add(-3*time.Minute))
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1}, nil, now)
assert.NoError(t, err)
assert.True(t, clusterstate.IsClusterHealthy())
assert.False(t, clusterstate.IsNodeGroupHealthy("ng1"))
assert.Equal(t, clusterstate.GetScaleUpFailures(), map[string][]ScaleUpFailure{
"ng1": {
{NodeGroup: provider.GetNodeGroup("ng1"), Time: now, Reason: metrics.Timeout},
},
})
}
func TestRegisterScaleDown(t *testing.T) {
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNodeGroup("ng1", 1, 10, 1)
provider.AddNode("ng1", ng1_1)
assert.NotNil(t, provider)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap")
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
now := time.Now()
clusterstate.RegisterScaleDown(provider.GetNodeGroup("ng1"), "ng1-1", now.Add(time.Minute), now)
assert.Equal(t, 1, len(clusterstate.scaleDownRequests))
clusterstate.updateScaleRequests(now.Add(5 * time.Minute))
assert.Equal(t, 0, len(clusterstate.scaleDownRequests))
assert.Empty(t, clusterstate.GetScaleUpFailures())
}
func TestUpcomingNodes(t *testing.T) {
provider := testprovider.NewTestCloudProviderBuilder().Build()
now := time.Now()
// 6 nodes are expected to come.
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
provider.AddNodeGroup("ng1", 1, 10, 7)
provider.AddNode("ng1", ng1_1)
// One node is expected to come. One node is unready for the long time
// but this should not make any difference.
ng2_1 := BuildTestNode("ng2-1", 1000, 1000)
SetNodeReadyState(ng2_1, false, now.Add(-time.Minute))
provider.AddNodeGroup("ng2", 1, 10, 2)
provider.AddNode("ng2", ng2_1)
// Two nodes are expected to come. One is just being started for the first time,
// the other one is not there yet.
ng3_1 := BuildTestNode("ng3-1", 1000, 1000)
SetNodeReadyState(ng3_1, false, now.Add(-time.Minute))
ng3_1.CreationTimestamp = metav1.Time{Time: now.Add(-time.Minute)}
provider.AddNodeGroup("ng3", 1, 10, 2)
provider.AddNode("ng3", ng3_1)
// Nothing should be added here.
ng4_1 := BuildTestNode("ng4-1", 1000, 1000)
SetNodeReadyState(ng4_1, false, now.Add(-time.Minute))
provider.AddNodeGroup("ng4", 1, 10, 1)
provider.AddNode("ng4", ng4_1)
// One node is already there, for a second nde deletion / draining was already started.
ng5_1 := BuildTestNode("ng5-1", 1000, 1000)
SetNodeReadyState(ng5_1, true, now.Add(-time.Minute))
ng5_2 := BuildTestNode("ng5-2", 1000, 1000)
SetNodeReadyState(ng5_2, true, now.Add(-time.Minute))
ng5_2.Spec.Taints = []apiv1.Taint{
{
Key: taints.ToBeDeletedTaint,
Value: fmt.Sprint(time.Now().Unix()),
Effect: apiv1.TaintEffectNoSchedule,
},
}
provider.AddNodeGroup("ng5", 1, 10, 2)
provider.AddNode("ng5", ng5_1)
provider.AddNode("ng5", ng5_2)
assert.NotNil(t, provider)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap")
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1, ng3_1, ng4_1, ng5_1, ng5_2}, nil, now)
assert.NoError(t, err)
assert.Empty(t, clusterstate.GetScaleUpFailures())
upcomingNodes, upcomingRegistered := clusterstate.GetUpcomingNodes()
assert.Equal(t, 6, upcomingNodes["ng1"])
assert.Empty(t, upcomingRegistered["ng1"]) // Only unregistered.
assert.Equal(t, 1, upcomingNodes["ng2"])
assert.Empty(t, upcomingRegistered["ng2"]) // Only unregistered.
assert.Equal(t, 2, upcomingNodes["ng3"])
assert.Equal(t, []string{"ng3-1"}, upcomingRegistered["ng3"]) // 1 registered, 1 unregistered.
assert.NotContains(t, upcomingNodes, "ng4")
assert.NotContains(t, upcomingRegistered, "ng4")
assert.Equal(t, 0, upcomingNodes["ng5"])
assert.Empty(t, upcomingRegistered["ng5"])
}
func TestTaintBasedNodeDeletion(t *testing.T) {
// Create a new Cloud Provider that does not implement the HasInstance check
// it will return the ErrNotImplemented error instead.
provider := testprovider.NewTestCloudProviderBuilder().WithHasInstance(func(string) (bool, error) { return false, cloudprovider.ErrNotImplemented }).Build()
now := time.Now()
// One node is already there, for a second nde deletion / draining was already started.
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
ng1_2 := BuildTestNode("ng1-2", 1000, 1000)
SetNodeReadyState(ng1_2, true, now.Add(-time.Minute))
ng1_2.Spec.Taints = []apiv1.Taint{
{
Key: taints.ToBeDeletedTaint,
Value: fmt.Sprint(time.Now().Unix()),
Effect: apiv1.TaintEffectNoSchedule,
},
}
provider.AddNodeGroup("ng1", 1, 10, 2)
provider.AddNode("ng1", ng1_1)
provider.AddNode("ng1", ng1_2)
assert.NotNil(t, provider)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap")
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_2}, nil, now)
assert.NoError(t, err)
assert.Empty(t, clusterstate.GetScaleUpFailures())
upcomingNodes, upcomingRegistered := clusterstate.GetUpcomingNodes()
assert.Equal(t, 1, upcomingNodes["ng1"])
assert.Empty(t, upcomingRegistered["ng1"]) // Only unregistered.
}
func TestIncorrectSize(t *testing.T) {
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNodeGroup("ng1", 1, 10, 5)
provider.AddNode("ng1", ng1_1)
assert.NotNil(t, provider)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap")
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
now := time.Now()
clusterstate.UpdateNodes([]*apiv1.Node{ng1_1}, nil, now.Add(-5*time.Minute))
incorrect := clusterstate.incorrectNodeGroupSizes["ng1"]
assert.Equal(t, 5, incorrect.ExpectedSize)
assert.Equal(t, 1, incorrect.CurrentSize)
assert.Equal(t, now.Add(-5*time.Minute), incorrect.FirstObserved)
clusterstate.UpdateNodes([]*apiv1.Node{ng1_1}, nil, now.Add(-4*time.Minute))
incorrect = clusterstate.incorrectNodeGroupSizes["ng1"]
assert.Equal(t, 5, incorrect.ExpectedSize)
assert.Equal(t, 1, incorrect.CurrentSize)
assert.Equal(t, now.Add(-5*time.Minute), incorrect.FirstObserved)
clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_1}, nil, now.Add(-3*time.Minute))
incorrect = clusterstate.incorrectNodeGroupSizes["ng1"]
assert.Equal(t, 5, incorrect.ExpectedSize)
assert.Equal(t, 2, incorrect.CurrentSize)
assert.Equal(t, now.Add(-3*time.Minute), incorrect.FirstObserved)
}
func TestUnregisteredNodes(t *testing.T) {
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
ng1_1.Spec.ProviderID = "ng1-1"
ng1_2 := BuildTestNode("ng1-2", 1000, 1000)
ng1_2.Spec.ProviderID = "ng1-2"
provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNodeGroup("ng1", 1, 10, 2)
provider.AddNode("ng1", ng1_1)
provider.AddNode("ng1", ng1_2)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap")
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 10 * time.Second}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1}, nil, time.Now().Add(-time.Minute))
assert.NoError(t, err)
assert.Equal(t, 1, len(clusterstate.GetUnregisteredNodes()))
assert.Equal(t, "ng1-2", clusterstate.GetUnregisteredNodes()[0].Node.Name)
upcomingNodes, upcomingRegistered := clusterstate.GetUpcomingNodes()
assert.Equal(t, 1, upcomingNodes["ng1"])
assert.Empty(t, upcomingRegistered["ng1"]) // Unregistered only.
// The node didn't come up in MaxNodeProvisionTime, it should no longer be
// counted as upcoming (but it is still an unregistered node)
err = clusterstate.UpdateNodes([]*apiv1.Node{ng1_1}, nil, time.Now().Add(time.Minute))
assert.NoError(t, err)
assert.Equal(t, 1, len(clusterstate.GetUnregisteredNodes()))
assert.Equal(t, "ng1-2", clusterstate.GetUnregisteredNodes()[0].Node.Name)
upcomingNodes, upcomingRegistered = clusterstate.GetUpcomingNodes()
assert.Equal(t, 0, len(upcomingNodes))
assert.Empty(t, upcomingRegistered["ng1"])
err = clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_2}, nil, time.Now().Add(time.Minute))
assert.NoError(t, err)
assert.Equal(t, 0, len(clusterstate.GetUnregisteredNodes()))
}
func TestCloudProviderDeletedNodes(t *testing.T) {
now := time.Now()
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
ng1_1.Spec.ProviderID = "ng1-1"
ng1_2 := BuildTestNode("ng1-2", 1000, 1000)
SetNodeReadyState(ng1_2, true, now.Add(-time.Minute))
ng1_2.Spec.ProviderID = "ng1-2"
// No Node Group - Not Autoscaled Node
noNgNode := BuildTestNode("no-ng", 1000, 1000)
SetNodeReadyState(noNgNode, true, now.Add(-time.Minute))
noNgNode.Spec.ProviderID = "no-ng"
provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNodeGroup("ng1", 1, 10, 2)
provider.AddNode("ng1", ng1_1)
provider.AddNode("ng1", ng1_2)
provider.AddNode("no_ng", noNgNode)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap")
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 10 * time.Second}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
now.Add(time.Minute)
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_2, noNgNode}, nil, now)
// Nodes are registered correctly between Kubernetes and cloud provider.
assert.NoError(t, err)
assert.Equal(t, 0, len(GetCloudProviderDeletedNodeNames(clusterstate)))
// The node was removed from Cloud Provider
// should be counted as Deleted by cluster state
nodeGroup, err := provider.NodeGroupForNode(ng1_2)
assert.NoError(t, err)
provider.DeleteNode(ng1_2)
clusterstate.InvalidateNodeInstancesCacheEntry(nodeGroup)
now.Add(time.Minute)
err = clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_2, noNgNode}, nil, now)
assert.NoError(t, err)
assert.Equal(t, 1, len(GetCloudProviderDeletedNodeNames(clusterstate)))
assert.Equal(t, "ng1-2", GetCloudProviderDeletedNodeNames(clusterstate)[0])
assert.Equal(t, 1, len(clusterstate.GetClusterReadiness().Deleted))
// The node is removed from Kubernetes
now.Add(time.Minute)
err = clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, noNgNode}, nil, now)
assert.NoError(t, err)
assert.Equal(t, 0, len(GetCloudProviderDeletedNodeNames(clusterstate)))
// New Node is added afterwards
ng1_3 := BuildTestNode("ng1-3", 1000, 1000)
SetNodeReadyState(ng1_3, true, now.Add(-time.Minute))
ng1_3.Spec.ProviderID = "ng1-3"
provider.AddNode("ng1", ng1_3)
clusterstate.InvalidateNodeInstancesCacheEntry(nodeGroup)
now.Add(time.Minute)
err = clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_3, noNgNode}, nil, now)
assert.NoError(t, err)
assert.Equal(t, 0, len(GetCloudProviderDeletedNodeNames(clusterstate)))
// Newly added node is removed from Cloud Provider
// should be counted as Deleted by cluster state
nodeGroup, err = provider.NodeGroupForNode(ng1_3)
assert.NoError(t, err)
provider.DeleteNode(ng1_3)
clusterstate.InvalidateNodeInstancesCacheEntry(nodeGroup)
now.Add(time.Minute)
err = clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, noNgNode, ng1_3}, nil, now)
assert.NoError(t, err)
assert.Equal(t, 1, len(GetCloudProviderDeletedNodeNames(clusterstate)))
assert.Equal(t, "ng1-3", GetCloudProviderDeletedNodeNames(clusterstate)[0])
assert.Equal(t, 1, len(clusterstate.GetClusterReadiness().Deleted))
// Confirm that previously identified deleted Cloud Provider nodes are still included
// until it is removed from Kubernetes
now.Add(time.Minute)
err = clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, noNgNode, ng1_3}, nil, now)
assert.NoError(t, err)
assert.Equal(t, 1, len(GetCloudProviderDeletedNodeNames(clusterstate)))
assert.Equal(t, "ng1-3", GetCloudProviderDeletedNodeNames(clusterstate)[0])
assert.Equal(t, 1, len(clusterstate.GetClusterReadiness().Deleted))
// The node is removed from Kubernetes
now.Add(time.Minute)
err = clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, noNgNode}, nil, now)
assert.NoError(t, err)
assert.Equal(t, 0, len(GetCloudProviderDeletedNodeNames(clusterstate)))
}
func TestScaleUpBackoff(t *testing.T) {
now := time.Now()
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
ng1_2 := BuildTestNode("ng1-2", 1000, 1000)
SetNodeReadyState(ng1_2, true, now.Add(-time.Minute))
ng1_3 := BuildTestNode("ng1-3", 1000, 1000)
SetNodeReadyState(ng1_3, true, now.Add(-time.Minute))
provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNodeGroup("ng1", 1, 10, 4)
ng1 := provider.GetNodeGroup("ng1")
provider.AddNode("ng1", ng1_1)
provider.AddNode("ng1", ng1_2)
provider.AddNode("ng1", ng1_3)
assert.NotNil(t, provider)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap")
clusterstate := NewClusterStateRegistry(
provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 120 * time.Second}),
asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
// After failed scale-up, node group should be still healthy, but should backoff from scale-ups
clusterstate.RegisterScaleUp(provider.GetNodeGroup("ng1"), 1, now.Add(-180*time.Second))
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_2, ng1_3}, nil, now)
assert.NoError(t, err)
assert.True(t, clusterstate.IsClusterHealthy())
assert.True(t, clusterstate.IsNodeGroupHealthy("ng1"))
assert.Equal(t, NodeGroupScalingSafety{
SafeToScale: false,
Healthy: true,
BackoffStatus: backoff.Status{
IsBackedOff: true,
ErrorInfo: cloudprovider.InstanceErrorInfo{
ErrorClass: cloudprovider.OtherErrorClass,
ErrorCode: "timeout",
ErrorMessage: "Scale-up timed out for node group ng1 after 3m0s",
},
},
}, clusterstate.NodeGroupScaleUpSafety(ng1, now))
assert.Equal(t, backoff.Status{
IsBackedOff: true,
ErrorInfo: cloudprovider.InstanceErrorInfo{
ErrorClass: cloudprovider.OtherErrorClass,
ErrorCode: "timeout",
ErrorMessage: "Scale-up timed out for node group ng1 after 3m0s",
}}, clusterstate.backoff.BackoffStatus(ng1, nil, now))
// Backoff should expire after timeout
now = now.Add(5 * time.Minute /*InitialNodeGroupBackoffDuration*/).Add(time.Second)
assert.True(t, clusterstate.IsClusterHealthy())
assert.True(t, clusterstate.IsNodeGroupHealthy("ng1"))
assert.Equal(t, NodeGroupScalingSafety{SafeToScale: true, Healthy: true}, clusterstate.NodeGroupScaleUpSafety(ng1, now))
// Another failed scale up should cause longer backoff
clusterstate.RegisterScaleUp(provider.GetNodeGroup("ng1"), 1, now.Add(-121*time.Second))
err = clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_2, ng1_3}, nil, now)
assert.NoError(t, err)
assert.True(t, clusterstate.IsClusterHealthy())
assert.True(t, clusterstate.IsNodeGroupHealthy("ng1"))
assert.Equal(t, NodeGroupScalingSafety{
SafeToScale: false,
Healthy: true,
BackoffStatus: backoff.Status{
IsBackedOff: true,
ErrorInfo: cloudprovider.InstanceErrorInfo{
ErrorClass: cloudprovider.OtherErrorClass,
ErrorCode: "timeout",
ErrorMessage: "Scale-up timed out for node group ng1 after 2m1s",
},
},
}, clusterstate.NodeGroupScaleUpSafety(ng1, now))
now = now.Add(5 * time.Minute /*InitialNodeGroupBackoffDuration*/).Add(time.Second)
assert.Equal(t, NodeGroupScalingSafety{
SafeToScale: false,
Healthy: true,
BackoffStatus: backoff.Status{
IsBackedOff: true,
ErrorInfo: cloudprovider.InstanceErrorInfo{
ErrorClass: cloudprovider.OtherErrorClass,
ErrorCode: "timeout",
ErrorMessage: "Scale-up timed out for node group ng1 after 2m1s",
},
},
}, clusterstate.NodeGroupScaleUpSafety(ng1, now))
// After successful scale-up, node group should still be backed off
clusterstate.RegisterScaleUp(provider.GetNodeGroup("ng1"), 1, now)
ng1_4 := BuildTestNode("ng1-4", 1000, 1000)
SetNodeReadyState(ng1_4, true, now.Add(-1*time.Minute))
provider.AddNode("ng1", ng1_4)
err = clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_2, ng1_3, ng1_4}, nil, now)
assert.NoError(t, err)
assert.True(t, clusterstate.IsClusterHealthy())
assert.True(t, clusterstate.IsNodeGroupHealthy("ng1"))
assert.Equal(t, NodeGroupScalingSafety{
SafeToScale: false,
Healthy: true,
BackoffStatus: backoff.Status{
IsBackedOff: true,
ErrorInfo: cloudprovider.InstanceErrorInfo{
ErrorClass: cloudprovider.OtherErrorClass,
ErrorCode: "timeout",
ErrorMessage: "Scale-up timed out for node group ng1 after 2m1s",
},
},
}, clusterstate.NodeGroupScaleUpSafety(ng1, now))
assert.Equal(t, backoff.Status{
IsBackedOff: true,
ErrorInfo: cloudprovider.InstanceErrorInfo{
ErrorClass: cloudprovider.OtherErrorClass,
ErrorCode: "timeout",
ErrorMessage: "Scale-up timed out for node group ng1 after 2m1s",
}}, clusterstate.backoff.BackoffStatus(ng1, nil, now))
}
func TestGetClusterSize(t *testing.T) {
now := time.Now()
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
ng2_1 := BuildTestNode("ng2-1", 1000, 1000)
SetNodeReadyState(ng2_1, true, now.Add(-time.Minute))
notAutoscaledNode := BuildTestNode("notAutoscaledNode", 1000, 1000)
SetNodeReadyState(notAutoscaledNode, true, now.Add(-time.Minute))
provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNodeGroup("ng1", 1, 10, 5)
provider.AddNodeGroup("ng2", 1, 10, 1)
provider.AddNode("ng1", ng1_1)
provider.AddNode("ng2", ng2_1)
// Add a node not belonging to any autoscaled node group. This is to make sure that GetAutoscaledNodesCount doesn't
// take nodes from non-autoscaled node groups into account.
provider.AddNode("notAutoscaledNode", notAutoscaledNode)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap")
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
// There are 2 actual nodes in 2 node groups with target sizes of 5 and 1.
clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1, notAutoscaledNode}, nil, now)
currentSize, targetSize := clusterstate.GetAutoscaledNodesCount()
assert.Equal(t, 2, currentSize)
assert.Equal(t, 6, targetSize)
// Current size should increase after a new node is added.
clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_1, notAutoscaledNode, ng2_1}, nil, now.Add(time.Minute))
currentSize, targetSize = clusterstate.GetAutoscaledNodesCount()
assert.Equal(t, 3, currentSize)
assert.Equal(t, 6, targetSize)
// Target size should increase after a new node group is added.
provider.AddNodeGroup("ng3", 1, 10, 1)
clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_1, notAutoscaledNode, ng2_1}, nil, now.Add(2*time.Minute))
currentSize, targetSize = clusterstate.GetAutoscaledNodesCount()
assert.Equal(t, 3, currentSize)
assert.Equal(t, 7, targetSize)
// Target size should change after a node group changes its target size.
for _, ng := range provider.NodeGroups() {
ng.(*testprovider.TestNodeGroup).SetTargetSize(10)
}
clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_1, notAutoscaledNode, ng2_1}, nil, now.Add(3*time.Minute))
currentSize, targetSize = clusterstate.GetAutoscaledNodesCount()
assert.Equal(t, 3, currentSize)
assert.Equal(t, 30, targetSize)
}
func TestUpdateScaleUp(t *testing.T) {
now := time.Now()
later := now.Add(time.Minute)
provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNodeGroup("ng1", 1, 10, 5)
provider.AddNodeGroup("ng2", 1, 10, 5)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap")
clusterstate := NewClusterStateRegistry(
provider,
ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
},
fakeLogRecorder,
newBackoff(),
nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 10 * time.Second}),
asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker(),
)
// Test cases for `RegisterScaleUp`
clusterstate.RegisterScaleUp(provider.GetNodeGroup("ng1"), 100, now)
assert.Equal(t, clusterstate.scaleUpRequests["ng1"].Increase, 100)
assert.Equal(t, clusterstate.scaleUpRequests["ng1"].Time, now)
assert.Equal(t, clusterstate.scaleUpRequests["ng1"].ExpectedAddTime, now.Add(10*time.Second))
// expect no change of times on negative delta
clusterstate.RegisterScaleUp(provider.GetNodeGroup("ng1"), -20, later)
assert.Equal(t, clusterstate.scaleUpRequests["ng1"].Increase, 80)
assert.Equal(t, clusterstate.scaleUpRequests["ng1"].Time, now)
assert.Equal(t, clusterstate.scaleUpRequests["ng1"].ExpectedAddTime, now.Add(10*time.Second))
// update times on positive delta
clusterstate.RegisterScaleUp(provider.GetNodeGroup("ng1"), 30, later)
assert.Equal(t, clusterstate.scaleUpRequests["ng1"].Increase, 110)
assert.Equal(t, clusterstate.scaleUpRequests["ng1"].Time, later)
assert.Equal(t, clusterstate.scaleUpRequests["ng1"].ExpectedAddTime, later.Add(10*time.Second))
// if we get below 0 scalup is deleted
clusterstate.RegisterScaleUp(provider.GetNodeGroup("ng1"), -200, now)
assert.Nil(t, clusterstate.scaleUpRequests["ng1"])
// If new scalup is registered with negative delta nothing should happen
clusterstate.RegisterScaleUp(provider.GetNodeGroup("ng1"), -200, now)
assert.Nil(t, clusterstate.scaleUpRequests["ng1"])
}
func TestScaleUpFailures(t *testing.T) {
now := time.Now()
provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNodeGroup("ng1", 0, 10, 0)
provider.AddNodeGroup("ng2", 0, 10, 0)
assert.NotNil(t, provider)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap")
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{}, fakeLogRecorder, newBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}), asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker())
clusterstate.RegisterFailedScaleUp(provider.GetNodeGroup("ng1"), string(metrics.Timeout), "", "", "", now)
clusterstate.RegisterFailedScaleUp(provider.GetNodeGroup("ng2"), string(metrics.Timeout), "", "", "", now)
clusterstate.RegisterFailedScaleUp(provider.GetNodeGroup("ng1"), string(metrics.APIError), "", "", "", now.Add(time.Minute))
failures := clusterstate.GetScaleUpFailures()
assert.Equal(t, map[string][]ScaleUpFailure{
"ng1": {
{NodeGroup: provider.GetNodeGroup("ng1"), Reason: metrics.Timeout, Time: now},
{NodeGroup: provider.GetNodeGroup("ng1"), Reason: metrics.APIError, Time: now.Add(time.Minute)},
},
"ng2": {
{NodeGroup: provider.GetNodeGroup("ng2"), Reason: metrics.Timeout, Time: now},
},
}, failures)
clusterstate.clearScaleUpFailures()
assert.Empty(t, clusterstate.GetScaleUpFailures())
}
func newBackoff() backoff.Backoff {
return backoff.NewIdBasedExponentialBackoff(5*time.Minute, /*InitialNodeGroupBackoffDuration*/
30*time.Minute /*MaxNodeGroupBackoffDuration*/, 3*time.Hour /*NodeGroupBackoffResetTimeout*/)
}
func TestUpdateAcceptableRanges(t *testing.T) {
testCases := []struct {
name string
targetSizes map[string]int
readiness map[string]Readiness
scaleUpRequests map[string]*ScaleUpRequest
scaledDownGroups []string
wantAcceptableRanges map[string]AcceptableRange
}{
{
name: "No scale-ups/scale-downs",
targetSizes: map[string]int{
"ng1": 10,
"ng2": 20,
},
readiness: map[string]Readiness{
"ng1": {Ready: make([]string, 10)},
"ng2": {Ready: make([]string, 20)},
},
wantAcceptableRanges: map[string]AcceptableRange{
"ng1": {MinNodes: 10, MaxNodes: 10, CurrentTarget: 10},
"ng2": {MinNodes: 20, MaxNodes: 20, CurrentTarget: 20},
},
},
{
name: "Ongoing scale-ups",
targetSizes: map[string]int{
"ng1": 10,
"ng2": 20,
},
readiness: map[string]Readiness{
"ng1": {Ready: make([]string, 10)},
"ng2": {Ready: make([]string, 20)},
},
scaleUpRequests: map[string]*ScaleUpRequest{
"ng1": {Increase: 3},
"ng2": {Increase: 5},
},
wantAcceptableRanges: map[string]AcceptableRange{
"ng1": {MinNodes: 7, MaxNodes: 10, CurrentTarget: 10},
"ng2": {MinNodes: 15, MaxNodes: 20, CurrentTarget: 20},
},
},
{
name: "Ongoing scale-downs",
targetSizes: map[string]int{
"ng1": 10,
"ng2": 20,
},
readiness: map[string]Readiness{
"ng1": {Ready: make([]string, 10)},
"ng2": {Ready: make([]string, 20)},
},
scaledDownGroups: []string{"ng1", "ng1", "ng2", "ng2", "ng2"},
wantAcceptableRanges: map[string]AcceptableRange{
"ng1": {MinNodes: 10, MaxNodes: 12, CurrentTarget: 10},
"ng2": {MinNodes: 20, MaxNodes: 23, CurrentTarget: 20},
},
},
{
name: "Some short unregistered nodes",
targetSizes: map[string]int{
"ng1": 10,
"ng2": 20,
},
readiness: map[string]Readiness{
"ng1": {Ready: make([]string, 8), Unregistered: make([]string, 2)},
"ng2": {Ready: make([]string, 17), Unregistered: make([]string, 3)},
},
wantAcceptableRanges: map[string]AcceptableRange{
"ng1": {MinNodes: 10, MaxNodes: 10, CurrentTarget: 10},
"ng2": {MinNodes: 20, MaxNodes: 20, CurrentTarget: 20},
},
},
{
name: "Some long unregistered nodes",
targetSizes: map[string]int{
"ng1": 10,
"ng2": 20,
},
readiness: map[string]Readiness{
"ng1": {Ready: make([]string, 8), LongUnregistered: make([]string, 2)},
"ng2": {Ready: make([]string, 17), LongUnregistered: make([]string, 3)},
},
wantAcceptableRanges: map[string]AcceptableRange{
"ng1": {MinNodes: 8, MaxNodes: 10, CurrentTarget: 10},
"ng2": {MinNodes: 17, MaxNodes: 20, CurrentTarget: 20},
},
},
{
name: "Everything together",
targetSizes: map[string]int{
"ng1": 10,
"ng2": 20,
},
readiness: map[string]Readiness{
"ng1": {Ready: make([]string, 8), Unregistered: make([]string, 1), LongUnregistered: make([]string, 2)},
"ng2": {Ready: make([]string, 17), Unregistered: make([]string, 3), LongUnregistered: make([]string, 4)},
},
scaleUpRequests: map[string]*ScaleUpRequest{
"ng1": {Increase: 3},
"ng2": {Increase: 5},
},
scaledDownGroups: []string{"ng1", "ng1", "ng2", "ng2", "ng2"},
wantAcceptableRanges: map[string]AcceptableRange{
"ng1": {MinNodes: 5, MaxNodes: 12, CurrentTarget: 10},
"ng2": {MinNodes: 11, MaxNodes: 23, CurrentTarget: 20},
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
provider := testprovider.NewTestCloudProviderBuilder().Build()
for nodeGroupName, targetSize := range tc.targetSizes {
provider.AddNodeGroup(nodeGroupName, 0, 1000, targetSize)
}
var scaleDownRequests []*ScaleDownRequest
for _, nodeGroupName := range tc.scaledDownGroups {
scaleDownRequests = append(scaleDownRequests, &ScaleDownRequest{
NodeGroup: provider.GetNodeGroup(nodeGroupName),
})
}
clusterState := &ClusterStateRegistry{
cloudProvider: provider,
perNodeGroupReadiness: tc.readiness,
scaleUpRequests: tc.scaleUpRequests,
scaleDownRequests: scaleDownRequests,
asyncNodeGroupStateChecker: asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker(),
}
clusterState.updateAcceptableRanges(tc.targetSizes)
assert.Equal(t, tc.wantAcceptableRanges, clusterState.acceptableRanges)
})
}
}
func TestUpdateIncorrectNodeGroupSizes(t *testing.T) {
timeNow := time.Now()
testCases := []struct {
name string
acceptableRanges map[string]AcceptableRange
readiness map[string]Readiness
incorrectSizes map[string]IncorrectNodeGroupSize
wantIncorrectSizes map[string]IncorrectNodeGroupSize
}{
{
name: "node groups with correct sizes",
acceptableRanges: map[string]AcceptableRange{
"ng1": {CurrentTarget: 10},
"ng2": {CurrentTarget: 20},
},
readiness: map[string]Readiness{
"ng1": {Registered: make([]string, 10)},
"ng2": {Registered: make([]string, 20)},
},
incorrectSizes: map[string]IncorrectNodeGroupSize{},
wantIncorrectSizes: map[string]IncorrectNodeGroupSize{},
},
{
name: "node groups with correct sizes after not being correct sized",
acceptableRanges: map[string]AcceptableRange{
"ng1": {CurrentTarget: 10},
"ng2": {CurrentTarget: 20},
},
readiness: map[string]Readiness{
"ng1": {Registered: make([]string, 10)},
"ng2": {Registered: make([]string, 20)},
},
incorrectSizes: map[string]IncorrectNodeGroupSize{
"ng1": {CurrentSize: 8, ExpectedSize: 10, FirstObserved: timeNow.Add(-time.Hour)},
"ng2": {CurrentSize: 15, ExpectedSize: 20, FirstObserved: timeNow.Add(-time.Minute)},
},
wantIncorrectSizes: map[string]IncorrectNodeGroupSize{},
},
{
name: "node groups below the target size",
acceptableRanges: map[string]AcceptableRange{
"ng1": {CurrentTarget: 10},
"ng2": {CurrentTarget: 20},
},
readiness: map[string]Readiness{
"ng1": {Registered: make([]string, 8)},
"ng2": {Registered: make([]string, 15)},
},
incorrectSizes: map[string]IncorrectNodeGroupSize{},
wantIncorrectSizes: map[string]IncorrectNodeGroupSize{
"ng1": {CurrentSize: 8, ExpectedSize: 10, FirstObserved: timeNow},
"ng2": {CurrentSize: 15, ExpectedSize: 20, FirstObserved: timeNow},
},
},
{
name: "node groups above the target size",
acceptableRanges: map[string]AcceptableRange{
"ng1": {CurrentTarget: 10},
"ng2": {CurrentTarget: 20},
},
readiness: map[string]Readiness{
"ng1": {Registered: make([]string, 12)},
"ng2": {Registered: make([]string, 25)},
},
incorrectSizes: map[string]IncorrectNodeGroupSize{},
wantIncorrectSizes: map[string]IncorrectNodeGroupSize{
"ng1": {CurrentSize: 12, ExpectedSize: 10, FirstObserved: timeNow},
"ng2": {CurrentSize: 25, ExpectedSize: 20, FirstObserved: timeNow},
},
},
{
name: "node groups below the target size with changed delta",
acceptableRanges: map[string]AcceptableRange{
"ng1": {CurrentTarget: 10},
"ng2": {CurrentTarget: 20},
},
readiness: map[string]Readiness{
"ng1": {Registered: make([]string, 8)},
"ng2": {Registered: make([]string, 15)},
},
incorrectSizes: map[string]IncorrectNodeGroupSize{
"ng1": {CurrentSize: 7, ExpectedSize: 10, FirstObserved: timeNow.Add(-time.Hour)},
"ng2": {CurrentSize: 14, ExpectedSize: 20, FirstObserved: timeNow.Add(-time.Minute)},
},
wantIncorrectSizes: map[string]IncorrectNodeGroupSize{
"ng1": {CurrentSize: 8, ExpectedSize: 10, FirstObserved: timeNow},
"ng2": {CurrentSize: 15, ExpectedSize: 20, FirstObserved: timeNow},
},
},
{
name: "node groups below the target size with the same delta",
acceptableRanges: map[string]AcceptableRange{
"ng1": {CurrentTarget: 10},
"ng2": {CurrentTarget: 20},
},
readiness: map[string]Readiness{
"ng1": {Registered: make([]string, 8)},
"ng2": {Registered: make([]string, 15)},
},
incorrectSizes: map[string]IncorrectNodeGroupSize{
"ng1": {CurrentSize: 8, ExpectedSize: 10, FirstObserved: timeNow.Add(-time.Hour)},
"ng2": {CurrentSize: 15, ExpectedSize: 20, FirstObserved: timeNow.Add(-time.Minute)},
},
wantIncorrectSizes: map[string]IncorrectNodeGroupSize{
"ng1": {CurrentSize: 8, ExpectedSize: 10, FirstObserved: timeNow.Add(-time.Hour)},
"ng2": {CurrentSize: 15, ExpectedSize: 20, FirstObserved: timeNow.Add(-time.Minute)},
},
},
{
name: "node groups below the target size with short unregistered nodes",
acceptableRanges: map[string]AcceptableRange{
"ng1": {CurrentTarget: 10},
"ng2": {CurrentTarget: 20},
},
readiness: map[string]Readiness{
"ng1": {Registered: make([]string, 8), Unregistered: make([]string, 2)},
"ng2": {Registered: make([]string, 15), Unregistered: make([]string, 3)},
},
incorrectSizes: map[string]IncorrectNodeGroupSize{},
wantIncorrectSizes: map[string]IncorrectNodeGroupSize{
"ng2": {CurrentSize: 15, ExpectedSize: 20, FirstObserved: timeNow},
},
},
{
name: "node groups below the target size with long unregistered nodes",
acceptableRanges: map[string]AcceptableRange{
"ng1": {CurrentTarget: 10},
"ng2": {CurrentTarget: 20},
},
readiness: map[string]Readiness{
"ng1": {Registered: make([]string, 8), LongUnregistered: make([]string, 2)},
"ng2": {Registered: make([]string, 15), LongUnregistered: make([]string, 3)},
},
incorrectSizes: map[string]IncorrectNodeGroupSize{},
wantIncorrectSizes: map[string]IncorrectNodeGroupSize{
"ng2": {CurrentSize: 15, ExpectedSize: 20, FirstObserved: timeNow},
},
},
{
name: "node groups below the target size with various unregistered nodes",
acceptableRanges: map[string]AcceptableRange{
"ng1": {CurrentTarget: 10},
"ng2": {CurrentTarget: 20},
},
readiness: map[string]Readiness{
"ng1": {Registered: make([]string, 8), Unregistered: make([]string, 1), LongUnregistered: make([]string, 1)},
"ng2": {Registered: make([]string, 15), Unregistered: make([]string, 2), LongUnregistered: make([]string, 2)},
},
incorrectSizes: map[string]IncorrectNodeGroupSize{},
wantIncorrectSizes: map[string]IncorrectNodeGroupSize{
"ng2": {CurrentSize: 15, ExpectedSize: 20, FirstObserved: timeNow},
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
provider := testprovider.NewTestCloudProviderBuilder().Build()
for nodeGroupName, acceptableRange := range tc.acceptableRanges {
provider.AddNodeGroup(nodeGroupName, 0, 1000, acceptableRange.CurrentTarget)
}
clusterState := &ClusterStateRegistry{
cloudProvider: provider,
acceptableRanges: tc.acceptableRanges,
perNodeGroupReadiness: tc.readiness,
incorrectNodeGroupSizes: tc.incorrectSizes,
asyncNodeGroupStateChecker: asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker(),
}
clusterState.updateIncorrectNodeGroupSizes(timeNow)
assert.Equal(t, tc.wantIncorrectSizes, clusterState.incorrectNodeGroupSizes)
})
}
}
func TestTruncateIfExceedMaxSize(t *testing.T) {
testCases := []struct {
name string
message string
maxSize int
wantMessage string
}{
{
name: "Message doesn't exceed maxSize",
message: "Some message",
maxSize: len("Some message"),
wantMessage: "Some message",
},
{
name: "Message exceeds maxSize",
message: "Some long message",
maxSize: len("Some long message") - 1,
wantMessage: "Some <truncated>",
},
{
name: "Message doesn't exceed maxSize and maxSize is smaller than truncatedMessageSuffix length",
message: "msg",
maxSize: len("msg"),
wantMessage: "msg",
},
{
name: "Message exceeds maxSize and maxSize is smaller than truncatedMessageSuffix length",
message: "msg",
maxSize: 2,
wantMessage: "ms",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
got := truncateIfExceedMaxLength(tc.message, tc.maxSize)
assert.Equal(t, tc.wantMessage, got)
})
}
}
func TestIsNodeGroupRegistered(t *testing.T) {
provider := testprovider.NewTestCloudProviderBuilder().Build()
registeredNodeGroupName := "registered-node-group"
provider.AddNodeGroup(registeredNodeGroupName, 1, 10, 1)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "some-map")
clusterstate := NewClusterStateRegistry(
provider,
ClusterStateRegistryConfig{MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1},
fakeLogRecorder,
newBackoff(),
nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}),
asyncnodegroups.NewDefaultAsyncNodeGroupStateChecker(),
)
clusterstate.Recalculate()
testCases := []struct {
nodeGroupName string
want bool
}{
{
nodeGroupName: registeredNodeGroupName,
want: true,
},
{
nodeGroupName: "unregistered-node-group",
want: false,
},
}
for _, tc := range testCases {
t.Run(tc.nodeGroupName, func(t *testing.T) {
registered := clusterstate.IsNodeGroupRegistered(tc.nodeGroupName)
assert.Equal(t, tc.want, registered)
})
}
}
func TestUpcomingNodesFromUpcomingNodeGroups(t *testing.T) {
testCases := []struct {
isUpcomingMockMap map[string]bool
nodeGroups map[string]int
expectedGroupsUpcomingNodesNumber map[string]int
updateNodes bool
}{
{
isUpcomingMockMap: map[string]bool{"ng": true},
nodeGroups: map[string]int{"ng": 2},
expectedGroupsUpcomingNodesNumber: map[string]int{"ng": 2},
updateNodes: false,
},
{
isUpcomingMockMap: map[string]bool{"ng": true, "ng2": true},
nodeGroups: map[string]int{"ng": 2, "ng2": 3},
expectedGroupsUpcomingNodesNumber: map[string]int{"ng": 2, "ng2": 3},
updateNodes: false,
},
{
isUpcomingMockMap: map[string]bool{},
nodeGroups: map[string]int{"ng": 2},
expectedGroupsUpcomingNodesNumber: map[string]int{"ng": 2},
updateNodes: true,
},
{
isUpcomingMockMap: map[string]bool{"ng": true},
nodeGroups: map[string]int{"ng": 2, "ng2": 1},
expectedGroupsUpcomingNodesNumber: map[string]int{"ng": 2, "ng2": 1},
updateNodes: true,
},
{
isUpcomingMockMap: map[string]bool{"ng": true},
nodeGroups: map[string]int{"ng": 2, "ng2": 1},
expectedGroupsUpcomingNodesNumber: map[string]int{"ng": 2, "ng2": 0},
updateNodes: false,
},
}
for _, tc := range testCases {
now := time.Now()
provider := testprovider.NewTestCloudProviderBuilder().Build()
for groupName, groupSize := range tc.nodeGroups {
provider.AddUpcomingNodeGroup(groupName, 1, 10, groupSize)
}
assert.NotNil(t, provider)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "some-map")
clusterstate := NewClusterStateRegistry(
provider,
ClusterStateRegistryConfig{MaxTotalUnreadyPercentage: 10, OkTotalUnreadyCount: 1},
fakeLogRecorder,
newBackoff(),
nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute}),
&asyncnodegroups.MockAsyncNodeGroupStateChecker{IsUpcomingNodeGroup: tc.isUpcomingMockMap},
)
if tc.updateNodes {
err := clusterstate.UpdateNodes([]*apiv1.Node{}, nil, now)
assert.NoError(t, err)
}
assert.Equal(t, 0, len(clusterstate.GetClusterReadiness().Unready))
assert.Equal(t, 0, len(clusterstate.GetClusterReadiness().NotStarted))
upcoming, upcomingRegistered := clusterstate.GetUpcomingNodes()
for groupName, groupSize := range tc.expectedGroupsUpcomingNodesNumber {
assert.Equal(t, groupSize, upcoming[groupName])
assert.Empty(t, upcomingRegistered[groupName])
}
}
}