Updating error messaging and fallback behavior of hasCloudProviderInstance. Changing deletedNodes to store empty struct instead of node values, and modifying the helper function to utilize that information for tests.

This commit is contained in:
Clint Fooken 2022-12-05 12:44:39 -08:00
parent ab4fff6307
commit 1198fbcd90
2 changed files with 21 additions and 37 deletions

View File

@ -122,7 +122,7 @@ type ClusterStateRegistry struct {
acceptableRanges map[string]AcceptableRange
incorrectNodeGroupSizes map[string]IncorrectNodeGroupSize
unregisteredNodes map[string]UnregisteredNode
deletedNodes map[string]*apiv1.Node
deletedNodes map[string]struct{}
candidatesForScaleDown map[string][]string
backoff backoff.Backoff
lastStatus *api.ClusterAutoscalerStatus
@ -155,7 +155,7 @@ func NewClusterStateRegistry(cloudProvider cloudprovider.CloudProvider, config C
acceptableRanges: make(map[string]AcceptableRange),
incorrectNodeGroupSizes: make(map[string]IncorrectNodeGroupSize),
unregisteredNodes: make(map[string]UnregisteredNode),
deletedNodes: make(map[string]*apiv1.Node),
deletedNodes: make(map[string]struct{}),
candidatesForScaleDown: make(map[string][]string),
backoff: backoff,
lastStatus: emptyStatus,
@ -675,29 +675,13 @@ func (csr *ClusterStateRegistry) GetUnregisteredNodes() []UnregisteredNode {
}
func (csr *ClusterStateRegistry) updateCloudProviderDeletedNodes(deletedNodes []*apiv1.Node) {
result := make(map[string]*apiv1.Node, len(deletedNodes)+len(csr.deletedNodes))
result := make(map[string]struct{}, len(deletedNodes))
for _, deleted := range deletedNodes {
if prev, found := csr.deletedNodes[deleted.Name]; found {
result[deleted.Name] = prev
} else {
result[deleted.Name] = deleted
}
result[deleted.Name] = struct{}{}
}
csr.deletedNodes = result
}
// GetCloudProviderDeletedNodes returns a list of all nodes removed from cloud provider but registered in Kubernetes.
func (csr *ClusterStateRegistry) GetCloudProviderDeletedNodes() []*apiv1.Node {
csr.Lock()
defer csr.Unlock()
result := make([]*apiv1.Node, 0, len(csr.deletedNodes))
for _, deleted := range csr.deletedNodes {
result = append(result, deleted)
}
return result
}
// UpdateScaleDownCandidates updates scale down candidates
func (csr *ClusterStateRegistry) UpdateScaleDownCandidates(nodes []*apiv1.Node, now time.Time) {
result := make(map[string][]string)
@ -1004,8 +988,7 @@ func (csr *ClusterStateRegistry) hasCloudProviderInstance(node *apiv1.Node) bool
return exists
}
if !errors.Is(err, cloudprovider.ErrNotImplemented) {
klog.Warningf("Failed to check whether node has cloud instance for %s: %v", node.Name, err)
return exists
klog.Warningf("Failed to check cloud provider has instance for %s: %v", node.Name, err)
}
return !deletetaint.HasToBeDeletedTaint(node)
}

View File

@ -38,14 +38,15 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/utils/backoff"
)
// GetCloudProviderDeletedNodes returns a list of all nodes removed from cloud provider but registered in Kubernetes.
func GetCloudProviderDeletedNodes(csr *ClusterStateRegistry) []*apiv1.Node {
// GetCloudProviderDeletedNodeNames returns a list of the names of nodes removed
// from cloud provider but registered in Kubernetes.
func GetCloudProviderDeletedNodeNames(csr *ClusterStateRegistry) []string {
csr.Lock()
defer csr.Unlock()
result := make([]*apiv1.Node, 0, len(csr.deletedNodes))
for _, deleted := range csr.deletedNodes {
result = append(result, deleted)
result := make([]string, 0, len(csr.deletedNodes))
for nodeName, _ := range csr.deletedNodes {
result = append(result, nodeName)
}
return result
}
@ -671,7 +672,7 @@ func TestCloudProviderDeletedNodes(t *testing.T) {
// Nodes are registered correctly between Kubernetes and cloud provider.
assert.NoError(t, err)
assert.Equal(t, 0, len(GetCloudProviderDeletedNodes(clusterstate)))
assert.Equal(t, 0, len(GetCloudProviderDeletedNodeNames(clusterstate)))
// The node was removed from Cloud Provider
// should be counted as Deleted by cluster state
@ -683,8 +684,8 @@ func TestCloudProviderDeletedNodes(t *testing.T) {
err = clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_2, noNgNode}, nil, now)
assert.NoError(t, err)
assert.Equal(t, 1, len(GetCloudProviderDeletedNodes(clusterstate)))
assert.Equal(t, "ng1-2", GetCloudProviderDeletedNodes(clusterstate)[0].Name)
assert.Equal(t, 1, len(GetCloudProviderDeletedNodeNames(clusterstate)))
assert.Equal(t, "ng1-2", GetCloudProviderDeletedNodeNames(clusterstate)[0])
assert.Equal(t, 1, clusterstate.GetClusterReadiness().Deleted)
// The node is removed from Kubernetes
@ -692,7 +693,7 @@ func TestCloudProviderDeletedNodes(t *testing.T) {
err = clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, noNgNode}, nil, now)
assert.NoError(t, err)
assert.Equal(t, 0, len(GetCloudProviderDeletedNodes(clusterstate)))
assert.Equal(t, 0, len(GetCloudProviderDeletedNodeNames(clusterstate)))
// New Node is added afterwards
ng1_3 := BuildTestNode("ng1-3", 1000, 1000)
@ -704,7 +705,7 @@ func TestCloudProviderDeletedNodes(t *testing.T) {
err = clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_3, noNgNode}, nil, now)
assert.NoError(t, err)
assert.Equal(t, 0, len(GetCloudProviderDeletedNodes(clusterstate)))
assert.Equal(t, 0, len(GetCloudProviderDeletedNodeNames(clusterstate)))
// Newly added node is removed from Cloud Provider
// should be counted as Deleted by cluster state
@ -716,8 +717,8 @@ func TestCloudProviderDeletedNodes(t *testing.T) {
err = clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, noNgNode, ng1_3}, nil, now)
assert.NoError(t, err)
assert.Equal(t, 1, len(GetCloudProviderDeletedNodes(clusterstate)))
assert.Equal(t, "ng1-3", GetCloudProviderDeletedNodes(clusterstate)[0].Name)
assert.Equal(t, 1, len(GetCloudProviderDeletedNodeNames(clusterstate)))
assert.Equal(t, "ng1-3", GetCloudProviderDeletedNodeNames(clusterstate)[0])
assert.Equal(t, 1, clusterstate.GetClusterReadiness().Deleted)
// Confirm that previously identified deleted Cloud Provider nodes are still included
@ -726,8 +727,8 @@ func TestCloudProviderDeletedNodes(t *testing.T) {
err = clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, noNgNode, ng1_3}, nil, now)
assert.NoError(t, err)
assert.Equal(t, 1, len(GetCloudProviderDeletedNodes(clusterstate)))
assert.Equal(t, "ng1-3", GetCloudProviderDeletedNodes(clusterstate)[0].Name)
assert.Equal(t, 1, len(GetCloudProviderDeletedNodeNames(clusterstate)))
assert.Equal(t, "ng1-3", GetCloudProviderDeletedNodeNames(clusterstate)[0])
assert.Equal(t, 1, clusterstate.GetClusterReadiness().Deleted)
// The node is removed from Kubernetes
@ -735,7 +736,7 @@ func TestCloudProviderDeletedNodes(t *testing.T) {
err = clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, noNgNode}, nil, now)
assert.NoError(t, err)
assert.Equal(t, 0, len(GetCloudProviderDeletedNodes(clusterstate)))
assert.Equal(t, 0, len(GetCloudProviderDeletedNodeNames(clusterstate)))
}
func TestUpdateLastTransitionTimes(t *testing.T) {