Merge pull request #4143 from BigDarkClown/master

Skip iteration loop if node creation failed
This commit is contained in:
Kubernetes Prow Robot 2021-06-16 07:55:59 -07:00 committed by GitHub
commit 7d7df8c48c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 5 deletions

View File

@ -346,7 +346,10 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
return nil
}
a.deleteCreatedNodesWithErrors()
if a.deleteCreatedNodesWithErrors() {
klog.V(0).Infof("Some nodes that failed to create were removed, skipping iteration")
return nil
}
// Check if there has been a constant difference between the number of nodes in k8s and
// the number of nodes on the cloud provider side.
@ -635,7 +638,7 @@ func removeOldUnregisteredNodes(unregisteredNodes []clusterstate.UnregisteredNod
return removedAny, nil
}
func (a *StaticAutoscaler) deleteCreatedNodesWithErrors() {
func (a *StaticAutoscaler) deleteCreatedNodesWithErrors() bool {
// We always schedule deleting of incoming errornous nodes
// TODO[lukaszos] Consider adding logic to not retry delete every loop iteration
nodes := a.clusterStateRegistry.GetCreatedNodesWithErrors()
@ -656,6 +659,8 @@ func (a *StaticAutoscaler) deleteCreatedNodesWithErrors() {
nodesToBeDeletedByNodeGroupId[nodeGroup.Id()] = append(nodesToBeDeletedByNodeGroupId[nodeGroup.Id()], node)
}
deletedAny := false
for nodeGroupId, nodesToBeDeleted := range nodesToBeDeletedByNodeGroupId {
var err error
klog.V(1).Infof("Deleting %v from %v node group because of create errors", len(nodesToBeDeleted), nodeGroupId)
@ -671,8 +676,11 @@ func (a *StaticAutoscaler) deleteCreatedNodesWithErrors() {
klog.Warningf("Error while trying to delete nodes from %v: %v", nodeGroupId, err)
}
deletedAny = deletedAny || err == nil
a.clusterStateRegistry.InvalidateNodeInstancesCacheEntry(nodeGroup)
}
return deletedAny
}
func (a *StaticAutoscaler) nodeGroupsById() map[string]cloudprovider.NodeGroup {

View File

@ -1058,7 +1058,7 @@ func TestStaticAutoscalerInstaceCreationErrors(t *testing.T) {
clusterState.UpdateNodes([]*apiv1.Node{}, nil, now)
// delete nodes with create errors
autoscaler.deleteCreatedNodesWithErrors()
assert.True(t, autoscaler.deleteCreatedNodesWithErrors())
// check delete was called on correct nodes
nodeGroupA.AssertCalled(t, "DeleteNodes", mock.MatchedBy(
@ -1082,7 +1082,7 @@ func TestStaticAutoscalerInstaceCreationErrors(t *testing.T) {
clusterState.UpdateNodes([]*apiv1.Node{}, nil, now)
// delete nodes with create errors
autoscaler.deleteCreatedNodesWithErrors()
assert.True(t, autoscaler.deleteCreatedNodesWithErrors())
// nodes should be deleted again
nodeGroupA.AssertCalled(t, "DeleteNodes", mock.MatchedBy(
@ -1145,7 +1145,7 @@ func TestStaticAutoscalerInstaceCreationErrors(t *testing.T) {
clusterState.UpdateNodes([]*apiv1.Node{}, nil, now)
// delete nodes with create errors
autoscaler.deleteCreatedNodesWithErrors()
assert.False(t, autoscaler.deleteCreatedNodesWithErrors())
// we expect no more Delete Nodes
nodeGroupA.AssertNumberOfCalls(t, "DeleteNodes", 2)