Merge pull request #4143 from BigDarkClown/master
Skip iteration loop if node creation failed
This commit is contained in:
commit
7d7df8c48c
|
|
@ -346,7 +346,10 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
|
|||
return nil
|
||||
}
|
||||
|
||||
a.deleteCreatedNodesWithErrors()
|
||||
if a.deleteCreatedNodesWithErrors() {
|
||||
klog.V(0).Infof("Some nodes that failed to create were removed, skipping iteration")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if there has been a constant difference between the number of nodes in k8s and
|
||||
// the number of nodes on the cloud provider side.
|
||||
|
|
@ -635,7 +638,7 @@ func removeOldUnregisteredNodes(unregisteredNodes []clusterstate.UnregisteredNod
|
|||
return removedAny, nil
|
||||
}
|
||||
|
||||
func (a *StaticAutoscaler) deleteCreatedNodesWithErrors() {
|
||||
func (a *StaticAutoscaler) deleteCreatedNodesWithErrors() bool {
|
||||
// We always schedule deleting of incoming errornous nodes
|
||||
// TODO[lukaszos] Consider adding logic to not retry delete every loop iteration
|
||||
nodes := a.clusterStateRegistry.GetCreatedNodesWithErrors()
|
||||
|
|
@ -656,6 +659,8 @@ func (a *StaticAutoscaler) deleteCreatedNodesWithErrors() {
|
|||
nodesToBeDeletedByNodeGroupId[nodeGroup.Id()] = append(nodesToBeDeletedByNodeGroupId[nodeGroup.Id()], node)
|
||||
}
|
||||
|
||||
deletedAny := false
|
||||
|
||||
for nodeGroupId, nodesToBeDeleted := range nodesToBeDeletedByNodeGroupId {
|
||||
var err error
|
||||
klog.V(1).Infof("Deleting %v from %v node group because of create errors", len(nodesToBeDeleted), nodeGroupId)
|
||||
|
|
@ -671,8 +676,11 @@ func (a *StaticAutoscaler) deleteCreatedNodesWithErrors() {
|
|||
klog.Warningf("Error while trying to delete nodes from %v: %v", nodeGroupId, err)
|
||||
}
|
||||
|
||||
deletedAny = deletedAny || err == nil
|
||||
a.clusterStateRegistry.InvalidateNodeInstancesCacheEntry(nodeGroup)
|
||||
}
|
||||
|
||||
return deletedAny
|
||||
}
|
||||
|
||||
func (a *StaticAutoscaler) nodeGroupsById() map[string]cloudprovider.NodeGroup {
|
||||
|
|
|
|||
|
|
@ -1058,7 +1058,7 @@ func TestStaticAutoscalerInstaceCreationErrors(t *testing.T) {
|
|||
clusterState.UpdateNodes([]*apiv1.Node{}, nil, now)
|
||||
|
||||
// delete nodes with create errors
|
||||
autoscaler.deleteCreatedNodesWithErrors()
|
||||
assert.True(t, autoscaler.deleteCreatedNodesWithErrors())
|
||||
|
||||
// check delete was called on correct nodes
|
||||
nodeGroupA.AssertCalled(t, "DeleteNodes", mock.MatchedBy(
|
||||
|
|
@ -1082,7 +1082,7 @@ func TestStaticAutoscalerInstaceCreationErrors(t *testing.T) {
|
|||
clusterState.UpdateNodes([]*apiv1.Node{}, nil, now)
|
||||
|
||||
// delete nodes with create errors
|
||||
autoscaler.deleteCreatedNodesWithErrors()
|
||||
assert.True(t, autoscaler.deleteCreatedNodesWithErrors())
|
||||
|
||||
// nodes should be deleted again
|
||||
nodeGroupA.AssertCalled(t, "DeleteNodes", mock.MatchedBy(
|
||||
|
|
@ -1145,7 +1145,7 @@ func TestStaticAutoscalerInstaceCreationErrors(t *testing.T) {
|
|||
clusterState.UpdateNodes([]*apiv1.Node{}, nil, now)
|
||||
|
||||
// delete nodes with create errors
|
||||
autoscaler.deleteCreatedNodesWithErrors()
|
||||
assert.False(t, autoscaler.deleteCreatedNodesWithErrors())
|
||||
|
||||
// we expect no more Delete Nodes
|
||||
nodeGroupA.AssertNumberOfCalls(t, "DeleteNodes", 2)
|
||||
|
|
|
|||
Loading…
Reference in New Issue