diff --git a/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go b/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go index ffeae2b7c0..7cff4e1af6 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go @@ -679,6 +679,13 @@ func instanceStatusFromProvisioningState(provisioningState *string) *cloudprovid status.State = cloudprovider.InstanceDeleting case string(compute.ProvisioningStateCreating): status.State = cloudprovider.InstanceCreating + case string(compute.ProvisioningStateFailed): + status.State = cloudprovider.InstanceCreating + status.ErrorInfo = &cloudprovider.InstanceErrorInfo{ + ErrorClass: cloudprovider.OutOfResourcesErrorClass, + ErrorCode: "provisioning-state-failed", + ErrorMessage: "Azure failed to provision a node for this node group", + } default: status.State = cloudprovider.InstanceRunning } diff --git a/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go b/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go index cf5a4f6d0c..0897fe25b2 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go @@ -225,6 +225,48 @@ func TestIncreaseSize(t *testing.T) { } } +func TestIncreaseSizeOnVMProvisioningFailed(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + manager := newTestAzureManager(t) + vmssName := "vmss-failed-upscale" + + expectedScaleSets := newTestVMSSList(3, "vmss-failed-upscale", "eastus", compute.Uniform) + expectedVMSSVMs := newTestVMSSVMList(3) + expectedVMSSVMs[2].ProvisioningState = to.StringPtr(string(compute.ProvisioningStateFailed)) + + mockVMSSClient := mockvmssclient.NewMockInterface(ctrl) + mockVMSSClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup).Return(expectedScaleSets, nil) + mockVMSSClient.EXPECT().CreateOrUpdateAsync(gomock.Any(), manager.config.ResourceGroup, vmssName, gomock.Any()).Return(nil, nil) + mockVMSSClient.EXPECT().WaitForCreateOrUpdateResult(gomock.Any(), gomock.Any(), manager.config.ResourceGroup).Return(&http.Response{StatusCode: http.StatusOK}, nil).AnyTimes() + manager.azClient.virtualMachineScaleSetsClient = mockVMSSClient + mockVMSSVMClient := mockvmssvmclient.NewMockInterface(ctrl) + mockVMSSVMClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup, "vmss-failed-upscale", gomock.Any()).Return(expectedVMSSVMs, nil).AnyTimes() + manager.azClient.virtualMachineScaleSetVMsClient = mockVMSSVMClient + manager.explicitlyConfigured["vmss-failed-upscale"] = true + registered := manager.RegisterNodeGroup(newTestScaleSet(manager, vmssName)) + assert.True(t, registered) + manager.Refresh() + + provider, err := BuildAzureCloudProvider(manager, nil) + assert.NoError(t, err) + + scaleSet, ok := provider.NodeGroups()[0].(*ScaleSet) + assert.True(t, ok) + + // Increase size by one, but the new node fails provisioning + err = scaleSet.IncreaseSize(1) + assert.NoError(t, err) + + nodes, err := scaleSet.Nodes() + assert.NoError(t, err) + + assert.Equal(t, 3, len(nodes)) + assert.Equal(t, cloudprovider.InstanceCreating, nodes[2].Status.State) + assert.Equal(t, cloudprovider.OutOfResourcesErrorClass, nodes[2].Status.ErrorInfo.ErrorClass) +} + func TestIncreaseSizeOnVMSSUpdating(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish()