Merge pull request #5548 from DataDog/azure-fast-backoff
Azure: Fast nodegroup backoff on failed provisioning
This commit is contained in:
commit
0142a57730
|
|
@ -679,6 +679,13 @@ func instanceStatusFromProvisioningState(provisioningState *string) *cloudprovid
|
||||||
status.State = cloudprovider.InstanceDeleting
|
status.State = cloudprovider.InstanceDeleting
|
||||||
case string(compute.ProvisioningStateCreating):
|
case string(compute.ProvisioningStateCreating):
|
||||||
status.State = cloudprovider.InstanceCreating
|
status.State = cloudprovider.InstanceCreating
|
||||||
|
case string(compute.ProvisioningStateFailed):
|
||||||
|
status.State = cloudprovider.InstanceCreating
|
||||||
|
status.ErrorInfo = &cloudprovider.InstanceErrorInfo{
|
||||||
|
ErrorClass: cloudprovider.OutOfResourcesErrorClass,
|
||||||
|
ErrorCode: "provisioning-state-failed",
|
||||||
|
ErrorMessage: "Azure failed to provision a node for this node group",
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
status.State = cloudprovider.InstanceRunning
|
status.State = cloudprovider.InstanceRunning
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -225,6 +225,48 @@ func TestIncreaseSize(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestIncreaseSizeOnVMProvisioningFailed(t *testing.T) {
|
||||||
|
ctrl := gomock.NewController(t)
|
||||||
|
defer ctrl.Finish()
|
||||||
|
|
||||||
|
manager := newTestAzureManager(t)
|
||||||
|
vmssName := "vmss-failed-upscale"
|
||||||
|
|
||||||
|
expectedScaleSets := newTestVMSSList(3, "vmss-failed-upscale", "eastus", compute.Uniform)
|
||||||
|
expectedVMSSVMs := newTestVMSSVMList(3)
|
||||||
|
expectedVMSSVMs[2].ProvisioningState = to.StringPtr(string(compute.ProvisioningStateFailed))
|
||||||
|
|
||||||
|
mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
|
||||||
|
mockVMSSClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup).Return(expectedScaleSets, nil)
|
||||||
|
mockVMSSClient.EXPECT().CreateOrUpdateAsync(gomock.Any(), manager.config.ResourceGroup, vmssName, gomock.Any()).Return(nil, nil)
|
||||||
|
mockVMSSClient.EXPECT().WaitForCreateOrUpdateResult(gomock.Any(), gomock.Any(), manager.config.ResourceGroup).Return(&http.Response{StatusCode: http.StatusOK}, nil).AnyTimes()
|
||||||
|
manager.azClient.virtualMachineScaleSetsClient = mockVMSSClient
|
||||||
|
mockVMSSVMClient := mockvmssvmclient.NewMockInterface(ctrl)
|
||||||
|
mockVMSSVMClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup, "vmss-failed-upscale", gomock.Any()).Return(expectedVMSSVMs, nil).AnyTimes()
|
||||||
|
manager.azClient.virtualMachineScaleSetVMsClient = mockVMSSVMClient
|
||||||
|
manager.explicitlyConfigured["vmss-failed-upscale"] = true
|
||||||
|
registered := manager.RegisterNodeGroup(newTestScaleSet(manager, vmssName))
|
||||||
|
assert.True(t, registered)
|
||||||
|
manager.Refresh()
|
||||||
|
|
||||||
|
provider, err := BuildAzureCloudProvider(manager, nil)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
scaleSet, ok := provider.NodeGroups()[0].(*ScaleSet)
|
||||||
|
assert.True(t, ok)
|
||||||
|
|
||||||
|
// Increase size by one, but the new node fails provisioning
|
||||||
|
err = scaleSet.IncreaseSize(1)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
nodes, err := scaleSet.Nodes()
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
assert.Equal(t, 3, len(nodes))
|
||||||
|
assert.Equal(t, cloudprovider.InstanceCreating, nodes[2].Status.State)
|
||||||
|
assert.Equal(t, cloudprovider.OutOfResourcesErrorClass, nodes[2].Status.ErrorInfo.ErrorClass)
|
||||||
|
}
|
||||||
|
|
||||||
func TestIncreaseSizeOnVMSSUpdating(t *testing.T) {
|
func TestIncreaseSizeOnVMSSUpdating(t *testing.T) {
|
||||||
ctrl := gomock.NewController(t)
|
ctrl := gomock.NewController(t)
|
||||||
defer ctrl.Finish()
|
defer ctrl.Finish()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue