From 7993d648671e46b49e9b6e17c1c9bbfc4c99c386 Mon Sep 17 00:00:00 2001 From: Maximiliano Uribe Falcon <90781524+MaximilianoUribe@users.noreply.github.com> Date: Wed, 20 Aug 2025 18:45:32 +0000 Subject: [PATCH 1/3] adding support for force delete in azure --- .../cloudprovider/azure/azure_agent_pool.go | 14 +++++----- .../cloudprovider/azure/azure_scale_set.go | 26 +++++-------------- 2 files changed, 13 insertions(+), 27 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go b/cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go index 9d3731f5d1..cdf925592c 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go @@ -446,6 +446,11 @@ func (as *AgentPool) DeleteNodes(nodes []*apiv1.Node) error { return fmt.Errorf("min size reached, nodes will not be deleted") } + return as.ForceDeleteNodes(nodes) +} + +// ForceDeleteNodes deletes nodes from the group regardless of constraints. +func (as *AgentPool) ForceDeleteNodes(nodes []*apiv1.Node) error { refs := make([]*azureRef, 0, len(nodes)) for _, node := range nodes { belongs, err := as.Belongs(node) @@ -463,19 +468,14 @@ func (as *AgentPool) DeleteNodes(nodes []*apiv1.Node) error { refs = append(refs, ref) } - err = as.deleteOutdatedDeployments() + err := as.deleteOutdatedDeployments() if err != nil { - klog.Warningf("DeleteNodes: failed to cleanup outdated deployments with err: %v.", err) + klog.Warningf("ForceDeleteNodes: failed to cleanup outdated deployments with err: %v.", err) } return as.DeleteInstances(refs) } -// ForceDeleteNodes deletes nodes from the group regardless of constraints. -func (as *AgentPool) ForceDeleteNodes(nodes []*apiv1.Node) error { - return cloudprovider.ErrNotImplemented -} - // Debug returns a debug string for the agent pool. func (as *AgentPool) Debug() string { return fmt.Sprintf("%s (%d:%d)", as.Name, as.MinSize(), as.MaxSize()) diff --git a/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go b/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go index e5bb1591e1..de874bbb0c 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go @@ -598,12 +598,14 @@ func (scaleSet *ScaleSet) DeleteNodes(nodes []*apiv1.Node) error { if int(size) <= scaleSet.MinSize() { return fmt.Errorf("min size reached, nodes will not be deleted") } + return scaleSet.ForceDeleteNodes(nodes) +} - // Distinguish between unregistered node deletion and normal node deletion +// ForceDeleteNodes deletes nodes from the group regardless of constraints. +func (scaleSet *ScaleSet) ForceDeleteNodes(nodes []*apiv1.Node) error { + klog.V(8).Infof("Delete nodes requested: %q\n", nodes) refs := make([]*azureRef, 0, len(nodes)) hasUnregisteredNodes := false - unregisteredRefs := make([]*azureRef, 0, len(nodes)) - for _, node := range nodes { belongs, err := scaleSet.Belongs(node) if err != nil { @@ -620,28 +622,12 @@ func (scaleSet *ScaleSet) DeleteNodes(nodes []*apiv1.Node) error { ref := &azureRef{ Name: node.Spec.ProviderID, } - - if node.Annotations[cloudprovider.FakeNodeReasonAnnotation] == cloudprovider.FakeNodeUnregistered { - klog.V(5).Infof("Node: %s type is unregistered..Appending to the unregistered list", node.Name) - unregisteredRefs = append(unregisteredRefs, ref) - } else { - refs = append(refs, ref) - } - } - - if len(unregisteredRefs) > 0 { - klog.V(3).Infof("Removing unregisteredNodes: %v", unregisteredRefs) - return scaleSet.DeleteInstances(unregisteredRefs, true) + refs = append(refs, ref) } return scaleSet.DeleteInstances(refs, hasUnregisteredNodes) } -// ForceDeleteNodes deletes nodes from the group regardless of constraints. -func (scaleSet *ScaleSet) ForceDeleteNodes(nodes []*apiv1.Node) error { - return cloudprovider.ErrNotImplemented -} - // Id returns ScaleSet id. func (scaleSet *ScaleSet) Id() string { return scaleSet.Name From 8e1b388f1466b2cc97aedc28a487d8e13133b0ac Mon Sep 17 00:00:00 2001 From: Maximiliano Uribe Falcon <90781524+MaximilianoUribe@users.noreply.github.com> Date: Thu, 21 Aug 2025 09:29:33 -0700 Subject: [PATCH 2/3] Update cluster-autoscaler/cloudprovider/azure/azure_scale_set.go Co-authored-by: Bryce Soghigian <49734722+Bryce-Soghigian@users.noreply.github.com> --- cluster-autoscaler/cloudprovider/azure/azure_scale_set.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go b/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go index de874bbb0c..2ec1ddffd6 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go @@ -603,7 +603,7 @@ func (scaleSet *ScaleSet) DeleteNodes(nodes []*apiv1.Node) error { // ForceDeleteNodes deletes nodes from the group regardless of constraints. func (scaleSet *ScaleSet) ForceDeleteNodes(nodes []*apiv1.Node) error { - klog.V(8).Infof("Delete nodes requested: %q\n", nodes) + klog.V(5).Infof("Delete nodes requested: %q\n", nodes) refs := make([]*azureRef, 0, len(nodes)) hasUnregisteredNodes := false for _, node := range nodes { From d88d1ab9c15df1362ce9e921393e78999aa0c51f Mon Sep 17 00:00:00 2001 From: Maximiliano Uribe Falcon <90781524+MaximilianoUribe@users.noreply.github.com> Date: Fri, 22 Aug 2025 19:56:36 +0000 Subject: [PATCH 3/3] adding ut and changing logging level --- .../cloudprovider/azure/azure_agent_pool.go | 4 +- .../azure/azure_agent_pool_test.go | 40 +++++++++++++++++++ .../cloudprovider/azure/azure_scale_set.go | 4 +- 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go b/cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go index cdf925592c..f6f7d4d02a 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go @@ -429,14 +429,14 @@ func (as *AgentPool) DeleteInstances(instances []*azureRef) error { } } - klog.V(6).Infof("DeleteInstances: invalidating cache") + klog.V(3).Infof("DeleteInstances: invalidating cache") as.manager.invalidateCache() return nil } // DeleteNodes deletes the nodes from the group. func (as *AgentPool) DeleteNodes(nodes []*apiv1.Node) error { - klog.V(6).Infof("Delete nodes requested: %v\n", nodes) + klog.V(3).Infof("Delete nodes requested: %v\n", nodes) indexes, _, err := as.GetVMIndexes() if err != nil { return err diff --git a/cluster-autoscaler/cloudprovider/azure/azure_agent_pool_test.go b/cluster-autoscaler/cloudprovider/azure/azure_agent_pool_test.go index 3ba87aa634..1f3b8872c2 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_agent_pool_test.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_agent_pool_test.go @@ -410,6 +410,46 @@ func TestDeleteInstances(t *testing.T) { assert.Equal(t, expectedErr, err) } +func TestForceDeleteNodes(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + as := newTestAgentPool(newTestAzureManager(t), "as") + as1 := newTestAgentPool(newTestAzureManager(t), "as1") + as.manager.azureCache.instanceToNodeGroup[azureRef{Name: testValidProviderID0}] = as + as.manager.azureCache.instanceToNodeGroup[azureRef{Name: testValidProviderID1}] = as1 + as.manager.azureCache.instanceToNodeGroup[azureRef{Name: testInvalidProviderID}] = as + + mockVMClient := mockvmclient.NewMockInterface(ctrl) + as.manager.azClient.virtualMachinesClient = mockVMClient + + mockSAClient := mockstorageaccountclient.NewMockInterface(ctrl) + as.manager.azClient.storageAccountsClient = mockSAClient + + err := as.ForceDeleteNodes([]*apiv1.Node{}) + assert.NoError(t, err) + + nodes := []*apiv1.Node{ + { + Spec: apiv1.NodeSpec{ProviderID: testInvalidProviderID}, + ObjectMeta: v1.ObjectMeta{Name: "node"}, + }, + } + err = as.ForceDeleteNodes(nodes) + expectedErr := fmt.Errorf("resource name was missing from identifier") + assert.Equal(t, expectedErr, err) + + nodes = []*apiv1.Node{ + { + Spec: apiv1.NodeSpec{ProviderID: testValidProviderID1}, + ObjectMeta: v1.ObjectMeta{Name: "node1"}, + }, + } + err = as.ForceDeleteNodes(nodes) + expectedErr = fmt.Errorf("node1 belongs to a different asg than as") + assert.Equal(t, expectedErr, err) +} + func TestAgentPoolDeleteNodes(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() diff --git a/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go b/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go index 2ec1ddffd6..ee76e9b33a 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go @@ -589,7 +589,7 @@ func (scaleSet *ScaleSet) waitForDeleteInstances(future *azure.Future, requiredI // DeleteNodes deletes the nodes from the group. func (scaleSet *ScaleSet) DeleteNodes(nodes []*apiv1.Node) error { - klog.V(8).Infof("Delete nodes requested: %q\n", nodes) + klog.V(3).Infof("Delete nodes requested: %q\n", nodes) size, err := scaleSet.getScaleSetSize() if err != nil { return err @@ -603,7 +603,7 @@ func (scaleSet *ScaleSet) DeleteNodes(nodes []*apiv1.Node) error { // ForceDeleteNodes deletes nodes from the group regardless of constraints. func (scaleSet *ScaleSet) ForceDeleteNodes(nodes []*apiv1.Node) error { - klog.V(5).Infof("Delete nodes requested: %q\n", nodes) + klog.V(3).Infof("Delete nodes requested: %q\n", nodes) refs := make([]*azureRef, 0, len(nodes)) hasUnregisteredNodes := false for _, node := range nodes {