refactor: refactoring tests and sharing validation between ngfornode + hasinstance

2024-07-23 04:39:25 -07:00 · 2024-07-23 04:39:25 -07:00 · e48a743079
parent 39d0882893
commit e48a743079
4 changed files with 139 additions and 98 deletions
--- a/cluster-autoscaler/cloudprovider/azure/azure_cache.go
+++ b/cluster-autoscaler/cloudprovider/azure/azure_cache.go
@ -319,7 +319,6 @@ func (m *azureCache) Register(nodeGroup cloudprovider.NodeGroup) bool {
 				// Node group is already registered and min/max size haven't changed, no action required.
 				return false
 			}
-
 			m.registeredNodeGroups[i] = nodeGroup
 			klog.V(4).Infof("Node group %q updated", nodeGroup.Id())
 			m.invalidateUnownedInstanceCache()
@ -328,6 +327,7 @@ func (m *azureCache) Register(nodeGroup cloudprovider.NodeGroup) bool {
 	}

 	klog.V(4).Infof("Registering Node Group %q", nodeGroup.Id())
+	
 	m.registeredNodeGroups = append(m.registeredNodeGroups, nodeGroup)
 	m.invalidateUnownedInstanceCache()
 	return true
@ -398,6 +398,7 @@ func (m *azureCache) getAutoscalingOptions(ref azureRef) map[string]string {

 // HasInstance returns if a given instance exists in the azure cache 
 func (m *azureCache) HasInstance(instance *azureRef) (bool, error) {
+	vmsPoolSet := m.getVMsPoolSet()
 	m.mutex.Lock()
 	defer m.mutex.Unlock()
 	resourceID, err := convertResourceGroupNameToLower(instance.Name)
@ -408,24 +409,32 @@ func (m *azureCache) HasInstance(instance *azureRef) (bool, error) {
 		return false, err
 	}
 	inst := azureRef{Name: resourceID}
-	// Check if resource id belongs to an asg
-	// TODO: Validate how unownedInstances gets its state, 
-	// is there a better way for us to check if an instance id belongs to 
-	// an autoscaling group?  
+	klog.V(4).Infof("HasInstance: Checking instance %s", inst.Name)
+
 	if m.unownedInstances[inst] {
-		// We want to fall back onto the to be deleted taint 
-		// if we do not own the instance
+		klog.V(4).Infof("HasInstance: Instance %s is unowned", inst.Name)
 		return false, cloudprovider.ErrNotImplemented
-	}	
+	}
+
+	if unowned, err := m.isUnownedInstance(inst, ""); unowned || err != nil {
+		klog.V(4).Infof("HasInstance: Instance %s is unowned or error occurred: %v", inst.Name, err)
+		return false, cloudprovider.ErrNotImplemented
+	}
+
+	if invalid, err := m.isInvalidVMType(inst, m.vmType, vmsPoolSet); invalid || err != nil {
+		klog.V(4).Infof("HasInstance: Instance %s is invalid VM type or error occurred: %v", inst.Name, err)
+		return false, cloudprovider.ErrNotImplemented
+	}
+
 	if nodeGroup := m.getInstanceFromCache(inst.Name); nodeGroup != nil {
 		klog.V(4).Infof("FindForInstance: found node group %q in cache", nodeGroup.Id())
 		return true, nil
 	}
 	// couldn't find instance in the cache, assume its deleted
+	klog.V(4).Infof("HasInstance: Couldn't find instance %s in cache, assuming it's deleted", inst.Name)
 	return false, nil
 }

-
 // FindForInstance returns node group of the given Instance
 func (m *azureCache) FindForInstance(instance *azureRef, vmType string) (cloudprovider.NodeGroup, error) {
 	vmsPoolSet := m.getVMsPoolSet()
@ -439,32 +448,13 @@ func (m *azureCache) FindForInstance(instance *azureRef, vmType string) (cloudpr
 		return nil, err
 	}
 	inst := azureRef{Name: resourceID}
-	if m.unownedInstances[inst] {
-		// We already know we don't own this instance. Return early and avoid
-		// additional calls.
-		klog.V(4).Infof("FindForInstance: Couldn't find NodeGroup of instance %q", inst)
-		return nil, nil
+
+	if unowned, err := m.isUnownedInstance(inst, ""); unowned || err != nil {
+		return nil, err
 	}

-	// cluster with vmss pool only
-	if vmType == vmTypeVMSS && len(vmsPoolSet) == 0 {
-		if m.areAllScaleSetsUniform() {
-			// Omit virtual machines not managed by vmss only in case of uniform scale set.
-			if ok := virtualMachineRE.Match([]byte(inst.Name)); ok {
-				klog.V(3).Infof("Instance %q is not managed by vmss, omit it in autoscaler", instance.Name)
-				m.unownedInstances[inst] = true
-				return nil, nil
-			}
-		}
-	}
-
-	if vmType == vmTypeStandard {
-		// Omit virtual machines with providerID not in Azure resource ID format.
-		if ok := virtualMachineRE.Match([]byte(inst.Name)); !ok {
-			klog.V(3).Infof("Instance %q is not in Azure resource ID format, omit it in autoscaler", instance.Name)
-			m.unownedInstances[inst] = true
-			return nil, nil
-		}
+	if invalid, err := m.isInvalidVMType(inst, vmType, vmsPoolSet); invalid || err != nil {
+		return nil, err
 	}

 	// Look up caches for the instance.
@ -477,6 +467,43 @@ func (m *azureCache) FindForInstance(instance *azureRef, vmType string) (cloudpr
 	return nil, nil
 }

+func (m *azureCache) isUnownedInstance(inst azureRef, reason string) (bool, error) {
+	if m.unownedInstances[inst] {
+		// We already know we don't own this instance. Return early and avoid additional calls.
+		klog.V(4).Infof("FindForInstance: Couldn't find NodeGroup of instance %q", inst)
+		return true, nil
+	}
+
+	if reason != "" {
+		klog.V(3).Infof("Instance %q %s, omit it in autoscaler", inst.Name, reason)
+		m.unownedInstances[inst] = true
+		return true, nil
+	}
+
+	return false, nil
+}
+
+func (m *azureCache) isInvalidVMType(inst azureRef, vmType string, vmsPoolSet map[string]struct{}) (bool, error) {
+	if vmType == vmTypeVMSS && len(vmsPoolSet) == 0 {
+		if m.areAllScaleSetsUniform() {
+			// Omit virtual machines not managed by vmss only in case of uniform scale set.
+			if ok := virtualMachineRE.Match([]byte(inst.Name)); ok {
+				return m.isUnownedInstance(inst, "is not managed by vmss")
+			}
+		}
+	}
+
+	// TODO: unify providerID validation
+	if vmType == vmTypeStandard {
+		// Omit virtual machines with providerID not in Azure resource ID format.
+		if ok := virtualMachineRE.Match([]byte(inst.Name)); !ok {
+			return m.isUnownedInstance(inst, "is not in Azure resource ID format")
+		}
+	}
+
+	return false, nil
+}
+
 // isAllScaleSetsAreUniform determines if all the scale set autoscaler is monitoring are Uniform or not.
 func (m *azureCache) areAllScaleSetsUniform() bool {
 	for _, scaleSet := range m.scaleSets {
--- a/cluster-autoscaler/cloudprovider/azure/azure_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/azure/azure_cloud_provider.go
@ -132,7 +132,6 @@ func (azure *AzureCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
 	if !strings.HasPrefix(node.Spec.ProviderID, "azure://") {
 	return false,  fmt.Errorf("invalid azure ProviderID prefix for node: %v, skipped", node.Name)
 	}
-
 	instance := &azureRef{Name: node.Spec.ProviderID}
 	return azure.azureManager.azureCache.HasInstance(instance)
 }
--- a/cluster-autoscaler/cloudprovider/azure/azure_cloud_provider_test.go
+++ b/cluster-autoscaler/cloudprovider/azure/azure_cloud_provider_test.go
@ -17,6 +17,7 @@ limitations under the License.
 package azure

 import (
+	"fmt"
 	"testing"

 	"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2022-08-01/compute"
@ -133,10 +134,42 @@ func TestNodeGroups(t *testing.T) {
 	assert.Equal(t, len(provider.NodeGroups()), 2)
 }

-func TestStaticVMSSNodesAreNotCountedTowardBeingDeleted(t *testing.T){}
-func TestHasInstanceNodesOutsideOfUnownedInstancesThatAreNonASG(t *testing.T) {}
+// func TestStaticVMSSNodesAreNotCountedTowardBeingDeleted(t *testing.T) {
+// 	// VMSS Instances that belong to a VMSS on the cluster but do not belong to a registered ASG
+// 	// should return err unimplemented for HasInstance
+// 	ctrl := gomock.NewController(t)
+// 	defer ctrl.Finish()
+// 
+// 	provider := newTestProvider(t)
+// 	mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
+// 	mockVMClient := mockvmclient.NewMockInterface(ctrl)
+// 	mockVMSSVMClient := mockvmssvmclient.NewMockInterface(ctrl)
+// 	provider.azureManager.azClient.virtualMachinesClient = mockVMClient
+// 	provider.azureManager.azClient.virtualMachineScaleSetsClient = mockVMSSClient
+// 	provider.azureManager.azClient.virtualMachineScaleSetVMsClient = mockVMSSVMClient
+// 
+// 	// Simulate VMSS instances
+// 	unregisteredVMSSInstance := &apiv1.Node{
+// 		ObjectMeta: metav1.ObjectMeta{
+// 			Name: "unregistered-vmss-node",
+// 		},
+// 		Spec: apiv1.NodeSpec{
+// 			ProviderID: "azure:///subscriptions/sub/resourceGroups/rg/providers/Microsoft.Compute/virtualMachineScaleSets/unregistered-vmss-instance-id/virtualMachines/0",
+// 		},
+// 	}
+// 
+// 	// Mock responses to simulate that the instance belongs to a VMSS not in any registered ASG
+// 	expectedVMSSVMs := newTestVMSSVMList(1)
+// 	mockVMSSVMClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup, "unregistered-vmss-instance-id", gomock.Any()).Return(expectedVMSSVMs, nil).AnyTimes()
+// 
+// 	// Call HasInstance and check the result
+// 	inst := &azureRef{Name: unregisteredVMSSInstance.Spec.ProviderID}
+// 	hasInstance, err := provider.azureManager.azureCache.HasInstance(inst)
+// 	assert.False(t, hasInstance)
+// 	assert.Equal(t, cloudprovider.ErrNotImplemented, err)
+// }

-  func TestHasInstance(t *testing.T) {
+func TestHasInstance(t *testing.T) {
 	ctrl := gomock.NewController(t)
 	defer ctrl.Finish()

@ -280,77 +313,60 @@ func TestMixedNodeGroups(t *testing.T) {
 	assert.Equal(t, group.MaxSize(), 10)
 }

+
 func TestNodeGroupForNode(t *testing.T) {
 	ctrl := gomock.NewController(t)
 	defer ctrl.Finish()
-	orchestrationModes := [2]compute.OrchestrationMode{compute.Uniform, compute.Flexible}
+	orchestrationModes := []compute.OrchestrationMode{compute.Uniform, compute.Flexible}

 	expectedVMSSVMs := newTestVMSSVMList(3)
 	expectedVMs := newTestVMList(3)

 	for _, orchMode := range orchestrationModes {
-		expectedScaleSets := newTestVMSSList(3, "test-asg", "eastus", orchMode)
-		provider := newTestProvider(t)
-		mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
-		mockVMSSClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup).Return(expectedScaleSets, nil)
-		provider.azureManager.azClient.virtualMachineScaleSetsClient = mockVMSSClient
-		mockVMClient := mockvmclient.NewMockInterface(ctrl)
-		provider.azureManager.azClient.virtualMachinesClient = mockVMClient
-		mockVMClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup).Return(expectedVMs, nil).AnyTimes()
+		t.Run(fmt.Sprintf("OrchestrationMode_%v", orchMode), func(t *testing.T) {
+			expectedScaleSets := newTestVMSSList(3, "test-asg", "eastus", orchMode)
+			provider := newTestProvider(t)
+			mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
+			mockVMSSClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup).Return(expectedScaleSets, nil)
+			provider.azureManager.azClient.virtualMachineScaleSetsClient = mockVMSSClient
+			mockVMClient := mockvmclient.NewMockInterface(ctrl)
+			provider.azureManager.azClient.virtualMachinesClient = mockVMClient
+			mockVMClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup).Return(expectedVMs, nil).AnyTimes()

-		if orchMode == compute.Uniform {
-			mockVMSSVMClient := mockvmssvmclient.NewMockInterface(ctrl)
-			mockVMSSVMClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup, "test-asg", gomock.Any()).Return(expectedVMSSVMs, nil).AnyTimes()
-			provider.azureManager.azClient.virtualMachineScaleSetVMsClient = mockVMSSVMClient
-		} else {
+			if orchMode == compute.Uniform {
+				mockVMSSVMClient := mockvmssvmclient.NewMockInterface(ctrl)
+				mockVMSSVMClient.EXPECT().List(gomock.Any(), provider.azureManager.config.ResourceGroup, "test-asg", gomock.Any()).Return(expectedVMSSVMs, nil).AnyTimes()
+				provider.azureManager.azClient.virtualMachineScaleSetVMsClient = mockVMSSVMClient
+			} else {
+				provider.azureManager.config.EnableVmssFlex = true
+				mockVMClient.EXPECT().ListVmssFlexVMsWithoutInstanceView(gomock.Any(), "test-asg").Return(expectedVMs, nil).AnyTimes()
+			}

-			provider.azureManager.config.EnableVmssFlex = true
-			mockVMClient.EXPECT().ListVmssFlexVMsWithoutInstanceView(gomock.Any(), "test-asg").Return(expectedVMs, nil).AnyTimes()
+			registered := provider.azureManager.RegisterNodeGroup(
+				newTestScaleSet(provider.azureManager, "test-asg"))
+			provider.azureManager.explicitlyConfigured["test-asg"] = true
+			assert.True(t, registered)
+			assert.Equal(t, len(provider.NodeGroups()), 1)

-		}
+			node := newApiNode(orchMode, 0)
+			// refresh cache
+			provider.azureManager.forceRefresh()
+			group, err := provider.NodeGroupForNode(node)
+			assert.NoError(t, err)
+			assert.NotNil(t, group, "Group should not be nil")
+			assert.Equal(t, group.Id(), "test-asg")
+			assert.Equal(t, group.MinSize(), 1)
+			assert.Equal(t, group.MaxSize(), 5)

-		registered := provider.azureManager.RegisterNodeGroup(
-			newTestScaleSet(provider.azureManager, testASG))
-		provider.azureManager.explicitlyConfigured[testASG] = true
-		assert.True(t, registered)
-		assert.Equal(t, len(provider.NodeGroups()), 1)
-
-		node := newApiNode(orchMode, 0)
-		// refresh cache
-		provider.azureManager.forceRefresh()
-		group, err := provider.NodeGroupForNode(node)
-		assert.NoError(t, err)
-		assert.NotNil(t, group, "Group should not be nil")
-		assert.Equal(t, group.Id(), testASG)
-		assert.Equal(t, group.MinSize(), 1)
-		assert.Equal(t, group.MaxSize(), 5)
-
-		// test node in cluster that is not in a group managed by cluster autoscaler
-		nodeNotInGroup := &apiv1.Node{
-			Spec: apiv1.NodeSpec{
-				ProviderID: azurePrefix + "/subscriptions/subscripion/resourceGroups/test-resource-group/providers/Microsoft.Compute/virtualMachines/test-instance-id-not-in-group",
-			},
-		}
-		group, err = provider.NodeGroupForNode(nodeNotInGroup)
-		assert.NoError(t, err)
-		assert.Nil(t, group)
+			// test node in cluster that is not in a group managed by cluster autoscaler
+			nodeNotInGroup := &apiv1.Node{
+				Spec: apiv1.NodeSpec{
+					ProviderID: "azure:///subscriptions/subscription/resourceGroups/test-resource-group/providers/Microsoft.Compute/virtualMachines/test-instance-id-not-in-group",
+				},
+			}
+			group, err = provider.NodeGroupForNode(nodeNotInGroup)
+			assert.NoError(t, err)
+			assert.Nil(t, group)
+		})
 	}
 }
-
-func TestNodeGroupForNodeWithNoProviderId(t *testing.T) {
-	provider := newTestProvider(t)
-	registered := provider.azureManager.RegisterNodeGroup(
-		newTestScaleSet(provider.azureManager, "test-asg"))
-	assert.True(t, registered)
-	assert.Equal(t, len(provider.NodeGroups()), 1)
-
-	node := &apiv1.Node{
-		Spec: apiv1.NodeSpec{
-			ProviderID: "",
-		},
-	}
-	group, err := provider.NodeGroupForNode(node)
-
-	assert.NoError(t, err)
-	assert.Equal(t, group, nil)
-}
--- a/cluster-autoscaler/cloudprovider/azure/azure_manager.go
+++ b/cluster-autoscaler/cloudprovider/azure/azure_manager.go
@ -173,7 +173,6 @@ func (m *AzureManager) buildNodeGroupFromSpec(spec string) (cloudprovider.NodeGr
 	if err != nil {
 		return nil, fmt.Errorf("failed to parse node group spec: %v", err)
 	}
-
 	vmsPoolSet := m.azureCache.getVMsPoolSet()
 	if _, ok := vmsPoolSet[s.Name]; ok {
 		return NewVMsPool(s, m), nil