Merge a81913f3b9 into a9292351c3
				
					
				
			This commit is contained in:
		
						commit
						791b1845dd
					
				|  | @ -175,6 +175,11 @@ func (ali *aliCloudProvider) Cleanup() error { | |||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (ali *aliCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // AliRef contains a reference to ECS instance or .
 | ||||
| type AliRef struct { | ||||
| 	ID     string | ||||
|  |  | |||
|  | @ -193,6 +193,11 @@ func (aws *awsCloudProvider) Refresh() error { | |||
| 	return aws.awsManager.Refresh() | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (aws *awsCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // AwsRef contains a reference to some entity in AWS world.
 | ||||
| type AwsRef struct { | ||||
| 	Name string | ||||
|  |  | |||
|  | @ -174,6 +174,11 @@ func (azure *AzureCloudProvider) Refresh() error { | |||
| 	return azure.azureManager.Refresh() | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (azure *AzureCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // azureRef contains a reference to some entity in Azure world.
 | ||||
| type azureRef struct { | ||||
| 	Name string | ||||
|  |  | |||
|  | @ -228,6 +228,11 @@ func (baiducloud *baiducloudCloudProvider) Refresh() error { | |||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (baiducloud *baiducloudCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // BaiducloudRef contains a reference to some entity in baiducloud world.
 | ||||
| type BaiducloudRef struct { | ||||
| 	Name string | ||||
|  |  | |||
|  | @ -172,6 +172,11 @@ func (d *bizflycloudCloudProvider) Refresh() error { | |||
| 	return d.manager.Refresh() | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (d *bizflycloudCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // BuildBizflyCloud builds the Bizflycloud cloud provider.
 | ||||
| func BuildBizflyCloud( | ||||
| 	opts config.AutoscalingOptions, | ||||
|  |  | |||
|  | @ -219,6 +219,11 @@ func (b *brightboxCloudProvider) Cleanup() error { | |||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (b *brightboxCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // BuildBrightbox builds the Brightbox provider
 | ||||
| func BuildBrightbox( | ||||
| 	opts config.AutoscalingOptions, | ||||
|  |  | |||
|  | @ -170,6 +170,11 @@ func (ccp *cherryCloudProvider) Cleanup() error { | |||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (ccp *cherryCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // BuildCherry is called by the autoscaler to build a Cherry Servers cloud provider.
 | ||||
| //
 | ||||
| // The cherryManager is created here, and the node groups are created
 | ||||
|  |  | |||
|  | @ -167,6 +167,11 @@ func (d *civoCloudProvider) Refresh() error { | |||
| 	return d.manager.Refresh() | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (d *civoCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // BuildCivo builds the Civo cloud provider.
 | ||||
| func BuildCivo( | ||||
| 	opts config.AutoscalingOptions, | ||||
|  |  | |||
|  | @ -154,6 +154,14 @@ type CloudProvider interface { | |||
| 	// Refresh is called before every main loop and can be used to dynamically update cloud provider state.
 | ||||
| 	// In particular the list of node groups returned by NodeGroups can change as a result of CloudProvider.Refresh().
 | ||||
| 	Refresh() error | ||||
| 
 | ||||
| 	// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down. This function
 | ||||
| 	// will be called during prefiltering of nodes for scaledown to allow cloud providers the opportunity
 | ||||
| 	// to reject a node for scale down. This may be used in cases where nodes are undergoing upgrades or other
 | ||||
| 	// cloud-specific behavior where the cluster autoscaler should not begin cordoning, draining, and tainting
 | ||||
| 	// the node.
 | ||||
| 	// Returns true if the node can be safely scaled down or false otherwise.
 | ||||
| 	IsNodeCandidateForScaleDown(*apiv1.Node) (bool, error) | ||||
| } | ||||
| 
 | ||||
| // ErrNotImplemented is returned if a method is not implemented.
 | ||||
|  |  | |||
|  | @ -115,6 +115,11 @@ func (provider *cloudStackCloudProvider) Pricing() (cloudprovider.PricingModel, | |||
| 	return nil, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (provider *cloudStackCloudProvider) IsNodeCandidateForScaleDown(node *v1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // NewNodeGroup builds a theoretical node group based on the node definition provided. The node group is not automatically
 | ||||
| // created on the cloud provider side. The node group is not returned by NodeGroups() until it is created.
 | ||||
| func (provider *cloudStackCloudProvider) NewNodeGroup(machineType string, labels map[string]string, systemLabels map[string]string, taints []v1.Taint, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) { | ||||
|  |  | |||
|  | @ -849,6 +849,27 @@ func (c *machineController) listMachinesForScalableResource(r *unstructured.Unst | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| func (c *machineController) listMachineSetsForMachineDeployment(r *unstructured.Unstructured) ([]*unstructured.Unstructured, error) { | ||||
| 	selector := labels.SelectorFromSet(map[string]string{ | ||||
| 		machineDeploymentNameLabel: r.GetName(), | ||||
| 	}) | ||||
| 	objs, err := c.machineSetInformer.Lister().ByNamespace(r.GetNamespace()).List(selector) | ||||
| 	if err != nil { | ||||
| 		return nil, fmt.Errorf("unable to list MachineSets for MachineDeployment %s: %w", r.GetName(), err) | ||||
| 	} | ||||
| 
 | ||||
| 	results := make([]*unstructured.Unstructured, 0, len(objs)) | ||||
| 	for _, x := range objs { | ||||
| 		u, ok := x.(*unstructured.Unstructured) | ||||
| 		if !ok { | ||||
| 			return nil, fmt.Errorf("expected unstructured resource from lister, not %T", x) | ||||
| 		} | ||||
| 		results = append(results, u.DeepCopy()) | ||||
| 	} | ||||
| 
 | ||||
| 	return results, nil | ||||
| } | ||||
| 
 | ||||
| func (c *machineController) listScalableResources() ([]*unstructured.Unstructured, error) { | ||||
| 	scalableResources, err := c.listResources(c.machineSetInformer.Lister()) | ||||
| 	if err != nil { | ||||
|  |  | |||
|  | @ -458,6 +458,68 @@ func (ng *nodegroup) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*c | |||
| 	return &defaults, nil | ||||
| } | ||||
| 
 | ||||
| func (ng *nodegroup) IsMachineDeploymentAndRollingOut() (bool, error) { | ||||
| 	if ng.scalableResource.Kind() != machineDeploymentKind { | ||||
| 		// Not a MachineDeployment.
 | ||||
| 		return false, nil | ||||
| 	} | ||||
| 
 | ||||
| 	machineSets, err := ng.machineController.listMachineSetsForMachineDeployment(ng.scalableResource.unstructured) | ||||
| 	if err != nil { | ||||
| 		return false, err | ||||
| 	} | ||||
| 
 | ||||
| 	if len(machineSets) == 0 { | ||||
| 		// No MachineSets => MD is not rolling out.
 | ||||
| 		return false, nil | ||||
| 	} | ||||
| 
 | ||||
| 	// Find the latest revision, the MachineSet with the latest revision is the MachineSet that
 | ||||
| 	// matches the MachineDeployment spec.
 | ||||
| 	var latestMSRevisionInt int64 | ||||
| 	for _, ms := range machineSets { | ||||
| 		msRevision, ok := ms.GetAnnotations()[machineDeploymentRevisionAnnotation] | ||||
| 		if !ok { | ||||
| 			continue | ||||
| 		} | ||||
| 
 | ||||
| 		msRevisionInt, err := strconv.ParseInt(msRevision, 10, 64) | ||||
| 		if err != nil { | ||||
| 			return false, errors.Wrapf(err, "failed to parse current revision on MachineSet %s", klog.KObj(ms)) | ||||
| 		} | ||||
| 		latestMSRevisionInt = max(latestMSRevisionInt, msRevisionInt) | ||||
| 	} | ||||
| 	maxMSRevision := strconv.FormatInt(latestMSRevisionInt, 10) | ||||
| 
 | ||||
| 	for _, ms := range machineSets { | ||||
| 		if ms.GetAnnotations()[machineDeploymentRevisionAnnotation] == maxMSRevision { | ||||
| 			// Ignore the MachineSet with the latest revision
 | ||||
| 			continue | ||||
| 		} | ||||
| 
 | ||||
| 		// Check if any of the old MachineSets still have replicas
 | ||||
| 		replicas, found, err := unstructured.NestedInt64(ms.UnstructuredContent(), "spec", "replicas") | ||||
| 		if err != nil { | ||||
| 			return false, errors.Wrapf(err, "failed to find spec replicas on MachineSet %s", klog.KObj(ms)) | ||||
| 		} | ||||
| 		if found && replicas > 0 { | ||||
| 			// Found old MachineSets that still has replicas => MD is still rolling out.
 | ||||
| 			return true, nil | ||||
| 		} | ||||
| 		replicas, found, err = unstructured.NestedInt64(ms.UnstructuredContent(), "status", "replicas") | ||||
| 		if err != nil { | ||||
| 			return false, errors.Wrapf(err, "failed to find status replicas on MachineSet %s", klog.KObj(ms)) | ||||
| 		} | ||||
| 		if found && replicas > 0 { | ||||
| 			// Found old MachineSets that still has replicas => MD is still rolling out.
 | ||||
| 			return true, nil | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	// Didn't find any old MachineSets that still have replicas => MD is not rolling out.
 | ||||
| 	return false, nil | ||||
| } | ||||
| 
 | ||||
| func newNodeGroupFromScalableResource(controller *machineController, unstructuredScalableResource *unstructured.Unstructured) (*nodegroup, error) { | ||||
| 	// Ensure that the resulting node group would be allowed based on the autodiscovery specs if defined
 | ||||
| 	if !controller.allowedByAutoDiscoverySpecs(unstructuredScalableResource) { | ||||
|  |  | |||
|  | @ -21,6 +21,7 @@ import ( | |||
| 	"path" | ||||
| 	"reflect" | ||||
| 
 | ||||
| 	"github.com/pkg/errors" | ||||
| 	corev1 "k8s.io/api/core/v1" | ||||
| 	"k8s.io/apimachinery/pkg/api/resource" | ||||
| 	"k8s.io/client-go/discovery" | ||||
|  | @ -34,7 +35,7 @@ import ( | |||
| 
 | ||||
| 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider" | ||||
| 	"k8s.io/autoscaler/cluster-autoscaler/config" | ||||
| 	"k8s.io/autoscaler/cluster-autoscaler/utils/errors" | ||||
| 	caserrors "k8s.io/autoscaler/cluster-autoscaler/utils/errors" | ||||
| 	"k8s.io/autoscaler/cluster-autoscaler/utils/gpu" | ||||
| ) | ||||
| 
 | ||||
|  | @ -92,7 +93,7 @@ func (p *provider) HasInstance(node *corev1.Node) (bool, error) { | |||
| 	return false, fmt.Errorf("machine not found for node %s: %v", node.Name, err) | ||||
| } | ||||
| 
 | ||||
| func (*provider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) { | ||||
| func (*provider) Pricing() (cloudprovider.PricingModel, caserrors.AutoscalerError) { | ||||
| 	return nil, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
|  | @ -140,6 +141,24 @@ func (p *provider) GetNodeGpuConfig(node *corev1.Node) *cloudprovider.GpuConfig | |||
| 	return gpu.GetNodeGPUFromCloudProvider(p, node) | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (p *provider) IsNodeCandidateForScaleDown(node *corev1.Node) (bool, error) { | ||||
| 	ng, err := p.controller.nodeGroupForNode(node) | ||||
| 	if err != nil { | ||||
| 		return false, errors.Wrapf(err, "failed to determine node group for node %s", klog.KObj(node)) | ||||
| 	} | ||||
| 	if ng == nil { | ||||
| 		klog.V(5).Infof("node %s is not part of a node group", klog.KObj(node)) | ||||
| 		return false, nil | ||||
| 	} | ||||
| 	rollingout, err := ng.IsMachineDeploymentAndRollingOut() | ||||
| 	if err != nil { | ||||
| 		return false, errors.Wrapf(err, "failed to determine rolling out status for MachineDeployment %s", ng.scalableResource.ID()) | ||||
| 	} | ||||
| 	// A node is a good candidate for scale down if it is not currently part of a MachineDeployment that is rolling out.
 | ||||
| 	return !rollingout, nil | ||||
| } | ||||
| 
 | ||||
| func newProvider( | ||||
| 	name string, | ||||
| 	rl *cloudprovider.ResourceLimiter, | ||||
|  |  | |||
|  | @ -32,15 +32,17 @@ import ( | |||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	cpuKey          = "capacity.cluster-autoscaler.kubernetes.io/cpu" | ||||
| 	memoryKey       = "capacity.cluster-autoscaler.kubernetes.io/memory" | ||||
| 	diskCapacityKey = "capacity.cluster-autoscaler.kubernetes.io/ephemeral-disk" | ||||
| 	gpuTypeKey      = "capacity.cluster-autoscaler.kubernetes.io/gpu-type" | ||||
| 	gpuCountKey     = "capacity.cluster-autoscaler.kubernetes.io/gpu-count" | ||||
| 	maxPodsKey      = "capacity.cluster-autoscaler.kubernetes.io/maxPods" | ||||
| 	taintsKey       = "capacity.cluster-autoscaler.kubernetes.io/taints" | ||||
| 	labelsKey       = "capacity.cluster-autoscaler.kubernetes.io/labels" | ||||
| 	draDriverKey    = "capacity.cluster-autoscaler.kubernetes.io/dra-driver" | ||||
| 	cpuKey                              = "capacity.cluster-autoscaler.kubernetes.io/cpu" | ||||
| 	memoryKey                           = "capacity.cluster-autoscaler.kubernetes.io/memory" | ||||
| 	diskCapacityKey                     = "capacity.cluster-autoscaler.kubernetes.io/ephemeral-disk" | ||||
| 	gpuTypeKey                          = "capacity.cluster-autoscaler.kubernetes.io/gpu-type" | ||||
| 	gpuCountKey                         = "capacity.cluster-autoscaler.kubernetes.io/gpu-count" | ||||
| 	maxPodsKey                          = "capacity.cluster-autoscaler.kubernetes.io/maxPods" | ||||
| 	taintsKey                           = "capacity.cluster-autoscaler.kubernetes.io/taints" | ||||
| 	labelsKey                           = "capacity.cluster-autoscaler.kubernetes.io/labels" | ||||
| 	draDriverKey                        = "capacity.cluster-autoscaler.kubernetes.io/dra-driver" | ||||
| 	machineDeploymentRevisionAnnotation = "machinedeployment.clusters.x-k8s.io/revision" | ||||
| 	machineDeploymentNameLabel          = "cluster.x-k8s.io/deployment-name" | ||||
| 	// UnknownArch is used if the Architecture is Unknown
 | ||||
| 	UnknownArch SystemArchitecture = "" | ||||
| 	// Amd64 is used if the Architecture is x86_64
 | ||||
|  |  | |||
|  | @ -181,6 +181,11 @@ func (c *CoreWeaveCloudProvider) Refresh() error { | |||
| 	return c.manager.Refresh() | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (c *CoreWeaveCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // BuildCoreWeave builds the CoreWeave cloud provider with the given options and returns it.
 | ||||
| func BuildCoreWeave(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider { | ||||
| 	klog.V(4).Infof("Building CoreWeave cloud provider with options: %+v", opts) | ||||
|  |  | |||
|  | @ -169,6 +169,11 @@ func (d *digitaloceanCloudProvider) Refresh() error { | |||
| 	return d.manager.Refresh() | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (d *digitaloceanCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // BuildDigitalOcean builds the DigitalOcean cloud provider.
 | ||||
| func BuildDigitalOcean( | ||||
| 	opts config.AutoscalingOptions, | ||||
|  |  | |||
|  | @ -176,6 +176,11 @@ func (pcp *equinixMetalCloudProvider) Cleanup() error { | |||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (pcp *equinixMetalCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // BuildCloudProvider is called by the autoscaler to build an Equinix Metal cloud provider.
 | ||||
| //
 | ||||
| // The equinixMetalManager is created here, and the node groups are created
 | ||||
|  |  | |||
|  | @ -224,6 +224,11 @@ func (e *exoscaleCloudProvider) Refresh() error { | |||
| 	return e.manager.Refresh() | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (e *exoscaleCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // BuildExoscale builds the Exoscale cloud provider.
 | ||||
| func BuildExoscale(_ config.AutoscalingOptions, discoveryOpts cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider { | ||||
| 	manager, err := newManager(discoveryOpts) | ||||
|  |  | |||
|  | @ -316,6 +316,11 @@ func (e *externalGrpcCloudProvider) Refresh() error { | |||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (e *externalGrpcCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // BuildExternalGrpc builds the externalgrpc cloud provider.
 | ||||
| func BuildExternalGrpc( | ||||
| 	opts config.AutoscalingOptions, | ||||
|  |  | |||
|  | @ -148,6 +148,11 @@ func (gce *GceCloudProvider) Refresh() error { | |||
| 	return gce.gceManager.Refresh() | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (gce *GceCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // GceRef contains s reference to some entity in GCE world.
 | ||||
| type GceRef struct { | ||||
| 	Project string | ||||
|  |  | |||
|  | @ -182,6 +182,11 @@ func (d *HetznerCloudProvider) Refresh() error { | |||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (d *HetznerCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // BuildHetzner builds the Hetzner cloud provider.
 | ||||
| func BuildHetzner(_ config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider { | ||||
| 	manager, err := newManager() | ||||
|  |  | |||
|  | @ -179,6 +179,11 @@ func (hcp *huaweicloudCloudProvider) Refresh() error { | |||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (hcp *huaweicloudCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| func (hcp *huaweicloudCloudProvider) buildAsgs(specs []string) error { | ||||
| 	asgs, err := hcp.cloudServiceManager.ListScalingGroups() | ||||
| 	if err != nil { | ||||
|  |  | |||
|  | @ -308,6 +308,11 @@ func (ic *IonosCloudCloudProvider) Refresh() error { | |||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (ic *IonosCloudCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // BuildIonosCloud builds the IonosCloud cloud provider.
 | ||||
| func BuildIonosCloud( | ||||
| 	opts config.AutoscalingOptions, | ||||
|  |  | |||
|  | @ -129,6 +129,11 @@ func (k *kamateraCloudProvider) Refresh() error { | |||
| 	return k.manager.refresh() | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (k *kamateraCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // BuildKamatera builds the Kamatera cloud provider.
 | ||||
| func BuildKamatera( | ||||
| 	opts config.AutoscalingOptions, | ||||
|  |  | |||
|  | @ -180,6 +180,11 @@ func (kubemark *KubemarkCloudProvider) Cleanup() error { | |||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (kubemark *KubemarkCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // NodeGroup implements NodeGroup interface.
 | ||||
| type NodeGroup struct { | ||||
| 	Name               string | ||||
|  |  | |||
|  | @ -170,6 +170,11 @@ func (kwok *KwokCloudProvider) Cleanup() error { | |||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (kwok *KwokCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // BuildKwok builds kwok cloud provider.
 | ||||
| func BuildKwok(opts config.AutoscalingOptions, | ||||
| 	do cloudprovider.NodeGroupDiscoveryOptions, | ||||
|  |  | |||
|  | @ -151,6 +151,11 @@ func (l *linodeCloudProvider) Refresh() error { | |||
| 	return l.manager.refresh() | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (l *linodeCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| func newLinodeCloudProvider(config io.Reader, rl *cloudprovider.ResourceLimiter) (cloudprovider.CloudProvider, error) { | ||||
| 	m, err := newManager(config) | ||||
| 	if err != nil { | ||||
|  |  | |||
|  | @ -203,6 +203,11 @@ func (mcp *magnumCloudProvider) Cleanup() error { | |||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (mcp *magnumCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // refreshNodeGroups gets the list of node groups which meet the requirements for autoscaling,
 | ||||
| // creates magnumNodeGroups for any that do not exist in the cloud provider,
 | ||||
| // and drops any node groups which are present in the cloud provider but not in the
 | ||||
|  |  | |||
|  | @ -276,3 +276,8 @@ func (_m *CloudProvider) Refresh() error { | |||
| 
 | ||||
| 	return r0 | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (_m *CloudProvider) IsNodeCandidateForScaleDown(node *v1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
|  |  | |||
|  | @ -147,6 +147,11 @@ func (ocp *OciCloudProvider) Refresh() error { | |||
| 	return ocp.poolManager.Refresh() | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (ocp *OciCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // BuildOCI constructs the OciCloudProvider object that implements the could provider interface (InstancePoolManager).
 | ||||
| func BuildOCI(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider { | ||||
| 	ocidType, err := ocicommon.GetAllPoolTypes(opts.NodeGroups) | ||||
|  |  | |||
|  | @ -149,3 +149,8 @@ func (ocp *OciCloudProvider) Cleanup() error { | |||
| func (ocp *OciCloudProvider) Refresh() error { | ||||
| 	return ocp.manager.Refresh() | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (ocp *OciCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
|  |  | |||
|  | @ -318,3 +318,8 @@ func (provider *OVHCloudProvider) Refresh() error { | |||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (provider *OVHCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
|  |  | |||
|  | @ -219,6 +219,11 @@ func (provider *RancherCloudProvider) Cleanup() error { | |||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (provider *RancherCloudProvider) IsNodeCandidateForScaleDown(node *corev1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| func (provider *RancherCloudProvider) scalableNodeGroups() ([]*nodeGroup, error) { | ||||
| 	var result []*nodeGroup | ||||
| 
 | ||||
|  |  | |||
|  | @ -277,3 +277,8 @@ func (scw *scalewayCloudProvider) Refresh() error { | |||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (scw *scalewayCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
|  |  | |||
|  | @ -173,6 +173,11 @@ func (tencentcloud *tencentCloudProvider) Refresh() error { | |||
| 	return tencentcloud.tencentcloudManager.Refresh() | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (tencentcloud *tencentCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // BuildTencentcloud returns tencentcloud provider
 | ||||
| func BuildTencentcloud(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider { | ||||
| 	var config io.ReadCloser | ||||
|  |  | |||
|  | @ -45,20 +45,25 @@ type OnNodeGroupDeleteFunc func(string) error | |||
| // HasInstance is a function called to determine if a node has been removed from the cloud provider.
 | ||||
| type HasInstance func(string) (bool, error) | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown is a function called to determine if a cloud provider considers
 | ||||
| // a node a good candidate for scaling down.
 | ||||
| type IsNodeCandidateForScaleDown func(*apiv1.Node) (bool, error) | ||||
| 
 | ||||
| // TestCloudProvider is a dummy cloud provider to be used in tests.
 | ||||
| type TestCloudProvider struct { | ||||
| 	sync.Mutex | ||||
| 	nodes             map[string]string | ||||
| 	groups            map[string]cloudprovider.NodeGroup | ||||
| 	onScaleUp         func(string, int) error | ||||
| 	onScaleDown       func(string, string) error | ||||
| 	onNodeGroupCreate func(string) error | ||||
| 	onNodeGroupDelete func(string) error | ||||
| 	hasInstance       func(string) (bool, error) | ||||
| 	machineTypes      []string | ||||
| 	machineTemplates  map[string]*framework.NodeInfo | ||||
| 	priceModel        cloudprovider.PricingModel | ||||
| 	resourceLimiter   *cloudprovider.ResourceLimiter | ||||
| 	nodes                       map[string]string | ||||
| 	groups                      map[string]cloudprovider.NodeGroup | ||||
| 	onScaleUp                   func(string, int) error | ||||
| 	onScaleDown                 func(string, string) error | ||||
| 	onNodeGroupCreate           func(string) error | ||||
| 	onNodeGroupDelete           func(string) error | ||||
| 	hasInstance                 func(string) (bool, error) | ||||
| 	isNodeCandidateForScaleDown func(*apiv1.Node) (bool, error) | ||||
| 	machineTypes                []string | ||||
| 	machineTemplates            map[string]*framework.NodeInfo | ||||
| 	priceModel                  cloudprovider.PricingModel | ||||
| 	resourceLimiter             *cloudprovider.ResourceLimiter | ||||
| } | ||||
| 
 | ||||
| // TestCloudProviderBuilder is used to create CloudProvider
 | ||||
|  | @ -127,6 +132,14 @@ func (b *TestCloudProviderBuilder) WithHasInstance(hasInstance HasInstance) *Tes | |||
| 	return b | ||||
| } | ||||
| 
 | ||||
| // WithIsNodeCandidateForScaleDown adds an IsNodeCandidateForScaleDown handler to provider.
 | ||||
| func (b *TestCloudProviderBuilder) WithIsNodeCandidateForScaleDown(isNodeCandidateForScaleDown IsNodeCandidateForScaleDown) *TestCloudProviderBuilder { | ||||
| 	b.builders = append(b.builders, func(p *TestCloudProvider) { | ||||
| 		p.isNodeCandidateForScaleDown = isNodeCandidateForScaleDown | ||||
| 	}) | ||||
| 	return b | ||||
| } | ||||
| 
 | ||||
| // Build returns a built test cloud provider
 | ||||
| func (b *TestCloudProviderBuilder) Build() *TestCloudProvider { | ||||
| 	p := &TestCloudProvider{ | ||||
|  | @ -352,6 +365,15 @@ func (tcp *TestCloudProvider) Refresh() error { | |||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (tcp *TestCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	if tcp.isNodeCandidateForScaleDown == nil { | ||||
| 		return true, cloudprovider.ErrNotImplemented | ||||
| 	} | ||||
| 
 | ||||
| 	return tcp.isNodeCandidateForScaleDown(node) | ||||
| } | ||||
| 
 | ||||
| // TestNodeGroup is a node group used by TestCloudProvider.
 | ||||
| type TestNodeGroup struct { | ||||
| 	sync.Mutex | ||||
|  |  | |||
|  | @ -162,6 +162,11 @@ func (u *uthoCloudProvider) Refresh() error { | |||
| 	return u.manager.Refresh() | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (u *uthoCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // BuildUtho builds the Utho cloud provider.
 | ||||
| func BuildUtho( | ||||
| 	opts config.AutoscalingOptions, | ||||
|  |  | |||
|  | @ -120,6 +120,11 @@ func (v *volcengineCloudProvider) Refresh() error { | |||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (v *volcengineCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // GetNodeGpuConfig returns the label, type and resource name for the GPU added to node. If node doesn't have
 | ||||
| // any GPUs, it returns nil.
 | ||||
| func (v *volcengineCloudProvider) GetNodeGpuConfig(node *apiv1.Node) *cloudprovider.GpuConfig { | ||||
|  |  | |||
|  | @ -140,6 +140,11 @@ func (v *vultrCloudProvider) Refresh() error { | |||
| 	return v.manager.Refresh() | ||||
| } | ||||
| 
 | ||||
| // IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
 | ||||
| func (v *vultrCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) { | ||||
| 	return true, cloudprovider.ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| // toProviderID returns a provider ID from the given node ID.
 | ||||
| func toProviderID(nodeID string) string { | ||||
| 	return fmt.Sprintf("%s%s", vultrProviderIDPrefix, nodeID) | ||||
|  |  | |||
|  | @ -22,6 +22,7 @@ import ( | |||
| 	apiv1 "k8s.io/api/core/v1" | ||||
| 	klog "k8s.io/klog/v2" | ||||
| 
 | ||||
| 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider" | ||||
| 	"k8s.io/autoscaler/cluster-autoscaler/context" | ||||
| 	"k8s.io/autoscaler/cluster-autoscaler/utils" | ||||
| 	"k8s.io/autoscaler/cluster-autoscaler/utils/errors" | ||||
|  | @ -48,6 +49,15 @@ func (n *PreFilteringScaleDownNodeProcessor) GetScaleDownCandidates(ctx *context | |||
| 	nodeGroupSize := utils.GetNodeGroupSizeMap(ctx.CloudProvider) | ||||
| 
 | ||||
| 	for _, node := range nodes { | ||||
| 		if candidate, err := ctx.CloudProvider.IsNodeCandidateForScaleDown(node); err != nil { | ||||
| 			if err != cloudprovider.ErrNotImplemented { | ||||
| 				klog.Warningf("Error while checking if node is a candidate for deletion %s: %v", node.Name, err) | ||||
| 				continue | ||||
| 			} | ||||
| 		} else if !candidate { | ||||
| 			klog.V(5).Infof("Node %s is not a candidate for deletion (cloud provider determined)", node.Name) | ||||
| 			continue | ||||
| 		} | ||||
| 		nodeGroup, err := ctx.CloudProvider.NodeGroupForNode(node) | ||||
| 		if err != nil { | ||||
| 			klog.Warningf("Error while checking node group for %s: %v", node.Name, err) | ||||
|  |  | |||
|  | @ -17,6 +17,7 @@ limitations under the License. | |||
| package nodes | ||||
| 
 | ||||
| import ( | ||||
| 	"strings" | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/stretchr/testify/assert" | ||||
|  | @ -44,23 +45,75 @@ func TestPreFilteringScaleDownNodeProcessor_GetScaleDownCandidateNodes(t *testin | |||
| 	ng1_1 := BuildTestNode("ng1-1", 1000, 1000) | ||||
| 	ng1_2 := BuildTestNode("ng1-2", 1000, 1000) | ||||
| 	ng2_1 := BuildTestNode("ng2-1", 1000, 1000) | ||||
| 	ng2_2 := BuildTestNode("ng2-2", 1000, 1000) | ||||
| 	noNg := BuildTestNode("no-ng", 1000, 1000) | ||||
| 	provider := testprovider.NewTestCloudProviderBuilder().Build() | ||||
| 	provider.AddNodeGroup("ng1", 1, 10, 2) | ||||
| 	provider.AddNodeGroup("ng2", 1, 10, 1) | ||||
| 	provider.AddNode("ng1", ng1_1) | ||||
| 	provider.AddNode("ng1", ng1_2) | ||||
| 	provider.AddNode("ng2", ng2_1) | ||||
| 
 | ||||
| 	ctx := &context.AutoscalingContext{ | ||||
| 		CloudProvider: provider, | ||||
| 	testCases := map[string]struct { | ||||
| 		buildProvider     func() *testprovider.TestCloudProvider | ||||
| 		configureProvider func(p *testprovider.TestCloudProvider) | ||||
| 		expectedNodes     []*apiv1.Node | ||||
| 		inputNodes        []*apiv1.Node | ||||
| 	}{ | ||||
| 		// Expectation: only node groups not at minimum size should be candidates.
 | ||||
| 		"1 scale down candidate, 1 node group at minimum size, 1 node with no node group, 1 node group above minimum size.": { | ||||
| 			configureProvider: func(p *testprovider.TestCloudProvider) { | ||||
| 				p.AddNodeGroup("ng1", 1, 10, 2) | ||||
| 				p.AddNodeGroup("ng2", 1, 10, 1) | ||||
| 				p.AddNode("ng1", ng1_1) | ||||
| 				p.AddNode("ng1", ng1_2) | ||||
| 				p.AddNode("ng2", ng2_1) | ||||
| 			}, | ||||
| 			expectedNodes: []*apiv1.Node{ng1_1, ng1_2}, | ||||
| 			inputNodes:    []*apiv1.Node{ng1_1, ng1_2, ng2_1, noNg}, | ||||
| 		}, | ||||
| 		// Expectation: only node groups that contain nodes the cloud provider considers candidates for deletion should be candidates.
 | ||||
| 		"1 scale down candidate, 1 node group with nodes that are not candidates for deletion, 1 node group above minimum size.": { | ||||
| 			buildProvider: func() *testprovider.TestCloudProvider { | ||||
| 				provider := testprovider. | ||||
| 					NewTestCloudProviderBuilder(). | ||||
| 					WithIsNodeCandidateForScaleDown(func(n *apiv1.Node) (bool, error) { | ||||
| 						if strings.HasPrefix(n.Name, "ng2") { | ||||
| 							return false, nil | ||||
| 						} | ||||
| 						return true, nil | ||||
| 					}). | ||||
| 					Build() | ||||
| 				return provider | ||||
| 			}, | ||||
| 			configureProvider: func(p *testprovider.TestCloudProvider) { | ||||
| 				p.AddNodeGroup("ng1", 1, 10, 2) | ||||
| 				p.AddNodeGroup("ng2", 1, 10, 2) | ||||
| 				p.AddNode("ng1", ng1_1) | ||||
| 				p.AddNode("ng1", ng1_2) | ||||
| 				p.AddNode("ng2", ng2_1) | ||||
| 				p.AddNode("ng2", ng2_2) | ||||
| 			}, | ||||
| 			expectedNodes: []*apiv1.Node{ng1_1, ng1_2}, | ||||
| 			inputNodes:    []*apiv1.Node{ng1_1, ng1_2, ng2_1, ng2_2}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	expectedNodes := []*apiv1.Node{ng1_1, ng1_2} | ||||
| 	defaultProcessor := NewPreFilteringScaleDownNodeProcessor() | ||||
| 	inputNodes := []*apiv1.Node{ng1_1, ng1_2, ng2_1, noNg} | ||||
| 	result, err := defaultProcessor.GetScaleDownCandidates(ctx, inputNodes) | ||||
| 	for description, testCase := range testCases { | ||||
| 		t.Run(description, func(t *testing.T) { | ||||
| 			var provider *testprovider.TestCloudProvider | ||||
| 			if testCase.buildProvider == nil { | ||||
| 				provider = testprovider.NewTestCloudProviderBuilder().Build() | ||||
| 			} else { | ||||
| 				provider = testCase.buildProvider() | ||||
| 			} | ||||
| 			assert.NotNil(t, provider) | ||||
| 
 | ||||
| 	assert.NoError(t, err) | ||||
| 	assert.Equal(t, result, expectedNodes) | ||||
| 			testCase.configureProvider(provider) | ||||
| 
 | ||||
| 			ctx := &context.AutoscalingContext{ | ||||
| 				CloudProvider: provider, | ||||
| 			} | ||||
| 
 | ||||
| 			defaultProcessor := NewPreFilteringScaleDownNodeProcessor() | ||||
| 			result, err := defaultProcessor.GetScaleDownCandidates(ctx, testCase.inputNodes) | ||||
| 
 | ||||
| 			assert.NoError(t, err) | ||||
| 			assert.Equal(t, result, testCase.expectedNodes) | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue