This commit is contained in:
Michael McCune 2025-09-18 13:33:50 +02:00 committed by GitHub
commit 791b1845dd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
41 changed files with 398 additions and 36 deletions

View File

@ -175,6 +175,11 @@ func (ali *aliCloudProvider) Cleanup() error {
return nil
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (ali *aliCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// AliRef contains a reference to ECS instance or .
type AliRef struct {
ID string

View File

@ -193,6 +193,11 @@ func (aws *awsCloudProvider) Refresh() error {
return aws.awsManager.Refresh()
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (aws *awsCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// AwsRef contains a reference to some entity in AWS world.
type AwsRef struct {
Name string

View File

@ -174,6 +174,11 @@ func (azure *AzureCloudProvider) Refresh() error {
return azure.azureManager.Refresh()
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (azure *AzureCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// azureRef contains a reference to some entity in Azure world.
type azureRef struct {
Name string

View File

@ -228,6 +228,11 @@ func (baiducloud *baiducloudCloudProvider) Refresh() error {
return nil
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (baiducloud *baiducloudCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// BaiducloudRef contains a reference to some entity in baiducloud world.
type BaiducloudRef struct {
Name string

View File

@ -172,6 +172,11 @@ func (d *bizflycloudCloudProvider) Refresh() error {
return d.manager.Refresh()
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (d *bizflycloudCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// BuildBizflyCloud builds the Bizflycloud cloud provider.
func BuildBizflyCloud(
opts config.AutoscalingOptions,

View File

@ -219,6 +219,11 @@ func (b *brightboxCloudProvider) Cleanup() error {
return nil
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (b *brightboxCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// BuildBrightbox builds the Brightbox provider
func BuildBrightbox(
opts config.AutoscalingOptions,

View File

@ -170,6 +170,11 @@ func (ccp *cherryCloudProvider) Cleanup() error {
return nil
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (ccp *cherryCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// BuildCherry is called by the autoscaler to build a Cherry Servers cloud provider.
//
// The cherryManager is created here, and the node groups are created

View File

@ -167,6 +167,11 @@ func (d *civoCloudProvider) Refresh() error {
return d.manager.Refresh()
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (d *civoCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// BuildCivo builds the Civo cloud provider.
func BuildCivo(
opts config.AutoscalingOptions,

View File

@ -154,6 +154,14 @@ type CloudProvider interface {
// Refresh is called before every main loop and can be used to dynamically update cloud provider state.
// In particular the list of node groups returned by NodeGroups can change as a result of CloudProvider.Refresh().
Refresh() error
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down. This function
// will be called during prefiltering of nodes for scaledown to allow cloud providers the opportunity
// to reject a node for scale down. This may be used in cases where nodes are undergoing upgrades or other
// cloud-specific behavior where the cluster autoscaler should not begin cordoning, draining, and tainting
// the node.
// Returns true if the node can be safely scaled down or false otherwise.
IsNodeCandidateForScaleDown(*apiv1.Node) (bool, error)
}
// ErrNotImplemented is returned if a method is not implemented.

View File

@ -115,6 +115,11 @@ func (provider *cloudStackCloudProvider) Pricing() (cloudprovider.PricingModel,
return nil, cloudprovider.ErrNotImplemented
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (provider *cloudStackCloudProvider) IsNodeCandidateForScaleDown(node *v1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// NewNodeGroup builds a theoretical node group based on the node definition provided. The node group is not automatically
// created on the cloud provider side. The node group is not returned by NodeGroups() until it is created.
func (provider *cloudStackCloudProvider) NewNodeGroup(machineType string, labels map[string]string, systemLabels map[string]string, taints []v1.Taint, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {

View File

@ -849,6 +849,27 @@ func (c *machineController) listMachinesForScalableResource(r *unstructured.Unst
}
}
func (c *machineController) listMachineSetsForMachineDeployment(r *unstructured.Unstructured) ([]*unstructured.Unstructured, error) {
selector := labels.SelectorFromSet(map[string]string{
machineDeploymentNameLabel: r.GetName(),
})
objs, err := c.machineSetInformer.Lister().ByNamespace(r.GetNamespace()).List(selector)
if err != nil {
return nil, fmt.Errorf("unable to list MachineSets for MachineDeployment %s: %w", r.GetName(), err)
}
results := make([]*unstructured.Unstructured, 0, len(objs))
for _, x := range objs {
u, ok := x.(*unstructured.Unstructured)
if !ok {
return nil, fmt.Errorf("expected unstructured resource from lister, not %T", x)
}
results = append(results, u.DeepCopy())
}
return results, nil
}
func (c *machineController) listScalableResources() ([]*unstructured.Unstructured, error) {
scalableResources, err := c.listResources(c.machineSetInformer.Lister())
if err != nil {

View File

@ -458,6 +458,68 @@ func (ng *nodegroup) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*c
return &defaults, nil
}
func (ng *nodegroup) IsMachineDeploymentAndRollingOut() (bool, error) {
if ng.scalableResource.Kind() != machineDeploymentKind {
// Not a MachineDeployment.
return false, nil
}
machineSets, err := ng.machineController.listMachineSetsForMachineDeployment(ng.scalableResource.unstructured)
if err != nil {
return false, err
}
if len(machineSets) == 0 {
// No MachineSets => MD is not rolling out.
return false, nil
}
// Find the latest revision, the MachineSet with the latest revision is the MachineSet that
// matches the MachineDeployment spec.
var latestMSRevisionInt int64
for _, ms := range machineSets {
msRevision, ok := ms.GetAnnotations()[machineDeploymentRevisionAnnotation]
if !ok {
continue
}
msRevisionInt, err := strconv.ParseInt(msRevision, 10, 64)
if err != nil {
return false, errors.Wrapf(err, "failed to parse current revision on MachineSet %s", klog.KObj(ms))
}
latestMSRevisionInt = max(latestMSRevisionInt, msRevisionInt)
}
maxMSRevision := strconv.FormatInt(latestMSRevisionInt, 10)
for _, ms := range machineSets {
if ms.GetAnnotations()[machineDeploymentRevisionAnnotation] == maxMSRevision {
// Ignore the MachineSet with the latest revision
continue
}
// Check if any of the old MachineSets still have replicas
replicas, found, err := unstructured.NestedInt64(ms.UnstructuredContent(), "spec", "replicas")
if err != nil {
return false, errors.Wrapf(err, "failed to find spec replicas on MachineSet %s", klog.KObj(ms))
}
if found && replicas > 0 {
// Found old MachineSets that still has replicas => MD is still rolling out.
return true, nil
}
replicas, found, err = unstructured.NestedInt64(ms.UnstructuredContent(), "status", "replicas")
if err != nil {
return false, errors.Wrapf(err, "failed to find status replicas on MachineSet %s", klog.KObj(ms))
}
if found && replicas > 0 {
// Found old MachineSets that still has replicas => MD is still rolling out.
return true, nil
}
}
// Didn't find any old MachineSets that still have replicas => MD is not rolling out.
return false, nil
}
func newNodeGroupFromScalableResource(controller *machineController, unstructuredScalableResource *unstructured.Unstructured) (*nodegroup, error) {
// Ensure that the resulting node group would be allowed based on the autodiscovery specs if defined
if !controller.allowedByAutoDiscoverySpecs(unstructuredScalableResource) {

View File

@ -21,6 +21,7 @@ import (
"path"
"reflect"
"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/client-go/discovery"
@ -34,7 +35,7 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
caserrors "k8s.io/autoscaler/cluster-autoscaler/utils/errors"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
)
@ -92,7 +93,7 @@ func (p *provider) HasInstance(node *corev1.Node) (bool, error) {
return false, fmt.Errorf("machine not found for node %s: %v", node.Name, err)
}
func (*provider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
func (*provider) Pricing() (cloudprovider.PricingModel, caserrors.AutoscalerError) {
return nil, cloudprovider.ErrNotImplemented
}
@ -140,6 +141,24 @@ func (p *provider) GetNodeGpuConfig(node *corev1.Node) *cloudprovider.GpuConfig
return gpu.GetNodeGPUFromCloudProvider(p, node)
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (p *provider) IsNodeCandidateForScaleDown(node *corev1.Node) (bool, error) {
ng, err := p.controller.nodeGroupForNode(node)
if err != nil {
return false, errors.Wrapf(err, "failed to determine node group for node %s", klog.KObj(node))
}
if ng == nil {
klog.V(5).Infof("node %s is not part of a node group", klog.KObj(node))
return false, nil
}
rollingout, err := ng.IsMachineDeploymentAndRollingOut()
if err != nil {
return false, errors.Wrapf(err, "failed to determine rolling out status for MachineDeployment %s", ng.scalableResource.ID())
}
// A node is a good candidate for scale down if it is not currently part of a MachineDeployment that is rolling out.
return !rollingout, nil
}
func newProvider(
name string,
rl *cloudprovider.ResourceLimiter,

View File

@ -32,15 +32,17 @@ import (
)
const (
cpuKey = "capacity.cluster-autoscaler.kubernetes.io/cpu"
memoryKey = "capacity.cluster-autoscaler.kubernetes.io/memory"
diskCapacityKey = "capacity.cluster-autoscaler.kubernetes.io/ephemeral-disk"
gpuTypeKey = "capacity.cluster-autoscaler.kubernetes.io/gpu-type"
gpuCountKey = "capacity.cluster-autoscaler.kubernetes.io/gpu-count"
maxPodsKey = "capacity.cluster-autoscaler.kubernetes.io/maxPods"
taintsKey = "capacity.cluster-autoscaler.kubernetes.io/taints"
labelsKey = "capacity.cluster-autoscaler.kubernetes.io/labels"
draDriverKey = "capacity.cluster-autoscaler.kubernetes.io/dra-driver"
cpuKey = "capacity.cluster-autoscaler.kubernetes.io/cpu"
memoryKey = "capacity.cluster-autoscaler.kubernetes.io/memory"
diskCapacityKey = "capacity.cluster-autoscaler.kubernetes.io/ephemeral-disk"
gpuTypeKey = "capacity.cluster-autoscaler.kubernetes.io/gpu-type"
gpuCountKey = "capacity.cluster-autoscaler.kubernetes.io/gpu-count"
maxPodsKey = "capacity.cluster-autoscaler.kubernetes.io/maxPods"
taintsKey = "capacity.cluster-autoscaler.kubernetes.io/taints"
labelsKey = "capacity.cluster-autoscaler.kubernetes.io/labels"
draDriverKey = "capacity.cluster-autoscaler.kubernetes.io/dra-driver"
machineDeploymentRevisionAnnotation = "machinedeployment.clusters.x-k8s.io/revision"
machineDeploymentNameLabel = "cluster.x-k8s.io/deployment-name"
// UnknownArch is used if the Architecture is Unknown
UnknownArch SystemArchitecture = ""
// Amd64 is used if the Architecture is x86_64

View File

@ -181,6 +181,11 @@ func (c *CoreWeaveCloudProvider) Refresh() error {
return c.manager.Refresh()
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (c *CoreWeaveCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// BuildCoreWeave builds the CoreWeave cloud provider with the given options and returns it.
func BuildCoreWeave(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
klog.V(4).Infof("Building CoreWeave cloud provider with options: %+v", opts)

View File

@ -169,6 +169,11 @@ func (d *digitaloceanCloudProvider) Refresh() error {
return d.manager.Refresh()
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (d *digitaloceanCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// BuildDigitalOcean builds the DigitalOcean cloud provider.
func BuildDigitalOcean(
opts config.AutoscalingOptions,

View File

@ -176,6 +176,11 @@ func (pcp *equinixMetalCloudProvider) Cleanup() error {
return nil
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (pcp *equinixMetalCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// BuildCloudProvider is called by the autoscaler to build an Equinix Metal cloud provider.
//
// The equinixMetalManager is created here, and the node groups are created

View File

@ -224,6 +224,11 @@ func (e *exoscaleCloudProvider) Refresh() error {
return e.manager.Refresh()
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (e *exoscaleCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// BuildExoscale builds the Exoscale cloud provider.
func BuildExoscale(_ config.AutoscalingOptions, discoveryOpts cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
manager, err := newManager(discoveryOpts)

View File

@ -316,6 +316,11 @@ func (e *externalGrpcCloudProvider) Refresh() error {
return nil
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (e *externalGrpcCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// BuildExternalGrpc builds the externalgrpc cloud provider.
func BuildExternalGrpc(
opts config.AutoscalingOptions,

View File

@ -148,6 +148,11 @@ func (gce *GceCloudProvider) Refresh() error {
return gce.gceManager.Refresh()
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (gce *GceCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// GceRef contains s reference to some entity in GCE world.
type GceRef struct {
Project string

View File

@ -182,6 +182,11 @@ func (d *HetznerCloudProvider) Refresh() error {
return nil
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (d *HetznerCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// BuildHetzner builds the Hetzner cloud provider.
func BuildHetzner(_ config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
manager, err := newManager()

View File

@ -179,6 +179,11 @@ func (hcp *huaweicloudCloudProvider) Refresh() error {
return nil
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (hcp *huaweicloudCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
func (hcp *huaweicloudCloudProvider) buildAsgs(specs []string) error {
asgs, err := hcp.cloudServiceManager.ListScalingGroups()
if err != nil {

View File

@ -308,6 +308,11 @@ func (ic *IonosCloudCloudProvider) Refresh() error {
return nil
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (ic *IonosCloudCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// BuildIonosCloud builds the IonosCloud cloud provider.
func BuildIonosCloud(
opts config.AutoscalingOptions,

View File

@ -129,6 +129,11 @@ func (k *kamateraCloudProvider) Refresh() error {
return k.manager.refresh()
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (k *kamateraCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// BuildKamatera builds the Kamatera cloud provider.
func BuildKamatera(
opts config.AutoscalingOptions,

View File

@ -180,6 +180,11 @@ func (kubemark *KubemarkCloudProvider) Cleanup() error {
return nil
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (kubemark *KubemarkCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// NodeGroup implements NodeGroup interface.
type NodeGroup struct {
Name string

View File

@ -170,6 +170,11 @@ func (kwok *KwokCloudProvider) Cleanup() error {
return nil
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (kwok *KwokCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// BuildKwok builds kwok cloud provider.
func BuildKwok(opts config.AutoscalingOptions,
do cloudprovider.NodeGroupDiscoveryOptions,

View File

@ -151,6 +151,11 @@ func (l *linodeCloudProvider) Refresh() error {
return l.manager.refresh()
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (l *linodeCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
func newLinodeCloudProvider(config io.Reader, rl *cloudprovider.ResourceLimiter) (cloudprovider.CloudProvider, error) {
m, err := newManager(config)
if err != nil {

View File

@ -203,6 +203,11 @@ func (mcp *magnumCloudProvider) Cleanup() error {
return nil
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (mcp *magnumCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// refreshNodeGroups gets the list of node groups which meet the requirements for autoscaling,
// creates magnumNodeGroups for any that do not exist in the cloud provider,
// and drops any node groups which are present in the cloud provider but not in the

View File

@ -276,3 +276,8 @@ func (_m *CloudProvider) Refresh() error {
return r0
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (_m *CloudProvider) IsNodeCandidateForScaleDown(node *v1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

View File

@ -147,6 +147,11 @@ func (ocp *OciCloudProvider) Refresh() error {
return ocp.poolManager.Refresh()
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (ocp *OciCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// BuildOCI constructs the OciCloudProvider object that implements the could provider interface (InstancePoolManager).
func BuildOCI(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
ocidType, err := ocicommon.GetAllPoolTypes(opts.NodeGroups)

View File

@ -149,3 +149,8 @@ func (ocp *OciCloudProvider) Cleanup() error {
func (ocp *OciCloudProvider) Refresh() error {
return ocp.manager.Refresh()
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (ocp *OciCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

View File

@ -318,3 +318,8 @@ func (provider *OVHCloudProvider) Refresh() error {
return nil
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (provider *OVHCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

View File

@ -219,6 +219,11 @@ func (provider *RancherCloudProvider) Cleanup() error {
return nil
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (provider *RancherCloudProvider) IsNodeCandidateForScaleDown(node *corev1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
func (provider *RancherCloudProvider) scalableNodeGroups() ([]*nodeGroup, error) {
var result []*nodeGroup

View File

@ -277,3 +277,8 @@ func (scw *scalewayCloudProvider) Refresh() error {
return nil
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (scw *scalewayCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}

View File

@ -173,6 +173,11 @@ func (tencentcloud *tencentCloudProvider) Refresh() error {
return tencentcloud.tencentcloudManager.Refresh()
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (tencentcloud *tencentCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// BuildTencentcloud returns tencentcloud provider
func BuildTencentcloud(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
var config io.ReadCloser

View File

@ -45,20 +45,25 @@ type OnNodeGroupDeleteFunc func(string) error
// HasInstance is a function called to determine if a node has been removed from the cloud provider.
type HasInstance func(string) (bool, error)
// IsNodeCandidateForScaleDown is a function called to determine if a cloud provider considers
// a node a good candidate for scaling down.
type IsNodeCandidateForScaleDown func(*apiv1.Node) (bool, error)
// TestCloudProvider is a dummy cloud provider to be used in tests.
type TestCloudProvider struct {
sync.Mutex
nodes map[string]string
groups map[string]cloudprovider.NodeGroup
onScaleUp func(string, int) error
onScaleDown func(string, string) error
onNodeGroupCreate func(string) error
onNodeGroupDelete func(string) error
hasInstance func(string) (bool, error)
machineTypes []string
machineTemplates map[string]*framework.NodeInfo
priceModel cloudprovider.PricingModel
resourceLimiter *cloudprovider.ResourceLimiter
nodes map[string]string
groups map[string]cloudprovider.NodeGroup
onScaleUp func(string, int) error
onScaleDown func(string, string) error
onNodeGroupCreate func(string) error
onNodeGroupDelete func(string) error
hasInstance func(string) (bool, error)
isNodeCandidateForScaleDown func(*apiv1.Node) (bool, error)
machineTypes []string
machineTemplates map[string]*framework.NodeInfo
priceModel cloudprovider.PricingModel
resourceLimiter *cloudprovider.ResourceLimiter
}
// TestCloudProviderBuilder is used to create CloudProvider
@ -127,6 +132,14 @@ func (b *TestCloudProviderBuilder) WithHasInstance(hasInstance HasInstance) *Tes
return b
}
// WithIsNodeCandidateForScaleDown adds an IsNodeCandidateForScaleDown handler to provider.
func (b *TestCloudProviderBuilder) WithIsNodeCandidateForScaleDown(isNodeCandidateForScaleDown IsNodeCandidateForScaleDown) *TestCloudProviderBuilder {
b.builders = append(b.builders, func(p *TestCloudProvider) {
p.isNodeCandidateForScaleDown = isNodeCandidateForScaleDown
})
return b
}
// Build returns a built test cloud provider
func (b *TestCloudProviderBuilder) Build() *TestCloudProvider {
p := &TestCloudProvider{
@ -352,6 +365,15 @@ func (tcp *TestCloudProvider) Refresh() error {
return nil
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (tcp *TestCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
if tcp.isNodeCandidateForScaleDown == nil {
return true, cloudprovider.ErrNotImplemented
}
return tcp.isNodeCandidateForScaleDown(node)
}
// TestNodeGroup is a node group used by TestCloudProvider.
type TestNodeGroup struct {
sync.Mutex

View File

@ -162,6 +162,11 @@ func (u *uthoCloudProvider) Refresh() error {
return u.manager.Refresh()
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (u *uthoCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// BuildUtho builds the Utho cloud provider.
func BuildUtho(
opts config.AutoscalingOptions,

View File

@ -120,6 +120,11 @@ func (v *volcengineCloudProvider) Refresh() error {
return nil
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (v *volcengineCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// GetNodeGpuConfig returns the label, type and resource name for the GPU added to node. If node doesn't have
// any GPUs, it returns nil.
func (v *volcengineCloudProvider) GetNodeGpuConfig(node *apiv1.Node) *cloudprovider.GpuConfig {

View File

@ -140,6 +140,11 @@ func (v *vultrCloudProvider) Refresh() error {
return v.manager.Refresh()
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (v *vultrCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// toProviderID returns a provider ID from the given node ID.
func toProviderID(nodeID string) string {
return fmt.Sprintf("%s%s", vultrProviderIDPrefix, nodeID)

View File

@ -22,6 +22,7 @@ import (
apiv1 "k8s.io/api/core/v1"
klog "k8s.io/klog/v2"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/context"
"k8s.io/autoscaler/cluster-autoscaler/utils"
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
@ -48,6 +49,15 @@ func (n *PreFilteringScaleDownNodeProcessor) GetScaleDownCandidates(ctx *context
nodeGroupSize := utils.GetNodeGroupSizeMap(ctx.CloudProvider)
for _, node := range nodes {
if candidate, err := ctx.CloudProvider.IsNodeCandidateForScaleDown(node); err != nil {
if err != cloudprovider.ErrNotImplemented {
klog.Warningf("Error while checking if node is a candidate for deletion %s: %v", node.Name, err)
continue
}
} else if !candidate {
klog.V(5).Infof("Node %s is not a candidate for deletion (cloud provider determined)", node.Name)
continue
}
nodeGroup, err := ctx.CloudProvider.NodeGroupForNode(node)
if err != nil {
klog.Warningf("Error while checking node group for %s: %v", node.Name, err)

View File

@ -17,6 +17,7 @@ limitations under the License.
package nodes
import (
"strings"
"testing"
"github.com/stretchr/testify/assert"
@ -44,23 +45,75 @@ func TestPreFilteringScaleDownNodeProcessor_GetScaleDownCandidateNodes(t *testin
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
ng1_2 := BuildTestNode("ng1-2", 1000, 1000)
ng2_1 := BuildTestNode("ng2-1", 1000, 1000)
ng2_2 := BuildTestNode("ng2-2", 1000, 1000)
noNg := BuildTestNode("no-ng", 1000, 1000)
provider := testprovider.NewTestCloudProviderBuilder().Build()
provider.AddNodeGroup("ng1", 1, 10, 2)
provider.AddNodeGroup("ng2", 1, 10, 1)
provider.AddNode("ng1", ng1_1)
provider.AddNode("ng1", ng1_2)
provider.AddNode("ng2", ng2_1)
ctx := &context.AutoscalingContext{
CloudProvider: provider,
testCases := map[string]struct {
buildProvider func() *testprovider.TestCloudProvider
configureProvider func(p *testprovider.TestCloudProvider)
expectedNodes []*apiv1.Node
inputNodes []*apiv1.Node
}{
// Expectation: only node groups not at minimum size should be candidates.
"1 scale down candidate, 1 node group at minimum size, 1 node with no node group, 1 node group above minimum size.": {
configureProvider: func(p *testprovider.TestCloudProvider) {
p.AddNodeGroup("ng1", 1, 10, 2)
p.AddNodeGroup("ng2", 1, 10, 1)
p.AddNode("ng1", ng1_1)
p.AddNode("ng1", ng1_2)
p.AddNode("ng2", ng2_1)
},
expectedNodes: []*apiv1.Node{ng1_1, ng1_2},
inputNodes: []*apiv1.Node{ng1_1, ng1_2, ng2_1, noNg},
},
// Expectation: only node groups that contain nodes the cloud provider considers candidates for deletion should be candidates.
"1 scale down candidate, 1 node group with nodes that are not candidates for deletion, 1 node group above minimum size.": {
buildProvider: func() *testprovider.TestCloudProvider {
provider := testprovider.
NewTestCloudProviderBuilder().
WithIsNodeCandidateForScaleDown(func(n *apiv1.Node) (bool, error) {
if strings.HasPrefix(n.Name, "ng2") {
return false, nil
}
return true, nil
}).
Build()
return provider
},
configureProvider: func(p *testprovider.TestCloudProvider) {
p.AddNodeGroup("ng1", 1, 10, 2)
p.AddNodeGroup("ng2", 1, 10, 2)
p.AddNode("ng1", ng1_1)
p.AddNode("ng1", ng1_2)
p.AddNode("ng2", ng2_1)
p.AddNode("ng2", ng2_2)
},
expectedNodes: []*apiv1.Node{ng1_1, ng1_2},
inputNodes: []*apiv1.Node{ng1_1, ng1_2, ng2_1, ng2_2},
},
}
expectedNodes := []*apiv1.Node{ng1_1, ng1_2}
defaultProcessor := NewPreFilteringScaleDownNodeProcessor()
inputNodes := []*apiv1.Node{ng1_1, ng1_2, ng2_1, noNg}
result, err := defaultProcessor.GetScaleDownCandidates(ctx, inputNodes)
for description, testCase := range testCases {
t.Run(description, func(t *testing.T) {
var provider *testprovider.TestCloudProvider
if testCase.buildProvider == nil {
provider = testprovider.NewTestCloudProviderBuilder().Build()
} else {
provider = testCase.buildProvider()
}
assert.NotNil(t, provider)
assert.NoError(t, err)
assert.Equal(t, result, expectedNodes)
testCase.configureProvider(provider)
ctx := &context.AutoscalingContext{
CloudProvider: provider,
}
defaultProcessor := NewPreFilteringScaleDownNodeProcessor()
result, err := defaultProcessor.GetScaleDownCandidates(ctx, testCase.inputNodes)
assert.NoError(t, err)
assert.Equal(t, result, testCase.expectedNodes)
})
}
}