Merge a81913f3b9 into a9292351c3
This commit is contained in:
commit
791b1845dd
|
|
@ -175,6 +175,11 @@ func (ali *aliCloudProvider) Cleanup() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (ali *aliCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// AliRef contains a reference to ECS instance or .
|
||||
type AliRef struct {
|
||||
ID string
|
||||
|
|
|
|||
|
|
@ -193,6 +193,11 @@ func (aws *awsCloudProvider) Refresh() error {
|
|||
return aws.awsManager.Refresh()
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (aws *awsCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// AwsRef contains a reference to some entity in AWS world.
|
||||
type AwsRef struct {
|
||||
Name string
|
||||
|
|
|
|||
|
|
@ -174,6 +174,11 @@ func (azure *AzureCloudProvider) Refresh() error {
|
|||
return azure.azureManager.Refresh()
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (azure *AzureCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// azureRef contains a reference to some entity in Azure world.
|
||||
type azureRef struct {
|
||||
Name string
|
||||
|
|
|
|||
|
|
@ -228,6 +228,11 @@ func (baiducloud *baiducloudCloudProvider) Refresh() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (baiducloud *baiducloudCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// BaiducloudRef contains a reference to some entity in baiducloud world.
|
||||
type BaiducloudRef struct {
|
||||
Name string
|
||||
|
|
|
|||
|
|
@ -172,6 +172,11 @@ func (d *bizflycloudCloudProvider) Refresh() error {
|
|||
return d.manager.Refresh()
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (d *bizflycloudCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// BuildBizflyCloud builds the Bizflycloud cloud provider.
|
||||
func BuildBizflyCloud(
|
||||
opts config.AutoscalingOptions,
|
||||
|
|
|
|||
|
|
@ -219,6 +219,11 @@ func (b *brightboxCloudProvider) Cleanup() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (b *brightboxCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// BuildBrightbox builds the Brightbox provider
|
||||
func BuildBrightbox(
|
||||
opts config.AutoscalingOptions,
|
||||
|
|
|
|||
|
|
@ -170,6 +170,11 @@ func (ccp *cherryCloudProvider) Cleanup() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (ccp *cherryCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// BuildCherry is called by the autoscaler to build a Cherry Servers cloud provider.
|
||||
//
|
||||
// The cherryManager is created here, and the node groups are created
|
||||
|
|
|
|||
|
|
@ -167,6 +167,11 @@ func (d *civoCloudProvider) Refresh() error {
|
|||
return d.manager.Refresh()
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (d *civoCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// BuildCivo builds the Civo cloud provider.
|
||||
func BuildCivo(
|
||||
opts config.AutoscalingOptions,
|
||||
|
|
|
|||
|
|
@ -154,6 +154,14 @@ type CloudProvider interface {
|
|||
// Refresh is called before every main loop and can be used to dynamically update cloud provider state.
|
||||
// In particular the list of node groups returned by NodeGroups can change as a result of CloudProvider.Refresh().
|
||||
Refresh() error
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down. This function
|
||||
// will be called during prefiltering of nodes for scaledown to allow cloud providers the opportunity
|
||||
// to reject a node for scale down. This may be used in cases where nodes are undergoing upgrades or other
|
||||
// cloud-specific behavior where the cluster autoscaler should not begin cordoning, draining, and tainting
|
||||
// the node.
|
||||
// Returns true if the node can be safely scaled down or false otherwise.
|
||||
IsNodeCandidateForScaleDown(*apiv1.Node) (bool, error)
|
||||
}
|
||||
|
||||
// ErrNotImplemented is returned if a method is not implemented.
|
||||
|
|
|
|||
|
|
@ -115,6 +115,11 @@ func (provider *cloudStackCloudProvider) Pricing() (cloudprovider.PricingModel,
|
|||
return nil, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (provider *cloudStackCloudProvider) IsNodeCandidateForScaleDown(node *v1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// NewNodeGroup builds a theoretical node group based on the node definition provided. The node group is not automatically
|
||||
// created on the cloud provider side. The node group is not returned by NodeGroups() until it is created.
|
||||
func (provider *cloudStackCloudProvider) NewNodeGroup(machineType string, labels map[string]string, systemLabels map[string]string, taints []v1.Taint, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
|
||||
|
|
|
|||
|
|
@ -849,6 +849,27 @@ func (c *machineController) listMachinesForScalableResource(r *unstructured.Unst
|
|||
}
|
||||
}
|
||||
|
||||
func (c *machineController) listMachineSetsForMachineDeployment(r *unstructured.Unstructured) ([]*unstructured.Unstructured, error) {
|
||||
selector := labels.SelectorFromSet(map[string]string{
|
||||
machineDeploymentNameLabel: r.GetName(),
|
||||
})
|
||||
objs, err := c.machineSetInformer.Lister().ByNamespace(r.GetNamespace()).List(selector)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to list MachineSets for MachineDeployment %s: %w", r.GetName(), err)
|
||||
}
|
||||
|
||||
results := make([]*unstructured.Unstructured, 0, len(objs))
|
||||
for _, x := range objs {
|
||||
u, ok := x.(*unstructured.Unstructured)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("expected unstructured resource from lister, not %T", x)
|
||||
}
|
||||
results = append(results, u.DeepCopy())
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (c *machineController) listScalableResources() ([]*unstructured.Unstructured, error) {
|
||||
scalableResources, err := c.listResources(c.machineSetInformer.Lister())
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -458,6 +458,68 @@ func (ng *nodegroup) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*c
|
|||
return &defaults, nil
|
||||
}
|
||||
|
||||
func (ng *nodegroup) IsMachineDeploymentAndRollingOut() (bool, error) {
|
||||
if ng.scalableResource.Kind() != machineDeploymentKind {
|
||||
// Not a MachineDeployment.
|
||||
return false, nil
|
||||
}
|
||||
|
||||
machineSets, err := ng.machineController.listMachineSetsForMachineDeployment(ng.scalableResource.unstructured)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if len(machineSets) == 0 {
|
||||
// No MachineSets => MD is not rolling out.
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Find the latest revision, the MachineSet with the latest revision is the MachineSet that
|
||||
// matches the MachineDeployment spec.
|
||||
var latestMSRevisionInt int64
|
||||
for _, ms := range machineSets {
|
||||
msRevision, ok := ms.GetAnnotations()[machineDeploymentRevisionAnnotation]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
msRevisionInt, err := strconv.ParseInt(msRevision, 10, 64)
|
||||
if err != nil {
|
||||
return false, errors.Wrapf(err, "failed to parse current revision on MachineSet %s", klog.KObj(ms))
|
||||
}
|
||||
latestMSRevisionInt = max(latestMSRevisionInt, msRevisionInt)
|
||||
}
|
||||
maxMSRevision := strconv.FormatInt(latestMSRevisionInt, 10)
|
||||
|
||||
for _, ms := range machineSets {
|
||||
if ms.GetAnnotations()[machineDeploymentRevisionAnnotation] == maxMSRevision {
|
||||
// Ignore the MachineSet with the latest revision
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if any of the old MachineSets still have replicas
|
||||
replicas, found, err := unstructured.NestedInt64(ms.UnstructuredContent(), "spec", "replicas")
|
||||
if err != nil {
|
||||
return false, errors.Wrapf(err, "failed to find spec replicas on MachineSet %s", klog.KObj(ms))
|
||||
}
|
||||
if found && replicas > 0 {
|
||||
// Found old MachineSets that still has replicas => MD is still rolling out.
|
||||
return true, nil
|
||||
}
|
||||
replicas, found, err = unstructured.NestedInt64(ms.UnstructuredContent(), "status", "replicas")
|
||||
if err != nil {
|
||||
return false, errors.Wrapf(err, "failed to find status replicas on MachineSet %s", klog.KObj(ms))
|
||||
}
|
||||
if found && replicas > 0 {
|
||||
// Found old MachineSets that still has replicas => MD is still rolling out.
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Didn't find any old MachineSets that still have replicas => MD is not rolling out.
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func newNodeGroupFromScalableResource(controller *machineController, unstructuredScalableResource *unstructured.Unstructured) (*nodegroup, error) {
|
||||
// Ensure that the resulting node group would be allowed based on the autodiscovery specs if defined
|
||||
if !controller.allowedByAutoDiscoverySpecs(unstructuredScalableResource) {
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ import (
|
|||
"path"
|
||||
"reflect"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/client-go/discovery"
|
||||
|
|
@ -34,7 +35,7 @@ import (
|
|||
|
||||
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/config"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
|
||||
caserrors "k8s.io/autoscaler/cluster-autoscaler/utils/errors"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
|
||||
)
|
||||
|
||||
|
|
@ -92,7 +93,7 @@ func (p *provider) HasInstance(node *corev1.Node) (bool, error) {
|
|||
return false, fmt.Errorf("machine not found for node %s: %v", node.Name, err)
|
||||
}
|
||||
|
||||
func (*provider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
|
||||
func (*provider) Pricing() (cloudprovider.PricingModel, caserrors.AutoscalerError) {
|
||||
return nil, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
|
|
@ -140,6 +141,24 @@ func (p *provider) GetNodeGpuConfig(node *corev1.Node) *cloudprovider.GpuConfig
|
|||
return gpu.GetNodeGPUFromCloudProvider(p, node)
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (p *provider) IsNodeCandidateForScaleDown(node *corev1.Node) (bool, error) {
|
||||
ng, err := p.controller.nodeGroupForNode(node)
|
||||
if err != nil {
|
||||
return false, errors.Wrapf(err, "failed to determine node group for node %s", klog.KObj(node))
|
||||
}
|
||||
if ng == nil {
|
||||
klog.V(5).Infof("node %s is not part of a node group", klog.KObj(node))
|
||||
return false, nil
|
||||
}
|
||||
rollingout, err := ng.IsMachineDeploymentAndRollingOut()
|
||||
if err != nil {
|
||||
return false, errors.Wrapf(err, "failed to determine rolling out status for MachineDeployment %s", ng.scalableResource.ID())
|
||||
}
|
||||
// A node is a good candidate for scale down if it is not currently part of a MachineDeployment that is rolling out.
|
||||
return !rollingout, nil
|
||||
}
|
||||
|
||||
func newProvider(
|
||||
name string,
|
||||
rl *cloudprovider.ResourceLimiter,
|
||||
|
|
|
|||
|
|
@ -32,15 +32,17 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
cpuKey = "capacity.cluster-autoscaler.kubernetes.io/cpu"
|
||||
memoryKey = "capacity.cluster-autoscaler.kubernetes.io/memory"
|
||||
diskCapacityKey = "capacity.cluster-autoscaler.kubernetes.io/ephemeral-disk"
|
||||
gpuTypeKey = "capacity.cluster-autoscaler.kubernetes.io/gpu-type"
|
||||
gpuCountKey = "capacity.cluster-autoscaler.kubernetes.io/gpu-count"
|
||||
maxPodsKey = "capacity.cluster-autoscaler.kubernetes.io/maxPods"
|
||||
taintsKey = "capacity.cluster-autoscaler.kubernetes.io/taints"
|
||||
labelsKey = "capacity.cluster-autoscaler.kubernetes.io/labels"
|
||||
draDriverKey = "capacity.cluster-autoscaler.kubernetes.io/dra-driver"
|
||||
cpuKey = "capacity.cluster-autoscaler.kubernetes.io/cpu"
|
||||
memoryKey = "capacity.cluster-autoscaler.kubernetes.io/memory"
|
||||
diskCapacityKey = "capacity.cluster-autoscaler.kubernetes.io/ephemeral-disk"
|
||||
gpuTypeKey = "capacity.cluster-autoscaler.kubernetes.io/gpu-type"
|
||||
gpuCountKey = "capacity.cluster-autoscaler.kubernetes.io/gpu-count"
|
||||
maxPodsKey = "capacity.cluster-autoscaler.kubernetes.io/maxPods"
|
||||
taintsKey = "capacity.cluster-autoscaler.kubernetes.io/taints"
|
||||
labelsKey = "capacity.cluster-autoscaler.kubernetes.io/labels"
|
||||
draDriverKey = "capacity.cluster-autoscaler.kubernetes.io/dra-driver"
|
||||
machineDeploymentRevisionAnnotation = "machinedeployment.clusters.x-k8s.io/revision"
|
||||
machineDeploymentNameLabel = "cluster.x-k8s.io/deployment-name"
|
||||
// UnknownArch is used if the Architecture is Unknown
|
||||
UnknownArch SystemArchitecture = ""
|
||||
// Amd64 is used if the Architecture is x86_64
|
||||
|
|
|
|||
|
|
@ -181,6 +181,11 @@ func (c *CoreWeaveCloudProvider) Refresh() error {
|
|||
return c.manager.Refresh()
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (c *CoreWeaveCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// BuildCoreWeave builds the CoreWeave cloud provider with the given options and returns it.
|
||||
func BuildCoreWeave(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
|
||||
klog.V(4).Infof("Building CoreWeave cloud provider with options: %+v", opts)
|
||||
|
|
|
|||
|
|
@ -169,6 +169,11 @@ func (d *digitaloceanCloudProvider) Refresh() error {
|
|||
return d.manager.Refresh()
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (d *digitaloceanCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// BuildDigitalOcean builds the DigitalOcean cloud provider.
|
||||
func BuildDigitalOcean(
|
||||
opts config.AutoscalingOptions,
|
||||
|
|
|
|||
|
|
@ -176,6 +176,11 @@ func (pcp *equinixMetalCloudProvider) Cleanup() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (pcp *equinixMetalCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// BuildCloudProvider is called by the autoscaler to build an Equinix Metal cloud provider.
|
||||
//
|
||||
// The equinixMetalManager is created here, and the node groups are created
|
||||
|
|
|
|||
|
|
@ -224,6 +224,11 @@ func (e *exoscaleCloudProvider) Refresh() error {
|
|||
return e.manager.Refresh()
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (e *exoscaleCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// BuildExoscale builds the Exoscale cloud provider.
|
||||
func BuildExoscale(_ config.AutoscalingOptions, discoveryOpts cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
|
||||
manager, err := newManager(discoveryOpts)
|
||||
|
|
|
|||
|
|
@ -316,6 +316,11 @@ func (e *externalGrpcCloudProvider) Refresh() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (e *externalGrpcCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// BuildExternalGrpc builds the externalgrpc cloud provider.
|
||||
func BuildExternalGrpc(
|
||||
opts config.AutoscalingOptions,
|
||||
|
|
|
|||
|
|
@ -148,6 +148,11 @@ func (gce *GceCloudProvider) Refresh() error {
|
|||
return gce.gceManager.Refresh()
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (gce *GceCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// GceRef contains s reference to some entity in GCE world.
|
||||
type GceRef struct {
|
||||
Project string
|
||||
|
|
|
|||
|
|
@ -182,6 +182,11 @@ func (d *HetznerCloudProvider) Refresh() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (d *HetznerCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// BuildHetzner builds the Hetzner cloud provider.
|
||||
func BuildHetzner(_ config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
|
||||
manager, err := newManager()
|
||||
|
|
|
|||
|
|
@ -179,6 +179,11 @@ func (hcp *huaweicloudCloudProvider) Refresh() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (hcp *huaweicloudCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
func (hcp *huaweicloudCloudProvider) buildAsgs(specs []string) error {
|
||||
asgs, err := hcp.cloudServiceManager.ListScalingGroups()
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -308,6 +308,11 @@ func (ic *IonosCloudCloudProvider) Refresh() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (ic *IonosCloudCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// BuildIonosCloud builds the IonosCloud cloud provider.
|
||||
func BuildIonosCloud(
|
||||
opts config.AutoscalingOptions,
|
||||
|
|
|
|||
|
|
@ -129,6 +129,11 @@ func (k *kamateraCloudProvider) Refresh() error {
|
|||
return k.manager.refresh()
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (k *kamateraCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// BuildKamatera builds the Kamatera cloud provider.
|
||||
func BuildKamatera(
|
||||
opts config.AutoscalingOptions,
|
||||
|
|
|
|||
|
|
@ -180,6 +180,11 @@ func (kubemark *KubemarkCloudProvider) Cleanup() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (kubemark *KubemarkCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// NodeGroup implements NodeGroup interface.
|
||||
type NodeGroup struct {
|
||||
Name string
|
||||
|
|
|
|||
|
|
@ -170,6 +170,11 @@ func (kwok *KwokCloudProvider) Cleanup() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (kwok *KwokCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// BuildKwok builds kwok cloud provider.
|
||||
func BuildKwok(opts config.AutoscalingOptions,
|
||||
do cloudprovider.NodeGroupDiscoveryOptions,
|
||||
|
|
|
|||
|
|
@ -151,6 +151,11 @@ func (l *linodeCloudProvider) Refresh() error {
|
|||
return l.manager.refresh()
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (l *linodeCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
func newLinodeCloudProvider(config io.Reader, rl *cloudprovider.ResourceLimiter) (cloudprovider.CloudProvider, error) {
|
||||
m, err := newManager(config)
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -203,6 +203,11 @@ func (mcp *magnumCloudProvider) Cleanup() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (mcp *magnumCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// refreshNodeGroups gets the list of node groups which meet the requirements for autoscaling,
|
||||
// creates magnumNodeGroups for any that do not exist in the cloud provider,
|
||||
// and drops any node groups which are present in the cloud provider but not in the
|
||||
|
|
|
|||
|
|
@ -276,3 +276,8 @@ func (_m *CloudProvider) Refresh() error {
|
|||
|
||||
return r0
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (_m *CloudProvider) IsNodeCandidateForScaleDown(node *v1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
|
|
|||
|
|
@ -147,6 +147,11 @@ func (ocp *OciCloudProvider) Refresh() error {
|
|||
return ocp.poolManager.Refresh()
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (ocp *OciCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// BuildOCI constructs the OciCloudProvider object that implements the could provider interface (InstancePoolManager).
|
||||
func BuildOCI(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
|
||||
ocidType, err := ocicommon.GetAllPoolTypes(opts.NodeGroups)
|
||||
|
|
|
|||
|
|
@ -149,3 +149,8 @@ func (ocp *OciCloudProvider) Cleanup() error {
|
|||
func (ocp *OciCloudProvider) Refresh() error {
|
||||
return ocp.manager.Refresh()
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (ocp *OciCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
|
|
|||
|
|
@ -318,3 +318,8 @@ func (provider *OVHCloudProvider) Refresh() error {
|
|||
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (provider *OVHCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
|
|
|||
|
|
@ -219,6 +219,11 @@ func (provider *RancherCloudProvider) Cleanup() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (provider *RancherCloudProvider) IsNodeCandidateForScaleDown(node *corev1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
func (provider *RancherCloudProvider) scalableNodeGroups() ([]*nodeGroup, error) {
|
||||
var result []*nodeGroup
|
||||
|
||||
|
|
|
|||
|
|
@ -277,3 +277,8 @@ func (scw *scalewayCloudProvider) Refresh() error {
|
|||
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (scw *scalewayCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
|
|
|||
|
|
@ -173,6 +173,11 @@ func (tencentcloud *tencentCloudProvider) Refresh() error {
|
|||
return tencentcloud.tencentcloudManager.Refresh()
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (tencentcloud *tencentCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// BuildTencentcloud returns tencentcloud provider
|
||||
func BuildTencentcloud(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
|
||||
var config io.ReadCloser
|
||||
|
|
|
|||
|
|
@ -45,20 +45,25 @@ type OnNodeGroupDeleteFunc func(string) error
|
|||
// HasInstance is a function called to determine if a node has been removed from the cloud provider.
|
||||
type HasInstance func(string) (bool, error)
|
||||
|
||||
// IsNodeCandidateForScaleDown is a function called to determine if a cloud provider considers
|
||||
// a node a good candidate for scaling down.
|
||||
type IsNodeCandidateForScaleDown func(*apiv1.Node) (bool, error)
|
||||
|
||||
// TestCloudProvider is a dummy cloud provider to be used in tests.
|
||||
type TestCloudProvider struct {
|
||||
sync.Mutex
|
||||
nodes map[string]string
|
||||
groups map[string]cloudprovider.NodeGroup
|
||||
onScaleUp func(string, int) error
|
||||
onScaleDown func(string, string) error
|
||||
onNodeGroupCreate func(string) error
|
||||
onNodeGroupDelete func(string) error
|
||||
hasInstance func(string) (bool, error)
|
||||
machineTypes []string
|
||||
machineTemplates map[string]*framework.NodeInfo
|
||||
priceModel cloudprovider.PricingModel
|
||||
resourceLimiter *cloudprovider.ResourceLimiter
|
||||
nodes map[string]string
|
||||
groups map[string]cloudprovider.NodeGroup
|
||||
onScaleUp func(string, int) error
|
||||
onScaleDown func(string, string) error
|
||||
onNodeGroupCreate func(string) error
|
||||
onNodeGroupDelete func(string) error
|
||||
hasInstance func(string) (bool, error)
|
||||
isNodeCandidateForScaleDown func(*apiv1.Node) (bool, error)
|
||||
machineTypes []string
|
||||
machineTemplates map[string]*framework.NodeInfo
|
||||
priceModel cloudprovider.PricingModel
|
||||
resourceLimiter *cloudprovider.ResourceLimiter
|
||||
}
|
||||
|
||||
// TestCloudProviderBuilder is used to create CloudProvider
|
||||
|
|
@ -127,6 +132,14 @@ func (b *TestCloudProviderBuilder) WithHasInstance(hasInstance HasInstance) *Tes
|
|||
return b
|
||||
}
|
||||
|
||||
// WithIsNodeCandidateForScaleDown adds an IsNodeCandidateForScaleDown handler to provider.
|
||||
func (b *TestCloudProviderBuilder) WithIsNodeCandidateForScaleDown(isNodeCandidateForScaleDown IsNodeCandidateForScaleDown) *TestCloudProviderBuilder {
|
||||
b.builders = append(b.builders, func(p *TestCloudProvider) {
|
||||
p.isNodeCandidateForScaleDown = isNodeCandidateForScaleDown
|
||||
})
|
||||
return b
|
||||
}
|
||||
|
||||
// Build returns a built test cloud provider
|
||||
func (b *TestCloudProviderBuilder) Build() *TestCloudProvider {
|
||||
p := &TestCloudProvider{
|
||||
|
|
@ -352,6 +365,15 @@ func (tcp *TestCloudProvider) Refresh() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (tcp *TestCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
if tcp.isNodeCandidateForScaleDown == nil {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
return tcp.isNodeCandidateForScaleDown(node)
|
||||
}
|
||||
|
||||
// TestNodeGroup is a node group used by TestCloudProvider.
|
||||
type TestNodeGroup struct {
|
||||
sync.Mutex
|
||||
|
|
|
|||
|
|
@ -162,6 +162,11 @@ func (u *uthoCloudProvider) Refresh() error {
|
|||
return u.manager.Refresh()
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (u *uthoCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// BuildUtho builds the Utho cloud provider.
|
||||
func BuildUtho(
|
||||
opts config.AutoscalingOptions,
|
||||
|
|
|
|||
|
|
@ -120,6 +120,11 @@ func (v *volcengineCloudProvider) Refresh() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (v *volcengineCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// GetNodeGpuConfig returns the label, type and resource name for the GPU added to node. If node doesn't have
|
||||
// any GPUs, it returns nil.
|
||||
func (v *volcengineCloudProvider) GetNodeGpuConfig(node *apiv1.Node) *cloudprovider.GpuConfig {
|
||||
|
|
|
|||
|
|
@ -140,6 +140,11 @@ func (v *vultrCloudProvider) Refresh() error {
|
|||
return v.manager.Refresh()
|
||||
}
|
||||
|
||||
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
|
||||
func (v *vultrCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
|
||||
return true, cloudprovider.ErrNotImplemented
|
||||
}
|
||||
|
||||
// toProviderID returns a provider ID from the given node ID.
|
||||
func toProviderID(nodeID string) string {
|
||||
return fmt.Sprintf("%s%s", vultrProviderIDPrefix, nodeID)
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ import (
|
|||
apiv1 "k8s.io/api/core/v1"
|
||||
klog "k8s.io/klog/v2"
|
||||
|
||||
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/context"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
|
||||
|
|
@ -48,6 +49,15 @@ func (n *PreFilteringScaleDownNodeProcessor) GetScaleDownCandidates(ctx *context
|
|||
nodeGroupSize := utils.GetNodeGroupSizeMap(ctx.CloudProvider)
|
||||
|
||||
for _, node := range nodes {
|
||||
if candidate, err := ctx.CloudProvider.IsNodeCandidateForScaleDown(node); err != nil {
|
||||
if err != cloudprovider.ErrNotImplemented {
|
||||
klog.Warningf("Error while checking if node is a candidate for deletion %s: %v", node.Name, err)
|
||||
continue
|
||||
}
|
||||
} else if !candidate {
|
||||
klog.V(5).Infof("Node %s is not a candidate for deletion (cloud provider determined)", node.Name)
|
||||
continue
|
||||
}
|
||||
nodeGroup, err := ctx.CloudProvider.NodeGroupForNode(node)
|
||||
if err != nil {
|
||||
klog.Warningf("Error while checking node group for %s: %v", node.Name, err)
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ limitations under the License.
|
|||
package nodes
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
|
@ -44,23 +45,75 @@ func TestPreFilteringScaleDownNodeProcessor_GetScaleDownCandidateNodes(t *testin
|
|||
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
|
||||
ng1_2 := BuildTestNode("ng1-2", 1000, 1000)
|
||||
ng2_1 := BuildTestNode("ng2-1", 1000, 1000)
|
||||
ng2_2 := BuildTestNode("ng2-2", 1000, 1000)
|
||||
noNg := BuildTestNode("no-ng", 1000, 1000)
|
||||
provider := testprovider.NewTestCloudProviderBuilder().Build()
|
||||
provider.AddNodeGroup("ng1", 1, 10, 2)
|
||||
provider.AddNodeGroup("ng2", 1, 10, 1)
|
||||
provider.AddNode("ng1", ng1_1)
|
||||
provider.AddNode("ng1", ng1_2)
|
||||
provider.AddNode("ng2", ng2_1)
|
||||
|
||||
ctx := &context.AutoscalingContext{
|
||||
CloudProvider: provider,
|
||||
testCases := map[string]struct {
|
||||
buildProvider func() *testprovider.TestCloudProvider
|
||||
configureProvider func(p *testprovider.TestCloudProvider)
|
||||
expectedNodes []*apiv1.Node
|
||||
inputNodes []*apiv1.Node
|
||||
}{
|
||||
// Expectation: only node groups not at minimum size should be candidates.
|
||||
"1 scale down candidate, 1 node group at minimum size, 1 node with no node group, 1 node group above minimum size.": {
|
||||
configureProvider: func(p *testprovider.TestCloudProvider) {
|
||||
p.AddNodeGroup("ng1", 1, 10, 2)
|
||||
p.AddNodeGroup("ng2", 1, 10, 1)
|
||||
p.AddNode("ng1", ng1_1)
|
||||
p.AddNode("ng1", ng1_2)
|
||||
p.AddNode("ng2", ng2_1)
|
||||
},
|
||||
expectedNodes: []*apiv1.Node{ng1_1, ng1_2},
|
||||
inputNodes: []*apiv1.Node{ng1_1, ng1_2, ng2_1, noNg},
|
||||
},
|
||||
// Expectation: only node groups that contain nodes the cloud provider considers candidates for deletion should be candidates.
|
||||
"1 scale down candidate, 1 node group with nodes that are not candidates for deletion, 1 node group above minimum size.": {
|
||||
buildProvider: func() *testprovider.TestCloudProvider {
|
||||
provider := testprovider.
|
||||
NewTestCloudProviderBuilder().
|
||||
WithIsNodeCandidateForScaleDown(func(n *apiv1.Node) (bool, error) {
|
||||
if strings.HasPrefix(n.Name, "ng2") {
|
||||
return false, nil
|
||||
}
|
||||
return true, nil
|
||||
}).
|
||||
Build()
|
||||
return provider
|
||||
},
|
||||
configureProvider: func(p *testprovider.TestCloudProvider) {
|
||||
p.AddNodeGroup("ng1", 1, 10, 2)
|
||||
p.AddNodeGroup("ng2", 1, 10, 2)
|
||||
p.AddNode("ng1", ng1_1)
|
||||
p.AddNode("ng1", ng1_2)
|
||||
p.AddNode("ng2", ng2_1)
|
||||
p.AddNode("ng2", ng2_2)
|
||||
},
|
||||
expectedNodes: []*apiv1.Node{ng1_1, ng1_2},
|
||||
inputNodes: []*apiv1.Node{ng1_1, ng1_2, ng2_1, ng2_2},
|
||||
},
|
||||
}
|
||||
|
||||
expectedNodes := []*apiv1.Node{ng1_1, ng1_2}
|
||||
defaultProcessor := NewPreFilteringScaleDownNodeProcessor()
|
||||
inputNodes := []*apiv1.Node{ng1_1, ng1_2, ng2_1, noNg}
|
||||
result, err := defaultProcessor.GetScaleDownCandidates(ctx, inputNodes)
|
||||
for description, testCase := range testCases {
|
||||
t.Run(description, func(t *testing.T) {
|
||||
var provider *testprovider.TestCloudProvider
|
||||
if testCase.buildProvider == nil {
|
||||
provider = testprovider.NewTestCloudProviderBuilder().Build()
|
||||
} else {
|
||||
provider = testCase.buildProvider()
|
||||
}
|
||||
assert.NotNil(t, provider)
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, result, expectedNodes)
|
||||
testCase.configureProvider(provider)
|
||||
|
||||
ctx := &context.AutoscalingContext{
|
||||
CloudProvider: provider,
|
||||
}
|
||||
|
||||
defaultProcessor := NewPreFilteringScaleDownNodeProcessor()
|
||||
result, err := defaultProcessor.GetScaleDownCandidates(ctx, testCase.inputNodes)
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, result, testCase.expectedNodes)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue