Allow balancing by labels exclusively
Adds a new flag `--balance-label` which allows users to balance between node groups exclusively via labels. This gives users the flexibility to specify the similarity logic themselves when --balance-similar-node-groups is in use.
This commit is contained in:
parent
0fcbac8a8f
commit
1b98b3823a
|
|
@ -753,6 +753,7 @@ The following startup parameters are supported for cluster autoscaler:
|
|||
| `max-failing-time` | Maximum time from last recorded successful autoscaler run before automatic restart | 15 minutes
|
||||
| `balance-similar-node-groups` | Detect similar node groups and balance the number of nodes between them | false
|
||||
| `balancing-ignore-label` | Define a node label that should be ignored when considering node group similarity. One label per flag occurrence. | ""
|
||||
| `balancing-label` | Define a node label to use when comparing node group similarity. If set, all other comparison logic is disabled, and only labels are considered when comparing groups. One label per flag occurrence. | ""
|
||||
| `node-autoprovisioning-enabled` | Should CA autoprovision node groups when needed | false
|
||||
| `max-autoprovisioned-node-group-count` | The maximum number of autoprovisioned groups in the cluster | 15
|
||||
| `unremovable-node-recheck-timeout` | The timeout before we check again a node that couldn't be removed before | 5 minutes
|
||||
|
|
|
|||
|
|
@ -362,6 +362,10 @@ spec:
|
|||
- i3.2xlarge
|
||||
```
|
||||
|
||||
Similarly, if using the `balancing-label` flag, you should only choose labels which have the same value for all nodes in
|
||||
the node group. Otherwise you may get unexpected results, as the flag values will vary based on the nodes created by
|
||||
the ASG.
|
||||
|
||||
### Example usage:
|
||||
|
||||
- Create a [Launch
|
||||
|
|
|
|||
|
|
@ -154,6 +154,9 @@ type AutoscalingOptions struct {
|
|||
// BalancingExtraIgnoredLabels is a list of labels to additionally ignore when comparing if two node groups are similar.
|
||||
// Labels in BasicIgnoredLabels and the cloud provider-specific ignored labels are always ignored.
|
||||
BalancingExtraIgnoredLabels []string
|
||||
// BalancingLabels is a list of labels to use when comparing if two node groups are similar.
|
||||
// If this is set, only labels are used to compare node groups. It is mutually exclusive with BalancingExtraIgnoredLabels.
|
||||
BalancingLabels []string
|
||||
// AWSUseStaticInstanceList tells if AWS cloud provider use static instance type list or dynamically fetch from remote APIs.
|
||||
AWSUseStaticInstanceList bool
|
||||
// ConcurrentGceRefreshes is the maximum number of concurrently refreshed instance groups or instance templates.
|
||||
|
|
|
|||
|
|
@ -180,6 +180,7 @@ var (
|
|||
|
||||
ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group")
|
||||
balancingIgnoreLabelsFlag = multiStringFlag("balancing-ignore-label", "Specifies a label to ignore in addition to the basic and cloud-provider set of labels when comparing if two node groups are similar")
|
||||
balancingLabelsFlag = multiStringFlag("balancing-label", "Specifies a label to use for comparing if two node groups are similar, rather than the built in heuristics. Setting this flag disables all other comparison logic, and cannot be combined with --balancing-ignore-label.")
|
||||
awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only")
|
||||
concurrentGceRefreshes = flag.Int("gce-concurrent-refreshes", 1, "Maximum number of concurrent refreshes per cloud object type.")
|
||||
enableProfiling = flag.Bool("profiling", false, "Is debug/pprof endpoint enabled")
|
||||
|
|
@ -275,6 +276,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
|
|||
NewPodScaleUpDelay: *newPodScaleUpDelay,
|
||||
IgnoredTaints: *ignoreTaintsFlag,
|
||||
BalancingExtraIgnoredLabels: *balancingIgnoreLabelsFlag,
|
||||
BalancingLabels: *balancingLabelsFlag,
|
||||
KubeConfigPath: *kubeConfigFile,
|
||||
NodeDeletionDelayTimeout: *nodeDeletionDelayTimeout,
|
||||
AWSUseStaticInstanceList: *awsUseStaticInstanceList,
|
||||
|
|
@ -356,20 +358,26 @@ func buildAutoscaler(debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter
|
|||
opts.Processors.TemplateNodeInfoProvider = nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nodeInfoCacheExpireTime)
|
||||
opts.Processors.PodListProcessor = filteroutschedulable.NewFilterOutSchedulablePodListProcessor()
|
||||
|
||||
nodeInfoComparatorBuilder := nodegroupset.CreateGenericNodeInfoComparator
|
||||
if autoscalingOptions.CloudProviderName == cloudprovider.AzureProviderName {
|
||||
nodeInfoComparatorBuilder = nodegroupset.CreateAzureNodeInfoComparator
|
||||
} else if autoscalingOptions.CloudProviderName == cloudprovider.AwsProviderName {
|
||||
nodeInfoComparatorBuilder = nodegroupset.CreateAwsNodeInfoComparator
|
||||
} else if autoscalingOptions.CloudProviderName == cloudprovider.GceProviderName {
|
||||
nodeInfoComparatorBuilder = nodegroupset.CreateGceNodeInfoComparator
|
||||
opts.Processors.TemplateNodeInfoProvider = nodeinfosprovider.NewAnnotationNodeInfoProvider(nodeInfoCacheExpireTime)
|
||||
} else if autoscalingOptions.CloudProviderName == cloudprovider.ClusterAPIProviderName {
|
||||
nodeInfoComparatorBuilder = nodegroupset.CreateClusterAPINodeInfoComparator
|
||||
var nodeInfoComparator nodegroupset.NodeInfoComparator
|
||||
if len(autoscalingOptions.BalancingLabels) > 0 {
|
||||
nodeInfoComparator = nodegroupset.CreateLabelNodeInfoComparator(autoscalingOptions.BalancingLabels)
|
||||
} else {
|
||||
nodeInfoComparatorBuilder := nodegroupset.CreateGenericNodeInfoComparator
|
||||
if autoscalingOptions.CloudProviderName == cloudprovider.AzureProviderName {
|
||||
nodeInfoComparatorBuilder = nodegroupset.CreateAzureNodeInfoComparator
|
||||
} else if autoscalingOptions.CloudProviderName == cloudprovider.AwsProviderName {
|
||||
nodeInfoComparatorBuilder = nodegroupset.CreateAwsNodeInfoComparator
|
||||
} else if autoscalingOptions.CloudProviderName == cloudprovider.GceProviderName {
|
||||
nodeInfoComparatorBuilder = nodegroupset.CreateGceNodeInfoComparator
|
||||
opts.Processors.TemplateNodeInfoProvider = nodeinfosprovider.NewAnnotationNodeInfoProvider(nodeInfoCacheExpireTime)
|
||||
} else if autoscalingOptions.CloudProviderName == cloudprovider.ClusterAPIProviderName {
|
||||
nodeInfoComparatorBuilder = nodegroupset.CreateClusterAPINodeInfoComparator
|
||||
}
|
||||
nodeInfoComparator = nodeInfoComparatorBuilder(autoscalingOptions.BalancingExtraIgnoredLabels)
|
||||
}
|
||||
|
||||
opts.Processors.NodeGroupSetProcessor = &nodegroupset.BalancingNodeGroupSetProcessor{
|
||||
Comparator: nodeInfoComparatorBuilder(autoscalingOptions.BalancingExtraIgnoredLabels),
|
||||
Comparator: nodeInfoComparator,
|
||||
}
|
||||
|
||||
// These metrics should be published only once.
|
||||
|
|
|
|||
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
Copyright 2021 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package nodegroupset
|
||||
|
||||
import (
|
||||
klog "k8s.io/klog/v2"
|
||||
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
// CreateLabelNodeInfoComparator returns a comparator that checks for node group similarity using the given labels.
|
||||
func CreateLabelNodeInfoComparator(labels []string) NodeInfoComparator {
|
||||
return func(n1, n2 *schedulerframework.NodeInfo) bool {
|
||||
return areLabelsSame(n1, n2, labels)
|
||||
}
|
||||
}
|
||||
|
||||
func areLabelsSame(n1, n2 *schedulerframework.NodeInfo, labels []string) bool {
|
||||
for _, label := range labels {
|
||||
val1, exists := n1.Node().ObjectMeta.Labels[label]
|
||||
if !exists {
|
||||
klog.V(8).Infof("%s label not present on %s", label, n1.Node().Name)
|
||||
return false
|
||||
}
|
||||
val2, exists := n2.Node().ObjectMeta.Labels[label]
|
||||
if !exists {
|
||||
klog.V(8).Infof("%s label not present on %s", label, n1.Node().Name)
|
||||
return false
|
||||
}
|
||||
if val1 != val2 {
|
||||
klog.V(8).Infof("%s label did not match. %s: %s, %s: %s", label, n1.Node().Name, val1, n2.Node().Name, val2)
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
/*
|
||||
Copyright 2021 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package nodegroupset
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
|
||||
)
|
||||
|
||||
func TestNodeLabelComparison(t *testing.T) {
|
||||
labels := []string{"node.kubernetes.io/instance-type", "kubernetes.io/arch"}
|
||||
comparator := CreateLabelNodeInfoComparator(labels)
|
||||
node1 := BuildTestNode("node1", 1000, 2000)
|
||||
node2 := BuildTestNode("node2", 1000, 2000)
|
||||
|
||||
for _, tc := range []struct {
|
||||
description string
|
||||
labels1 map[string]string
|
||||
labels2 map[string]string
|
||||
isSimilar bool
|
||||
}{
|
||||
{
|
||||
description: "both labels match",
|
||||
labels1: map[string]string{"node.kubernetes.io/instance-type": "m5.4xlarge", "kubernetes.io/arch": "amd64"},
|
||||
labels2: map[string]string{"node.kubernetes.io/instance-type": "m5.4xlarge", "kubernetes.io/arch": "amd64"},
|
||||
isSimilar: true,
|
||||
},
|
||||
{
|
||||
description: "one label doesn't match",
|
||||
labels1: map[string]string{"node.kubernetes.io/instance-type": "m5.4xlarge", "kubernetes.io/arch": "amd64"},
|
||||
labels2: map[string]string{"node.kubernetes.io/instance-type": "m5.4xlarge", "kubernetes.io/arch": "i386"},
|
||||
isSimilar: false,
|
||||
},
|
||||
{
|
||||
description: "unspecified labels are not considered",
|
||||
labels1: map[string]string{"node.kubernetes.io/instance-type": "m5.4xlarge", "kubernetes.io/arch": "amd64", "unspecified-label": "eu-west1"},
|
||||
labels2: map[string]string{"node.kubernetes.io/instance-type": "m5.4xlarge", "kubernetes.io/arch": "amd64", "unspecified-label": "eu-west2"},
|
||||
isSimilar: true,
|
||||
},
|
||||
{
|
||||
description: "no labels are set",
|
||||
labels1: map[string]string{},
|
||||
labels2: map[string]string{},
|
||||
isSimilar: false,
|
||||
},
|
||||
{
|
||||
description: "single label matches, label is unset on second group",
|
||||
labels1: map[string]string{"node.kubernetes.io/instance-type": "m5.4xlarge", "kubernetes.io/arch": "amd64"},
|
||||
labels2: map[string]string{"kubernetes.io/arch": "amd64"},
|
||||
isSimilar: false,
|
||||
},
|
||||
{
|
||||
description: "single label matches, label is unset on first group",
|
||||
labels1: map[string]string{"kubernetes.io/arch": "amd64"},
|
||||
labels2: map[string]string{"node.kubernetes.io/instance-type": "m5.4xlarge", "kubernetes.io/arch": "amd64"},
|
||||
isSimilar: false,
|
||||
},
|
||||
{
|
||||
description: "labels are explicitly set to be empty",
|
||||
labels1: map[string]string{"node.kubernetes.io/instance-type": "", "kubernetes.io/arch": ""},
|
||||
labels2: map[string]string{"node.kubernetes.io/instance-type": "", "kubernetes.io/arch": ""},
|
||||
isSimilar: true,
|
||||
},
|
||||
{
|
||||
description: "one labels is explicitly set to be empty",
|
||||
labels1: map[string]string{"node.kubernetes.io/instance-type": "", "kubernetes.io/arch": "amd64"},
|
||||
labels2: map[string]string{"node.kubernetes.io/instance-type": "", "kubernetes.io/arch": "amd64"},
|
||||
isSimilar: true,
|
||||
},
|
||||
} {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
node1.ObjectMeta.Labels = tc.labels1
|
||||
node2.ObjectMeta.Labels = tc.labels2
|
||||
checkNodesSimilar(t, node1, node2, comparator, tc.isSimilar)
|
||||
})
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue