Allow balancing by labels exclusively
Adds a new flag `--balance-label` which allows users to balance between node groups exclusively via labels. This gives users the flexibility to specify the similarity logic themselves when --balance-similar-node-groups is in use.
This commit is contained in:
parent
0fcbac8a8f
commit
1b98b3823a
|
|
@ -753,6 +753,7 @@ The following startup parameters are supported for cluster autoscaler:
|
||||||
| `max-failing-time` | Maximum time from last recorded successful autoscaler run before automatic restart | 15 minutes
|
| `max-failing-time` | Maximum time from last recorded successful autoscaler run before automatic restart | 15 minutes
|
||||||
| `balance-similar-node-groups` | Detect similar node groups and balance the number of nodes between them | false
|
| `balance-similar-node-groups` | Detect similar node groups and balance the number of nodes between them | false
|
||||||
| `balancing-ignore-label` | Define a node label that should be ignored when considering node group similarity. One label per flag occurrence. | ""
|
| `balancing-ignore-label` | Define a node label that should be ignored when considering node group similarity. One label per flag occurrence. | ""
|
||||||
|
| `balancing-label` | Define a node label to use when comparing node group similarity. If set, all other comparison logic is disabled, and only labels are considered when comparing groups. One label per flag occurrence. | ""
|
||||||
| `node-autoprovisioning-enabled` | Should CA autoprovision node groups when needed | false
|
| `node-autoprovisioning-enabled` | Should CA autoprovision node groups when needed | false
|
||||||
| `max-autoprovisioned-node-group-count` | The maximum number of autoprovisioned groups in the cluster | 15
|
| `max-autoprovisioned-node-group-count` | The maximum number of autoprovisioned groups in the cluster | 15
|
||||||
| `unremovable-node-recheck-timeout` | The timeout before we check again a node that couldn't be removed before | 5 minutes
|
| `unremovable-node-recheck-timeout` | The timeout before we check again a node that couldn't be removed before | 5 minutes
|
||||||
|
|
|
||||||
|
|
@ -362,6 +362,10 @@ spec:
|
||||||
- i3.2xlarge
|
- i3.2xlarge
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Similarly, if using the `balancing-label` flag, you should only choose labels which have the same value for all nodes in
|
||||||
|
the node group. Otherwise you may get unexpected results, as the flag values will vary based on the nodes created by
|
||||||
|
the ASG.
|
||||||
|
|
||||||
### Example usage:
|
### Example usage:
|
||||||
|
|
||||||
- Create a [Launch
|
- Create a [Launch
|
||||||
|
|
|
||||||
|
|
@ -154,6 +154,9 @@ type AutoscalingOptions struct {
|
||||||
// BalancingExtraIgnoredLabels is a list of labels to additionally ignore when comparing if two node groups are similar.
|
// BalancingExtraIgnoredLabels is a list of labels to additionally ignore when comparing if two node groups are similar.
|
||||||
// Labels in BasicIgnoredLabels and the cloud provider-specific ignored labels are always ignored.
|
// Labels in BasicIgnoredLabels and the cloud provider-specific ignored labels are always ignored.
|
||||||
BalancingExtraIgnoredLabels []string
|
BalancingExtraIgnoredLabels []string
|
||||||
|
// BalancingLabels is a list of labels to use when comparing if two node groups are similar.
|
||||||
|
// If this is set, only labels are used to compare node groups. It is mutually exclusive with BalancingExtraIgnoredLabels.
|
||||||
|
BalancingLabels []string
|
||||||
// AWSUseStaticInstanceList tells if AWS cloud provider use static instance type list or dynamically fetch from remote APIs.
|
// AWSUseStaticInstanceList tells if AWS cloud provider use static instance type list or dynamically fetch from remote APIs.
|
||||||
AWSUseStaticInstanceList bool
|
AWSUseStaticInstanceList bool
|
||||||
// ConcurrentGceRefreshes is the maximum number of concurrently refreshed instance groups or instance templates.
|
// ConcurrentGceRefreshes is the maximum number of concurrently refreshed instance groups or instance templates.
|
||||||
|
|
|
||||||
|
|
@ -180,6 +180,7 @@ var (
|
||||||
|
|
||||||
ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group")
|
ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group")
|
||||||
balancingIgnoreLabelsFlag = multiStringFlag("balancing-ignore-label", "Specifies a label to ignore in addition to the basic and cloud-provider set of labels when comparing if two node groups are similar")
|
balancingIgnoreLabelsFlag = multiStringFlag("balancing-ignore-label", "Specifies a label to ignore in addition to the basic and cloud-provider set of labels when comparing if two node groups are similar")
|
||||||
|
balancingLabelsFlag = multiStringFlag("balancing-label", "Specifies a label to use for comparing if two node groups are similar, rather than the built in heuristics. Setting this flag disables all other comparison logic, and cannot be combined with --balancing-ignore-label.")
|
||||||
awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only")
|
awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only")
|
||||||
concurrentGceRefreshes = flag.Int("gce-concurrent-refreshes", 1, "Maximum number of concurrent refreshes per cloud object type.")
|
concurrentGceRefreshes = flag.Int("gce-concurrent-refreshes", 1, "Maximum number of concurrent refreshes per cloud object type.")
|
||||||
enableProfiling = flag.Bool("profiling", false, "Is debug/pprof endpoint enabled")
|
enableProfiling = flag.Bool("profiling", false, "Is debug/pprof endpoint enabled")
|
||||||
|
|
@ -275,6 +276,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
|
||||||
NewPodScaleUpDelay: *newPodScaleUpDelay,
|
NewPodScaleUpDelay: *newPodScaleUpDelay,
|
||||||
IgnoredTaints: *ignoreTaintsFlag,
|
IgnoredTaints: *ignoreTaintsFlag,
|
||||||
BalancingExtraIgnoredLabels: *balancingIgnoreLabelsFlag,
|
BalancingExtraIgnoredLabels: *balancingIgnoreLabelsFlag,
|
||||||
|
BalancingLabels: *balancingLabelsFlag,
|
||||||
KubeConfigPath: *kubeConfigFile,
|
KubeConfigPath: *kubeConfigFile,
|
||||||
NodeDeletionDelayTimeout: *nodeDeletionDelayTimeout,
|
NodeDeletionDelayTimeout: *nodeDeletionDelayTimeout,
|
||||||
AWSUseStaticInstanceList: *awsUseStaticInstanceList,
|
AWSUseStaticInstanceList: *awsUseStaticInstanceList,
|
||||||
|
|
@ -356,6 +358,10 @@ func buildAutoscaler(debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter
|
||||||
opts.Processors.TemplateNodeInfoProvider = nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nodeInfoCacheExpireTime)
|
opts.Processors.TemplateNodeInfoProvider = nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nodeInfoCacheExpireTime)
|
||||||
opts.Processors.PodListProcessor = filteroutschedulable.NewFilterOutSchedulablePodListProcessor()
|
opts.Processors.PodListProcessor = filteroutschedulable.NewFilterOutSchedulablePodListProcessor()
|
||||||
|
|
||||||
|
var nodeInfoComparator nodegroupset.NodeInfoComparator
|
||||||
|
if len(autoscalingOptions.BalancingLabels) > 0 {
|
||||||
|
nodeInfoComparator = nodegroupset.CreateLabelNodeInfoComparator(autoscalingOptions.BalancingLabels)
|
||||||
|
} else {
|
||||||
nodeInfoComparatorBuilder := nodegroupset.CreateGenericNodeInfoComparator
|
nodeInfoComparatorBuilder := nodegroupset.CreateGenericNodeInfoComparator
|
||||||
if autoscalingOptions.CloudProviderName == cloudprovider.AzureProviderName {
|
if autoscalingOptions.CloudProviderName == cloudprovider.AzureProviderName {
|
||||||
nodeInfoComparatorBuilder = nodegroupset.CreateAzureNodeInfoComparator
|
nodeInfoComparatorBuilder = nodegroupset.CreateAzureNodeInfoComparator
|
||||||
|
|
@ -367,9 +373,11 @@ func buildAutoscaler(debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter
|
||||||
} else if autoscalingOptions.CloudProviderName == cloudprovider.ClusterAPIProviderName {
|
} else if autoscalingOptions.CloudProviderName == cloudprovider.ClusterAPIProviderName {
|
||||||
nodeInfoComparatorBuilder = nodegroupset.CreateClusterAPINodeInfoComparator
|
nodeInfoComparatorBuilder = nodegroupset.CreateClusterAPINodeInfoComparator
|
||||||
}
|
}
|
||||||
|
nodeInfoComparator = nodeInfoComparatorBuilder(autoscalingOptions.BalancingExtraIgnoredLabels)
|
||||||
|
}
|
||||||
|
|
||||||
opts.Processors.NodeGroupSetProcessor = &nodegroupset.BalancingNodeGroupSetProcessor{
|
opts.Processors.NodeGroupSetProcessor = &nodegroupset.BalancingNodeGroupSetProcessor{
|
||||||
Comparator: nodeInfoComparatorBuilder(autoscalingOptions.BalancingExtraIgnoredLabels),
|
Comparator: nodeInfoComparator,
|
||||||
}
|
}
|
||||||
|
|
||||||
// These metrics should be published only once.
|
// These metrics should be published only once.
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,49 @@
|
||||||
|
/*
|
||||||
|
Copyright 2021 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package nodegroupset
|
||||||
|
|
||||||
|
import (
|
||||||
|
klog "k8s.io/klog/v2"
|
||||||
|
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
|
||||||
|
)
|
||||||
|
|
||||||
|
// CreateLabelNodeInfoComparator returns a comparator that checks for node group similarity using the given labels.
|
||||||
|
func CreateLabelNodeInfoComparator(labels []string) NodeInfoComparator {
|
||||||
|
return func(n1, n2 *schedulerframework.NodeInfo) bool {
|
||||||
|
return areLabelsSame(n1, n2, labels)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func areLabelsSame(n1, n2 *schedulerframework.NodeInfo, labels []string) bool {
|
||||||
|
for _, label := range labels {
|
||||||
|
val1, exists := n1.Node().ObjectMeta.Labels[label]
|
||||||
|
if !exists {
|
||||||
|
klog.V(8).Infof("%s label not present on %s", label, n1.Node().Name)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
val2, exists := n2.Node().ObjectMeta.Labels[label]
|
||||||
|
if !exists {
|
||||||
|
klog.V(8).Infof("%s label not present on %s", label, n1.Node().Name)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if val1 != val2 {
|
||||||
|
klog.V(8).Infof("%s label did not match. %s: %s, %s: %s", label, n1.Node().Name, val1, n2.Node().Name, val2)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,92 @@
|
||||||
|
/*
|
||||||
|
Copyright 2021 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package nodegroupset
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNodeLabelComparison(t *testing.T) {
|
||||||
|
labels := []string{"node.kubernetes.io/instance-type", "kubernetes.io/arch"}
|
||||||
|
comparator := CreateLabelNodeInfoComparator(labels)
|
||||||
|
node1 := BuildTestNode("node1", 1000, 2000)
|
||||||
|
node2 := BuildTestNode("node2", 1000, 2000)
|
||||||
|
|
||||||
|
for _, tc := range []struct {
|
||||||
|
description string
|
||||||
|
labels1 map[string]string
|
||||||
|
labels2 map[string]string
|
||||||
|
isSimilar bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
description: "both labels match",
|
||||||
|
labels1: map[string]string{"node.kubernetes.io/instance-type": "m5.4xlarge", "kubernetes.io/arch": "amd64"},
|
||||||
|
labels2: map[string]string{"node.kubernetes.io/instance-type": "m5.4xlarge", "kubernetes.io/arch": "amd64"},
|
||||||
|
isSimilar: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "one label doesn't match",
|
||||||
|
labels1: map[string]string{"node.kubernetes.io/instance-type": "m5.4xlarge", "kubernetes.io/arch": "amd64"},
|
||||||
|
labels2: map[string]string{"node.kubernetes.io/instance-type": "m5.4xlarge", "kubernetes.io/arch": "i386"},
|
||||||
|
isSimilar: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "unspecified labels are not considered",
|
||||||
|
labels1: map[string]string{"node.kubernetes.io/instance-type": "m5.4xlarge", "kubernetes.io/arch": "amd64", "unspecified-label": "eu-west1"},
|
||||||
|
labels2: map[string]string{"node.kubernetes.io/instance-type": "m5.4xlarge", "kubernetes.io/arch": "amd64", "unspecified-label": "eu-west2"},
|
||||||
|
isSimilar: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "no labels are set",
|
||||||
|
labels1: map[string]string{},
|
||||||
|
labels2: map[string]string{},
|
||||||
|
isSimilar: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "single label matches, label is unset on second group",
|
||||||
|
labels1: map[string]string{"node.kubernetes.io/instance-type": "m5.4xlarge", "kubernetes.io/arch": "amd64"},
|
||||||
|
labels2: map[string]string{"kubernetes.io/arch": "amd64"},
|
||||||
|
isSimilar: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "single label matches, label is unset on first group",
|
||||||
|
labels1: map[string]string{"kubernetes.io/arch": "amd64"},
|
||||||
|
labels2: map[string]string{"node.kubernetes.io/instance-type": "m5.4xlarge", "kubernetes.io/arch": "amd64"},
|
||||||
|
isSimilar: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "labels are explicitly set to be empty",
|
||||||
|
labels1: map[string]string{"node.kubernetes.io/instance-type": "", "kubernetes.io/arch": ""},
|
||||||
|
labels2: map[string]string{"node.kubernetes.io/instance-type": "", "kubernetes.io/arch": ""},
|
||||||
|
isSimilar: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "one labels is explicitly set to be empty",
|
||||||
|
labels1: map[string]string{"node.kubernetes.io/instance-type": "", "kubernetes.io/arch": "amd64"},
|
||||||
|
labels2: map[string]string{"node.kubernetes.io/instance-type": "", "kubernetes.io/arch": "amd64"},
|
||||||
|
isSimilar: true,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
|
node1.ObjectMeta.Labels = tc.labels1
|
||||||
|
node2.ObjectMeta.Labels = tc.labels2
|
||||||
|
checkNodesSimilar(t, node1, node2, comparator, tc.isSimilar)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue