From 01a56a8d73f537b8fb84c0892bb49534e7f4d7d4 Mon Sep 17 00:00:00 2001 From: Maciej Pytel Date: Thu, 25 Oct 2018 18:50:17 +0200 Subject: [PATCH] Add GKE-specific NodeGroupSet processor Also refactor Balancing processor a bit to make it easily extensible. --- cluster-autoscaler/main.go | 9 ++ .../nodegroupset/balancing_processor.go | 7 +- .../nodegroupset/balancing_processor_test.go | 8 +- .../nodegroupset/compare_nodegroups.go | 4 + .../nodegroupset/compare_nodegroups_test.go | 36 +++--- .../nodegroupset/gke_compare_nodegroups.go | 40 +++++++ .../gke_compare_nodegroups_test.go | 103 ++++++++++++++++++ 7 files changed, 186 insertions(+), 21 deletions(-) create mode 100644 cluster-autoscaler/processors/nodegroupset/gke_compare_nodegroups.go create mode 100644 cluster-autoscaler/processors/nodegroupset/gke_compare_nodegroups_test.go diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index 0feaace760..c16ea0c548 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -38,6 +38,8 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/estimator" "k8s.io/autoscaler/cluster-autoscaler/expander" "k8s.io/autoscaler/cluster-autoscaler/metrics" + ca_processors "k8s.io/autoscaler/cluster-autoscaler/processors" + "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" "k8s.io/autoscaler/cluster-autoscaler/utils/units" @@ -256,9 +258,16 @@ func buildAutoscaler() (core.Autoscaler, error) { // Create basic config from flags. autoscalingOptions := createAutoscalingOptions() kubeClient := createKubeClient(getKubeConfig()) + processors := ca_processors.DefaultProcessors() + if autoscalingOptions.CloudProviderName == "gke" { + processors.NodeGroupSetProcessor = &nodegroupset.BalancingNodeGroupSetProcessor{ + Comparator: nodegroupset.IsGkeNodeInfoSimilar} + + } opts := core.AutoscalerOptions{ AutoscalingOptions: autoscalingOptions, KubeClient: kubeClient, + Processors: processors, } // This metric should be published only once. diff --git a/cluster-autoscaler/processors/nodegroupset/balancing_processor.go b/cluster-autoscaler/processors/nodegroupset/balancing_processor.go index 43d7226452..4796e9d078 100644 --- a/cluster-autoscaler/processors/nodegroupset/balancing_processor.go +++ b/cluster-autoscaler/processors/nodegroupset/balancing_processor.go @@ -29,6 +29,7 @@ import ( // BalancingNodeGroupSetProcessor tries to keep similar node groups balanced on scale-up. type BalancingNodeGroupSetProcessor struct { + Comparator NodeInfoComparator } // FindSimilarNodeGroups returns a list of NodeGroups similar to the given one. @@ -55,7 +56,11 @@ func (b *BalancingNodeGroupSetProcessor) FindSimilarNodeGroups(context *context. glog.Warningf("Failed to find nodeInfo for group %v", ngId) continue } - if IsNodeInfoSimilar(nodeInfo, ngNodeInfo) { + comparator := b.Comparator + if comparator == nil { + comparator = IsNodeInfoSimilar + } + if comparator(nodeInfo, ngNodeInfo) { result = append(result, ng) } } diff --git a/cluster-autoscaler/processors/nodegroupset/balancing_processor_test.go b/cluster-autoscaler/processors/nodegroupset/balancing_processor_test.go index 8e7a87cc81..902822f8ba 100644 --- a/cluster-autoscaler/processors/nodegroupset/balancing_processor_test.go +++ b/cluster-autoscaler/processors/nodegroupset/balancing_processor_test.go @@ -28,8 +28,7 @@ import ( "github.com/stretchr/testify/assert" ) -func TestFindSimilarNodeGroups(t *testing.T) { - processor := &BalancingNodeGroupSetProcessor{} +func basicSimilarNodeGroupsTest(t *testing.T, processor NodeGroupSetProcessor) { context := &context.AutoscalingContext{} n1 := BuildTestNode("n1", 1000, 1000) @@ -72,6 +71,11 @@ func TestFindSimilarNodeGroups(t *testing.T) { assert.Equal(t, similar, []cloudprovider.NodeGroup{}) } +func TestFindSimilarNodeGroups(t *testing.T) { + processor := &BalancingNodeGroupSetProcessor{} + basicSimilarNodeGroupsTest(t, processor) +} + func TestBalanceSingleGroup(t *testing.T) { processor := &BalancingNodeGroupSetProcessor{} context := &context.AutoscalingContext{} diff --git a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go index 95ee93e869..c408069dae 100644 --- a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go +++ b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go @@ -34,6 +34,10 @@ const ( MaxFreeDifferenceRatio = 0.05 ) +// NodeInfoComparator is a function that tells if two nodes are from NodeGroups +// similar enough to be considered a part of a single NodeGroupSet. +type NodeInfoComparator func(n1, n2 *schedulercache.NodeInfo) bool + func compareResourceMapsWithTolerance(resources map[apiv1.ResourceName][]resource.Quantity, maxDifferenceRatio float64) bool { for _, qtyList := range resources { diff --git a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go index 782da9178d..6f15c8a26e 100644 --- a/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go +++ b/cluster-autoscaler/processors/nodegroupset/compare_nodegroups_test.go @@ -29,22 +29,22 @@ import ( "github.com/stretchr/testify/assert" ) -func checkNodesSimilar(t *testing.T, n1, n2 *apiv1.Node, shouldEqual bool) { - checkNodesSimilarWithPods(t, n1, n2, []*apiv1.Pod{}, []*apiv1.Pod{}, shouldEqual) +func checkNodesSimilar(t *testing.T, n1, n2 *apiv1.Node, comparator NodeInfoComparator, shouldEqual bool) { + checkNodesSimilarWithPods(t, n1, n2, []*apiv1.Pod{}, []*apiv1.Pod{}, comparator, shouldEqual) } -func checkNodesSimilarWithPods(t *testing.T, n1, n2 *apiv1.Node, pods1, pods2 []*apiv1.Pod, shouldEqual bool) { +func checkNodesSimilarWithPods(t *testing.T, n1, n2 *apiv1.Node, pods1, pods2 []*apiv1.Pod, comparator NodeInfoComparator, shouldEqual bool) { ni1 := schedulercache.NewNodeInfo(pods1...) ni1.SetNode(n1) ni2 := schedulercache.NewNodeInfo(pods2...) ni2.SetNode(n2) - assert.Equal(t, shouldEqual, IsNodeInfoSimilar(ni1, ni2)) + assert.Equal(t, shouldEqual, comparator(ni1, ni2)) } func TestIdenticalNodesSimilar(t *testing.T) { n1 := BuildTestNode("node1", 1000, 2000) n2 := BuildTestNode("node2", 1000, 2000) - checkNodesSimilar(t, n1, n2, true) + checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true) } func TestNodesSimilarVariousRequirements(t *testing.T) { @@ -53,23 +53,23 @@ func TestNodesSimilarVariousRequirements(t *testing.T) { // Different CPU capacity n2 := BuildTestNode("node2", 1000, 2000) n2.Status.Capacity[apiv1.ResourceCPU] = *resource.NewMilliQuantity(1001, resource.DecimalSI) - checkNodesSimilar(t, n1, n2, false) + checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, false) // Same CPU capacity, but slightly different allocatable n3 := BuildTestNode("node3", 1000, 2000) n3.Status.Allocatable[apiv1.ResourceCPU] = *resource.NewMilliQuantity(999, resource.DecimalSI) - checkNodesSimilar(t, n1, n3, true) + checkNodesSimilar(t, n1, n3, IsNodeInfoSimilar, true) // Same CPU capacity, significantly different allocatable n4 := BuildTestNode("node4", 1000, 2000) n4.Status.Allocatable[apiv1.ResourceCPU] = *resource.NewMilliQuantity(500, resource.DecimalSI) - checkNodesSimilar(t, n1, n4, false) + checkNodesSimilar(t, n1, n4, IsNodeInfoSimilar, false) // One with GPU, one without n5 := BuildTestNode("node5", 1000, 2000) n5.Status.Capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(1, resource.DecimalSI) n5.Status.Allocatable[gpu.ResourceNvidiaGPU] = n5.Status.Capacity[gpu.ResourceNvidiaGPU] - checkNodesSimilar(t, n1, n5, false) + checkNodesSimilar(t, n1, n5, IsNodeInfoSimilar, false) } func TestNodesSimilarVariousRequirementsAndPods(t *testing.T) { @@ -81,20 +81,20 @@ func TestNodesSimilarVariousRequirementsAndPods(t *testing.T) { n2 := BuildTestNode("node2", 1000, 2000) n2.Status.Allocatable[apiv1.ResourceCPU] = *resource.NewMilliQuantity(500, resource.DecimalSI) n2.Status.Allocatable[apiv1.ResourceMemory] = *resource.NewQuantity(1000, resource.DecimalSI) - checkNodesSimilarWithPods(t, n1, n2, []*apiv1.Pod{p1}, []*apiv1.Pod{}, false) + checkNodesSimilarWithPods(t, n1, n2, []*apiv1.Pod{p1}, []*apiv1.Pod{}, IsNodeInfoSimilar, false) // Same requests of pods n3 := BuildTestNode("node3", 1000, 2000) p3 := BuildTestPod("pod3", 500, 1000) p3.Spec.NodeName = "node3" - checkNodesSimilarWithPods(t, n1, n3, []*apiv1.Pod{p1}, []*apiv1.Pod{p3}, true) + checkNodesSimilarWithPods(t, n1, n3, []*apiv1.Pod{p1}, []*apiv1.Pod{p3}, IsNodeInfoSimilar, true) // Similar allocatable, similar pods n4 := BuildTestNode("node4", 1000, 2000) n4.Status.Allocatable[apiv1.ResourceCPU] = *resource.NewMilliQuantity(999, resource.DecimalSI) p4 := BuildTestPod("pod4", 501, 1001) p4.Spec.NodeName = "node4" - checkNodesSimilarWithPods(t, n1, n4, []*apiv1.Pod{p1}, []*apiv1.Pod{p4}, true) + checkNodesSimilarWithPods(t, n1, n4, []*apiv1.Pod{p1}, []*apiv1.Pod{p4}, IsNodeInfoSimilar, true) } func TestNodesSimilarVariousLabels(t *testing.T) { @@ -106,27 +106,27 @@ func TestNodesSimilarVariousLabels(t *testing.T) { n2.ObjectMeta.Labels["test-label"] = "test-value" // Missing character label - checkNodesSimilar(t, n1, n2, false) + checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, false) n2.ObjectMeta.Labels["character"] = "winnie the pooh" - checkNodesSimilar(t, n1, n2, true) + checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true) // Different hostname labels shouldn't matter n1.ObjectMeta.Labels[kubeletapis.LabelHostname] = "node1" n2.ObjectMeta.Labels[kubeletapis.LabelHostname] = "node2" - checkNodesSimilar(t, n1, n2, true) + checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true) // Different zone shouldn't matter either n1.ObjectMeta.Labels[kubeletapis.LabelZoneFailureDomain] = "mars-olympus-mons1-b" n2.ObjectMeta.Labels[kubeletapis.LabelZoneFailureDomain] = "us-houston1-a" - checkNodesSimilar(t, n1, n2, true) + checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true) // Different beta.kubernetes.io/fluentd-ds-ready should not matter n1.ObjectMeta.Labels["beta.kubernetes.io/fluentd-ds-ready"] = "true" n2.ObjectMeta.Labels["beta.kubernetes.io/fluentd-ds-ready"] = "false" - checkNodesSimilar(t, n1, n2, true) + checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true) n1.ObjectMeta.Labels["beta.kubernetes.io/fluentd-ds-ready"] = "true" delete(n2.ObjectMeta.Labels, "beta.kubernetes.io/fluentd-ds-ready") - checkNodesSimilar(t, n1, n2, true) + checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true) } diff --git a/cluster-autoscaler/processors/nodegroupset/gke_compare_nodegroups.go b/cluster-autoscaler/processors/nodegroupset/gke_compare_nodegroups.go new file mode 100644 index 0000000000..be1ffbbb7a --- /dev/null +++ b/cluster-autoscaler/processors/nodegroupset/gke_compare_nodegroups.go @@ -0,0 +1,40 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodegroupset + +import ( + schedulercache "k8s.io/kubernetes/pkg/scheduler/cache" +) + +// GkeNodepoolLabel is a label specifying GKE node pool particular node belongs to. +const GkeNodepoolLabel = "cloud.google.com/gke-nodepool" + +func nodesFromSameGkeNodePool(n1, n2 *schedulercache.NodeInfo) bool { + n1GkeNodePool := n1.Node().Labels[GkeNodepoolLabel] + n2GkeNodePool := n2.Node().Labels[GkeNodepoolLabel] + return n1GkeNodePool != "" && n1GkeNodePool == n2GkeNodePool +} + +// IsGkeNodeInfoSimilar compares if two nodes should be considered part of the +// same NodeGroupSet. This is true if they either belong to the same GKE nodepool +// or match usual conditions checked by IsNodeInfoSimilar. +func IsGkeNodeInfoSimilar(n1, n2 *schedulercache.NodeInfo) bool { + if nodesFromSameGkeNodePool(n1, n2) { + return true + } + return IsNodeInfoSimilar(n1, n2) +} diff --git a/cluster-autoscaler/processors/nodegroupset/gke_compare_nodegroups_test.go b/cluster-autoscaler/processors/nodegroupset/gke_compare_nodegroups_test.go new file mode 100644 index 0000000000..7245c978b5 --- /dev/null +++ b/cluster-autoscaler/processors/nodegroupset/gke_compare_nodegroups_test.go @@ -0,0 +1,103 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodegroupset + +import ( + "testing" + + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test" + "k8s.io/autoscaler/cluster-autoscaler/context" + . "k8s.io/autoscaler/cluster-autoscaler/utils/test" + schedulercache "k8s.io/kubernetes/pkg/scheduler/cache" + + "github.com/stretchr/testify/assert" +) + +func TestIsGkeNodeInfoSimilar(t *testing.T) { + n1 := BuildTestNode("node1", 1000, 2000) + n1.ObjectMeta.Labels["test-label"] = "test-value" + n1.ObjectMeta.Labels["character"] = "winnie the pooh" + n2 := BuildTestNode("node2", 1000, 2000) + n2.ObjectMeta.Labels["test-label"] = "test-value" + // No node-pool labels. + checkNodesSimilar(t, n1, n2, IsGkeNodeInfoSimilar, false) + // Empty node-pool labels + n1.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = "" + n2.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = "" + checkNodesSimilar(t, n1, n2, IsGkeNodeInfoSimilar, false) + // Only one non empty + n1.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = "" + n2.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = "blah" + checkNodesSimilar(t, n1, n2, IsGkeNodeInfoSimilar, false) + // Only one present + delete(n1.ObjectMeta.Labels, "cloud.google.com/gke-nodepool") + n2.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = "blah" + checkNodesSimilar(t, n1, n2, IsGkeNodeInfoSimilar, false) + // Different vales + n1.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = "blah1" + n2.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = "blah2" + checkNodesSimilar(t, n1, n2, IsGkeNodeInfoSimilar, false) + // Same values + n1.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = "blah" + n2.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = "blah" + checkNodesSimilar(t, n1, n2, IsGkeNodeInfoSimilar, true) +} + +func TestFindSimilarNodeGroupsGkeBasic(t *testing.T) { + processor := &BalancingNodeGroupSetProcessor{Comparator: IsGkeNodeInfoSimilar} + basicSimilarNodeGroupsTest(t, processor) +} + +func TestFindSimilarNodeGroupsGkeByLabel(t *testing.T) { + processor := &BalancingNodeGroupSetProcessor{Comparator: IsGkeNodeInfoSimilar} + context := &context.AutoscalingContext{} + + n1 := BuildTestNode("n1", 1000, 1000) + n2 := BuildTestNode("n2", 2000, 2000) + + provider := testprovider.NewTestCloudProvider(nil, nil) + provider.AddNodeGroup("ng1", 1, 10, 1) + provider.AddNodeGroup("ng2", 1, 10, 1) + provider.AddNode("ng1", n1) + provider.AddNode("ng2", n2) + + ni1 := schedulercache.NewNodeInfo() + ni1.SetNode(n1) + ni2 := schedulercache.NewNodeInfo() + ni2.SetNode(n2) + + nodeInfosForGroups := map[string]*schedulercache.NodeInfo{ + "ng1": ni1, "ng2": ni2, + } + + ng1, _ := provider.NodeGroupForNode(n1) + ng2, _ := provider.NodeGroupForNode(n2) + context.CloudProvider = provider + + // Groups with different cpu and mem are not similar + similar, err := processor.FindSimilarNodeGroups(context, ng1, nodeInfosForGroups) + assert.NoError(t, err) + assert.Equal(t, similar, []cloudprovider.NodeGroup{}) + + // Unless we give them nodepool label + n1.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = "blah" + n2.ObjectMeta.Labels["cloud.google.com/gke-nodepool"] = "blah" + similar, err = processor.FindSimilarNodeGroups(context, ng1, nodeInfosForGroups) + assert.NoError(t, err) + assert.Equal(t, similar, []cloudprovider.NodeGroup{ng2}) +}