From 0561d0ea84cbcdfafeff14ed01f6520bf936201e Mon Sep 17 00:00:00 2001 From: Marcin Wielgus Date: Tue, 13 Sep 2016 22:58:28 +0200 Subject: [PATCH] ClusterAutoscaler: first fit decreasing estimate algorithm --- .../estimator/binpacking_estimator.go | 122 ++++++++++++++++++ .../estimator/binpacking_estimator_test.go | 114 ++++++++++++++++ 2 files changed, 236 insertions(+) create mode 100644 cluster-autoscaler/estimator/binpacking_estimator.go create mode 100644 cluster-autoscaler/estimator/binpacking_estimator_test.go diff --git a/cluster-autoscaler/estimator/binpacking_estimator.go b/cluster-autoscaler/estimator/binpacking_estimator.go new file mode 100644 index 0000000000..79a92ee651 --- /dev/null +++ b/cluster-autoscaler/estimator/binpacking_estimator.go @@ -0,0 +1,122 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package estimator + +import ( + "sort" + + "k8s.io/contrib/cluster-autoscaler/simulator" + kube_api "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/api/resource" + "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" +) + +// podInfo contains Pod and score that corresponds to how important it is to handle the pod first. +type podInfo struct { + score float64 + pod *kube_api.Pod +} + +type byScoreDesc []*podInfo + +func (a byScoreDesc) Len() int { return len(a) } +func (a byScoreDesc) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a byScoreDesc) Less(i, j int) bool { return a[i].score > a[j].score } + +// BinpackingNodeEstimator estimates the number of needed nodes to handle the given amount of pods. +type BinpackingNodeEstimator struct { + predicateChecker *simulator.PredicateChecker +} + +// NewBinpackingNodeEstimator builds a new BinpackingNodeEstimator. +func NewBinpackingNodeEstimator(predicateChecker *simulator.PredicateChecker) *BinpackingNodeEstimator { + return &BinpackingNodeEstimator{ + predicateChecker: predicateChecker, + } +} + +// Estimate implements First Fit Decreasing bin-packing approximation algorithm. +// See https://en.wikipedia.org/wiki/Bin_packing_problem for more details. +// While it is a multi-dimensional bin packing (cpu, mem, ports) in most cases the main dimension +// will be cpu thus the estimated overprovisioning of 11/9 * optimal + 6/9 should be +// still be maintained. +// It is assumed that all pods from the given list can fit to nodeTemplate. +// Returns the number of nodes needed to accommodate all pods from the list. +func (estimator *BinpackingNodeEstimator) Estimate(pods []*kube_api.Pod, nodeTemplate *schedulercache.NodeInfo) int { + + podInfos := calculatePodScore(pods, nodeTemplate) + sort.Sort(byScoreDesc(podInfos)) + + // nodeWithPod function returns NodeInfo, which is a copy of nodeInfo argument with an additional pod scheduled on it. + nodeWithPod := func(nodeInfo *schedulercache.NodeInfo, pod *kube_api.Pod) *schedulercache.NodeInfo { + podsOnNode := nodeInfo.Pods() + podsOnNode = append(podsOnNode, pod) + newNodeInfo := schedulercache.NewNodeInfo(podsOnNode...) + newNodeInfo.SetNode(nodeInfo.Node()) + return newNodeInfo + } + + newNodes := make([]*schedulercache.NodeInfo, 0) + for _, podInfo := range podInfos { + found := false + for i, nodeInfo := range newNodes { + if err := estimator.predicateChecker.CheckPredicates(podInfo.pod, nodeInfo); err == nil { + found = true + newNodes[i] = nodeWithPod(nodeInfo, podInfo.pod) + break + } + } + if !found { + newNodes = append(newNodes, nodeWithPod(nodeTemplate, podInfo.pod)) + } + } + return len(newNodes) +} + +// Calculates score for all pods and returns podInfo structure. +// Score is defined as cpu_sum/node_capacity + mem_sum/node_capacity. +// Pods that have bigger requirements should be processed first, thus have higher scores. +func calculatePodScore(pods []*kube_api.Pod, nodeTemplate *schedulercache.NodeInfo) []*podInfo { + podInfos := make([]*podInfo, 0, len(pods)) + + for _, pod := range pods { + cpuSum := resource.Quantity{} + memorySum := resource.Quantity{} + + for _, container := range pod.Spec.Containers { + if request, ok := container.Resources.Requests[kube_api.ResourceCPU]; ok { + cpuSum.Add(request) + } + if request, ok := container.Resources.Requests[kube_api.ResourceMemory]; ok { + memorySum.Add(request) + } + } + score := float64(0) + if cpuAllocatable, ok := nodeTemplate.Node().Status.Allocatable[kube_api.ResourceCPU]; ok && cpuAllocatable.MilliValue() > 0 { + score += float64(cpuSum.MilliValue()) / float64(cpuAllocatable.MilliValue()) + } + if memAllocatable, ok := nodeTemplate.Node().Status.Allocatable[kube_api.ResourceMemory]; ok && memAllocatable.Value() > 0 { + score += float64(memorySum.Value()) / float64(memAllocatable.Value()) + } + + podInfos = append(podInfos, &podInfo{ + score: score, + pod: pod, + }) + } + return podInfos +} diff --git a/cluster-autoscaler/estimator/binpacking_estimator_test.go b/cluster-autoscaler/estimator/binpacking_estimator_test.go new file mode 100644 index 0000000000..5b0c71c65b --- /dev/null +++ b/cluster-autoscaler/estimator/binpacking_estimator_test.go @@ -0,0 +1,114 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package estimator + +import ( + "testing" + + "k8s.io/contrib/cluster-autoscaler/simulator" + kube_api "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/api/resource" + "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" + + "github.com/stretchr/testify/assert" +) + +func TestBinpackingEstimate(t *testing.T) { + estimator := NewBinpackingNodeEstimator(simulator.NewTestPredicateChecker()) + + cpuPerPod := int64(350) + memoryPerPod := int64(1000 * 1024 * 1024) + pod := &kube_api.Pod{ + Spec: kube_api.PodSpec{ + Containers: []kube_api.Container{ + { + Resources: kube_api.ResourceRequirements{ + Requests: kube_api.ResourceList{ + kube_api.ResourceCPU: *resource.NewMilliQuantity(cpuPerPod, resource.DecimalSI), + kube_api.ResourceMemory: *resource.NewQuantity(memoryPerPod, resource.DecimalSI), + }, + }, + }, + }, + }, + } + + pods := make([]*kube_api.Pod, 0) + for i := 0; i < 10; i++ { + pods = append(pods, pod) + } + node := &kube_api.Node{ + Status: kube_api.NodeStatus{ + Capacity: kube_api.ResourceList{ + kube_api.ResourceCPU: *resource.NewMilliQuantity(cpuPerPod*3-50, resource.DecimalSI), + kube_api.ResourceMemory: *resource.NewQuantity(2*memoryPerPod, resource.DecimalSI), + kube_api.ResourcePods: *resource.NewQuantity(10, resource.DecimalSI), + }, + }, + } + node.Status.Allocatable = node.Status.Capacity + + nodeInfo := schedulercache.NewNodeInfo() + nodeInfo.SetNode(node) + estimate := estimator.Estimate(pods, nodeInfo) + assert.Equal(t, 5, estimate) +} + +func TestBinpackingEstimateWithPorts(t *testing.T) { + estimator := NewBinpackingNodeEstimator(simulator.NewTestPredicateChecker()) + + cpuPerPod := int64(200) + memoryPerPod := int64(1000 * 1024 * 1024) + pod := &kube_api.Pod{ + Spec: kube_api.PodSpec{ + Containers: []kube_api.Container{ + { + Resources: kube_api.ResourceRequirements{ + Requests: kube_api.ResourceList{ + kube_api.ResourceCPU: *resource.NewMilliQuantity(cpuPerPod, resource.DecimalSI), + kube_api.ResourceMemory: *resource.NewQuantity(memoryPerPod, resource.DecimalSI), + }, + }, + Ports: []kube_api.ContainerPort{ + { + HostPort: 5555, + }, + }, + }, + }, + }, + } + pods := make([]*kube_api.Pod, 0) + for i := 0; i < 8; i++ { + pods = append(pods, pod) + } + node := &kube_api.Node{ + Status: kube_api.NodeStatus{ + Capacity: kube_api.ResourceList{ + kube_api.ResourceCPU: *resource.NewMilliQuantity(5*cpuPerPod, resource.DecimalSI), + kube_api.ResourceMemory: *resource.NewQuantity(5*memoryPerPod, resource.DecimalSI), + kube_api.ResourcePods: *resource.NewQuantity(10, resource.DecimalSI), + }, + }, + } + node.Status.Allocatable = node.Status.Capacity + + nodeInfo := schedulercache.NewNodeInfo() + nodeInfo.SetNode(node) + estimate := estimator.Estimate(pods, nodeInfo) + assert.Equal(t, 8, estimate) +}