ClusterAutoscaler: first fit decreasing estimate algorithm

2016-09-13 22:58:28 +02:00 · 2016-09-13 22:58:28 +02:00 · 0561d0ea84
parent 771c92e989
commit 0561d0ea84
2 changed files with 236 additions and 0 deletions
--- a/cluster-autoscaler/estimator/binpacking_estimator.go
+++ b/cluster-autoscaler/estimator/binpacking_estimator.go
@ -0,0 +1,122 @@
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package estimator
+
+import (
+	"sort"
+
+	"k8s.io/contrib/cluster-autoscaler/simulator"
+	kube_api "k8s.io/kubernetes/pkg/api"
+	"k8s.io/kubernetes/pkg/api/resource"
+	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
+)
+
+// podInfo contains Pod and score that corresponds to how important it is to handle the pod first.
+type podInfo struct {
+	score float64
+	pod   *kube_api.Pod
+}
+
+type byScoreDesc []*podInfo
+
+func (a byScoreDesc) Len() int           { return len(a) }
+func (a byScoreDesc) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+func (a byScoreDesc) Less(i, j int) bool { return a[i].score > a[j].score }
+
+// BinpackingNodeEstimator estimates the number of needed nodes to handle the given amount of pods.
+type BinpackingNodeEstimator struct {
+	predicateChecker *simulator.PredicateChecker
+}
+
+// NewBinpackingNodeEstimator builds a new BinpackingNodeEstimator.
+func NewBinpackingNodeEstimator(predicateChecker *simulator.PredicateChecker) *BinpackingNodeEstimator {
+	return &BinpackingNodeEstimator{
+		predicateChecker: predicateChecker,
+	}
+}
+
+// Estimate implements First Fit Decreasing bin-packing approximation algorithm.
+// See https://en.wikipedia.org/wiki/Bin_packing_problem for more details.
+// While it is a multi-dimensional bin packing (cpu, mem, ports) in most cases the main dimension
+// will be cpu thus the estimated overprovisioning of 11/9 * optimal + 6/9 should be
+// still be maintained.
+// It is assumed that all pods from the given list can fit to nodeTemplate.
+// Returns the number of nodes needed to accommodate all pods from the list.
+func (estimator *BinpackingNodeEstimator) Estimate(pods []*kube_api.Pod, nodeTemplate *schedulercache.NodeInfo) int {
+
+	podInfos := calculatePodScore(pods, nodeTemplate)
+	sort.Sort(byScoreDesc(podInfos))
+
+	// nodeWithPod function returns NodeInfo, which is a copy of nodeInfo argument with an additional pod scheduled on it.
+	nodeWithPod := func(nodeInfo *schedulercache.NodeInfo, pod *kube_api.Pod) *schedulercache.NodeInfo {
+		podsOnNode := nodeInfo.Pods()
+		podsOnNode = append(podsOnNode, pod)
+		newNodeInfo := schedulercache.NewNodeInfo(podsOnNode...)
+		newNodeInfo.SetNode(nodeInfo.Node())
+		return newNodeInfo
+	}
+
+	newNodes := make([]*schedulercache.NodeInfo, 0)
+	for _, podInfo := range podInfos {
+		found := false
+		for i, nodeInfo := range newNodes {
+			if err := estimator.predicateChecker.CheckPredicates(podInfo.pod, nodeInfo); err == nil {
+				found = true
+				newNodes[i] = nodeWithPod(nodeInfo, podInfo.pod)
+				break
+			}
+		}
+		if !found {
+			newNodes = append(newNodes, nodeWithPod(nodeTemplate, podInfo.pod))
+		}
+	}
+	return len(newNodes)
+}
+
+// Calculates score for all pods and returns podInfo structure.
+// Score is defined as cpu_sum/node_capacity + mem_sum/node_capacity.
+// Pods that have bigger requirements should be processed first, thus have higher scores.
+func calculatePodScore(pods []*kube_api.Pod, nodeTemplate *schedulercache.NodeInfo) []*podInfo {
+	podInfos := make([]*podInfo, 0, len(pods))
+
+	for _, pod := range pods {
+		cpuSum := resource.Quantity{}
+		memorySum := resource.Quantity{}
+
+		for _, container := range pod.Spec.Containers {
+			if request, ok := container.Resources.Requests[kube_api.ResourceCPU]; ok {
+				cpuSum.Add(request)
+			}
+			if request, ok := container.Resources.Requests[kube_api.ResourceMemory]; ok {
+				memorySum.Add(request)
+			}
+		}
+		score := float64(0)
+		if cpuAllocatable, ok := nodeTemplate.Node().Status.Allocatable[kube_api.ResourceCPU]; ok && cpuAllocatable.MilliValue() > 0 {
+			score += float64(cpuSum.MilliValue()) / float64(cpuAllocatable.MilliValue())
+		}
+		if memAllocatable, ok := nodeTemplate.Node().Status.Allocatable[kube_api.ResourceMemory]; ok && memAllocatable.Value() > 0 {
+			score += float64(memorySum.Value()) / float64(memAllocatable.Value())
+		}
+
+		podInfos = append(podInfos, &podInfo{
+			score: score,
+			pod:   pod,
+		})
+	}
+	return podInfos
+}
--- a/cluster-autoscaler/estimator/binpacking_estimator_test.go
+++ b/cluster-autoscaler/estimator/binpacking_estimator_test.go
@ -0,0 +1,114 @@
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package estimator
+
+import (
+	"testing"
+
+	"k8s.io/contrib/cluster-autoscaler/simulator"
+	kube_api "k8s.io/kubernetes/pkg/api"
+	"k8s.io/kubernetes/pkg/api/resource"
+	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestBinpackingEstimate(t *testing.T) {
+	estimator := NewBinpackingNodeEstimator(simulator.NewTestPredicateChecker())
+
+	cpuPerPod := int64(350)
+	memoryPerPod := int64(1000 * 1024 * 1024)
+	pod := &kube_api.Pod{
+		Spec: kube_api.PodSpec{
+			Containers: []kube_api.Container{
+				{
+					Resources: kube_api.ResourceRequirements{
+						Requests: kube_api.ResourceList{
+							kube_api.ResourceCPU:    *resource.NewMilliQuantity(cpuPerPod, resource.DecimalSI),
+							kube_api.ResourceMemory: *resource.NewQuantity(memoryPerPod, resource.DecimalSI),
+						},
+					},
+				},
+			},
+		},
+	}
+
+	pods := make([]*kube_api.Pod, 0)
+	for i := 0; i < 10; i++ {
+		pods = append(pods, pod)
+	}
+	node := &kube_api.Node{
+		Status: kube_api.NodeStatus{
+			Capacity: kube_api.ResourceList{
+				kube_api.ResourceCPU:    *resource.NewMilliQuantity(cpuPerPod*3-50, resource.DecimalSI),
+				kube_api.ResourceMemory: *resource.NewQuantity(2*memoryPerPod, resource.DecimalSI),
+				kube_api.ResourcePods:   *resource.NewQuantity(10, resource.DecimalSI),
+			},
+		},
+	}
+	node.Status.Allocatable = node.Status.Capacity
+
+	nodeInfo := schedulercache.NewNodeInfo()
+	nodeInfo.SetNode(node)
+	estimate := estimator.Estimate(pods, nodeInfo)
+	assert.Equal(t, 5, estimate)
+}
+
+func TestBinpackingEstimateWithPorts(t *testing.T) {
+	estimator := NewBinpackingNodeEstimator(simulator.NewTestPredicateChecker())
+
+	cpuPerPod := int64(200)
+	memoryPerPod := int64(1000 * 1024 * 1024)
+	pod := &kube_api.Pod{
+		Spec: kube_api.PodSpec{
+			Containers: []kube_api.Container{
+				{
+					Resources: kube_api.ResourceRequirements{
+						Requests: kube_api.ResourceList{
+							kube_api.ResourceCPU:    *resource.NewMilliQuantity(cpuPerPod, resource.DecimalSI),
+							kube_api.ResourceMemory: *resource.NewQuantity(memoryPerPod, resource.DecimalSI),
+						},
+					},
+					Ports: []kube_api.ContainerPort{
+						{
+							HostPort: 5555,
+						},
+					},
+				},
+			},
+		},
+	}
+	pods := make([]*kube_api.Pod, 0)
+	for i := 0; i < 8; i++ {
+		pods = append(pods, pod)
+	}
+	node := &kube_api.Node{
+		Status: kube_api.NodeStatus{
+			Capacity: kube_api.ResourceList{
+				kube_api.ResourceCPU:    *resource.NewMilliQuantity(5*cpuPerPod, resource.DecimalSI),
+				kube_api.ResourceMemory: *resource.NewQuantity(5*memoryPerPod, resource.DecimalSI),
+				kube_api.ResourcePods:   *resource.NewQuantity(10, resource.DecimalSI),
+			},
+		},
+	}
+	node.Status.Allocatable = node.Status.Capacity
+
+	nodeInfo := schedulercache.NewNodeInfo()
+	nodeInfo.SetNode(node)
+	estimate := estimator.Estimate(pods, nodeInfo)
+	assert.Equal(t, 8, estimate)
+}