Cluster-autoscaler scale down impl
This commit is contained in:
parent
c998f90c49
commit
2caae1647a
|
|
@ -0,0 +1,134 @@
|
||||||
|
/*
|
||||||
|
Copyright 2016 The Kubernetes Authors All rights reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package simulator
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
kube_api "k8s.io/kubernetes/pkg/api"
|
||||||
|
"k8s.io/kubernetes/pkg/api/resource"
|
||||||
|
kube_client "k8s.io/kubernetes/pkg/client/unversioned"
|
||||||
|
cmd "k8s.io/kubernetes/pkg/kubectl/cmd"
|
||||||
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
|
|
||||||
|
"github.com/golang/glog"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Nodes with utilization below this are considered unused and may be subject to scale down.
|
||||||
|
unusedThreshold = float64(0.5)
|
||||||
|
)
|
||||||
|
|
||||||
|
// FindNodeToRemove finds a node that can be removed.
|
||||||
|
func FindNodeToRemove(nodes []*kube_api.Node, pods []*kube_api.Pod, client *kube_client.Client) (*kube_api.Node, error) {
|
||||||
|
nodeNameToNodeInfo := schedulercache.CreateNodeNameToInfoMap(pods)
|
||||||
|
|
||||||
|
//TODO: Interate over underutulized nodes first.
|
||||||
|
for _, node := range nodes {
|
||||||
|
nodeInfo, found := nodeNameToNodeInfo[node.Name]
|
||||||
|
if !found {
|
||||||
|
glog.Errorf("Node info for %s not found", node.Name)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Use other resources as well.
|
||||||
|
reservation, err := calculateReservation(node, nodeInfo, kube_api.ResourceCPU)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
glog.Warningf("Failed to calculate reservation for %s: %v", node.Name, err)
|
||||||
|
}
|
||||||
|
glog.V(4).Infof("Node %s - reservation %f", node.Name, reservation)
|
||||||
|
|
||||||
|
if reservation > unusedThreshold {
|
||||||
|
glog.Infof("Node %s is not suitable for removal - reservation to big (%f)", node.Name, reservation)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
//Lets try to remove this one.
|
||||||
|
glog.V(2).Infof("Considering %s for removal", node.Name)
|
||||||
|
|
||||||
|
podsToRemoveList, _, _, err := cmd.GetPodsForDeletionOnNodeDrain(client, node.Name,
|
||||||
|
kube_api.Codecs.UniversalDecoder(), false, true)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
glog.V(1).Infof("Node %s cannot be removed: %v", node.Name, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
tempNodeNameToNodeInfo := schedulercache.CreateNodeNameToInfoMap(pods)
|
||||||
|
delete(tempNodeNameToNodeInfo, node.Name)
|
||||||
|
ptrPodsToRemove := make([]*kube_api.Pod, 0, len(podsToRemoveList))
|
||||||
|
for i := range podsToRemoveList {
|
||||||
|
ptrPodsToRemove = append(ptrPodsToRemove, &podsToRemoveList[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
findProblems := findPlaceFor(ptrPodsToRemove, nodes, tempNodeNameToNodeInfo)
|
||||||
|
if findProblems == nil {
|
||||||
|
return node, nil
|
||||||
|
}
|
||||||
|
glog.Infof("Node %s is not suitable for removal %v", node.Name, err)
|
||||||
|
}
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func calculateReservation(node *kube_api.Node, nodeInfo *schedulercache.NodeInfo, resourceName kube_api.ResourceName) (float64, error) {
|
||||||
|
nodeCapacity, found := node.Status.Capacity[resourceName]
|
||||||
|
if !found {
|
||||||
|
return 0, fmt.Errorf("Failed to get %v from %s", resourceName, node.Name)
|
||||||
|
}
|
||||||
|
if nodeCapacity.MilliValue() == 0 {
|
||||||
|
return 0, fmt.Errorf("%v is 0 at %s", resourceName, node.Name)
|
||||||
|
}
|
||||||
|
podsRequest := resource.MustParse("0")
|
||||||
|
for _, pod := range nodeInfo.Pods() {
|
||||||
|
for _, container := range pod.Spec.Containers {
|
||||||
|
if resourceValue, found := container.Resources.Requests[resourceName]; found {
|
||||||
|
podsRequest.Add(resourceValue)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return float64(podsRequest.MilliValue()) / float64(nodeCapacity.MilliValue()), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func findPlaceFor(pods []*kube_api.Pod, nodes []*kube_api.Node, nodeInfos map[string]*schedulercache.NodeInfo) error {
|
||||||
|
predicateChecker := NewPredicateChecker()
|
||||||
|
for _, pod := range pods {
|
||||||
|
foundPlace := false
|
||||||
|
glog.V(4).Infof("Looking for place for %s/%s", pod.Namespace, pod.Name)
|
||||||
|
|
||||||
|
// TODO: Sort nodes by reservation
|
||||||
|
nodeloop:
|
||||||
|
for _, node := range nodes {
|
||||||
|
node.Status.Allocatable = node.Status.Capacity
|
||||||
|
if nodeInfo, found := nodeInfos[node.Name]; found {
|
||||||
|
err := predicateChecker.CheckPredicates(pod, node, nodeInfo)
|
||||||
|
glog.V(4).Infof("Evaluation %s for %s/%s -> %v", node.Name, pod.Namespace, pod.Name, err)
|
||||||
|
if err == nil {
|
||||||
|
foundPlace = true
|
||||||
|
// TODO(mwielgus): Optiomize it.
|
||||||
|
podsOnNode := nodeInfo.Pods()
|
||||||
|
podsOnNode = append(podsOnNode, pod)
|
||||||
|
nodeInfos[node.Name] = schedulercache.NewNodeInfo(podsOnNode...)
|
||||||
|
break nodeloop
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !foundPlace {
|
||||||
|
return fmt.Errorf("failed to find place for %s/%s", pod.Namespace, pod.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,150 @@
|
||||||
|
/*
|
||||||
|
Copyright 2016 The Kubernetes Authors All rights reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package simulator
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
kube_api "k8s.io/kubernetes/pkg/api"
|
||||||
|
"k8s.io/kubernetes/pkg/api/resource"
|
||||||
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestReservation(t *testing.T) {
|
||||||
|
pod := buildPod("p1", 100, 200000)
|
||||||
|
pod2 := &kube_api.Pod{
|
||||||
|
Spec: kube_api.PodSpec{
|
||||||
|
Containers: []kube_api.Container{
|
||||||
|
{
|
||||||
|
Resources: kube_api.ResourceRequirements{
|
||||||
|
Requests: kube_api.ResourceList{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
nodeInfo := schedulercache.NewNodeInfo(pod, pod, pod2)
|
||||||
|
|
||||||
|
node := &kube_api.Node{
|
||||||
|
Status: kube_api.NodeStatus{
|
||||||
|
Capacity: kube_api.ResourceList{
|
||||||
|
kube_api.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
reservation, err := calculateReservation(node, nodeInfo, kube_api.ResourceCPU)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.InEpsilon(t, 1.0/10, reservation, 0.01)
|
||||||
|
|
||||||
|
_, err = calculateReservation(node, nodeInfo, kube_api.ResourceMemory)
|
||||||
|
assert.Error(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindPlaceAllOk(t *testing.T) {
|
||||||
|
pod1 := buildPod("p1", 300, 500000)
|
||||||
|
new1 := buildPod("p2", 600, 500000)
|
||||||
|
new2 := buildPod("p3", 500, 500000)
|
||||||
|
|
||||||
|
nodeInfos := map[string]*schedulercache.NodeInfo{
|
||||||
|
"n1": schedulercache.NewNodeInfo(pod1),
|
||||||
|
"n2": schedulercache.NewNodeInfo(),
|
||||||
|
}
|
||||||
|
node1 := buildNode("n1", 1000, 2000000)
|
||||||
|
node2 := buildNode("n2", 1000, 2000000)
|
||||||
|
|
||||||
|
err := findPlaceFor(
|
||||||
|
[]*kube_api.Pod{new1, new2},
|
||||||
|
[]*kube_api.Node{node1, node2},
|
||||||
|
nodeInfos)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindPlaceAllBas(t *testing.T) {
|
||||||
|
pod1 := buildPod("p1", 300, 500000)
|
||||||
|
new1 := buildPod("p2", 600, 500000)
|
||||||
|
new2 := buildPod("p3", 500, 500000)
|
||||||
|
new3 := buildPod("p4", 700, 500000)
|
||||||
|
|
||||||
|
nodeInfos := map[string]*schedulercache.NodeInfo{
|
||||||
|
"n1": schedulercache.NewNodeInfo(pod1),
|
||||||
|
"n2": schedulercache.NewNodeInfo(),
|
||||||
|
}
|
||||||
|
node1 := buildNode("n1", 1000, 2000000)
|
||||||
|
node2 := buildNode("n2", 1000, 2000000)
|
||||||
|
|
||||||
|
err := findPlaceFor(
|
||||||
|
[]*kube_api.Pod{new1, new2, new3},
|
||||||
|
[]*kube_api.Node{node1, node2},
|
||||||
|
nodeInfos)
|
||||||
|
assert.Error(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindNone(t *testing.T) {
|
||||||
|
pod1 := buildPod("p1", 300, 500000)
|
||||||
|
|
||||||
|
nodeInfos := map[string]*schedulercache.NodeInfo{
|
||||||
|
"n1": schedulercache.NewNodeInfo(pod1),
|
||||||
|
"n2": schedulercache.NewNodeInfo(),
|
||||||
|
}
|
||||||
|
node1 := buildNode("n1", 1000, 2000000)
|
||||||
|
node2 := buildNode("n2", 1000, 2000000)
|
||||||
|
|
||||||
|
err := findPlaceFor(
|
||||||
|
[]*kube_api.Pod{},
|
||||||
|
[]*kube_api.Node{node1, node2},
|
||||||
|
nodeInfos)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildPod(name string, cpu int64, mem int64) *kube_api.Pod {
|
||||||
|
return &kube_api.Pod{
|
||||||
|
ObjectMeta: kube_api.ObjectMeta{
|
||||||
|
Namespace: "default",
|
||||||
|
Name: name,
|
||||||
|
},
|
||||||
|
Spec: kube_api.PodSpec{
|
||||||
|
Containers: []kube_api.Container{
|
||||||
|
{
|
||||||
|
Resources: kube_api.ResourceRequirements{
|
||||||
|
Requests: kube_api.ResourceList{
|
||||||
|
kube_api.ResourceCPU: *resource.NewMilliQuantity(cpu, resource.DecimalSI),
|
||||||
|
kube_api.ResourceMemory: *resource.NewQuantity(mem, resource.DecimalSI),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildNode(name string, cpu int64, mem int64) *kube_api.Node {
|
||||||
|
return &kube_api.Node{
|
||||||
|
ObjectMeta: kube_api.ObjectMeta{
|
||||||
|
Name: name,
|
||||||
|
},
|
||||||
|
Status: kube_api.NodeStatus{
|
||||||
|
Capacity: kube_api.ResourceList{
|
||||||
|
kube_api.ResourceCPU: *resource.NewMilliQuantity(cpu, resource.DecimalSI),
|
||||||
|
kube_api.ResourceMemory: *resource.NewQuantity(mem, resource.DecimalSI),
|
||||||
|
kube_api.ResourcePods: *resource.NewQuantity(100, resource.DecimalSI),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -58,6 +58,31 @@ func NewUnscheduledPodLister(kubeClient *kube_client.Client) *UnscheduledPodList
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ScheduledPodLister lists scheduled pods.
|
||||||
|
type ScheduledPodLister struct {
|
||||||
|
podLister *cache.StoreToPodLister
|
||||||
|
}
|
||||||
|
|
||||||
|
// List returns all scheduled pods.
|
||||||
|
func (lister *ScheduledPodLister) List() ([]*kube_api.Pod, error) {
|
||||||
|
return lister.podLister.List(labels.Everything())
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewScheduledPodLister builds ScheduledPodLister
|
||||||
|
func NewScheduledPodLister(kubeClient *kube_client.Client) *ScheduledPodLister {
|
||||||
|
// watch unscheduled pods
|
||||||
|
selector := fields.ParseSelectorOrDie("spec.nodeName!=" + "" + ",status.phase!=" +
|
||||||
|
string(kube_api.PodSucceeded) + ",status.phase!=" + string(kube_api.PodFailed))
|
||||||
|
podListWatch := cache.NewListWatchFromClient(kubeClient, "pods", kube_api.NamespaceAll, selector)
|
||||||
|
podLister := &cache.StoreToPodLister{Store: cache.NewStore(cache.MetaNamespaceKeyFunc)}
|
||||||
|
podReflector := cache.NewReflector(podListWatch, &kube_api.Pod{}, podLister.Store, time.Hour)
|
||||||
|
podReflector.Run()
|
||||||
|
|
||||||
|
return &ScheduledPodLister{
|
||||||
|
podLister: podLister,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ReadyNodeLister lists ready nodes.
|
// ReadyNodeLister lists ready nodes.
|
||||||
type ReadyNodeLister struct {
|
type ReadyNodeLister struct {
|
||||||
nodeLister *cache.StoreToNodeLister
|
nodeLister *cache.StoreToNodeLister
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue