Cluster-autoscaler: support unready nodes in scale down

This commit is contained in:
Marcin Wielgus 2017-01-02 17:24:40 +01:00
parent 6ea981c46c
commit 949cf37465
7 changed files with 132 additions and 37 deletions

View File

@ -87,7 +87,9 @@ var (
scaleDownDelay = flag.Duration("scale-down-delay", 10*time.Minute,
"Duration from the last scale up to the time when CA starts to check scale down options")
scaleDownUnneededTime = flag.Duration("scale-down-unneeded-time", 10*time.Minute,
"How long the node should be unneeded before it is eligible for scale down")
"How long a node should be unneeded before it is eligible for scale down")
scaleDownUnreadyTime = flag.Duration("scale-down-unready-time", 20*time.Minute,
"How long an unready node should be unneeded before it is eligible for scale down")
scaleDownUtilizationThreshold = flag.Float64("scale-down-utilization-threshold", 0.5,
"Node utilization level, defined as sum of requested resources divided by capacity, below which a node can be considered for scale down")
scaleDownTrialInterval = flag.Duration("scale-down-trial-interval", 1*time.Minute,
@ -214,6 +216,7 @@ func run(_ <-chan struct{}) {
MaxEmptyBulkDelete: *maxEmptyBulkDeleteFlag,
ScaleDownUtilizationThreshold: *scaleDownUtilizationThreshold,
ScaleDownUnneededTime: *scaleDownUnneededTime,
ScaleDownUnreadyTime: *scaleDownUnreadyTime,
MaxNodesTotal: *maxNodesTotal,
EstimatorName: *estimatorFlag,
ExpanderStrategy: expanderStrategy,

View File

@ -288,7 +288,7 @@ func (csr *ClusterStateRegistry) calculateReadinessStats(currentTime time.Time)
for _, node := range csr.nodes {
nodeGroup, errNg := csr.cloudProvider.NodeGroupForNode(node)
ready, _, errReady := getReadinessState(node)
ready, _, errReady := GetReadinessState(node)
// Node is most likely not autoscaled, however check the errors.
if reflect.ValueOf(nodeGroup).IsNil() {
@ -306,11 +306,10 @@ func (csr *ClusterStateRegistry) calculateReadinessStats(currentTime time.Time)
return perNodeGroup, total
}
// getReadinessState gets readiness state for the node
func getReadinessState(node *apiv1.Node) (isNodeReady bool, lastTransitionTime time.Time, err error) {
// GetReadinessState gets readiness state for the node
func GetReadinessState(node *apiv1.Node) (isNodeReady bool, lastTransitionTime time.Time, err error) {
for _, condition := range node.Status.Conditions {
if condition.Type == apiv1.NodeReady {
if condition.Status == apiv1.ConditionTrue {
return true, condition.LastTransitionTime.Time, nil
}

View File

@ -32,9 +32,9 @@ func TestOKWithScaleUp(t *testing.T) {
now := time.Now()
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
setReadyState(ng1_1, true, now.Add(-time.Minute))
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
ng2_1 := BuildTestNode("ng2-1", 1000, 1000)
setReadyState(ng2_1, true, now.Add(-time.Minute))
SetNodeReadyState(ng2_1, true, now.Add(-time.Minute))
provider := testprovider.NewTestCloudProvider(nil, nil)
provider.AddNodeGroup("ng1", 1, 10, 5)
@ -63,9 +63,9 @@ func TestOKOneUnreadyNode(t *testing.T) {
now := time.Now()
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
setReadyState(ng1_1, true, now.Add(-time.Minute))
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
ng2_1 := BuildTestNode("ng2-1", 1000, 1000)
setReadyState(ng2_1, false, now.Add(-time.Minute))
SetNodeReadyState(ng2_1, false, now.Add(-time.Minute))
provider := testprovider.NewTestCloudProvider(nil, nil)
provider.AddNodeGroup("ng1", 1, 10, 1)
@ -88,9 +88,9 @@ func TestMissingNodes(t *testing.T) {
now := time.Now()
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
setReadyState(ng1_1, true, now.Add(-time.Minute))
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
ng2_1 := BuildTestNode("ng2-1", 1000, 1000)
setReadyState(ng2_1, true, now.Add(-time.Minute))
SetNodeReadyState(ng2_1, true, now.Add(-time.Minute))
provider := testprovider.NewTestCloudProvider(nil, nil)
provider.AddNodeGroup("ng1", 1, 10, 5)
@ -113,9 +113,9 @@ func TestToManyUnready(t *testing.T) {
now := time.Now()
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
setReadyState(ng1_1, false, now.Add(-time.Minute))
SetNodeReadyState(ng1_1, false, now.Add(-time.Minute))
ng2_1 := BuildTestNode("ng2-1", 1000, 1000)
setReadyState(ng2_1, false, now.Add(-time.Minute))
SetNodeReadyState(ng2_1, false, now.Add(-time.Minute))
provider := testprovider.NewTestCloudProvider(nil, nil)
provider.AddNodeGroup("ng1", 1, 10, 1)
@ -138,7 +138,7 @@ func TestExpiredScaleUp(t *testing.T) {
now := time.Now()
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
setReadyState(ng1_1, true, now.Add(-time.Minute))
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
provider := testprovider.NewTestCloudProvider(nil, nil)
provider.AddNodeGroup("ng1", 1, 10, 5)
@ -161,24 +161,6 @@ func TestExpiredScaleUp(t *testing.T) {
assert.False(t, clusterstate.IsNodeGroupHealthy("ng1"))
}
func setReadyState(node *apiv1.Node, ready bool, lastTransition time.Time) {
if ready {
node.Status.Conditions = append(node.Status.Conditions,
apiv1.NodeCondition{
Type: apiv1.NodeReady,
Status: apiv1.ConditionTrue,
LastTransitionTime: metav1.Time{Time: lastTransition},
})
} else {
node.Status.Conditions = append(node.Status.Conditions,
apiv1.NodeCondition{
Type: apiv1.NodeReady,
Status: apiv1.ConditionFalse,
LastTransitionTime: metav1.Time{Time: lastTransition},
})
}
}
func TestRegisterScaleDown(t *testing.T) {
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
provider := testprovider.NewTestCloudProvider(nil, nil)
@ -210,28 +192,28 @@ func TestUpcomingNodes(t *testing.T) {
// 6 nodes are expected to come.
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
setReadyState(ng1_1, true, now.Add(-time.Minute))
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
provider.AddNodeGroup("ng1", 1, 10, 7)
provider.AddNode("ng1", ng1_1)
// One node is expected to come. One node is unready for the long time
// but this should not make any differnece.
ng2_1 := BuildTestNode("ng2-1", 1000, 1000)
setReadyState(ng2_1, false, now.Add(-time.Minute))
SetNodeReadyState(ng2_1, false, now.Add(-time.Minute))
provider.AddNodeGroup("ng2", 1, 10, 2)
provider.AddNode("ng2", ng2_1)
// Two nodes are expected to come. One is just being started for the first time,
// the other one is not there yet.
ng3_1 := BuildTestNode("ng3-1", 1000, 1000)
setReadyState(ng3_1, false, now.Add(-time.Minute))
SetNodeReadyState(ng3_1, false, now.Add(-time.Minute))
ng3_1.CreationTimestamp = metav1.Time{Time: now.Add(-time.Minute)}
provider.AddNodeGroup("ng3", 1, 10, 2)
provider.AddNode("ng3", ng3_1)
// Nothing should be added here.
ng4_1 := BuildTestNode("ng4-1", 1000, 1000)
setReadyState(ng4_1, false, now.Add(-time.Minute))
SetNodeReadyState(ng4_1, false, now.Add(-time.Minute))
provider.AddNodeGroup("ng4", 1, 10, 1)
provider.AddNode("ng4", ng4_1)

View File

@ -23,6 +23,7 @@ import (
"time"
"k8s.io/contrib/cluster-autoscaler/cloudprovider"
"k8s.io/contrib/cluster-autoscaler/clusterstate"
"k8s.io/contrib/cluster-autoscaler/simulator"
"k8s.io/contrib/cluster-autoscaler/utils/deletetaint"
@ -145,8 +146,15 @@ func (sd *ScaleDown) TryToScaleDown(nodes []*apiv1.Node, pods []*apiv1.Pod) (Sca
glog.V(2).Infof("%s was unneeded for %s", node.Name, now.Sub(val).String())
ready, _, _ := clusterstate.GetReadinessState(node)
// Check how long the node was underutilized.
if !val.Add(sd.context.ScaleDownUnneededTime).Before(now) {
if ready && !val.Add(sd.context.ScaleDownUnneededTime).Before(now) {
continue
}
// Unready nodes may be deleted after a different time than unrerutilized.
if !ready && !val.Add(sd.context.ScaleDownUnreadyTime).Before(now) {
continue
}

View File

@ -135,7 +135,9 @@ func TestScaleDown(t *testing.T) {
},
}
n1 := BuildTestNode("n1", 1000, 1000)
SetNodeReadyState(n1, true, time.Time{})
n2 := BuildTestNode("n2", 1000, 1000)
SetNodeReadyState(n2, true, time.Time{})
p1 := BuildTestPod("p1", 100, 0)
p1.Annotations = map[string]string{
"kubernetes.io/created-by": RefJSON(&job),
@ -200,6 +202,76 @@ func TestScaleDown(t *testing.T) {
assert.Equal(t, n1.Name, getStringFromChan(updatedNodes))
}
func TestNoScaleDown(t *testing.T) {
fakeClient := &fake.Clientset{}
n1 := BuildTestNode("n1", 1000, 1000)
SetNodeReadyState(n1, false, time.Now().Add(-3*time.Minute))
n2 := BuildTestNode("n2", 1000, 1000)
SetNodeReadyState(n2, true, time.Time{})
p2 := BuildTestPod("p2", 800, 0)
p2.Spec.NodeName = "n2"
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, &apiv1.PodList{Items: []apiv1.Pod{*p2}}, nil
})
fakeClient.Fake.AddReactor("get", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, nil, errors.NewNotFound(apiv1.Resource("pod"), "whatever")
})
fakeClient.Fake.AddReactor("get", "nodes", func(action core.Action) (bool, runtime.Object, error) {
getAction := action.(core.GetAction)
switch getAction.GetName() {
case n1.Name:
return true, n1, nil
case n2.Name:
return true, n2, nil
}
return true, nil, fmt.Errorf("Wrong node: %v", getAction.GetName())
})
provider := testprovider.NewTestCloudProvider(nil, func(nodeGroup string, node string) error {
t.Fatalf("Unexpected deletion of %s", node)
return nil
})
provider.AddNodeGroup("ng1", 1, 10, 2)
provider.AddNode("ng1", n1)
provider.AddNode("ng1", n2)
context := &AutoscalingContext{
PredicateChecker: simulator.NewTestPredicateChecker(),
CloudProvider: provider,
ClientSet: fakeClient,
Recorder: createEventRecorder(fakeClient),
ScaleDownUtilizationThreshold: 0.5,
ScaleDownUnneededTime: time.Minute,
ScaleDownUnreadyTime: time.Hour,
MaxGratefulTerminationSec: 60,
}
scaleDown := NewScaleDown(context)
scaleDown.UpdateUnneededNodes([]*apiv1.Node{n1, n2}, []*apiv1.Pod{p2}, time.Now().Add(-5*time.Minute))
result, err := scaleDown.TryToScaleDown([]*apiv1.Node{n1, n2}, []*apiv1.Pod{p2})
assert.NoError(t, err)
assert.Equal(t, ScaleDownNoUnneeded, result)
deletedNodes := make(chan string, 10)
provider = testprovider.NewTestCloudProvider(nil, func(nodeGroup string, node string) error {
deletedNodes <- node
return nil
})
SetNodeReadyState(n1, false, time.Now().Add(-3*time.Hour))
provider.AddNodeGroup("ng1", 1, 10, 2)
provider.AddNode("ng1", n1)
provider.AddNode("ng1", n2)
context.CloudProvider = provider
scaleDown = NewScaleDown(context)
scaleDown.UpdateUnneededNodes([]*apiv1.Node{n1, n2}, []*apiv1.Pod{p2}, time.Now().Add(-2*time.Hour))
result, err = scaleDown.TryToScaleDown([]*apiv1.Node{n1, n2}, []*apiv1.Pod{p2})
assert.NoError(t, err)
assert.Equal(t, ScaleDownNodeDeleted, result)
assert.Equal(t, n1.Name, getStringFromChan(deletedNodes))
}
func getStringFromChan(c chan string) string {
select {
case val := <-c:

View File

@ -53,6 +53,9 @@ type AutoscalingContext struct {
// ScaleDownUnneededTime sets the duriation CA exepects a node to be unneded/eligible for removal
// before scaling down the node.
ScaleDownUnneededTime time.Duration
// ScaleDownUnreadyTime sets the duriation CA exepects an unready node to be unneded/eligible for removal
// before scaling down the node.
ScaleDownUnreadyTime time.Duration
// MaxNodesTotal sets the maximum number of nodes in the whole cluster
MaxNodesTotal int
// EstimatorName is the estimator used to estimate the number of needed nodes in scale up.

View File

@ -18,9 +18,11 @@ package test
import (
"fmt"
"time"
"k8s.io/kubernetes/pkg/api/resource"
apiv1 "k8s.io/kubernetes/pkg/api/v1"
metav1 "k8s.io/kubernetes/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/api/testapi"
"k8s.io/kubernetes/pkg/runtime"
@ -81,6 +83,32 @@ func BuildTestNode(name string, cpu int64, mem int64) *apiv1.Node {
return node
}
// SetNodeReadyState sets node ready state.
func SetNodeReadyState(node *apiv1.Node, ready bool, lastTransition time.Time) {
for i := range node.Status.Conditions {
if node.Status.Conditions[i].Type == apiv1.NodeReady {
node.Status.Conditions[i].LastTransitionTime = metav1.Time{Time: lastTransition}
if ready {
node.Status.Conditions[i].Status = apiv1.ConditionTrue
} else {
node.Status.Conditions[i].Status = apiv1.ConditionFalse
}
return
}
}
condition := apiv1.NodeCondition{
Type: apiv1.NodeReady,
Status: apiv1.ConditionTrue,
LastTransitionTime: metav1.Time{Time: lastTransition},
}
if ready {
condition.Status = apiv1.ConditionTrue
} else {
condition.Status = apiv1.ConditionFalse
}
node.Status.Conditions = append(node.Status.Conditions, condition)
}
// RefJSON builds string reference to
func RefJSON(o runtime.Object) string {
ref, err := apiv1.GetReference(o)