Cluster-autoscaler: skip unready node groups in scale up

This commit is contained in:
Marcin Wielgus 2017-01-09 14:43:38 +01:00
parent 1573d4edd2
commit 5b8997fef9
2 changed files with 134 additions and 4 deletions

View File

@ -65,6 +65,11 @@ func ScaleUp(context *AutoscalingContext, unschedulablePods []*apiv1.Pod, nodes
for _, nodeGroup := range context.CloudProvider.NodeGroups() {
if !context.ClusterStateRegistry.IsNodeGroupHealthy(nodeGroup.Id()) {
glog.Warningf("Node group %s is unhealthy", nodeGroup.Id())
continue
}
currentSize, err := nodeGroup.TargetSize()
if err != nil {
glog.Errorf("Failed to get node group size: %v", err)

View File

@ -41,7 +41,10 @@ func TestScaleUpOK(t *testing.T) {
fakeClient := &fake.Clientset{}
n1 := BuildTestNode("n1", 100, 1000)
SetNodeReadyState(n1, true, time.Now())
n2 := BuildTestNode("n2", 1000, 1000)
SetNodeReadyState(n2, true, time.Now())
p1 := BuildTestPod("p1", 80, 0)
p2 := BuildTestPod("p2", 800, 0)
p1.Spec.NodeName = "n1"
@ -69,6 +72,8 @@ func TestScaleUpOK(t *testing.T) {
provider.AddNode("ng2", n2)
assert.NotNil(t, provider)
clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{})
clusterState.UpdateNodes([]*apiv1.Node{n1, n2}, time.Now())
context := &AutoscalingContext{
PredicateChecker: simulator.NewTestPredicateChecker(),
CloudProvider: provider,
@ -76,7 +81,7 @@ func TestScaleUpOK(t *testing.T) {
Recorder: createEventRecorder(fakeClient),
EstimatorName: BinpackingEstimatorName,
ExpanderStrategy: random.NewStrategy(),
ClusterStateRegistry: clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}),
ClusterStateRegistry: clusterState,
}
p3 := BuildTestPod("p-new", 500, 0)
@ -86,9 +91,12 @@ func TestScaleUpOK(t *testing.T) {
assert.Equal(t, "ng2-1", getStringFromChan(expandedGroups))
}
func TestScaleUpNodeComing(t *testing.T) {
func TestScaleUpNodeComingNoScale(t *testing.T) {
n1 := BuildTestNode("n1", 100, 1000)
SetNodeReadyState(n1, true, time.Now())
n2 := BuildTestNode("n2", 1000, 1000)
SetNodeReadyState(n2, true, time.Now())
p1 := BuildTestPod("p1", 80, 0)
p2 := BuildTestPod("p2", 800, 0)
p1.Spec.NodeName = "n1"
@ -134,7 +142,7 @@ func TestScaleUpNodeComing(t *testing.T) {
ExpanderStrategy: random.NewStrategy(),
ClusterStateRegistry: clusterState,
}
p3 := BuildTestPod("p-new", 500, 0)
p3 := BuildTestPod("p-new", 550, 0)
result, err := ScaleUp(context, []*apiv1.Pod{p3}, []*apiv1.Node{n1, n2})
assert.NoError(t, err)
@ -142,9 +150,124 @@ func TestScaleUpNodeComing(t *testing.T) {
assert.False(t, result)
}
func TestScaleUpNodeComingHasScale(t *testing.T) {
n1 := BuildTestNode("n1", 100, 1000)
SetNodeReadyState(n1, true, time.Now())
n2 := BuildTestNode("n2", 1000, 1000)
SetNodeReadyState(n2, true, time.Now())
p1 := BuildTestPod("p1", 80, 0)
p2 := BuildTestPod("p2", 800, 0)
p1.Spec.NodeName = "n1"
p2.Spec.NodeName = "n2"
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
list := action.(core.ListAction)
fieldstring := list.GetListRestrictions().Fields.String()
if strings.Contains(fieldstring, "n1") {
return true, &apiv1.PodList{Items: []apiv1.Pod{*p1}}, nil
}
if strings.Contains(fieldstring, "n2") {
return true, &apiv1.PodList{Items: []apiv1.Pod{*p2}}, nil
}
return true, nil, fmt.Errorf("Failed to list: %v", list)
})
expandedGroups := make(chan string, 10)
provider := testprovider.NewTestCloudProvider(func(nodeGroup string, increase int) error {
expandedGroups <- fmt.Sprintf("%s-%d", nodeGroup, increase)
return nil
}, nil)
provider.AddNodeGroup("ng1", 1, 10, 1)
provider.AddNodeGroup("ng2", 1, 10, 2)
provider.AddNode("ng1", n1)
provider.AddNode("ng2", n2)
clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{})
clusterState.RegisterScaleUp(&clusterstate.ScaleUpRequest{
NodeGroupName: "ng2",
Increase: 1,
Time: time.Now(),
ExpectedAddTime: time.Now().Add(5 * time.Minute),
})
clusterState.UpdateNodes([]*apiv1.Node{n1, n2}, time.Now())
context := &AutoscalingContext{
PredicateChecker: simulator.NewTestPredicateChecker(),
CloudProvider: provider,
ClientSet: fakeClient,
Recorder: createEventRecorder(fakeClient),
EstimatorName: BinpackingEstimatorName,
ExpanderStrategy: random.NewStrategy(),
ClusterStateRegistry: clusterState,
}
p3 := BuildTestPod("p-new", 550, 0)
result, err := ScaleUp(context, []*apiv1.Pod{p3, p3}, []*apiv1.Node{n1, n2})
assert.NoError(t, err)
// Twho nodes needed but one node is already coming, so it should increase by one.
assert.True(t, result)
assert.Equal(t, "ng2-1", getStringFromChan(expandedGroups))
}
func TestScaleUpUnhealthy(t *testing.T) {
n1 := BuildTestNode("n1", 100, 1000)
SetNodeReadyState(n1, true, time.Now())
n2 := BuildTestNode("n2", 1000, 1000)
SetNodeReadyState(n2, true, time.Now())
p1 := BuildTestPod("p1", 80, 0)
p2 := BuildTestPod("p2", 800, 0)
p1.Spec.NodeName = "n1"
p2.Spec.NodeName = "n2"
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
list := action.(core.ListAction)
fieldstring := list.GetListRestrictions().Fields.String()
if strings.Contains(fieldstring, "n1") {
return true, &apiv1.PodList{Items: []apiv1.Pod{*p1}}, nil
}
if strings.Contains(fieldstring, "n2") {
return true, &apiv1.PodList{Items: []apiv1.Pod{*p2}}, nil
}
return true, nil, fmt.Errorf("Failed to list: %v", list)
})
provider := testprovider.NewTestCloudProvider(func(nodeGroup string, increase int) error {
t.Fatalf("No expansion is expected, but increased %s by %d", nodeGroup, increase)
return nil
}, nil)
provider.AddNodeGroup("ng1", 1, 10, 1)
provider.AddNodeGroup("ng2", 1, 10, 5)
provider.AddNode("ng1", n1)
provider.AddNode("ng2", n2)
clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{})
clusterState.UpdateNodes([]*apiv1.Node{n1, n2}, time.Now())
context := &AutoscalingContext{
PredicateChecker: simulator.NewTestPredicateChecker(),
CloudProvider: provider,
ClientSet: fakeClient,
Recorder: createEventRecorder(fakeClient),
EstimatorName: BinpackingEstimatorName,
ExpanderStrategy: random.NewStrategy(),
ClusterStateRegistry: clusterState,
}
p3 := BuildTestPod("p-new", 550, 0)
result, err := ScaleUp(context, []*apiv1.Pod{p3}, []*apiv1.Node{n1, n2})
assert.NoError(t, err)
// Node group is unhealthy.
assert.False(t, result)
}
func TestScaleUpNoHelp(t *testing.T) {
fakeClient := &fake.Clientset{}
n1 := BuildTestNode("n1", 100, 1000)
SetNodeReadyState(n1, true, time.Now())
p1 := BuildTestPod("p1", 80, 0)
p1.Spec.NodeName = "n1"
@ -165,6 +288,8 @@ func TestScaleUpNoHelp(t *testing.T) {
provider.AddNode("ng1", n1)
assert.NotNil(t, provider)
clusterState := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{})
clusterState.UpdateNodes([]*apiv1.Node{n1}, time.Now())
context := &AutoscalingContext{
PredicateChecker: simulator.NewTestPredicateChecker(),
CloudProvider: provider,
@ -172,7 +297,7 @@ func TestScaleUpNoHelp(t *testing.T) {
Recorder: createEventRecorder(fakeClient),
EstimatorName: BinpackingEstimatorName,
ExpanderStrategy: random.NewStrategy(),
ClusterStateRegistry: clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}),
ClusterStateRegistry: clusterState,
}
p3 := BuildTestPod("p-new", 500, 0)