Don't calculate utilization and run scale down simulations for unmanaged nodes
This commit is contained in:
parent
7b3a75c294
commit
2cd532ebfe
|
|
@ -109,9 +109,11 @@ func (sd *ScaleDown) CleanUpUnneededNodes() {
|
||||||
|
|
||||||
// UpdateUnneededNodes calculates which nodes are not needed, i.e. all pods can be scheduled somewhere else,
|
// UpdateUnneededNodes calculates which nodes are not needed, i.e. all pods can be scheduled somewhere else,
|
||||||
// and updates unneededNodes map accordingly. It also computes information where pods can be rescheduled and
|
// and updates unneededNodes map accordingly. It also computes information where pods can be rescheduled and
|
||||||
// node utilization level. Timestamp is the current timestamp.
|
// node utilization level. Timestamp is the current timestamp. The computations are made only for the nodes
|
||||||
|
// managed by CA.
|
||||||
func (sd *ScaleDown) UpdateUnneededNodes(
|
func (sd *ScaleDown) UpdateUnneededNodes(
|
||||||
nodes []*apiv1.Node,
|
nodes []*apiv1.Node,
|
||||||
|
managedNodes []*apiv1.Node,
|
||||||
pods []*apiv1.Pod,
|
pods []*apiv1.Pod,
|
||||||
timestamp time.Time,
|
timestamp time.Time,
|
||||||
pdbs []*policyv1.PodDisruptionBudget) errors.AutoscalerError {
|
pdbs []*policyv1.PodDisruptionBudget) errors.AutoscalerError {
|
||||||
|
|
@ -120,8 +122,9 @@ func (sd *ScaleDown) UpdateUnneededNodes(
|
||||||
nodeNameToNodeInfo := schedulercache.CreateNodeNameToInfoMap(pods, nodes)
|
nodeNameToNodeInfo := schedulercache.CreateNodeNameToInfoMap(pods, nodes)
|
||||||
utilizationMap := make(map[string]float64)
|
utilizationMap := make(map[string]float64)
|
||||||
|
|
||||||
// Phase1 - look at the nodes utilization.
|
// Phase1 - look at the nodes utilization. Calculate the utilization
|
||||||
for _, node := range nodes {
|
// only for the managed nodes.
|
||||||
|
for _, node := range managedNodes {
|
||||||
|
|
||||||
// Skip nodes marked to be deleted, if they were marked recently.
|
// Skip nodes marked to be deleted, if they were marked recently.
|
||||||
// Old-time marked nodes are again eligible for deletion - something went wrong with them
|
// Old-time marked nodes are again eligible for deletion - something went wrong with them
|
||||||
|
|
|
||||||
|
|
@ -86,7 +86,7 @@ func TestFindUnneededNodes(t *testing.T) {
|
||||||
LogRecorder: fakeLogRecorder,
|
LogRecorder: fakeLogRecorder,
|
||||||
}
|
}
|
||||||
sd := NewScaleDown(&context)
|
sd := NewScaleDown(&context)
|
||||||
sd.UpdateUnneededNodes([]*apiv1.Node{n1, n2, n3, n4}, []*apiv1.Pod{p1, p2, p3, p4}, time.Now(), nil)
|
sd.UpdateUnneededNodes([]*apiv1.Node{n1, n2, n3, n4}, []*apiv1.Node{n1, n2, n3, n4}, []*apiv1.Pod{p1, p2, p3, p4}, time.Now(), nil)
|
||||||
|
|
||||||
assert.Equal(t, 1, len(sd.unneededNodes))
|
assert.Equal(t, 1, len(sd.unneededNodes))
|
||||||
addTime, found := sd.unneededNodes["n2"]
|
addTime, found := sd.unneededNodes["n2"]
|
||||||
|
|
@ -95,13 +95,16 @@ func TestFindUnneededNodes(t *testing.T) {
|
||||||
assert.Equal(t, 4, len(sd.nodeUtilizationMap))
|
assert.Equal(t, 4, len(sd.nodeUtilizationMap))
|
||||||
|
|
||||||
sd.unneededNodes["n1"] = time.Now()
|
sd.unneededNodes["n1"] = time.Now()
|
||||||
sd.UpdateUnneededNodes([]*apiv1.Node{n1, n2, n3, n4}, []*apiv1.Pod{p1, p2, p3, p4}, time.Now(), nil)
|
sd.UpdateUnneededNodes([]*apiv1.Node{n1, n2, n3, n4}, []*apiv1.Node{n1, n2, n3, n4}, []*apiv1.Pod{p1, p2, p3, p4}, time.Now(), nil)
|
||||||
|
|
||||||
assert.Equal(t, 1, len(sd.unneededNodes))
|
assert.Equal(t, 1, len(sd.unneededNodes))
|
||||||
addTime2, found := sd.unneededNodes["n2"]
|
addTime2, found := sd.unneededNodes["n2"]
|
||||||
assert.True(t, found)
|
assert.True(t, found)
|
||||||
assert.Equal(t, addTime, addTime2)
|
assert.Equal(t, addTime, addTime2)
|
||||||
assert.Equal(t, 4, len(sd.nodeUtilizationMap))
|
assert.Equal(t, 4, len(sd.nodeUtilizationMap))
|
||||||
|
|
||||||
|
sd.UpdateUnneededNodes([]*apiv1.Node{n1, n2, n3, n4}, []*apiv1.Node{n1, n3, n4}, []*apiv1.Pod{p1, p2, p3, p4}, time.Now(), nil)
|
||||||
|
assert.Equal(t, 0, len(sd.unneededNodes))
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestDrainNode(t *testing.T) {
|
func TestDrainNode(t *testing.T) {
|
||||||
|
|
@ -288,7 +291,8 @@ func TestScaleDown(t *testing.T) {
|
||||||
LogRecorder: fakeLogRecorder,
|
LogRecorder: fakeLogRecorder,
|
||||||
}
|
}
|
||||||
scaleDown := NewScaleDown(context)
|
scaleDown := NewScaleDown(context)
|
||||||
scaleDown.UpdateUnneededNodes([]*apiv1.Node{n1, n2}, []*apiv1.Pod{p1, p2}, time.Now().Add(-5*time.Minute), nil)
|
scaleDown.UpdateUnneededNodes([]*apiv1.Node{n1, n2},
|
||||||
|
[]*apiv1.Node{n1, n2}, []*apiv1.Pod{p1, p2}, time.Now().Add(-5*time.Minute), nil)
|
||||||
result, err := scaleDown.TryToScaleDown([]*apiv1.Node{n1, n2}, []*apiv1.Pod{p1, p2}, nil)
|
result, err := scaleDown.TryToScaleDown([]*apiv1.Node{n1, n2}, []*apiv1.Pod{p1, p2}, nil)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, ScaleDownNodeDeleted, result)
|
assert.Equal(t, ScaleDownNodeDeleted, result)
|
||||||
|
|
@ -349,7 +353,8 @@ func TestNoScaleDownUnready(t *testing.T) {
|
||||||
|
|
||||||
// N1 is unready so it requires a bigger unneeded time.
|
// N1 is unready so it requires a bigger unneeded time.
|
||||||
scaleDown := NewScaleDown(context)
|
scaleDown := NewScaleDown(context)
|
||||||
scaleDown.UpdateUnneededNodes([]*apiv1.Node{n1, n2}, []*apiv1.Pod{p2}, time.Now().Add(-5*time.Minute), nil)
|
scaleDown.UpdateUnneededNodes([]*apiv1.Node{n1, n2},
|
||||||
|
[]*apiv1.Node{n1, n2}, []*apiv1.Pod{p2}, time.Now().Add(-5*time.Minute), nil)
|
||||||
result, err := scaleDown.TryToScaleDown([]*apiv1.Node{n1, n2}, []*apiv1.Pod{p2}, nil)
|
result, err := scaleDown.TryToScaleDown([]*apiv1.Node{n1, n2}, []*apiv1.Pod{p2}, nil)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, ScaleDownNoUnneeded, result)
|
assert.Equal(t, ScaleDownNoUnneeded, result)
|
||||||
|
|
@ -368,7 +373,8 @@ func TestNoScaleDownUnready(t *testing.T) {
|
||||||
// N1 has been unready for 2 hours, ok to delete.
|
// N1 has been unready for 2 hours, ok to delete.
|
||||||
context.CloudProvider = provider
|
context.CloudProvider = provider
|
||||||
scaleDown = NewScaleDown(context)
|
scaleDown = NewScaleDown(context)
|
||||||
scaleDown.UpdateUnneededNodes([]*apiv1.Node{n1, n2}, []*apiv1.Pod{p2}, time.Now().Add(-2*time.Hour), nil)
|
scaleDown.UpdateUnneededNodes([]*apiv1.Node{n1, n2}, []*apiv1.Node{n1, n2},
|
||||||
|
[]*apiv1.Pod{p2}, time.Now().Add(-2*time.Hour), nil)
|
||||||
result, err = scaleDown.TryToScaleDown([]*apiv1.Node{n1, n2}, []*apiv1.Pod{p2}, nil)
|
result, err = scaleDown.TryToScaleDown([]*apiv1.Node{n1, n2}, []*apiv1.Pod{p2}, nil)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, ScaleDownNodeDeleted, result)
|
assert.Equal(t, ScaleDownNodeDeleted, result)
|
||||||
|
|
@ -451,7 +457,8 @@ func TestScaleDownNoMove(t *testing.T) {
|
||||||
LogRecorder: fakeLogRecorder,
|
LogRecorder: fakeLogRecorder,
|
||||||
}
|
}
|
||||||
scaleDown := NewScaleDown(context)
|
scaleDown := NewScaleDown(context)
|
||||||
scaleDown.UpdateUnneededNodes([]*apiv1.Node{n1, n2}, []*apiv1.Pod{p1, p2}, time.Now().Add(5*time.Minute), nil)
|
scaleDown.UpdateUnneededNodes([]*apiv1.Node{n1, n2}, []*apiv1.Node{n1, n2},
|
||||||
|
[]*apiv1.Pod{p1, p2}, time.Now().Add(5*time.Minute), nil)
|
||||||
result, err := scaleDown.TryToScaleDown([]*apiv1.Node{n1, n2}, []*apiv1.Pod{p1, p2}, nil)
|
result, err := scaleDown.TryToScaleDown([]*apiv1.Node{n1, n2}, []*apiv1.Pod{p1, p2}, nil)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, ScaleDownNoUnneeded, result)
|
assert.Equal(t, ScaleDownNoUnneeded, result)
|
||||||
|
|
|
||||||
|
|
@ -267,7 +267,9 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
|
||||||
glog.V(4).Infof("Calculating unneeded nodes")
|
glog.V(4).Infof("Calculating unneeded nodes")
|
||||||
|
|
||||||
scaleDown.CleanUp(time.Now())
|
scaleDown.CleanUp(time.Now())
|
||||||
typedErr := scaleDown.UpdateUnneededNodes(allNodes, allScheduled, time.Now(), pdbs)
|
managedNodes := getManagedNodes(autoscalingContext, allNodes)
|
||||||
|
|
||||||
|
typedErr := scaleDown.UpdateUnneededNodes(allNodes, managedNodes, allScheduled, time.Now(), pdbs)
|
||||||
if typedErr != nil {
|
if typedErr != nil {
|
||||||
glog.Errorf("Failed to scale down: %v", typedErr)
|
glog.Errorf("Failed to scale down: %v", typedErr)
|
||||||
return typedErr
|
return typedErr
|
||||||
|
|
|
||||||
|
|
@ -329,3 +329,21 @@ func fixNodeGroupSize(context *AutoscalingContext, currentTime time.Time) (bool,
|
||||||
}
|
}
|
||||||
return fixed, nil
|
return fixed, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getManagedNodes returns the nodes managed by the cluster autoscaler.
|
||||||
|
func getManagedNodes(context *AutoscalingContext, nodes []*apiv1.Node) []*apiv1.Node {
|
||||||
|
result := make([]*apiv1.Node, 0, len(nodes))
|
||||||
|
for _, node := range nodes {
|
||||||
|
nodeGroup, err := context.CloudProvider.NodeGroupForNode(node)
|
||||||
|
if err != nil {
|
||||||
|
glog.Warningf("Error while checking node group for %s: %v", node.Name, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if nodeGroup == nil || reflect.ValueOf(nodeGroup).IsNil() {
|
||||||
|
glog.V(4).Infof("Skipping %s - no node group config", node.Name)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
result = append(result, node)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue