diff --git a/cluster-autoscaler/core/scale_down.go b/cluster-autoscaler/core/scale_down.go index f54bb35c93..1e0d9ac271 100644 --- a/cluster-autoscaler/core/scale_down.go +++ b/cluster-autoscaler/core/scale_down.go @@ -144,7 +144,7 @@ func (sd *ScaleDown) CleanUpUnneededNodes() { // managed by CA. func (sd *ScaleDown) UpdateUnneededNodes( nodes []*apiv1.Node, - managedNodes []*apiv1.Node, + nodesToCheck []*apiv1.Node, pods []*apiv1.Pod, timestamp time.Time, pdbs []*policyv1.PodDisruptionBudget) errors.AutoscalerError { @@ -154,24 +154,24 @@ func (sd *ScaleDown) UpdateUnneededNodes( utilizationMap := make(map[string]float64) // Filter out nodes that were recently checked - nodesToCheck := make([]*apiv1.Node, 0) - for _, node := range managedNodes { + filteredNodesToCheck := make([]*apiv1.Node, 0) + for _, node := range nodesToCheck { if unremovableTimestamp, found := sd.unremovableNodes[node.Name]; found { if unremovableTimestamp.After(timestamp) { continue } delete(sd.unremovableNodes, node.Name) } - nodesToCheck = append(nodesToCheck, node) + filteredNodesToCheck = append(filteredNodesToCheck, node) } - skipped := len(managedNodes) - len(nodesToCheck) + skipped := len(nodesToCheck) - len(filteredNodesToCheck) if skipped > 0 { glog.V(1).Infof("Scale-down calculation: ignoring %v nodes, that were unremovable in the last %v", skipped, UnremovableNodeRecheckTimeout) } // Phase1 - look at the nodes utilization. Calculate the utilization // only for the managed nodes. - for _, node := range nodesToCheck { + for _, node := range filteredNodesToCheck { // Skip nodes marked to be deleted, if they were marked recently. // Old-time marked nodes are again eligible for deletion - something went wrong with them diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go index 379212d258..8512e2d4ec 100644 --- a/cluster-autoscaler/core/static_autoscaler.go +++ b/cluster-autoscaler/core/static_autoscaler.go @@ -266,9 +266,9 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError glog.V(4).Infof("Calculating unneeded nodes") scaleDown.CleanUp(time.Now()) - managedNodes := getManagedNodes(autoscalingContext, allNodes) + potentiallyUnneeded := getPotentiallyUnneededNodes(autoscalingContext, allNodes) - typedErr := scaleDown.UpdateUnneededNodes(allNodes, managedNodes, allScheduled, time.Now(), pdbs) + typedErr := scaleDown.UpdateUnneededNodes(allNodes, potentiallyUnneeded, allScheduled, time.Now(), pdbs) if typedErr != nil { glog.Errorf("Failed to scale down: %v", typedErr) return typedErr diff --git a/cluster-autoscaler/core/utils.go b/cluster-autoscaler/core/utils.go index cffcd00bc5..e2d2fc3043 100644 --- a/cluster-autoscaler/core/utils.go +++ b/cluster-autoscaler/core/utils.go @@ -359,8 +359,10 @@ func fixNodeGroupSize(context *AutoscalingContext, currentTime time.Time) (bool, return fixed, nil } -// getManagedNodes returns the nodes managed by the cluster autoscaler. -func getManagedNodes(context *AutoscalingContext, nodes []*apiv1.Node) []*apiv1.Node { +// getPotentiallyUnneededNodes returns nodes that are: +// - managed by the cluster autoscaler +// - in groups with size > min size +func getPotentiallyUnneededNodes(context *AutoscalingContext, nodes []*apiv1.Node) []*apiv1.Node { result := make([]*apiv1.Node, 0, len(nodes)) for _, node := range nodes { nodeGroup, err := context.CloudProvider.NodeGroupForNode(node) @@ -372,6 +374,15 @@ func getManagedNodes(context *AutoscalingContext, nodes []*apiv1.Node) []*apiv1. glog.V(4).Infof("Skipping %s - no node group config", node.Name) continue } + size, err := nodeGroup.TargetSize() + if err != nil { + glog.Errorf("Error while checking node group size %s: %v", nodeGroup.Id(), err) + continue + } + if size <= nodeGroup.MinSize() { + glog.V(1).Infof("Skipping %s - node group min size reached", node.Name) + continue + } result = append(result, node) } return result diff --git a/cluster-autoscaler/core/utils_test.go b/cluster-autoscaler/core/utils_test.go index 7935782ebd..81084e2104 100644 --- a/cluster-autoscaler/core/utils_test.go +++ b/cluster-autoscaler/core/utils_test.go @@ -256,3 +256,26 @@ func TestRemoveFixNodeTargetSize(t *testing.T) { change := getStringFromChan(sizeChanges) assert.Equal(t, "ng1/-2", change) } + +func TestGetPotentiallyUnneededNodes(t *testing.T) { + ng1_1 := BuildTestNode("ng1-1", 1000, 1000) + ng1_2 := BuildTestNode("ng1-2", 1000, 1000) + ng2_1 := BuildTestNode("ng2-1", 1000, 1000) + noNg := BuildTestNode("no-ng", 1000, 1000) + provider := testprovider.NewTestCloudProvider(nil, nil) + provider.AddNodeGroup("ng1", 1, 10, 2) + provider.AddNodeGroup("ng2", 1, 10, 1) + provider.AddNode("ng1", ng1_1) + provider.AddNode("ng1", ng1_2) + provider.AddNode("ng2", ng2_1) + + context := &AutoscalingContext{ + CloudProvider: provider, + } + + result := getPotentiallyUnneededNodes(context, []*apiv1.Node{ng1_1, ng1_2, ng2_1, noNg}) + assert.Equal(t, 2, len(result)) + ok1 := result[0].Name == "ng1-1" && result[1].Name == "ng1-2" + ok2 := result[1].Name == "ng1-1" && result[0].Name == "ng1-2" + assert.True(t, ok1 || ok2) +}