From a6c18b87d22e1cb51dedcec6e244c37de8babc66 Mon Sep 17 00:00:00 2001
From: Beata Skiba <bskiba@google.com>
Date: Thu, 31 Aug 2017 14:53:23 +0200
Subject: [PATCH] Only consider up to 10% of the nodes as additional candidates
 for scale down.

---
 .../core/autoscaling_context.go               | 10 ++++
 cluster-autoscaler/core/scale_down.go         | 15 ++++--
 cluster-autoscaler/core/scale_down_test.go    | 49 +++++++++++++++++++
 cluster-autoscaler/main.go                    | 13 +++++
 4 files changed, 84 insertions(+), 3 deletions(-)

diff --git a/cluster-autoscaler/core/autoscaling_context.go b/cluster-autoscaler/core/autoscaling_context.go
index 02f3dc767e..d3c79bc1f3 100644
--- a/cluster-autoscaler/core/autoscaling_context.go
+++ b/cluster-autoscaler/core/autoscaling_context.go
@@ -99,6 +99,16 @@ type AutoscalingOptions struct {
 	// ScaleDownNonEmptyCandidatesCount is the maximum number of non empty nodes
 	// considered at once as candidates for scale down.
 	ScaleDownNonEmptyCandidatesCount int
+	// ScaleDownCandidatesPoolRatio is a ratio of nodes that are considered
+	// as additional non empty candidates for scale down when some candidates from
+	// previous iteration are no longer valid.
+	ScaleDownCandidatesPoolRatio float64
+	// ScaleDownCandidatesPoolMinCount is the minimum number of nodes that are
+	// considered as additional non empty candidates for scale down when some
+	// candidates from previous iteration are no longer valid.
+	// The formula to calculate additional candidates number is following:
+	// max(#nodes * ScaleDownCandidatesPoolRatio, ScaleDownCandidatesPoolMinCount)
+	ScaleDownCandidatesPoolMinCount int
 	// WriteStatusConfigMap tells if the status information should be written to a ConfigMap
 	WriteStatusConfigMap bool
 	// BalanceSimilarNodeGroups enables logic that identifies node groups with similar machines and tries to balance node count between them.
diff --git a/cluster-autoscaler/core/scale_down.go b/cluster-autoscaler/core/scale_down.go
index 1ad731ccb1..b1ea541686 100644
--- a/cluster-autoscaler/core/scale_down.go
+++ b/cluster-autoscaler/core/scale_down.go
@@ -18,6 +18,7 @@ package core
 
 import (
 	"fmt"
+	"math"
 	"reflect"
 	"strings"
 	"sync"
@@ -219,15 +220,23 @@ func (sd *ScaleDown) UpdateUnneededNodes(
 		return sd.markSimulationError(simulatorErr, timestamp)
 	}
 
-	additionalCandidatesCount := sd.context.AutoscalingOptions.ScaleDownNonEmptyCandidatesCount - len(nodesToRemove)
+	additionalCandidatesCount := sd.context.ScaleDownNonEmptyCandidatesCount - len(nodesToRemove)
 	if additionalCandidatesCount > len(currentNonCandidates) {
 		additionalCandidatesCount = len(currentNonCandidates)
 	}
+	// Limit the additional candidates pool size for better performance.
+	additionalCandidatesPoolSize := int(math.Ceil(float64(len(nodes)) * sd.context.ScaleDownCandidatesPoolRatio))
+	if additionalCandidatesPoolSize < sd.context.ScaleDownCandidatesPoolMinCount {
+		additionalCandidatesPoolSize = sd.context.ScaleDownCandidatesPoolMinCount
+	}
+	if additionalCandidatesPoolSize > len(currentNonCandidates) {
+		additionalCandidatesPoolSize = len(currentNonCandidates)
+	}
 	if additionalCandidatesCount > 0 {
 		// Look for addidtional nodes to remove among the rest of nodes
 		glog.V(3).Infof("Finding additional %v candidates for scale down.", additionalCandidatesCount)
 		additionalNodesToRemove, additionalUnremovable, additionalNewHints, simulatorErr :=
-			simulator.FindNodesToRemove(currentNonCandidates, nodes, pods, nil,
+			simulator.FindNodesToRemove(currentNonCandidates[:additionalCandidatesPoolSize], nodes, pods, nil,
 				sd.context.PredicateChecker, additionalCandidatesCount, true,
 				sd.podLocationHints, sd.usageTracker, timestamp, pdbs)
 		if simulatorErr != nil {
@@ -290,7 +299,7 @@ func (sd *ScaleDown) markSimulationError(simulatorErr errors.AutoscalerError,
 func (sd *ScaleDown) chooseCandidates(nodes []*apiv1.Node) ([]*apiv1.Node, []*apiv1.Node) {
 	// Number of candidates should not be capped. We will look for nodes to remove
 	// from the whole set of nodes.
-	if sd.context.AutoscalingOptions.ScaleDownNonEmptyCandidatesCount <= 0 {
+	if sd.context.ScaleDownNonEmptyCandidatesCount <= 0 {
 		return nodes, []*apiv1.Node{}
 	}
 	currentCandidates := make([]*apiv1.Node, 0, len(sd.unneededNodesList))
diff --git a/cluster-autoscaler/core/scale_down_test.go b/cluster-autoscaler/core/scale_down_test.go
index 45f9702857..6190dde950 100644
--- a/cluster-autoscaler/core/scale_down_test.go
+++ b/cluster-autoscaler/core/scale_down_test.go
@@ -165,6 +165,8 @@ func TestFindUnneededMaxCandidates(t *testing.T) {
 		AutoscalingOptions: AutoscalingOptions{
 			ScaleDownUtilizationThreshold:    0.35,
 			ScaleDownNonEmptyCandidatesCount: numCandidates,
+			ScaleDownCandidatesPoolRatio:     1,
+			ScaleDownCandidatesPoolMinCount:  1000,
 		},
 		ClusterStateRegistry: clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, fakeLogRecorder),
 		PredicateChecker:     simulator.NewTestPredicateChecker(),
@@ -199,6 +201,53 @@ func TestFindUnneededMaxCandidates(t *testing.T) {
 	assert.NotContains(t, sd.unneededNodes, deleted)
 }
 
+func TestFindUnneededAdditionalNodePool(t *testing.T) {
+	provider := testprovider.NewTestCloudProvider(nil, nil)
+	provider.AddNodeGroup("ng1", 1, 100, 2)
+
+	numNodes := 100
+	nodes := make([]*apiv1.Node, 0, numNodes)
+	for i := 0; i < numNodes; i++ {
+		n := BuildTestNode(fmt.Sprintf("n%v", i), 1000, 10)
+		SetNodeReadyState(n, true, time.Time{})
+		provider.AddNode("ng1", n)
+		nodes = append(nodes, n)
+	}
+
+	// shared owner reference
+	ownerRef := GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", "")
+
+	pods := make([]*apiv1.Pod, 0, numNodes)
+	for i := 0; i < numNodes; i++ {
+		p := BuildTestPod(fmt.Sprintf("p%v", i), 100, 0)
+		p.Spec.NodeName = fmt.Sprintf("n%v", i)
+		p.OwnerReferences = ownerRef
+		pods = append(pods, p)
+	}
+
+	fakeClient := &fake.Clientset{}
+	fakeRecorder := kube_util.CreateEventRecorder(fakeClient)
+	fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", fakeRecorder, false)
+
+	numCandidates := 30
+
+	context := AutoscalingContext{
+		AutoscalingOptions: AutoscalingOptions{
+			ScaleDownUtilizationThreshold:    0.35,
+			ScaleDownNonEmptyCandidatesCount: numCandidates,
+			ScaleDownCandidatesPoolRatio:     0.1,
+			ScaleDownCandidatesPoolMinCount:  10,
+		},
+		ClusterStateRegistry: clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, fakeLogRecorder),
+		PredicateChecker:     simulator.NewTestPredicateChecker(),
+		LogRecorder:          fakeLogRecorder,
+	}
+	sd := NewScaleDown(&context)
+
+	sd.UpdateUnneededNodes(nodes, nodes, pods, time.Now(), nil)
+	assert.NotEmpty(t, sd.unneededNodes)
+}
+
 func TestDrainNode(t *testing.T) {
 	deletedPods := make(chan string, 10)
 	updatedNodes := make(chan string, 10)
diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go
index 6eb8875896..8d7774d85c 100644
--- a/cluster-autoscaler/main.go
+++ b/cluster-autoscaler/main.go
@@ -88,6 +88,17 @@ var (
 			"Lower value means better CA responsiveness but possible slower scale down latency."+
 			"Higher value can affect CA performance with big clusters (hundreds of nodes)."+
 			"Set to non posistive value to turn this heuristic off - CA will not limit the number of nodes it considers.")
+	scaleDownCandidatesPoolRatio = flag.Float64("scale-down-candidates-pool-ratio", 0.1,
+		"A ratio of nodes that are considered as additional non empty candidates for"+
+			"scale down when some candidates from previous iteration are no longer valid."+
+			"Lower value means better CA responsiveness but possible slower scale down latency."+
+			"Higher value can affect CA performance with big clusters (hundreds of nodes)."+
+			"Set to 1.0 to turn this heuristics off - CA will take all nodes as additional candidates.")
+	scaleDownCandidatesPoolMinCount = flag.Int("scale-down-candidates-pool-min-count", 50,
+		"Minimum number of nodes that are considered as additional non empty candidates"+
+			"for scale down when some candidates from previous iteration are no longer valid."+
+			"When calculating the pool size for additional candidates we take"+
+			"max(#nodes * scale-down-candidates-pool-ratio, scale-down-candidates-pool-min-count).")
 	scanInterval                = flag.Duration("scan-interval", 10*time.Second, "How often cluster is reevaluated for scale up or down")
 	maxNodesTotal               = flag.Int("max-nodes-total", 0, "Maximum number of nodes in all node groups. Cluster autoscaler will not grow the cluster beyond this number.")
 	cloudProviderFlag           = flag.String("cloud-provider", "gce", "Cloud provider type. Allowed values: gce, aws, kubemark")
@@ -134,6 +145,8 @@ func createAutoscalerOptions() core.AutoscalerOptions {
 		ScaleDownUnreadyTime:             *scaleDownUnreadyTime,
 		ScaleDownUtilizationThreshold:    *scaleDownUtilizationThreshold,
 		ScaleDownNonEmptyCandidatesCount: *scaleDownNonEmptyCandidatesCount,
+		ScaleDownCandidatesPoolRatio:     *scaleDownCandidatesPoolRatio,
+		ScaleDownCandidatesPoolMinCount:  *scaleDownCandidatesPoolMinCount,
 		WriteStatusConfigMap:             *writeStatusConfigMapFlag,
 		BalanceSimilarNodeGroups:         *balanceSimilarNodeGroupsFlag,
 		ConfigNamespace:                  *namespace,