From a6c18b87d22e1cb51dedcec6e244c37de8babc66 Mon Sep 17 00:00:00 2001 From: Beata Skiba Date: Thu, 31 Aug 2017 14:53:23 +0200 Subject: [PATCH] Only consider up to 10% of the nodes as additional candidates for scale down. --- .../core/autoscaling_context.go | 10 ++++ cluster-autoscaler/core/scale_down.go | 15 ++++-- cluster-autoscaler/core/scale_down_test.go | 49 +++++++++++++++++++ cluster-autoscaler/main.go | 13 +++++ 4 files changed, 84 insertions(+), 3 deletions(-) diff --git a/cluster-autoscaler/core/autoscaling_context.go b/cluster-autoscaler/core/autoscaling_context.go index 02f3dc767e..d3c79bc1f3 100644 --- a/cluster-autoscaler/core/autoscaling_context.go +++ b/cluster-autoscaler/core/autoscaling_context.go @@ -99,6 +99,16 @@ type AutoscalingOptions struct { // ScaleDownNonEmptyCandidatesCount is the maximum number of non empty nodes // considered at once as candidates for scale down. ScaleDownNonEmptyCandidatesCount int + // ScaleDownCandidatesPoolRatio is a ratio of nodes that are considered + // as additional non empty candidates for scale down when some candidates from + // previous iteration are no longer valid. + ScaleDownCandidatesPoolRatio float64 + // ScaleDownCandidatesPoolMinCount is the minimum number of nodes that are + // considered as additional non empty candidates for scale down when some + // candidates from previous iteration are no longer valid. + // The formula to calculate additional candidates number is following: + // max(#nodes * ScaleDownCandidatesPoolRatio, ScaleDownCandidatesPoolMinCount) + ScaleDownCandidatesPoolMinCount int // WriteStatusConfigMap tells if the status information should be written to a ConfigMap WriteStatusConfigMap bool // BalanceSimilarNodeGroups enables logic that identifies node groups with similar machines and tries to balance node count between them. diff --git a/cluster-autoscaler/core/scale_down.go b/cluster-autoscaler/core/scale_down.go index 1ad731ccb1..b1ea541686 100644 --- a/cluster-autoscaler/core/scale_down.go +++ b/cluster-autoscaler/core/scale_down.go @@ -18,6 +18,7 @@ package core import ( "fmt" + "math" "reflect" "strings" "sync" @@ -219,15 +220,23 @@ func (sd *ScaleDown) UpdateUnneededNodes( return sd.markSimulationError(simulatorErr, timestamp) } - additionalCandidatesCount := sd.context.AutoscalingOptions.ScaleDownNonEmptyCandidatesCount - len(nodesToRemove) + additionalCandidatesCount := sd.context.ScaleDownNonEmptyCandidatesCount - len(nodesToRemove) if additionalCandidatesCount > len(currentNonCandidates) { additionalCandidatesCount = len(currentNonCandidates) } + // Limit the additional candidates pool size for better performance. + additionalCandidatesPoolSize := int(math.Ceil(float64(len(nodes)) * sd.context.ScaleDownCandidatesPoolRatio)) + if additionalCandidatesPoolSize < sd.context.ScaleDownCandidatesPoolMinCount { + additionalCandidatesPoolSize = sd.context.ScaleDownCandidatesPoolMinCount + } + if additionalCandidatesPoolSize > len(currentNonCandidates) { + additionalCandidatesPoolSize = len(currentNonCandidates) + } if additionalCandidatesCount > 0 { // Look for addidtional nodes to remove among the rest of nodes glog.V(3).Infof("Finding additional %v candidates for scale down.", additionalCandidatesCount) additionalNodesToRemove, additionalUnremovable, additionalNewHints, simulatorErr := - simulator.FindNodesToRemove(currentNonCandidates, nodes, pods, nil, + simulator.FindNodesToRemove(currentNonCandidates[:additionalCandidatesPoolSize], nodes, pods, nil, sd.context.PredicateChecker, additionalCandidatesCount, true, sd.podLocationHints, sd.usageTracker, timestamp, pdbs) if simulatorErr != nil { @@ -290,7 +299,7 @@ func (sd *ScaleDown) markSimulationError(simulatorErr errors.AutoscalerError, func (sd *ScaleDown) chooseCandidates(nodes []*apiv1.Node) ([]*apiv1.Node, []*apiv1.Node) { // Number of candidates should not be capped. We will look for nodes to remove // from the whole set of nodes. - if sd.context.AutoscalingOptions.ScaleDownNonEmptyCandidatesCount <= 0 { + if sd.context.ScaleDownNonEmptyCandidatesCount <= 0 { return nodes, []*apiv1.Node{} } currentCandidates := make([]*apiv1.Node, 0, len(sd.unneededNodesList)) diff --git a/cluster-autoscaler/core/scale_down_test.go b/cluster-autoscaler/core/scale_down_test.go index 45f9702857..6190dde950 100644 --- a/cluster-autoscaler/core/scale_down_test.go +++ b/cluster-autoscaler/core/scale_down_test.go @@ -165,6 +165,8 @@ func TestFindUnneededMaxCandidates(t *testing.T) { AutoscalingOptions: AutoscalingOptions{ ScaleDownUtilizationThreshold: 0.35, ScaleDownNonEmptyCandidatesCount: numCandidates, + ScaleDownCandidatesPoolRatio: 1, + ScaleDownCandidatesPoolMinCount: 1000, }, ClusterStateRegistry: clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, fakeLogRecorder), PredicateChecker: simulator.NewTestPredicateChecker(), @@ -199,6 +201,53 @@ func TestFindUnneededMaxCandidates(t *testing.T) { assert.NotContains(t, sd.unneededNodes, deleted) } +func TestFindUnneededAdditionalNodePool(t *testing.T) { + provider := testprovider.NewTestCloudProvider(nil, nil) + provider.AddNodeGroup("ng1", 1, 100, 2) + + numNodes := 100 + nodes := make([]*apiv1.Node, 0, numNodes) + for i := 0; i < numNodes; i++ { + n := BuildTestNode(fmt.Sprintf("n%v", i), 1000, 10) + SetNodeReadyState(n, true, time.Time{}) + provider.AddNode("ng1", n) + nodes = append(nodes, n) + } + + // shared owner reference + ownerRef := GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", "") + + pods := make([]*apiv1.Pod, 0, numNodes) + for i := 0; i < numNodes; i++ { + p := BuildTestPod(fmt.Sprintf("p%v", i), 100, 0) + p.Spec.NodeName = fmt.Sprintf("n%v", i) + p.OwnerReferences = ownerRef + pods = append(pods, p) + } + + fakeClient := &fake.Clientset{} + fakeRecorder := kube_util.CreateEventRecorder(fakeClient) + fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", fakeRecorder, false) + + numCandidates := 30 + + context := AutoscalingContext{ + AutoscalingOptions: AutoscalingOptions{ + ScaleDownUtilizationThreshold: 0.35, + ScaleDownNonEmptyCandidatesCount: numCandidates, + ScaleDownCandidatesPoolRatio: 0.1, + ScaleDownCandidatesPoolMinCount: 10, + }, + ClusterStateRegistry: clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, fakeLogRecorder), + PredicateChecker: simulator.NewTestPredicateChecker(), + LogRecorder: fakeLogRecorder, + } + sd := NewScaleDown(&context) + + sd.UpdateUnneededNodes(nodes, nodes, pods, time.Now(), nil) + assert.NotEmpty(t, sd.unneededNodes) +} + func TestDrainNode(t *testing.T) { deletedPods := make(chan string, 10) updatedNodes := make(chan string, 10) diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index 6eb8875896..8d7774d85c 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -88,6 +88,17 @@ var ( "Lower value means better CA responsiveness but possible slower scale down latency."+ "Higher value can affect CA performance with big clusters (hundreds of nodes)."+ "Set to non posistive value to turn this heuristic off - CA will not limit the number of nodes it considers.") + scaleDownCandidatesPoolRatio = flag.Float64("scale-down-candidates-pool-ratio", 0.1, + "A ratio of nodes that are considered as additional non empty candidates for"+ + "scale down when some candidates from previous iteration are no longer valid."+ + "Lower value means better CA responsiveness but possible slower scale down latency."+ + "Higher value can affect CA performance with big clusters (hundreds of nodes)."+ + "Set to 1.0 to turn this heuristics off - CA will take all nodes as additional candidates.") + scaleDownCandidatesPoolMinCount = flag.Int("scale-down-candidates-pool-min-count", 50, + "Minimum number of nodes that are considered as additional non empty candidates"+ + "for scale down when some candidates from previous iteration are no longer valid."+ + "When calculating the pool size for additional candidates we take"+ + "max(#nodes * scale-down-candidates-pool-ratio, scale-down-candidates-pool-min-count).") scanInterval = flag.Duration("scan-interval", 10*time.Second, "How often cluster is reevaluated for scale up or down") maxNodesTotal = flag.Int("max-nodes-total", 0, "Maximum number of nodes in all node groups. Cluster autoscaler will not grow the cluster beyond this number.") cloudProviderFlag = flag.String("cloud-provider", "gce", "Cloud provider type. Allowed values: gce, aws, kubemark") @@ -134,6 +145,8 @@ func createAutoscalerOptions() core.AutoscalerOptions { ScaleDownUnreadyTime: *scaleDownUnreadyTime, ScaleDownUtilizationThreshold: *scaleDownUtilizationThreshold, ScaleDownNonEmptyCandidatesCount: *scaleDownNonEmptyCandidatesCount, + ScaleDownCandidatesPoolRatio: *scaleDownCandidatesPoolRatio, + ScaleDownCandidatesPoolMinCount: *scaleDownCandidatesPoolMinCount, WriteStatusConfigMap: *writeStatusConfigMapFlag, BalanceSimilarNodeGroups: *balanceSimilarNodeGroupsFlag, ConfigNamespace: *namespace,