Merge pull request #297 from bskiba/additional-k
Only consider up to 10% of the nodes as additional candidates for scale down
This commit is contained in:
		
						commit
						f9cabf3a1a
					
				|  | @ -99,6 +99,16 @@ type AutoscalingOptions struct { | |||
| 	// ScaleDownNonEmptyCandidatesCount is the maximum number of non empty nodes
 | ||||
| 	// considered at once as candidates for scale down.
 | ||||
| 	ScaleDownNonEmptyCandidatesCount int | ||||
| 	// ScaleDownCandidatesPoolRatio is a ratio of nodes that are considered
 | ||||
| 	// as additional non empty candidates for scale down when some candidates from
 | ||||
| 	// previous iteration are no longer valid.
 | ||||
| 	ScaleDownCandidatesPoolRatio float64 | ||||
| 	// ScaleDownCandidatesPoolMinCount is the minimum number of nodes that are
 | ||||
| 	// considered as additional non empty candidates for scale down when some
 | ||||
| 	// candidates from previous iteration are no longer valid.
 | ||||
| 	// The formula to calculate additional candidates number is following:
 | ||||
| 	// max(#nodes * ScaleDownCandidatesPoolRatio, ScaleDownCandidatesPoolMinCount)
 | ||||
| 	ScaleDownCandidatesPoolMinCount int | ||||
| 	// WriteStatusConfigMap tells if the status information should be written to a ConfigMap
 | ||||
| 	WriteStatusConfigMap bool | ||||
| 	// BalanceSimilarNodeGroups enables logic that identifies node groups with similar machines and tries to balance node count between them.
 | ||||
|  |  | |||
|  | @ -18,6 +18,7 @@ package core | |||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"math" | ||||
| 	"reflect" | ||||
| 	"strings" | ||||
| 	"sync" | ||||
|  | @ -219,15 +220,23 @@ func (sd *ScaleDown) UpdateUnneededNodes( | |||
| 		return sd.markSimulationError(simulatorErr, timestamp) | ||||
| 	} | ||||
| 
 | ||||
| 	additionalCandidatesCount := sd.context.AutoscalingOptions.ScaleDownNonEmptyCandidatesCount - len(nodesToRemove) | ||||
| 	additionalCandidatesCount := sd.context.ScaleDownNonEmptyCandidatesCount - len(nodesToRemove) | ||||
| 	if additionalCandidatesCount > len(currentNonCandidates) { | ||||
| 		additionalCandidatesCount = len(currentNonCandidates) | ||||
| 	} | ||||
| 	// Limit the additional candidates pool size for better performance.
 | ||||
| 	additionalCandidatesPoolSize := int(math.Ceil(float64(len(nodes)) * sd.context.ScaleDownCandidatesPoolRatio)) | ||||
| 	if additionalCandidatesPoolSize < sd.context.ScaleDownCandidatesPoolMinCount { | ||||
| 		additionalCandidatesPoolSize = sd.context.ScaleDownCandidatesPoolMinCount | ||||
| 	} | ||||
| 	if additionalCandidatesPoolSize > len(currentNonCandidates) { | ||||
| 		additionalCandidatesPoolSize = len(currentNonCandidates) | ||||
| 	} | ||||
| 	if additionalCandidatesCount > 0 { | ||||
| 		// Look for addidtional nodes to remove among the rest of nodes
 | ||||
| 		glog.V(3).Infof("Finding additional %v candidates for scale down.", additionalCandidatesCount) | ||||
| 		additionalNodesToRemove, additionalUnremovable, additionalNewHints, simulatorErr := | ||||
| 			simulator.FindNodesToRemove(currentNonCandidates, nodes, pods, nil, | ||||
| 			simulator.FindNodesToRemove(currentNonCandidates[:additionalCandidatesPoolSize], nodes, pods, nil, | ||||
| 				sd.context.PredicateChecker, additionalCandidatesCount, true, | ||||
| 				sd.podLocationHints, sd.usageTracker, timestamp, pdbs) | ||||
| 		if simulatorErr != nil { | ||||
|  | @ -290,7 +299,7 @@ func (sd *ScaleDown) markSimulationError(simulatorErr errors.AutoscalerError, | |||
| func (sd *ScaleDown) chooseCandidates(nodes []*apiv1.Node) ([]*apiv1.Node, []*apiv1.Node) { | ||||
| 	// Number of candidates should not be capped. We will look for nodes to remove
 | ||||
| 	// from the whole set of nodes.
 | ||||
| 	if sd.context.AutoscalingOptions.ScaleDownNonEmptyCandidatesCount <= 0 { | ||||
| 	if sd.context.ScaleDownNonEmptyCandidatesCount <= 0 { | ||||
| 		return nodes, []*apiv1.Node{} | ||||
| 	} | ||||
| 	currentCandidates := make([]*apiv1.Node, 0, len(sd.unneededNodesList)) | ||||
|  |  | |||
|  | @ -165,6 +165,8 @@ func TestFindUnneededMaxCandidates(t *testing.T) { | |||
| 		AutoscalingOptions: AutoscalingOptions{ | ||||
| 			ScaleDownUtilizationThreshold:    0.35, | ||||
| 			ScaleDownNonEmptyCandidatesCount: numCandidates, | ||||
| 			ScaleDownCandidatesPoolRatio:     1, | ||||
| 			ScaleDownCandidatesPoolMinCount:  1000, | ||||
| 		}, | ||||
| 		ClusterStateRegistry: clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, fakeLogRecorder), | ||||
| 		PredicateChecker:     simulator.NewTestPredicateChecker(), | ||||
|  | @ -199,6 +201,53 @@ func TestFindUnneededMaxCandidates(t *testing.T) { | |||
| 	assert.NotContains(t, sd.unneededNodes, deleted) | ||||
| } | ||||
| 
 | ||||
| func TestFindUnneededAdditionalNodePool(t *testing.T) { | ||||
| 	provider := testprovider.NewTestCloudProvider(nil, nil) | ||||
| 	provider.AddNodeGroup("ng1", 1, 100, 2) | ||||
| 
 | ||||
| 	numNodes := 100 | ||||
| 	nodes := make([]*apiv1.Node, 0, numNodes) | ||||
| 	for i := 0; i < numNodes; i++ { | ||||
| 		n := BuildTestNode(fmt.Sprintf("n%v", i), 1000, 10) | ||||
| 		SetNodeReadyState(n, true, time.Time{}) | ||||
| 		provider.AddNode("ng1", n) | ||||
| 		nodes = append(nodes, n) | ||||
| 	} | ||||
| 
 | ||||
| 	// shared owner reference
 | ||||
| 	ownerRef := GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", "") | ||||
| 
 | ||||
| 	pods := make([]*apiv1.Pod, 0, numNodes) | ||||
| 	for i := 0; i < numNodes; i++ { | ||||
| 		p := BuildTestPod(fmt.Sprintf("p%v", i), 100, 0) | ||||
| 		p.Spec.NodeName = fmt.Sprintf("n%v", i) | ||||
| 		p.OwnerReferences = ownerRef | ||||
| 		pods = append(pods, p) | ||||
| 	} | ||||
| 
 | ||||
| 	fakeClient := &fake.Clientset{} | ||||
| 	fakeRecorder := kube_util.CreateEventRecorder(fakeClient) | ||||
| 	fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", fakeRecorder, false) | ||||
| 
 | ||||
| 	numCandidates := 30 | ||||
| 
 | ||||
| 	context := AutoscalingContext{ | ||||
| 		AutoscalingOptions: AutoscalingOptions{ | ||||
| 			ScaleDownUtilizationThreshold:    0.35, | ||||
| 			ScaleDownNonEmptyCandidatesCount: numCandidates, | ||||
| 			ScaleDownCandidatesPoolRatio:     0.1, | ||||
| 			ScaleDownCandidatesPoolMinCount:  10, | ||||
| 		}, | ||||
| 		ClusterStateRegistry: clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, fakeLogRecorder), | ||||
| 		PredicateChecker:     simulator.NewTestPredicateChecker(), | ||||
| 		LogRecorder:          fakeLogRecorder, | ||||
| 	} | ||||
| 	sd := NewScaleDown(&context) | ||||
| 
 | ||||
| 	sd.UpdateUnneededNodes(nodes, nodes, pods, time.Now(), nil) | ||||
| 	assert.NotEmpty(t, sd.unneededNodes) | ||||
| } | ||||
| 
 | ||||
| func TestDrainNode(t *testing.T) { | ||||
| 	deletedPods := make(chan string, 10) | ||||
| 	updatedNodes := make(chan string, 10) | ||||
|  |  | |||
|  | @ -88,6 +88,17 @@ var ( | |||
| 			"Lower value means better CA responsiveness but possible slower scale down latency."+ | ||||
| 			"Higher value can affect CA performance with big clusters (hundreds of nodes)."+ | ||||
| 			"Set to non posistive value to turn this heuristic off - CA will not limit the number of nodes it considers.") | ||||
| 	scaleDownCandidatesPoolRatio = flag.Float64("scale-down-candidates-pool-ratio", 0.1, | ||||
| 		"A ratio of nodes that are considered as additional non empty candidates for"+ | ||||
| 			"scale down when some candidates from previous iteration are no longer valid."+ | ||||
| 			"Lower value means better CA responsiveness but possible slower scale down latency."+ | ||||
| 			"Higher value can affect CA performance with big clusters (hundreds of nodes)."+ | ||||
| 			"Set to 1.0 to turn this heuristics off - CA will take all nodes as additional candidates.") | ||||
| 	scaleDownCandidatesPoolMinCount = flag.Int("scale-down-candidates-pool-min-count", 50, | ||||
| 		"Minimum number of nodes that are considered as additional non empty candidates"+ | ||||
| 			"for scale down when some candidates from previous iteration are no longer valid."+ | ||||
| 			"When calculating the pool size for additional candidates we take"+ | ||||
| 			"max(#nodes * scale-down-candidates-pool-ratio, scale-down-candidates-pool-min-count).") | ||||
| 	scanInterval                = flag.Duration("scan-interval", 10*time.Second, "How often cluster is reevaluated for scale up or down") | ||||
| 	maxNodesTotal               = flag.Int("max-nodes-total", 0, "Maximum number of nodes in all node groups. Cluster autoscaler will not grow the cluster beyond this number.") | ||||
| 	cloudProviderFlag           = flag.String("cloud-provider", "gce", "Cloud provider type. Allowed values: gce, aws, kubemark") | ||||
|  | @ -134,6 +145,8 @@ func createAutoscalerOptions() core.AutoscalerOptions { | |||
| 		ScaleDownUnreadyTime:             *scaleDownUnreadyTime, | ||||
| 		ScaleDownUtilizationThreshold:    *scaleDownUtilizationThreshold, | ||||
| 		ScaleDownNonEmptyCandidatesCount: *scaleDownNonEmptyCandidatesCount, | ||||
| 		ScaleDownCandidatesPoolRatio:     *scaleDownCandidatesPoolRatio, | ||||
| 		ScaleDownCandidatesPoolMinCount:  *scaleDownCandidatesPoolMinCount, | ||||
| 		WriteStatusConfigMap:             *writeStatusConfigMapFlag, | ||||
| 		BalanceSimilarNodeGroups:         *balanceSimilarNodeGroupsFlag, | ||||
| 		ConfigNamespace:                  *namespace, | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue