diff --git a/cluster-autoscaler/cluster_autoscaler.go b/cluster-autoscaler/cluster_autoscaler.go index 1a6b84637b..aa0265495f 100644 --- a/cluster-autoscaler/cluster_autoscaler.go +++ b/cluster-autoscaler/cluster_autoscaler.go @@ -71,7 +71,8 @@ var ( "Node utilization level, defined as sum of requested resources divided by capacity, below which a node can be considered for scale down") scaleDownTrialInterval = flag.Duration("scale-down-trial-interval", 1*time.Minute, "How often scale down possiblity is check") - scanInterval = flag.Duration("scan-interval", 10*time.Second, "How often cluster is reevaluated for scale up or down") + scanInterval = flag.Duration("scan-interval", 10*time.Second, "How often cluster is reevaluated for scale up or down") + maxNodesTotal = flag.Int("max-nodes-total", 0, "Maximum number of nodes in all node groups. Cluster autoscaler will not grow the cluster beyond this number.") cloudProviderFlag = flag.String("cloud-provider", "gce", "Cloud provider type. Allowed values: gce") ) @@ -211,10 +212,13 @@ func run(_ <-chan struct{}) { if len(unschedulablePodsToHelp) == 0 { glog.V(1).Info("No unschedulable pods") + } else if *maxNodesTotal > 0 && len(nodes) >= *maxNodesTotal { + glog.V(1).Info("Max total nodes in cluster reached") } else { scaleUpStart := time.Now() updateLastTime("scaleup") - scaledUp, err := ScaleUp(unschedulablePodsToHelp, nodes, cloudProvider, kubeClient, predicateChecker, recorder) + scaledUp, err := ScaleUp(unschedulablePodsToHelp, nodes, cloudProvider, kubeClient, predicateChecker, recorder, + *maxNodesTotal) updateDuration("scaleup", scaleUpStart) diff --git a/cluster-autoscaler/scale_up.go b/cluster-autoscaler/scale_up.go index ba0c663ad3..48d309994e 100644 --- a/cluster-autoscaler/scale_up.go +++ b/cluster-autoscaler/scale_up.go @@ -36,9 +36,10 @@ type ExpansionOption struct { } // ScaleUp tries to scale the cluster up. Return true if it found a way to increase the size, -// false if it didn't and error if an error occured. +// false if it didn't and error if an error occured. Assumes that all nodes in the cluster are +// ready and in sync with instance groups. func ScaleUp(unschedulablePods []*kube_api.Pod, nodes []*kube_api.Node, cloudProvider cloudprovider.CloudProvider, kubeClient *kube_client.Client, - predicateChecker *simulator.PredicateChecker, recorder kube_record.EventRecorder) (bool, error) { + predicateChecker *simulator.PredicateChecker, recorder kube_record.EventRecorder, maxNodesTotal int) (bool, error) { // From now on we only care about unschedulable pods that were marked after the newest // node became available for the scheduler. @@ -122,6 +123,15 @@ func ScaleUp(unschedulablePods []*kube_api.Pod, nodes []*kube_api.Node, cloudPro glog.V(1).Infof("Capping size to MAX (%d)", bestOption.nodeGroup.MaxSize()) newSize = bestOption.nodeGroup.MaxSize() } + + if maxNodesTotal > 0 && len(nodes)+(newSize-currentSize) > maxNodesTotal { + glog.V(1).Infof("Capping size to max cluster total size (%d)", maxNodesTotal) + newSize = maxNodesTotal - len(nodes) + currentSize + if newSize < currentSize { + return false, fmt.Errorf("max node total count already reached") + } + } + glog.V(1).Infof("Setting %s size to %d", bestOption.nodeGroup.Id(), newSize) if err := bestOption.nodeGroup.IncreaseSize(newSize - currentSize); err != nil {