Merge pull request #284 from mwielgus/nap-5

Node autoprovisioning in scale up
2017-08-31 17:47:25 +05:30 · 2017-08-31 17:47:25 +05:30 · e9261a249c
parent 7c1aa113f8 22f856d4da
commit e9261a249c
11 changed files with 93 additions and 17 deletions
--- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go
@ -153,7 +153,7 @@ func (aws *awsCloudProvider) GetAvilableMachineTypes() ([]string, error) {

 // NewNodeGroup builds a theoretical node group based on the node definition provided. The node group is not automatically
 // created on the cloud provider side. The node group is not returned by NodeGroups() until it is created.
-func (aws *awsCloudProvider) NewNodeGroup(name string, machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
+func (aws *awsCloudProvider) NewNodeGroup(machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
 	return nil, cloudprovider.ErrNotImplemented
 }

--- a/cluster-autoscaler/cloudprovider/cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/cloud_provider.go
@ -50,7 +50,7 @@ type CloudProvider interface {
 	// NewNodeGroup builds a theoretical node group based on the node definition provided. The node group is not automatically
 	// created on the cloud provider side. The node group is not returned by NodeGroups() until it is created.
 	// Implementation optional.
-	NewNodeGroup(name string, machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (NodeGroup, error)
+	NewNodeGroup(machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (NodeGroup, error)
 }

 // ErrNotImplemented is returned if a method is not implemented.
--- a/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go
@ -19,6 +19,7 @@ package gce
 import (
 	"fmt"
 	"strings"
+	"time"

 	apiv1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
@ -127,8 +128,31 @@ func (gce *GceCloudProvider) GetAvilableMachineTypes() ([]string, error) {

 // NewNodeGroup builds a theoretical node group based on the node definition provided. The node group is not automatically
 // created on the cloud provider side. The node group is not returned by NodeGroups() until it is created.
-func (gce *GceCloudProvider) NewNodeGroup(name string, machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
-	return nil, cloudprovider.ErrNotImplemented
+func (gce *GceCloudProvider) NewNodeGroup(machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
+	nodePoolName := fmt.Sprintf("%s-%s-%d", nodeAutoprovisioningPrefix, machineType, time.Now().Unix())
+	mig := &Mig{
+		autoprovisioned: true,
+		exist:           false,
+		nodePoolName:    nodePoolName,
+		GceRef: GceRef{
+			Project: gce.gceManager.projectId,
+			Zone:    gce.gceManager.zone,
+			Name:    nodePoolName + "-temporary-mig",
+		},
+		minSize: minAutoprovisionedSize,
+		maxSize: maxAutoprovisionedSize,
+		spec: &autoprovisioningSpec{
+			machineType:    machineType,
+			labels:         labels,
+			extraResources: extraResources,
+		},
+		gceManager: gce.gceManager,
+	}
+	_, err := gce.gceManager.templates.buildNodeFromAutoprovisioningSpec(mig)
+	if err != nil {
+		return nil, err
+	}
+	return mig, nil
 }

 // GceRef contains s reference to some entity in GCE/GKE world.
@ -158,7 +182,7 @@ func GceRefFromProviderId(id string) (*GceRef, error) {
 type autoprovisioningSpec struct {
 	machineType    string
 	labels         map[string]string
-	extraResources map[string]string
+	extraResources map[string]resource.Quantity
 }

 // Mig implements NodeGroup interfrace.
@ -299,7 +323,7 @@ func (mig *Mig) Exist() (bool, error) {
 // Create creates the node group on the cloud provider side.
 func (mig *Mig) Create() error {
 	if !mig.exist && mig.autoprovisioned {
-		return mig.gceManager.createNodePool(mig.spec)
+		return mig.gceManager.createNodePool(mig)
 	}
 	return fmt.Errorf("Cannot create non-autoprovisioned node group")
 }
--- a/cluster-autoscaler/cloudprovider/gce/gce_manager.go
+++ b/cluster-autoscaler/cloudprovider/gce/gce_manager.go
@ -285,24 +285,22 @@ func (m *GceManager) deleteNodePool(toBeRemoved *Mig) error {
 	return m.fetchAllNodePools()
 }

-func (m *GceManager) createNodePool(spec *autoprovisioningSpec) error {
+func (m *GceManager) createNodePool(mig *Mig) error {
 	m.assertGKE()

 	// TODO: handle preemptable
 	// TODO: handle ssd
 	// TODO: handle taints

-	nodePoolName := fmt.Sprintf("%s-%s-%d", nodeAutoprovisioningPrefix, spec.machineType, time.Now().Unix())
-
 	config := gke.NodeConfig{
-		MachineType: spec.machineType,
+		MachineType: mig.spec.machineType,
 		OauthScopes: defaultOAuthScopes,
-		Labels:      spec.labels,
+		Labels:      mig.spec.labels,
 	}

 	createRequest := gke.CreateNodePoolRequest{
 		NodePool: &gke.NodePool{
-			Name:             nodePoolName,
+			Name:             mig.nodePoolName,
 			InitialNodeCount: 0,
 			Config:           &config,
 		},
@ -317,7 +315,17 @@ func (m *GceManager) createNodePool(spec *autoprovisioningSpec) error {
 	if err != nil {
 		return err
 	}
-	return m.fetchAllNodePools()
+	err = m.fetchAllNodePools()
+	if err != nil {
+		return err
+	}
+	for _, existingMig := range m.getMigs() {
+		if existingMig.config.nodePoolName == mig.nodePoolName {
+			*mig = *existingMig.config
+			return nil
+		}
+	}
+	return fmt.Errorf("node pool %s not found", mig.nodePoolName)
 }

 // GetMigSize gets MIG size.
--- a/cluster-autoscaler/cloudprovider/kubemark/kubemark_linux.go
+++ b/cluster-autoscaler/cloudprovider/kubemark/kubemark_linux.go
@ -111,7 +111,7 @@ func (kubemark *KubemarkCloudProvider) GetAvilableMachineTypes() ([]string, erro
 }

 // NewNodeGroup builds a theoretical node group based on the node definition provided.
-func (kubemark *KubemarkCloudProvider) NewNodeGroup(name string, machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
+func (kubemark *KubemarkCloudProvider) NewNodeGroup(machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
 	return nil, cloudprovider.ErrNotImplemented
 }

--- a/cluster-autoscaler/cloudprovider/kubemark/kubemark_other.go
+++ b/cluster-autoscaler/cloudprovider/kubemark/kubemark_other.go
@ -56,6 +56,6 @@ func (kubemark *KubemarkCloudProvider) GetAvilableMachineTypes() ([]string, erro
 	return []string{}, cloudprovider.ErrNotImplemented
 }

-func (kubemark *KubemarkCloudProvider) NewNodeGroup(name string, machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
+func (kubemark *KubemarkCloudProvider) NewNodeGroup(machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
 	return nil, cloudprovider.ErrNotImplemented
 }
--- a/cluster-autoscaler/cloudprovider/test/test_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/test/test_cloud_provider.go
@ -100,7 +100,7 @@ func (tcp *TestCloudProvider) GetAvilableMachineTypes() ([]string, error) {

 // NewNodeGroup builds a theoretical node group based on the node definition provided. The node group is not automatically
 // created on the cloud provider side. The node group is not returned by NodeGroups() until it is created.
-func (tcp *TestCloudProvider) NewNodeGroup(name string, machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
+func (tcp *TestCloudProvider) NewNodeGroup(machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
 	return nil, cloudprovider.ErrNotImplemented
 }

--- a/cluster-autoscaler/core/autoscaling_context.go
+++ b/cluster-autoscaler/core/autoscaling_context.go
@ -104,6 +104,8 @@ type AutoscalingOptions struct {
 	ConfigNamespace string
 	// ClusterName if available
 	ClusterName string
+	// NodeAutoprovisioningEnabled tells whether the node auto-provisioning is enabled for this cluster.
+	NodeAutoprovisioningEnabled bool
 }

 // NewAutoscalingContext returns an autoscaling context from all the necessary parameters passed via arguments
--- a/cluster-autoscaler/core/scale_up.go
+++ b/cluster-autoscaler/core/scale_up.go
@ -29,6 +29,7 @@ import (
 	"k8s.io/autoscaler/cluster-autoscaler/metrics"
 	"k8s.io/autoscaler/cluster-autoscaler/simulator"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
+	"k8s.io/autoscaler/cluster-autoscaler/utils/labels"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/nodegroupset"
 	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"

@ -77,8 +78,13 @@ func ScaleUp(context *AutoscalingContext, unschedulablePods []*apiv1.Pod, nodes
 	podsRemainUnschedulable := make(map[*apiv1.Pod]bool)
 	expansionOptions := make([]expander.Option, 0)

-	for _, nodeGroup := range context.CloudProvider.NodeGroups() {
+	nodeGroups := context.CloudProvider.NodeGroups()

+	if context.AutoscalingOptions.NodeAutoprovisioningEnabled {
+		addAutoprovisionedCandidates(context, nodeGroups, unschedulablePods)
+	}
+
+	for _, nodeGroup := range nodeGroups {
 		if !context.ClusterStateRegistry.IsNodeGroupSafeToScaleUp(nodeGroup.Id(), now) {
 			glog.Warningf("Node group %s is not ready for scaleup", nodeGroup.Id())
 			continue
@ -175,6 +181,14 @@ func ScaleUp(context *AutoscalingContext, unschedulablePods []*apiv1.Pod, nodes
 					"max node total count already reached")
 			}
 		}
+		if context.AutoscalingOptions.NodeAutoprovisioningEnabled {
+			if exist, err := bestOption.NodeGroup.Exist(); err == nil && !exist {
+				err := bestOption.NodeGroup.Create()
+				if err != nil {
+					return false, errors.ToAutoscalerError(errors.CloudProviderError, err)
+				}
+			}
+		}

 		targetNodeGroups := []cloudprovider.NodeGroup{bestOption.NodeGroup}
 		if context.BalanceSimilarNodeGroups {
@ -279,3 +293,20 @@ func executeScaleUp(context *AutoscalingContext, info nodegroupset.ScaleUpInfo)
 		"Scale-up: group %s size set to %d", info.Group.Id(), info.NewSize)
 	return nil
 }
+
+func addAutoprovisionedCandidates(context *AutoscalingContext, nodeGroups []cloudprovider.NodeGroup, unschedulablePods []*apiv1.Pod) {
+	machines, err := context.CloudProvider.GetAvilableMachineTypes()
+	if err != nil {
+		glog.Warningf("Failed to get machine types: %v", err)
+	} else {
+		bestLabels := labels.BestLabelSet(unschedulablePods)
+		for _, machineType := range machines {
+			nodeGroup, err := context.CloudProvider.NewNodeGroup(machineType, bestLabels, nil)
+			if err != nil {
+				glog.Warningf("Unable to build temporary node group for %s: %v", machineType, err)
+			} else {
+				nodeGroups = append(nodeGroups, nodeGroup)
+			}
+		}
+	}
+}
--- a/cluster-autoscaler/expander/price/price.go
+++ b/cluster-autoscaler/expander/price/price.go
@ -51,6 +51,11 @@ var (
 	// priceStabilizationPod is the pod cost to stabilize node_cost/pod_cost ratio a bit.
 	// 0.5 cpu, 500 mb ram
 	priceStabilizationPod = buildPod("stabilize", 500, 500*1024*1024)
+
+	// Penalty given to node groups that are yet to be created.
+	// TODO: make it a flag
+	// TODO: investigate what a proper value should be
+	notExistCoeficient = 2.0
 )

 // NewStrategy returns an expansion strategy that picks nodes based on price and preferred node type.
@ -120,6 +125,10 @@ nextoption:

 		optionScore := supressedUnfitness * priceSubScore

+		if exist, err := option.NodeGroup.Exist(); err != nil && !exist {
+			optionScore *= notExistCoeficient
+		}
+
 		debug := fmt.Sprintf("all_nodes_price=%f pods_price=%f stabilized_ratio=%f unfitness=%f supressed=%f final_score=%f",
 			totalNodePrice,
 			totalPodPrice,
--- a/cluster-autoscaler/main.go
+++ b/cluster-autoscaler/main.go
@ -103,6 +103,7 @@ var (
 	maxInactivityTimeFlag        = flag.Duration("max-inactivity", 10*time.Minute, "Maximum time from last recorded autoscaler activity before automatic restart")
 	maxFailingTimeFlag           = flag.Duration("max-failing-time", 15*time.Minute, "Maximum time from last recorded successful autoscaler run before automatic restart")
 	balanceSimilarNodeGroupsFlag = flag.Bool("balance-similar-node-groups", false, "Detect similar node groups and balance the number of nodes between them")
+	nodeAutoprovisioningEnabled  = flag.Bool("node-autoprovisioning-enabled", false, "Should CA autoprovision node groups when needed")
 )

 func createAutoscalerOptions() core.AutoscalerOptions {
@ -130,6 +131,7 @@ func createAutoscalerOptions() core.AutoscalerOptions {
 		BalanceSimilarNodeGroups:      *balanceSimilarNodeGroupsFlag,
 		ConfigNamespace:               *namespace,
 		ClusterName:                   *clusterName,
+		NodeAutoprovisioningEnabled:   *nodeAutoprovisioningEnabled,
 	}

 	configFetcherOpts := dynamic.ConfigFetcherOptions{