diff --git a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go index f3ee393ff3..d3c417df47 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go @@ -153,7 +153,7 @@ func (aws *awsCloudProvider) GetAvilableMachineTypes() ([]string, error) { // NewNodeGroup builds a theoretical node group based on the node definition provided. The node group is not automatically // created on the cloud provider side. The node group is not returned by NodeGroups() until it is created. -func (aws *awsCloudProvider) NewNodeGroup(name string, machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) { +func (aws *awsCloudProvider) NewNodeGroup(machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) { return nil, cloudprovider.ErrNotImplemented } diff --git a/cluster-autoscaler/cloudprovider/cloud_provider.go b/cluster-autoscaler/cloudprovider/cloud_provider.go index 511a49e6c8..5e7bd79d8b 100644 --- a/cluster-autoscaler/cloudprovider/cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/cloud_provider.go @@ -50,7 +50,7 @@ type CloudProvider interface { // NewNodeGroup builds a theoretical node group based on the node definition provided. The node group is not automatically // created on the cloud provider side. The node group is not returned by NodeGroups() until it is created. // Implementation optional. - NewNodeGroup(name string, machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (NodeGroup, error) + NewNodeGroup(machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (NodeGroup, error) } // ErrNotImplemented is returned if a method is not implemented. diff --git a/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go b/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go index 863d093665..61675f48a1 100644 --- a/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go @@ -19,6 +19,7 @@ package gce import ( "fmt" "strings" + "time" apiv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -127,8 +128,31 @@ func (gce *GceCloudProvider) GetAvilableMachineTypes() ([]string, error) { // NewNodeGroup builds a theoretical node group based on the node definition provided. The node group is not automatically // created on the cloud provider side. The node group is not returned by NodeGroups() until it is created. -func (gce *GceCloudProvider) NewNodeGroup(name string, machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) { - return nil, cloudprovider.ErrNotImplemented +func (gce *GceCloudProvider) NewNodeGroup(machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) { + nodePoolName := fmt.Sprintf("%s-%s-%d", nodeAutoprovisioningPrefix, machineType, time.Now().Unix()) + mig := &Mig{ + autoprovisioned: true, + exist: false, + nodePoolName: nodePoolName, + GceRef: GceRef{ + Project: gce.gceManager.projectId, + Zone: gce.gceManager.zone, + Name: nodePoolName + "-temporary-mig", + }, + minSize: minAutoprovisionedSize, + maxSize: maxAutoprovisionedSize, + spec: &autoprovisioningSpec{ + machineType: machineType, + labels: labels, + extraResources: extraResources, + }, + gceManager: gce.gceManager, + } + _, err := gce.gceManager.templates.buildNodeFromAutoprovisioningSpec(mig) + if err != nil { + return nil, err + } + return mig, nil } // GceRef contains s reference to some entity in GCE/GKE world. @@ -158,7 +182,7 @@ func GceRefFromProviderId(id string) (*GceRef, error) { type autoprovisioningSpec struct { machineType string labels map[string]string - extraResources map[string]string + extraResources map[string]resource.Quantity } // Mig implements NodeGroup interfrace. @@ -299,7 +323,7 @@ func (mig *Mig) Exist() (bool, error) { // Create creates the node group on the cloud provider side. func (mig *Mig) Create() error { if !mig.exist && mig.autoprovisioned { - return mig.gceManager.createNodePool(mig.spec) + return mig.gceManager.createNodePool(mig) } return fmt.Errorf("Cannot create non-autoprovisioned node group") } diff --git a/cluster-autoscaler/cloudprovider/gce/gce_manager.go b/cluster-autoscaler/cloudprovider/gce/gce_manager.go index 77a2d60136..3ccd20b144 100644 --- a/cluster-autoscaler/cloudprovider/gce/gce_manager.go +++ b/cluster-autoscaler/cloudprovider/gce/gce_manager.go @@ -285,24 +285,22 @@ func (m *GceManager) deleteNodePool(toBeRemoved *Mig) error { return m.fetchAllNodePools() } -func (m *GceManager) createNodePool(spec *autoprovisioningSpec) error { +func (m *GceManager) createNodePool(mig *Mig) error { m.assertGKE() // TODO: handle preemptable // TODO: handle ssd // TODO: handle taints - nodePoolName := fmt.Sprintf("%s-%s-%d", nodeAutoprovisioningPrefix, spec.machineType, time.Now().Unix()) - config := gke.NodeConfig{ - MachineType: spec.machineType, + MachineType: mig.spec.machineType, OauthScopes: defaultOAuthScopes, - Labels: spec.labels, + Labels: mig.spec.labels, } createRequest := gke.CreateNodePoolRequest{ NodePool: &gke.NodePool{ - Name: nodePoolName, + Name: mig.nodePoolName, InitialNodeCount: 0, Config: &config, }, @@ -317,7 +315,17 @@ func (m *GceManager) createNodePool(spec *autoprovisioningSpec) error { if err != nil { return err } - return m.fetchAllNodePools() + err = m.fetchAllNodePools() + if err != nil { + return err + } + for _, existingMig := range m.getMigs() { + if existingMig.config.nodePoolName == mig.nodePoolName { + *mig = *existingMig.config + return nil + } + } + return fmt.Errorf("node pool %s not found", mig.nodePoolName) } // GetMigSize gets MIG size. diff --git a/cluster-autoscaler/cloudprovider/kubemark/kubemark_linux.go b/cluster-autoscaler/cloudprovider/kubemark/kubemark_linux.go index 8656b9cd09..a33a3a6fec 100644 --- a/cluster-autoscaler/cloudprovider/kubemark/kubemark_linux.go +++ b/cluster-autoscaler/cloudprovider/kubemark/kubemark_linux.go @@ -111,7 +111,7 @@ func (kubemark *KubemarkCloudProvider) GetAvilableMachineTypes() ([]string, erro } // NewNodeGroup builds a theoretical node group based on the node definition provided. -func (kubemark *KubemarkCloudProvider) NewNodeGroup(name string, machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) { +func (kubemark *KubemarkCloudProvider) NewNodeGroup(machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) { return nil, cloudprovider.ErrNotImplemented } diff --git a/cluster-autoscaler/cloudprovider/kubemark/kubemark_other.go b/cluster-autoscaler/cloudprovider/kubemark/kubemark_other.go index 092d1ebdef..8dc9725eca 100644 --- a/cluster-autoscaler/cloudprovider/kubemark/kubemark_other.go +++ b/cluster-autoscaler/cloudprovider/kubemark/kubemark_other.go @@ -56,6 +56,6 @@ func (kubemark *KubemarkCloudProvider) GetAvilableMachineTypes() ([]string, erro return []string{}, cloudprovider.ErrNotImplemented } -func (kubemark *KubemarkCloudProvider) NewNodeGroup(name string, machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) { +func (kubemark *KubemarkCloudProvider) NewNodeGroup(machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) { return nil, cloudprovider.ErrNotImplemented } diff --git a/cluster-autoscaler/cloudprovider/test/test_cloud_provider.go b/cluster-autoscaler/cloudprovider/test/test_cloud_provider.go index f988cedb47..a839ad8a1d 100644 --- a/cluster-autoscaler/cloudprovider/test/test_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/test/test_cloud_provider.go @@ -100,7 +100,7 @@ func (tcp *TestCloudProvider) GetAvilableMachineTypes() ([]string, error) { // NewNodeGroup builds a theoretical node group based on the node definition provided. The node group is not automatically // created on the cloud provider side. The node group is not returned by NodeGroups() until it is created. -func (tcp *TestCloudProvider) NewNodeGroup(name string, machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) { +func (tcp *TestCloudProvider) NewNodeGroup(machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) { return nil, cloudprovider.ErrNotImplemented } diff --git a/cluster-autoscaler/core/autoscaling_context.go b/cluster-autoscaler/core/autoscaling_context.go index a9fe36c01b..9c0c7cf92e 100644 --- a/cluster-autoscaler/core/autoscaling_context.go +++ b/cluster-autoscaler/core/autoscaling_context.go @@ -104,6 +104,8 @@ type AutoscalingOptions struct { ConfigNamespace string // ClusterName if available ClusterName string + // NodeAutoprovisioningEnabled tells whether the node auto-provisioning is enabled for this cluster. + NodeAutoprovisioningEnabled bool } // NewAutoscalingContext returns an autoscaling context from all the necessary parameters passed via arguments diff --git a/cluster-autoscaler/core/scale_up.go b/cluster-autoscaler/core/scale_up.go index 4159be9efe..85fedd0b80 100644 --- a/cluster-autoscaler/core/scale_up.go +++ b/cluster-autoscaler/core/scale_up.go @@ -29,6 +29,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/metrics" "k8s.io/autoscaler/cluster-autoscaler/simulator" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" + "k8s.io/autoscaler/cluster-autoscaler/utils/labels" "k8s.io/autoscaler/cluster-autoscaler/utils/nodegroupset" "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" @@ -77,8 +78,13 @@ func ScaleUp(context *AutoscalingContext, unschedulablePods []*apiv1.Pod, nodes podsRemainUnschedulable := make(map[*apiv1.Pod]bool) expansionOptions := make([]expander.Option, 0) - for _, nodeGroup := range context.CloudProvider.NodeGroups() { + nodeGroups := context.CloudProvider.NodeGroups() + if context.AutoscalingOptions.NodeAutoprovisioningEnabled { + addAutoprovisionedCandidates(context, nodeGroups, unschedulablePods) + } + + for _, nodeGroup := range nodeGroups { if !context.ClusterStateRegistry.IsNodeGroupSafeToScaleUp(nodeGroup.Id(), now) { glog.Warningf("Node group %s is not ready for scaleup", nodeGroup.Id()) continue @@ -175,6 +181,14 @@ func ScaleUp(context *AutoscalingContext, unschedulablePods []*apiv1.Pod, nodes "max node total count already reached") } } + if context.AutoscalingOptions.NodeAutoprovisioningEnabled { + if exist, err := bestOption.NodeGroup.Exist(); err == nil && !exist { + err := bestOption.NodeGroup.Create() + if err != nil { + return false, errors.ToAutoscalerError(errors.CloudProviderError, err) + } + } + } targetNodeGroups := []cloudprovider.NodeGroup{bestOption.NodeGroup} if context.BalanceSimilarNodeGroups { @@ -279,3 +293,20 @@ func executeScaleUp(context *AutoscalingContext, info nodegroupset.ScaleUpInfo) "Scale-up: group %s size set to %d", info.Group.Id(), info.NewSize) return nil } + +func addAutoprovisionedCandidates(context *AutoscalingContext, nodeGroups []cloudprovider.NodeGroup, unschedulablePods []*apiv1.Pod) { + machines, err := context.CloudProvider.GetAvilableMachineTypes() + if err != nil { + glog.Warningf("Failed to get machine types: %v", err) + } else { + bestLabels := labels.BestLabelSet(unschedulablePods) + for _, machineType := range machines { + nodeGroup, err := context.CloudProvider.NewNodeGroup(machineType, bestLabels, nil) + if err != nil { + glog.Warningf("Unable to build temporary node group for %s: %v", machineType, err) + } else { + nodeGroups = append(nodeGroups, nodeGroup) + } + } + } +} diff --git a/cluster-autoscaler/expander/price/price.go b/cluster-autoscaler/expander/price/price.go index 596a94f634..9d67c338d1 100644 --- a/cluster-autoscaler/expander/price/price.go +++ b/cluster-autoscaler/expander/price/price.go @@ -51,6 +51,11 @@ var ( // priceStabilizationPod is the pod cost to stabilize node_cost/pod_cost ratio a bit. // 0.5 cpu, 500 mb ram priceStabilizationPod = buildPod("stabilize", 500, 500*1024*1024) + + // Penalty given to node groups that are yet to be created. + // TODO: make it a flag + // TODO: investigate what a proper value should be + notExistCoeficient = 2.0 ) // NewStrategy returns an expansion strategy that picks nodes based on price and preferred node type. @@ -120,6 +125,10 @@ nextoption: optionScore := supressedUnfitness * priceSubScore + if exist, err := option.NodeGroup.Exist(); err != nil && !exist { + optionScore *= notExistCoeficient + } + debug := fmt.Sprintf("all_nodes_price=%f pods_price=%f stabilized_ratio=%f unfitness=%f supressed=%f final_score=%f", totalNodePrice, totalPodPrice, diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index be14431ac1..28d06e1c0f 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -103,6 +103,7 @@ var ( maxInactivityTimeFlag = flag.Duration("max-inactivity", 10*time.Minute, "Maximum time from last recorded autoscaler activity before automatic restart") maxFailingTimeFlag = flag.Duration("max-failing-time", 15*time.Minute, "Maximum time from last recorded successful autoscaler run before automatic restart") balanceSimilarNodeGroupsFlag = flag.Bool("balance-similar-node-groups", false, "Detect similar node groups and balance the number of nodes between them") + nodeAutoprovisioningEnabled = flag.Bool("node-autoprovisioning-enabled", false, "Should CA autoprovision node groups when needed") ) func createAutoscalerOptions() core.AutoscalerOptions { @@ -130,6 +131,7 @@ func createAutoscalerOptions() core.AutoscalerOptions { BalanceSimilarNodeGroups: *balanceSimilarNodeGroupsFlag, ConfigNamespace: *namespace, ClusterName: *clusterName, + NodeAutoprovisioningEnabled: *nodeAutoprovisioningEnabled, } configFetcherOpts := dynamic.ConfigFetcherOptions{