Create node groups with GPU in scale-up.go

This is still not implemented in cloudprovider. Extended NewNodeGroup inteface to have a way of passing parameters for more complex resources.
2017-11-16 11:27:39 +01:00 · 2017-11-16 11:27:39 +01:00 · b7f8622eb2
parent 6554919700
commit b7f8622eb2
10 changed files with 74 additions and 25 deletions
--- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go
@ -162,7 +162,8 @@ func (aws *awsCloudProvider) GetAvailableMachineTypes() ([]string, error) {

 // NewNodeGroup builds a theoretical node group based on the node definition provided. The node group is not automatically
 // created on the cloud provider side. The node group is not returned by NodeGroups() until it is created.
-func (aws *awsCloudProvider) NewNodeGroup(machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
+func (aws *awsCloudProvider) NewNodeGroup(machineType string, labels map[string]string, systemLabels map[string]string,
+	extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
 	return nil, cloudprovider.ErrNotImplemented
 }

--- a/cluster-autoscaler/cloudprovider/cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/cloud_provider.go
@ -53,7 +53,7 @@ type CloudProvider interface {
 	// NewNodeGroup builds a theoretical node group based on the node definition provided. The node group is not automatically
 	// created on the cloud provider side. The node group is not returned by NodeGroups() until it is created.
 	// Implementation optional.
-	NewNodeGroup(machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (NodeGroup, error)
+	NewNodeGroup(machineType string, labels map[string]string, systemLabels map[string]string, extraResources map[string]resource.Quantity) (NodeGroup, error)

 	// GetResourceLimiter returns struct containing limits (max, min) for resources (cores, memory etc.).
 	GetResourceLimiter() (*ResourceLimiter, error)
@ -72,6 +72,10 @@ var ErrNotImplemented errors.AutoscalerError = errors.NewAutoscalerError(errors.
 // ErrAlreadyExist is returned if a method is not implemented.
 var ErrAlreadyExist errors.AutoscalerError = errors.NewAutoscalerError(errors.InternalError, "Already exist")

+// ErrIllegalConfiguration is returned when trying to create NewNodeGroup with
+// configuration that is not supported by cloudprovider.
+var ErrIllegalConfiguration errors.AutoscalerError = errors.NewAutoscalerError(errors.InternalError, "Configuration not allowed by cloud provider")
+
 // NodeGroup contains configuration info and functions to control a set
 // of nodes that have the same capacity and set of labels.
 type NodeGroup interface {
--- a/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go
@ -137,7 +137,8 @@ func (gce *GceCloudProvider) GetAvailableMachineTypes() ([]string, error) {

 // NewNodeGroup builds a theoretical node group based on the node definition provided. The node group is not automatically
 // created on the cloud provider side. The node group is not returned by NodeGroups() until it is created.
-func (gce *GceCloudProvider) NewNodeGroup(machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
+func (gce *GceCloudProvider) NewNodeGroup(machineType string, labels map[string]string, systemLabels map[string]string,
+	extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
 	nodePoolName := fmt.Sprintf("%s-%s-%d", nodeAutoprovisioningPrefix, machineType, time.Now().Unix())
 	mig := &Mig{
 		autoprovisioned: true,
--- a/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider_test.go
+++ b/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider_test.go
@ -376,7 +376,7 @@ func TestMig(t *testing.T) {
 	gceManagerMock.On("getLocation").Return("us-central1-b").Once()
 	gceManagerMock.On("getTemplates").Return(templateBuilder).Once()
 	server.On("handle", "/project1/zones/us-central1-b/machineTypes/n1-standard-1").Return(getMachineTypeResponse).Once()
-	nodeGroup, err := gce.NewNodeGroup("n1-standard-1", nil, nil)
+	nodeGroup, err := gce.NewNodeGroup("n1-standard-1", nil, nil, nil)
 	assert.NoError(t, err)
 	assert.NotNil(t, nodeGroup)
 	mig1 := reflect.ValueOf(nodeGroup).Interface().(*Mig)
--- a/cluster-autoscaler/cloudprovider/kubemark/kubemark_linux.go
+++ b/cluster-autoscaler/cloudprovider/kubemark/kubemark_linux.go
@ -113,7 +113,8 @@ func (kubemark *KubemarkCloudProvider) GetAvailableMachineTypes() ([]string, err
 }

 // NewNodeGroup builds a theoretical node group based on the node definition provided.
-func (kubemark *KubemarkCloudProvider) NewNodeGroup(machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
+func (kubemark *KubemarkCloudProvider) NewNodeGroup(machineType string, labels map[string]string, systemLabels map[string]string,
+	extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
 	return nil, cloudprovider.ErrNotImplemented
 }

--- a/cluster-autoscaler/cloudprovider/kubemark/kubemark_other.go
+++ b/cluster-autoscaler/cloudprovider/kubemark/kubemark_other.go
@ -56,7 +56,8 @@ func (kubemark *KubemarkCloudProvider) GetAvailableMachineTypes() ([]string, err
 	return []string{}, cloudprovider.ErrNotImplemented
 }

-func (kubemark *KubemarkCloudProvider) NewNodeGroup(machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
+func (kubemark *KubemarkCloudProvider) NewNodeGroup(machineType string, labels map[string]string, systemLabels map[string]string,
+	extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
 	return nil, cloudprovider.ErrNotImplemented
 }

--- a/cluster-autoscaler/cloudprovider/test/test_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/test/test_cloud_provider.go
@ -136,7 +136,8 @@ func (tcp *TestCloudProvider) GetAvailableMachineTypes() ([]string, error) {

 // NewNodeGroup builds a theoretical node group based on the node definition provided. The node group is not automatically
 // created on the cloud provider side. The node group is not returned by NodeGroups() until it is created.
-func (tcp *TestCloudProvider) NewNodeGroup(machineType string, labels map[string]string, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
+func (tcp *TestCloudProvider) NewNodeGroup(machineType string, labels map[string]string, systemLabels map[string]string,
+	extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
 	return &TestNodeGroup{
 		cloudProvider:   tcp,
 		id:              "autoprovisioned-" + machineType,
--- a/cluster-autoscaler/core/scale_up.go
+++ b/cluster-autoscaler/core/scale_up.go
@ -22,6 +22,7 @@ import (

 	apiv1 "k8s.io/api/core/v1"
 	extensionsv1 "k8s.io/api/extensions/v1beta1"
+	"k8s.io/apimachinery/pkg/api/resource"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
 	"k8s.io/autoscaler/cluster-autoscaler/clusterstate"
 	"k8s.io/autoscaler/cluster-autoscaler/estimator"
@ -29,6 +30,7 @@ import (
 	"k8s.io/autoscaler/cluster-autoscaler/metrics"
 	"k8s.io/autoscaler/cluster-autoscaler/simulator"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
+	"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/labels"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/nodegroupset"
 	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
@ -371,15 +373,48 @@ func addAutoprovisionedCandidates(context *AutoscalingContext, nodeGroups []clou
 		return nodeGroups, nodeInfos
 	}

+	newGroupsCount := 0
+
+	newNodeGroups := addAllMachineTypesForConfig(context, map[string]string{}, map[string]resource.Quantity{},
+		nodeInfos, unschedulablePods)
+	newGroupsCount += len(newNodeGroups)
+	nodeGroups = append(nodeGroups, newNodeGroups...)
+
+	gpuRequests := gpu.GetGpuRequests(unschedulablePods)
+	for _, gpuRequestInfo := range gpuRequests {
+		glog.V(4).Info("Adding node groups using GPU to NAP simulations")
+		extraResources := map[string]resource.Quantity{
+			gpu.ResourceNvidiaGPU: gpuRequestInfo.MaxRequest,
+		}
+		newNodeGroups := addAllMachineTypesForConfig(context, gpuRequestInfo.SystemLabels, extraResources,
+			nodeInfos, gpuRequestInfo.Pods)
+		newGroupsCount += len(newNodeGroups)
+		nodeGroups = append(nodeGroups, newNodeGroups...)
+	}
+	glog.V(4).Infof("Considering %v potential node groups in NAP simulations", newGroupsCount)
+
+	return nodeGroups, nodeInfos
+}
+
+func addAllMachineTypesForConfig(context *AutoscalingContext, systemLabels map[string]string, extraResources map[string]resource.Quantity,
+	nodeInfos map[string]*schedulercache.NodeInfo, unschedulablePods []*apiv1.Pod) []cloudprovider.NodeGroup {
+
+	nodeGroups := make([]cloudprovider.NodeGroup, 0)
 	machines, err := context.CloudProvider.GetAvailableMachineTypes()
 	if err != nil {
 		glog.Warningf("Failed to get machine types: %v", err)
-	} else {
+		return nodeGroups
+	}
+
 	bestLabels := labels.BestLabelSet(unschedulablePods)
 	for _, machineType := range machines {
-			nodeGroup, err := context.CloudProvider.NewNodeGroup(machineType, bestLabels, nil)
+		nodeGroup, err := context.CloudProvider.NewNodeGroup(machineType, bestLabels, systemLabels, extraResources)
 		if err != nil {
+			// We don't check if a given node group setup is allowed.
+			// It's fine if it isn't, just don't consider it an option.
+			if err != cloudprovider.ErrIllegalConfiguration {
 				glog.Warningf("Unable to build temporary node group for %s: %v", machineType, err)
+			}
 			continue
 		}
 		nodeInfo, err := nodeGroup.TemplateNodeInfo()
@ -390,8 +425,7 @@ func addAutoprovisionedCandidates(context *AutoscalingContext, nodeGroups []clou
 		nodeInfos[nodeGroup.Id()] = nodeInfo
 		nodeGroups = append(nodeGroups, nodeGroup)
 	}
-	}
-	return nodeGroups, nodeInfos
+	return nodeGroups
 }

 func calculateClusterCoresMemoryTotal(nodeGroups []cloudprovider.NodeGroup, nodeInfos map[string]*schedulercache.NodeInfo) (int64, int64) {
--- a/cluster-autoscaler/expander/price/price_test.go
+++ b/cluster-autoscaler/expander/price/price_test.go
@ -73,7 +73,7 @@ func TestPriceExpander(t *testing.T) {
 	provider.AddNode("ng2", n2)
 	ng1, _ := provider.NodeGroupForNode(n1)
 	ng2, _ := provider.NodeGroupForNode(n2)
-	ng3, _ := provider.NewNodeGroup("MT1", nil, nil)
+	ng3, _ := provider.NewNodeGroup("MT1", nil, nil, nil)

 	ni1 := schedulercache.NewNodeInfo()
 	ni1.SetNode(n1)
--- a/cluster-autoscaler/utils/gpu/gpu.go
+++ b/cluster-autoscaler/utils/gpu/gpu.go
@ -98,6 +98,9 @@ type GpuRequestInfo struct {
 	MaxRequest resource.Quantity
 	// Pods is a list of pods requesting GPU
 	Pods []*apiv1.Pod
+	// SystemLabels is a set of system labels corresponding to selected GPU
+	// that needs to be passed to cloudprovider
+	SystemLabels map[string]string
 }

 // GetGpuRequests returns a GpuRequestInfo for each type of GPU requested by
@ -127,6 +130,9 @@ func GetGpuRequests(pods []*apiv1.Pod) map[string]GpuRequestInfo {
 			requestInfo = GpuRequestInfo{
 				MaxRequest: podGpu,
 				Pods:       make([]*apiv1.Pod, 0),
+				SystemLabels: map[string]string{
+					GPULabel: gpuType,
+				},
 			}
 		}
 		if podGpu.Cmp(requestInfo.MaxRequest) > 0 {