diff --git a/cluster-autoscaler/core/scale_down_test.go b/cluster-autoscaler/core/scale_down_test.go index bd50f89b77..cdfd75258b 100644 --- a/cluster-autoscaler/core/scale_down_test.go +++ b/cluster-autoscaler/core/scale_down_test.go @@ -869,10 +869,10 @@ var defaultScaleDownOptions = context.AutoscalingOptions{ func TestScaleDownEmptyMultipleNodeGroups(t *testing.T) { config := &scaleTestConfig{ nodes: []nodeConfig{ - {"n1_1", 1000, 1000, true, "ng1"}, - {"n1_2", 1000, 1000, true, "ng1"}, - {"n2_1", 1000, 1000, true, "ng2"}, - {"n2_2", 1000, 1000, true, "ng2"}, + {"n1_1", 1000, 1000, 0, true, "ng1"}, + {"n1_2", 1000, 1000, 0, true, "ng1"}, + {"n2_1", 1000, 1000, 0, true, "ng2"}, + {"n2_2", 1000, 1000, 0, true, "ng2"}, }, options: defaultScaleDownOptions, expectedScaleDowns: []string{"n1_1", "n2_1"}, @@ -883,8 +883,8 @@ func TestScaleDownEmptyMultipleNodeGroups(t *testing.T) { func TestScaleDownEmptySingleNodeGroup(t *testing.T) { config := &scaleTestConfig{ nodes: []nodeConfig{ - {"n1", 1000, 1000, true, "ng1"}, - {"n2", 1000, 1000, true, "ng1"}, + {"n1", 1000, 1000, 0, true, "ng1"}, + {"n2", 1000, 1000, 0, true, "ng1"}, }, options: defaultScaleDownOptions, expectedScaleDowns: []string{"n1"}, @@ -897,8 +897,8 @@ func TestScaleDownEmptyMinCoresLimitHit(t *testing.T) { options.MinCoresTotal = 2 config := &scaleTestConfig{ nodes: []nodeConfig{ - {"n1", 2000, 1000, true, "ng1"}, - {"n2", 1000, 1000, true, "ng1"}, + {"n1", 2000, 1000, 0, true, "ng1"}, + {"n2", 1000, 1000, 0, true, "ng1"}, }, options: options, expectedScaleDowns: []string{"n2"}, @@ -911,10 +911,10 @@ func TestScaleDownEmptyMinMemoryLimitHit(t *testing.T) { options.MinMemoryTotal = 4000 config := &scaleTestConfig{ nodes: []nodeConfig{ - {"n1", 2000, 1000 * MB, true, "ng1"}, - {"n2", 1000, 1000 * MB, true, "ng1"}, - {"n3", 1000, 1000 * MB, true, "ng1"}, - {"n4", 1000, 3000 * MB, true, "ng1"}, + {"n1", 2000, 1000 * MB, 0, true, "ng1"}, + {"n2", 1000, 1000 * MB, 0, true, "ng1"}, + {"n3", 1000, 1000 * MB, 0, true, "ng1"}, + {"n4", 1000, 3000 * MB, 0, true, "ng1"}, }, options: options, expectedScaleDowns: []string{"n1", "n2"}, @@ -926,7 +926,7 @@ func TestScaleDownEmptyMinGroupSizeLimitHit(t *testing.T) { options := defaultScaleDownOptions config := &scaleTestConfig{ nodes: []nodeConfig{ - {"n1", 2000, 1000, true, "ng1"}, + {"n1", 2000, 1000, 0, true, "ng1"}, }, options: options, expectedScaleDowns: []string{}, @@ -1279,13 +1279,13 @@ func TestCleanUpNodeAutoprovisionedGroups(t *testing.T) { func TestCalculateCoresAndMemoryTotal(t *testing.T) { nodeConfigs := []nodeConfig{ - {"n1", 2000, 7500 * MB, true, "ng1"}, - {"n2", 2000, 7500 * MB, true, "ng1"}, - {"n3", 2000, 7500 * MB, true, "ng1"}, - {"n4", 12000, 8000 * MB, true, "ng1"}, - {"n5", 16000, 7500 * MB, true, "ng1"}, - {"n6", 8000, 6000 * MB, true, "ng1"}, - {"n7", 6000, 16000 * MB, true, "ng1"}, + {"n1", 2000, 7500 * MB, 0, true, "ng1"}, + {"n2", 2000, 7500 * MB, 0, true, "ng1"}, + {"n3", 2000, 7500 * MB, 0, true, "ng1"}, + {"n4", 12000, 8000 * MB, 0, true, "ng1"}, + {"n5", 16000, 7500 * MB, 0, true, "ng1"}, + {"n6", 8000, 6000 * MB, 0, true, "ng1"}, + {"n7", 6000, 16000 * MB, 0, true, "ng1"}, } nodes := make([]*apiv1.Node, len(nodeConfigs)) for i, n := range nodeConfigs { @@ -1310,13 +1310,13 @@ func TestCalculateCoresAndMemoryTotal(t *testing.T) { func TestFilterOutMasters(t *testing.T) { nodeConfigs := []nodeConfig{ - {"n1", 2000, 4000, false, "ng1"}, - {"n2", 2000, 4000, true, "ng2"}, - {"n3", 2000, 8000, true, ""}, // real master - {"n4", 1000, 2000, true, "ng3"}, - {"n5", 1000, 2000, true, "ng3"}, - {"n6", 2000, 8000, true, ""}, // same machine type, no node group, no api server - {"n7", 2000, 8000, true, ""}, // real master + {"n1", 2000, 4000, 0, false, "ng1"}, + {"n2", 2000, 4000, 0, true, "ng2"}, + {"n3", 2000, 8000, 0, true, ""}, // real master + {"n4", 1000, 2000, 0, true, "ng3"}, + {"n5", 1000, 2000, 0, true, "ng3"}, + {"n6", 2000, 8000, 0, true, ""}, // same machine type, no node group, no api server + {"n7", 2000, 8000, 0, true, ""}, // real master } nodes := make([]*apiv1.Node, len(nodeConfigs)) for i, n := range nodeConfigs { diff --git a/cluster-autoscaler/core/scale_test_common.go b/cluster-autoscaler/core/scale_test_common.go index 20aafe0e4b..69090c3090 100644 --- a/cluster-autoscaler/core/scale_test_common.go +++ b/cluster-autoscaler/core/scale_test_common.go @@ -24,6 +24,7 @@ type nodeConfig struct { name string cpu int64 memory int64 + gpu int64 ready bool group string } @@ -32,6 +33,7 @@ type podConfig struct { name string cpu int64 memory int64 + gpu int64 node string } diff --git a/cluster-autoscaler/core/scale_up_test.go b/cluster-autoscaler/core/scale_up_test.go index 7c7e427f47..d0355c55fe 100644 --- a/cluster-autoscaler/core/scale_up_test.go +++ b/cluster-autoscaler/core/scale_up_test.go @@ -58,15 +58,15 @@ var defaultOptions = context.AutoscalingOptions{ func TestScaleUpOK(t *testing.T) { config := &scaleTestConfig{ nodes: []nodeConfig{ - {"n1", 100, 100, true, "ng1"}, - {"n2", 1000, 1000, true, "ng2"}, + {"n1", 100, 100, 0, true, "ng1"}, + {"n2", 1000, 1000, 0, true, "ng2"}, }, pods: []podConfig{ - {"p1", 80, 0, "n1"}, - {"p2", 800, 0, "n2"}, + {"p1", 80, 0, 0, "n1"}, + {"p2", 800, 0, 0, "n2"}, }, extraPods: []podConfig{ - {"p-new", 500, 0, ""}, + {"p-new", 500, 0, 0, ""}, }, scaleUpOptionToChoose: groupSizeChange{groupName: "ng2", sizeChange: 1}, expectedFinalScaleUp: groupSizeChange{groupName: "ng2", sizeChange: 1}, @@ -81,16 +81,16 @@ func TestScaleUpMaxCoresLimitHit(t *testing.T) { options.MaxCoresTotal = 9 config := &scaleTestConfig{ nodes: []nodeConfig{ - {"n1", 2000, 100, true, "ng1"}, - {"n2", 4000, 1000, true, "ng2"}, + {"n1", 2000, 100, 0, true, "ng1"}, + {"n2", 4000, 1000, 0, true, "ng2"}, }, pods: []podConfig{ - {"p1", 1000, 0, "n1"}, - {"p2", 3000, 0, "n2"}, + {"p1", 1000, 0, 0, "n1"}, + {"p2", 3000, 0, 0, "n2"}, }, extraPods: []podConfig{ - {"p-new-1", 2000, 0, ""}, - {"p-new-2", 2000, 0, ""}, + {"p-new-1", 2000, 0, 0, ""}, + {"p-new-2", 2000, 0, 0, ""}, }, scaleUpOptionToChoose: groupSizeChange{groupName: "ng1", sizeChange: 2}, expectedFinalScaleUp: groupSizeChange{groupName: "ng1", sizeChange: 1}, @@ -107,17 +107,17 @@ func TestScaleUpMaxMemoryLimitHit(t *testing.T) { options.MaxMemoryTotal = 1300 // set in mb config := &scaleTestConfig{ nodes: []nodeConfig{ - {"n1", 2000, 100 * MB, true, "ng1"}, - {"n2", 4000, 1000 * MB, true, "ng2"}, + {"n1", 2000, 100 * MB, 0, true, "ng1"}, + {"n2", 4000, 1000 * MB, 0, true, "ng2"}, }, pods: []podConfig{ - {"p1", 1000, 0, "n1"}, - {"p2", 3000, 0, "n2"}, + {"p1", 1000, 0, 0, "n1"}, + {"p2", 3000, 0, 0, "n2"}, }, extraPods: []podConfig{ - {"p-new-1", 2000, 100 * MB, ""}, - {"p-new-2", 2000, 100 * MB, ""}, - {"p-new-3", 2000, 100 * MB, ""}, + {"p-new-1", 2000, 100 * MB, 0, ""}, + {"p-new-2", 2000, 100 * MB, 0, ""}, + {"p-new-3", 2000, 100 * MB, 0, ""}, }, scaleUpOptionToChoose: groupSizeChange{groupName: "ng1", sizeChange: 3}, expectedFinalScaleUp: groupSizeChange{groupName: "ng1", sizeChange: 2}, @@ -132,17 +132,17 @@ func TestScaleUpCapToMaxTotalNodesLimit(t *testing.T) { options.MaxNodesTotal = 3 config := &scaleTestConfig{ nodes: []nodeConfig{ - {"n1", 2000, 100 * MB, true, "ng1"}, - {"n2", 4000, 1000 * MB, true, "ng2"}, + {"n1", 2000, 100 * MB, 0, true, "ng1"}, + {"n2", 4000, 1000 * MB, 0, true, "ng2"}, }, pods: []podConfig{ - {"p1", 1000, 0, "n1"}, - {"p2", 3000, 0, "n2"}, + {"p1", 1000, 0, 0, "n1"}, + {"p2", 3000, 0, 0, "n2"}, }, extraPods: []podConfig{ - {"p-new-1", 4000, 100 * MB, ""}, - {"p-new-2", 4000, 100 * MB, ""}, - {"p-new-3", 4000, 100 * MB, ""}, + {"p-new-1", 4000, 100 * MB, 0, ""}, + {"p-new-2", 4000, 100 * MB, 0, ""}, + {"p-new-3", 4000, 100 * MB, 0, ""}, }, scaleUpOptionToChoose: groupSizeChange{groupName: "ng2", sizeChange: 3}, expectedFinalScaleUp: groupSizeChange{groupName: "ng2", sizeChange: 1}, @@ -211,6 +211,9 @@ func simpleScaleUpTest(t *testing.T, config *scaleTestConfig) { nodes := make([]*apiv1.Node, len(config.nodes)) for i, n := range config.nodes { node := BuildTestNode(n.name, n.cpu, n.memory) + if n.gpu > 0 { + AddGpusToNode(node, n.gpu) + } SetNodeReadyState(node, n.ready, time.Now()) nodes[i] = node groups[n.group] = append(groups[n.group], node) @@ -218,9 +221,8 @@ func simpleScaleUpTest(t *testing.T, config *scaleTestConfig) { pods := make(map[string][]apiv1.Pod) for _, p := range config.pods { - pod := *BuildTestPod(p.name, p.cpu, p.memory) - pod.Spec.NodeName = p.node - pods[p.node] = append(pods[p.node], pod) + pod := buildTestPod(p) + pods[p.node] = append(pods[p.node], *pod) } fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) { @@ -276,7 +278,7 @@ func simpleScaleUpTest(t *testing.T, config *scaleTestConfig) { extraPods := make([]*apiv1.Pod, len(config.extraPods)) for i, p := range config.extraPods { - pod := BuildTestPod(p.name, p.cpu, p.memory) + pod := buildTestPod(p) extraPods[i] = pod } @@ -312,6 +314,17 @@ func getGroupSizeChangeFromChan(c chan groupSizeChange) *groupSizeChange { } } +func buildTestPod(p podConfig) *apiv1.Pod { + pod := BuildTestPod(p.name, p.cpu, p.memory) + if p.gpu > 0 { + RequestGpuForPod(pod, p.gpu) + } + if p.node != "" { + pod.Spec.NodeName = p.node + } + return pod +} + func TestScaleUpNodeComingNoScale(t *testing.T) { n1 := BuildTestNode("n1", 100, 1000) SetNodeReadyState(n1, true, time.Now()) diff --git a/cluster-autoscaler/utils/test/test_utils.go b/cluster-autoscaler/utils/test/test_utils.go index c200860bd3..f0c9be8a12 100644 --- a/cluster-autoscaler/utils/test/test_utils.go +++ b/cluster-autoscaler/utils/test/test_utils.go @@ -64,6 +64,26 @@ func BuildTestPod(name string, cpu int64, mem int64) *apiv1.Pod { return pod } +const ( + // cannot use constants from gpu module due to cyclic package import + resourceNvidiaGPU = "nvidia.com/gpu" + gpuLabel = "cloud.google.com/gke-accelerator" + defaultGPUType = "nvidia-tesla-k80" +) + +// RequestGpuForPod modifies pod's resource requests by adding a number of GPUs to them. +func RequestGpuForPod(pod *apiv1.Pod, gpusCount int64) { + if pod.Spec.Containers[0].Resources.Limits == nil { + pod.Spec.Containers[0].Resources.Limits = apiv1.ResourceList{} + } + pod.Spec.Containers[0].Resources.Limits[resourceNvidiaGPU] = *resource.NewQuantity(gpusCount, resource.DecimalSI) + + if pod.Spec.Containers[0].Resources.Requests == nil { + pod.Spec.Containers[0].Resources.Requests = apiv1.ResourceList{} + } + pod.Spec.Containers[0].Resources.Requests[resourceNvidiaGPU] = *resource.NewQuantity(gpusCount, resource.DecimalSI) +} + // BuildTestNode creates a node with specified capacity. func BuildTestNode(name string, millicpu int64, mem int64) *apiv1.Node { node := &apiv1.Node{ @@ -97,6 +117,20 @@ func BuildTestNode(name string, millicpu int64, mem int64) *apiv1.Node { return node } +// AddGpusToNode adds GPU capacity to given node. Default accelerator type is used. +func AddGpusToNode(node *apiv1.Node, gpusCount int64) { + node.Spec.Taints = append( + node.Spec.Taints, + apiv1.Taint{ + Key: resourceNvidiaGPU, + Value: "present", + Effect: "NoSchedule", + }) + node.Status.Capacity[resourceNvidiaGPU] = *resource.NewQuantity(gpusCount, resource.DecimalSI) + node.Status.Allocatable[resourceNvidiaGPU] = *resource.NewQuantity(gpusCount, resource.DecimalSI) + node.Labels[gpuLabel] = defaultGPUType +} + // SetNodeReadyState sets node ready state. func SetNodeReadyState(node *apiv1.Node, ready bool, lastTransition time.Time) { for i := range node.Status.Conditions {