Cluster Autoscaler: GCE: Add support for Spot VMs

2021-09-10 23:36:08 +02:00 · 2021-09-10 23:36:08 +02:00 · 0427bb0f2b
parent 0f3e441c5a
commit 0427bb0f2b
2 changed files with 69 additions and 40 deletions
--- a/cluster-autoscaler/cloudprovider/gce/gce_price_model.go
+++ b/cluster-autoscaler/cloudprovider/gce/gce_price_model.go
@ -40,6 +40,7 @@ const (
 	gpuPricePerHour         = 0.700

 	preemptibleLabel = "cloud.google.com/gke-preemptible"
+	spotLabel        = "cloud.google.com/gke-spot"
 )

 var (
@ -355,14 +356,12 @@ var (
 func (model *GcePriceModel) NodePrice(node *apiv1.Node, startTime time.Time, endTime time.Time) (float64, error) {
 	price := 0.0
 	basePriceFound := false
-	isPreemptible := false

 	// Base instance price
 	if node.Labels != nil {
-		isPreemptible = node.Labels[preemptibleLabel] == "true"
 		if machineType, found := getInstanceTypeFromLabels(node.Labels); found {
 			priceMapToUse := instancePrices
-			if isPreemptible {
+			if hasPreemptiblePricing(node) {
 				priceMapToUse = preemptiblePrices
 			}
 			if basePricePerHour, found := priceMapToUse[machineType]; found {
@ -385,7 +384,7 @@ func (model *GcePriceModel) NodePrice(node *apiv1.Node, startTime time.Time, end
 		gpuPrice := gpuPricePerHour
 		if node.Labels != nil {
 			priceMapToUse := gpuPrices
-			if isPreemptible {
+			if hasPreemptiblePricing(node) {
 				priceMapToUse = preemptibleGpuPrices
 			}
 			if gpuType, found := node.Labels[GPULabel]; found {
@ -417,8 +416,20 @@ func isInstanceCustom(instanceType string) bool {
 	return strings.Contains(instanceType, "custom")
 }

+// hasPreemptiblePricing returns whether we should use preemptible pricing for a node, based on labels. Spot VMs have
+// dynamic pricing, which is different than the static pricing for Preemptible VMs we use here. However it should be close
+// enough in practice and we really only look at prices in comparison with each other. Spot VMs will always be cheaper
+// than corresponding non-preemptible VMs. So for the purposes of pricing, Spot VMs are treated the same as
+// Preemptible VMs.
+func hasPreemptiblePricing(node *apiv1.Node) bool {
+	if node.Labels == nil {
+		return false
+	}
+	return node.Labels[preemptibleLabel] == "true" || node.Labels[spotLabel] == "true"
+}
+
 func getPreemptibleDiscount(node *apiv1.Node) float64 {
-	if node.Labels[preemptibleLabel] != "true" {
+	if !hasPreemptiblePricing(node) {
 		return 1.0
 	}
 	instanceType, found := getInstanceTypeFromLabels(node.Labels)
--- a/cluster-autoscaler/cloudprovider/gce/gce_price_model_test.go
+++ b/cluster-autoscaler/cloudprovider/gce/gce_price_model_test.go
@ -30,7 +30,7 @@ import (
 	"github.com/stretchr/testify/assert"
 )

-func testNode(t *testing.T, nodeName string, instanceType string, millicpu int64, mem int64, gpuType string, gpuCount int64, isPreemptible bool) *apiv1.Node {
+func testNode(t *testing.T, nodeName string, instanceType string, millicpu int64, mem int64, gpuType string, gpuCount int64, isPreemptible bool, isSpot bool) *apiv1.Node {
 	node := BuildTestNode(nodeName, millicpu, mem)
 	labels, err := BuildGenericLabels(GceRef{
 		Name:    "kubernetes-minion-group",
@ -43,6 +43,9 @@ func testNode(t *testing.T, nodeName string, instanceType string, millicpu int64
 	if isPreemptible {
 		labels[preemptibleLabel] = "true"
 	}
+	if isSpot {
+		labels[spotLabel] = "true"
+	}
 	if gpuCount > 0 {
 		node.Status.Capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(gpuCount, resource.DecimalSI)
 		node.Status.Allocatable[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(gpuCount, resource.DecimalSI)
@ -64,93 +67,108 @@ func TestGetNodePrice(t *testing.T) {
 	}{
 		// instance types
 		"e2 is cheaper than n1": {
-			cheaperNode:                testNode(t, "e2", "e2-standard-8", 8000, 32*units.GiB, "", 0, false),
-			expensiveNode:              testNode(t, "n1", "n1-standard-8", 8000, 30*units.GiB, "", 0, false),
+			cheaperNode:                testNode(t, "e2", "e2-standard-8", 8000, 32*units.GiB, "", 0, false, false),
+			expensiveNode:              testNode(t, "n1", "n1-standard-8", 8000, 30*units.GiB, "", 0, false, false),
 			priceComparisonCoefficient: 1,
 		},
 		"custom nodes are more expensive than n1": {
-			cheaperNode:                testNode(t, "n1", "n1-standard-8", 8000, 30*units.GiB, "", 0, false),
-			expensiveNode:              testNode(t, "custom", "custom-8", 8000, 30*units.GiB, "", 0, false),
+			cheaperNode:                testNode(t, "n1", "n1-standard-8", 8000, 30*units.GiB, "", 0, false, false),
+			expensiveNode:              testNode(t, "custom", "custom-8", 8000, 30*units.GiB, "", 0, false, false),
 			priceComparisonCoefficient: 1,
 		},
 		"custom nodes are not extremely expensive": {
-			cheaperNode:                testNode(t, "custom", "custom-8", 8000, 30*units.GiB, "", 0, false),
-			expensiveNode:              testNode(t, "n1", "n1-standard-8", 8000, 30*units.GiB, "", 0, false),
+			cheaperNode:                testNode(t, "custom", "custom-8", 8000, 30*units.GiB, "", 0, false, false),
+			expensiveNode:              testNode(t, "n1", "n1-standard-8", 8000, 30*units.GiB, "", 0, false, false),
 			priceComparisonCoefficient: 1.2,
 		},
 		"custom node price scales linearly": {
-			cheaperNode:                testNode(t, "small_custom", "custom-1", 1000, 3.75*units.GiB, "", 0, false),
-			expensiveNode:              testNode(t, "large_custom", "custom-8", 8000, 30*units.GiB, "", 0, false),
+			cheaperNode:                testNode(t, "small_custom", "custom-1", 1000, 3.75*units.GiB, "", 0, false, false),
+			expensiveNode:              testNode(t, "large_custom", "custom-8", 8000, 30*units.GiB, "", 0, false, false),
 			priceComparisonCoefficient: 1.0 / 7.9,
 		},
 		"custom node price scales linearly 2": {
-			cheaperNode:                testNode(t, "large_custom", "custom-8", 8000, 30*units.GiB, "", 0, false),
-			expensiveNode:              testNode(t, "small_custom", "custom-1", 1000, 3.75*units.GiB, "", 0, false),
+			cheaperNode:                testNode(t, "large_custom", "custom-8", 8000, 30*units.GiB, "", 0, false, false),
+			expensiveNode:              testNode(t, "small_custom", "custom-1", 1000, 3.75*units.GiB, "", 0, false, false),
 			priceComparisonCoefficient: 8.1,
 		},
 		// GPUs
 		"accelerators are expensive": {
-			cheaperNode: testNode(t, "no_accelerators", "n1-standard-8", 8000, 30*units.GiB, "", 0, false),
+			cheaperNode: testNode(t, "no_accelerators", "n1-standard-8", 8000, 30*units.GiB, "", 0, false, false),
 			// #NotFunny
-			expensiveNode:              testNode(t, "large hadron collider", "n1-standard-8", 8000, 30*units.GiB, "nvidia-tesla-v100", 1, false),
+			expensiveNode:              testNode(t, "large hadron collider", "n1-standard-8", 8000, 30*units.GiB, "nvidia-tesla-v100", 1, false, false),
 			priceComparisonCoefficient: 0.5,
 		},
 		"GPUs of unknown type are still expensive": {
-			cheaperNode:                testNode(t, "no_accelerators", "n1-standard-8", 8000, 30*units.GiB, "", 0, false),
-			expensiveNode:              testNode(t, "cyclotron", "n1-standard-8", 8000, 30*units.GiB, "", 1, false),
+			cheaperNode:                testNode(t, "no_accelerators", "n1-standard-8", 8000, 30*units.GiB, "", 0, false, false),
+			expensiveNode:              testNode(t, "cyclotron", "n1-standard-8", 8000, 30*units.GiB, "", 1, false, false),
 			priceComparisonCoefficient: 0.5,
 		},
 		"different GPUs have different prices": {
-			cheaperNode:                testNode(t, "cheap gpu", "n1-standard-8", 8000, 30*units.GiB, "nvidia-tesla-t4", 1, false),
-			expensiveNode:              testNode(t, "large hadron collider", "n1-standard-8", 8000, 30*units.GiB, "nvidia-tesla-v100", 1, false),
+			cheaperNode:                testNode(t, "cheap gpu", "n1-standard-8", 8000, 30*units.GiB, "nvidia-tesla-t4", 1, false, false),
+			expensiveNode:              testNode(t, "large hadron collider", "n1-standard-8", 8000, 30*units.GiB, "nvidia-tesla-v100", 1, false, false),
 			priceComparisonCoefficient: 0.5,
 		},
 		"more GPUs is more expensive": {
-			cheaperNode:                testNode(t, "1 gpu", "n1-standard-8", 8000, 30*units.GiB, "nvidia-tesla-v100", 1, false),
-			expensiveNode:              testNode(t, "2 gpus", "n1-standard-8", 8000, 30*units.GiB, "nvidia-tesla-v100", 2, false),
+			cheaperNode:                testNode(t, "1 gpu", "n1-standard-8", 8000, 30*units.GiB, "nvidia-tesla-v100", 1, false, false),
+			expensiveNode:              testNode(t, "2 gpus", "n1-standard-8", 8000, 30*units.GiB, "nvidia-tesla-v100", 2, false, false),
 			priceComparisonCoefficient: 0.7,
 		},
 		"some instance types have fixed gpu count 1": {
-			cheaperNode:                testNode(t, "with partitioning", "a2-highgpu-2g", 12000, 85*units.GiB, "nvidia-tesla-a100", 10, false),
-			expensiveNode:              testNode(t, "without partitioning", "a2-highgpu-2g", 12000, 85*units.GiB, "nvidia-tesla-a100", 2, false),
+			cheaperNode:                testNode(t, "with partitioning", "a2-highgpu-2g", 12000, 85*units.GiB, "nvidia-tesla-a100", 10, false, false),
+			expensiveNode:              testNode(t, "without partitioning", "a2-highgpu-2g", 12000, 85*units.GiB, "nvidia-tesla-a100", 2, false, false),
 			priceComparisonCoefficient: 1.001,
 		},
 		"some instance types have fixed gpu count 2": {
-			cheaperNode:                testNode(t, "without partitioning", "a2-highgpu-2g", 12000, 85*units.GiB, "nvidia-tesla-a100", 2, false),
-			expensiveNode:              testNode(t, "with partitioning", "a2-highgpu-2g", 12000, 85*units.GiB, "nvidia-tesla-a100", 10, false),
+			cheaperNode:                testNode(t, "without partitioning", "a2-highgpu-2g", 12000, 85*units.GiB, "nvidia-tesla-a100", 2, false, false),
+			expensiveNode:              testNode(t, "with partitioning", "a2-highgpu-2g", 12000, 85*units.GiB, "nvidia-tesla-a100", 10, false, false),
 			priceComparisonCoefficient: 1.001,
 		},
 		// Preemptibles
 		"preemtpibles are cheap": {
-			cheaperNode:                testNode(t, "preempted_i_can_be", "n1-standard-8", 8000, 30*units.GiB, "", 0, true),
-			expensiveNode:              testNode(t, "ondemand", "n1-standard-8", 8000, 30*units.GiB, "", 0, false),
+			cheaperNode:                testNode(t, "preempted_i_can_be", "n1-standard-8", 8000, 30*units.GiB, "", 0, true, false),
+			expensiveNode:              testNode(t, "ondemand", "n1-standard-8", 8000, 30*units.GiB, "", 0, false, false),
 			priceComparisonCoefficient: 0.25,
 		},
 		"custom preemptibles are also cheap": {
-			cheaperNode:                testNode(t, "preempted_i_can_be", "custom-8", 8000, 30*units.GiB, "", 0, true),
-			expensiveNode:              testNode(t, "ondemand", "custom-8", 8000, 30*units.GiB, "", 0, false),
+			cheaperNode:                testNode(t, "preempted_i_can_be", "custom-8", 8000, 30*units.GiB, "", 0, true, false),
+			expensiveNode:              testNode(t, "ondemand", "custom-8", 8000, 30*units.GiB, "", 0, false, false),
 			priceComparisonCoefficient: 0.25,
 		},
 		"preemtpibles GPUs are (relatively) cheap": {
-			cheaperNode:                testNode(t, "preempted_i_can_be", "n1-standard-8", 8000, 30*units.GiB, "nvidia-tesla-v100", 2, true),
-			expensiveNode:              testNode(t, "ondemand", "n1-standard-8", 8000, 30*units.GiB, "nvidia-tesla-v100", 2, false),
+			cheaperNode:                testNode(t, "preempted_i_can_be", "n1-standard-8", 8000, 30*units.GiB, "nvidia-tesla-v100", 2, true, false),
+			expensiveNode:              testNode(t, "ondemand", "n1-standard-8", 8000, 30*units.GiB, "nvidia-tesla-v100", 2, false, false),
+			priceComparisonCoefficient: 0.5,
+		},
+		"spot vms are cheap": {
+			cheaperNode:                testNode(t, "spot", "n1-standard-8", 8000, 30*units.GiB, "", 0, false, true),
+			expensiveNode:              testNode(t, "ondemand", "n1-standard-8", 8000, 30*units.GiB, "", 0, false, false),
+			priceComparisonCoefficient: 0.25,
+		},
+		"custom spot vms are also cheap": {
+			cheaperNode:                testNode(t, "spot", "custom-8", 8000, 30*units.GiB, "", 0, false, true),
+			expensiveNode:              testNode(t, "ondemand", "custom-8", 8000, 30*units.GiB, "", 0, false, false),
+			priceComparisonCoefficient: 0.25,
+		},
+		"spot GPUs are (relatively) cheap": {
+			cheaperNode:                testNode(t, "spot", "n1-standard-8", 8000, 30*units.GiB, "nvidia-tesla-v100", 2, false, true),
+			expensiveNode:              testNode(t, "ondemand", "n1-standard-8", 8000, 30*units.GiB, "nvidia-tesla-v100", 2, false, false),
 			priceComparisonCoefficient: 0.5,
 		},
 		// Unknown instances
 		"unknown cost is similar to its node family": {
-			cheaperNode:                testNode(t, "unknown", "n1-unknown", 8000, 30*units.GiB, "", 0, false),
-			expensiveNode:              testNode(t, "known", "n1-standard-8", 8000, 30*units.GiB, "", 0, false),
+			cheaperNode:                testNode(t, "unknown", "n1-unknown", 8000, 30*units.GiB, "", 0, false, false),
+			expensiveNode:              testNode(t, "known", "n1-standard-8", 8000, 30*units.GiB, "", 0, false, false),
 			priceComparisonCoefficient: 1.001,
 		},
 		"unknown cost is similar to its node family 2": {
-			cheaperNode:                testNode(t, "unknown", "n1-standard-8", 8000, 30*units.GiB, "", 0, false),
-			expensiveNode:              testNode(t, "known", "n1-unknown", 8000, 30*units.GiB, "", 0, false),
+			cheaperNode:                testNode(t, "unknown", "n1-standard-8", 8000, 30*units.GiB, "", 0, false, false),
+			expensiveNode:              testNode(t, "known", "n1-unknown", 8000, 30*units.GiB, "", 0, false, false),
 			priceComparisonCoefficient: 1.001,
 		},
 		// Custom instances
 		"big custom from cheap family is cheaper than small custom from expensive family": {
-			cheaperNode:                testNode(t, "unknown", "e2-custom", 9000, 32*units.GiB, "", 0, false),
-			expensiveNode:              testNode(t, "known", "n1-custom", 8000, 30*units.GiB, "", 0, false),
+			cheaperNode:                testNode(t, "unknown", "e2-custom", 9000, 32*units.GiB, "", 0, false, false),
+			expensiveNode:              testNode(t, "known", "n1-custom", 8000, 30*units.GiB, "", 0, false, false),
 			priceComparisonCoefficient: 1.001,
 		},
 	}