Prevent populate ig from adding nvidia taint if it has already been set

This commit is contained in:
Ole Markus With 2022-02-17 10:42:21 +01:00
parent 61bcdd7d72
commit afcfd1b1e8
3 changed files with 56 additions and 1 deletions

View File

@ -33,6 +33,9 @@ func BuildMinimalCluster(clusterName string) *kops.Cluster {
{Name: "subnet-us-mock-1a", Zone: "us-mock-1a", CIDR: "172.20.1.0/24", Type: kops.SubnetTypePrivate},
}
c.Spec.ContainerRuntime = "containerd"
c.Spec.Containerd = &kops.ContainerdConfig{}
c.Spec.MasterPublicName = fmt.Sprintf("api.%v", clusterName)
c.Spec.MasterInternalName = fmt.Sprintf("internal.api.%v", clusterName)
c.Spec.KubernetesAPIAccess = []string{"0.0.0.0/0"}

View File

@ -18,6 +18,7 @@ package cloudup
import (
"fmt"
"strings"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/blang/semver/v4"
@ -186,7 +187,15 @@ func PopulateInstanceGroupSpec(cluster *kops.Cluster, input *kops.InstanceGroup,
ig.Spec.NodeLabels = make(map[string]string)
}
ig.Spec.NodeLabels["kops.k8s.io/gpu"] = "1"
ig.Spec.Taints = append(ig.Spec.Taints, "nvidia.com/gpu:NoSchedule")
hasNvidiaTaint := false
for _, taint := range ig.Spec.Taints {
if strings.HasPrefix(taint, "nvidia.com/gpu") {
hasNvidiaTaint = true
}
}
if !hasNvidiaTaint {
ig.Spec.Taints = append(ig.Spec.Taints, "nvidia.com/gpu:NoSchedule")
}
}
}
}

View File

@ -80,6 +80,49 @@ func TestPopulateInstanceGroup_Image_Required(t *testing.T) {
expectErrorFromPopulateInstanceGroup(t, cluster, g, channel, "unable to determine default image for InstanceGroup nodes")
}
func TestPopulateInstanceGroup_AddTaintsCollision(t *testing.T) {
_, cluster := buildMinimalCluster()
input := buildMinimalNodeInstanceGroup()
input.Spec.Taints = []string{"nvidia.com/gpu:NoSchedule"}
input.Spec.MachineType = "g4dn.xlarge"
cluster.Spec.Containerd.NvidiaGPU = &kopsapi.NvidiaGPUConfig{Enabled: fi.Bool(true)}
channel := &kopsapi.Channel{}
cloud, err := BuildCloud(cluster)
if err != nil {
t.Fatalf("error from BuildCloud: %v", err)
}
output, err := PopulateInstanceGroupSpec(cluster, input, cloud, channel)
if err != nil {
t.Fatalf("error from PopulateInstanceGropuSpec: %v", err)
}
if len(output.Spec.Taints) != 1 {
t.Errorf("Expected only 1 taint, got %d", len(output.Spec.Taints))
}
}
func TestPopulateInstanceGroup_AddTaints(t *testing.T) {
_, cluster := buildMinimalCluster()
input := buildMinimalNodeInstanceGroup()
input.Spec.MachineType = "g4dn.xlarge"
cluster.Spec.Containerd.NvidiaGPU = &kopsapi.NvidiaGPUConfig{Enabled: fi.Bool(true)}
channel := &kopsapi.Channel{}
cloud, err := BuildCloud(cluster)
if err != nil {
t.Fatalf("error from BuildCloud: %v", err)
}
output, err := PopulateInstanceGroupSpec(cluster, input, cloud, channel)
if err != nil {
t.Fatalf("error from PopulateInstanceGropuSpec: %v", err)
}
if len(output.Spec.Taints) != 1 {
t.Errorf("Expected only 1 taint, got %d", len(output.Spec.Taints))
}
}
func expectErrorFromPopulateInstanceGroup(t *testing.T, cluster *kopsapi.Cluster, g *kopsapi.InstanceGroup, channel *kopsapi.Channel, message string) {
cloud, err := BuildCloud(cluster)
if err != nil {