diff --git a/pkg/testutils/cluster.go b/pkg/testutils/cluster.go index b1dee80e38..ec71d374e9 100644 --- a/pkg/testutils/cluster.go +++ b/pkg/testutils/cluster.go @@ -33,6 +33,9 @@ func BuildMinimalCluster(clusterName string) *kops.Cluster { {Name: "subnet-us-mock-1a", Zone: "us-mock-1a", CIDR: "172.20.1.0/24", Type: kops.SubnetTypePrivate}, } + c.Spec.ContainerRuntime = "containerd" + c.Spec.Containerd = &kops.ContainerdConfig{} + c.Spec.MasterPublicName = fmt.Sprintf("api.%v", clusterName) c.Spec.MasterInternalName = fmt.Sprintf("internal.api.%v", clusterName) c.Spec.KubernetesAPIAccess = []string{"0.0.0.0/0"} diff --git a/upup/pkg/fi/cloudup/populate_instancegroup_spec.go b/upup/pkg/fi/cloudup/populate_instancegroup_spec.go index d5f5145add..d2e3f88ed5 100644 --- a/upup/pkg/fi/cloudup/populate_instancegroup_spec.go +++ b/upup/pkg/fi/cloudup/populate_instancegroup_spec.go @@ -18,6 +18,7 @@ package cloudup import ( "fmt" + "strings" "github.com/aws/aws-sdk-go/service/ec2" "github.com/blang/semver/v4" @@ -186,7 +187,15 @@ func PopulateInstanceGroupSpec(cluster *kops.Cluster, input *kops.InstanceGroup, ig.Spec.NodeLabels = make(map[string]string) } ig.Spec.NodeLabels["kops.k8s.io/gpu"] = "1" - ig.Spec.Taints = append(ig.Spec.Taints, "nvidia.com/gpu:NoSchedule") + hasNvidiaTaint := false + for _, taint := range ig.Spec.Taints { + if strings.HasPrefix(taint, "nvidia.com/gpu") { + hasNvidiaTaint = true + } + } + if !hasNvidiaTaint { + ig.Spec.Taints = append(ig.Spec.Taints, "nvidia.com/gpu:NoSchedule") + } } } } diff --git a/upup/pkg/fi/cloudup/populate_instancegroup_spec_test.go b/upup/pkg/fi/cloudup/populate_instancegroup_spec_test.go index 30b5da3f47..00d2e3eabf 100644 --- a/upup/pkg/fi/cloudup/populate_instancegroup_spec_test.go +++ b/upup/pkg/fi/cloudup/populate_instancegroup_spec_test.go @@ -80,6 +80,49 @@ func TestPopulateInstanceGroup_Image_Required(t *testing.T) { expectErrorFromPopulateInstanceGroup(t, cluster, g, channel, "unable to determine default image for InstanceGroup nodes") } +func TestPopulateInstanceGroup_AddTaintsCollision(t *testing.T) { + _, cluster := buildMinimalCluster() + input := buildMinimalNodeInstanceGroup() + input.Spec.Taints = []string{"nvidia.com/gpu:NoSchedule"} + input.Spec.MachineType = "g4dn.xlarge" + cluster.Spec.Containerd.NvidiaGPU = &kopsapi.NvidiaGPUConfig{Enabled: fi.Bool(true)} + + channel := &kopsapi.Channel{} + + cloud, err := BuildCloud(cluster) + if err != nil { + t.Fatalf("error from BuildCloud: %v", err) + } + output, err := PopulateInstanceGroupSpec(cluster, input, cloud, channel) + if err != nil { + t.Fatalf("error from PopulateInstanceGropuSpec: %v", err) + } + if len(output.Spec.Taints) != 1 { + t.Errorf("Expected only 1 taint, got %d", len(output.Spec.Taints)) + } +} + +func TestPopulateInstanceGroup_AddTaints(t *testing.T) { + _, cluster := buildMinimalCluster() + input := buildMinimalNodeInstanceGroup() + input.Spec.MachineType = "g4dn.xlarge" + cluster.Spec.Containerd.NvidiaGPU = &kopsapi.NvidiaGPUConfig{Enabled: fi.Bool(true)} + + channel := &kopsapi.Channel{} + + cloud, err := BuildCloud(cluster) + if err != nil { + t.Fatalf("error from BuildCloud: %v", err) + } + output, err := PopulateInstanceGroupSpec(cluster, input, cloud, channel) + if err != nil { + t.Fatalf("error from PopulateInstanceGropuSpec: %v", err) + } + if len(output.Spec.Taints) != 1 { + t.Errorf("Expected only 1 taint, got %d", len(output.Spec.Taints)) + } +} + func expectErrorFromPopulateInstanceGroup(t *testing.T, cluster *kopsapi.Cluster, g *kopsapi.InstanceGroup, channel *kopsapi.Channel, message string) { cloud, err := BuildCloud(cluster) if err != nil {