diff --git a/docs/releases/1.24-NOTES.md b/docs/releases/1.24-NOTES.md index 6f3e7cf20c..803646bb2b 100644 --- a/docs/releases/1.24-NOTES.md +++ b/docs/releases/1.24-NOTES.md @@ -27,6 +27,8 @@ kOps will directly manage the Karpenter Provisioner resources. Read more about h * The minimum version for the Terraform AWS Provider has been bumped to 4.0.0 to address the deprecation of the aws_s3_bucket_object resource and its replacement with the aws_s3_object resource. Such resources will be destroyed and recreated without downtime when applying the changes. +* ARM64 support for nvidia device driver. Nvidia nodes on ARM64 requires Ubuntu 22.04 AMIs. + # Breaking changes ## Control plane taints and labels diff --git a/pkg/apis/kops/containerdconfig.go b/pkg/apis/kops/containerdconfig.go index bb026d3820..61e472383e 100644 --- a/pkg/apis/kops/containerdconfig.go +++ b/pkg/apis/kops/containerdconfig.go @@ -17,7 +17,7 @@ limitations under the License. package kops // NvidiaDefaultDriverPackage is the nvidia driver default version -const NvidiaDefaultDriverPackage = "nvidia-headless-460-server" +const NvidiaDefaultDriverPackage = "nvidia-headless-510-server" // ContainerdConfig is the configuration for containerd type ContainerdConfig struct { @@ -45,7 +45,7 @@ type ContainerdConfig struct { type NvidiaGPUConfig struct { // Package is the name of the nvidia driver package that will be installed. - // Default is "nvidia-headless-460-server". + // Default is "nvidia-headless-510-server". DriverPackage string `json:"package,omitempty"` // Enabled determines if kOps will install the Nvidia GPU runtime and drivers. // They will only be installed on intances that has an Nvidia GPU. diff --git a/tests/integration/update_cluster/nvidia/cloudformation.json.extracted.yaml b/tests/integration/update_cluster/nvidia/cloudformation.json.extracted.yaml index e9bde6ded4..9092e24d08 100644 --- a/tests/integration/update_cluster/nvidia/cloudformation.json.extracted.yaml +++ b/tests/integration/update_cluster/nvidia/cloudformation.json.extracted.yaml @@ -133,7 +133,7 @@ Resources.AWSEC2LaunchTemplatemasterustest1amastersminimalexamplecom.Properties. logLevel: info nvidiaGPU: enabled: true - package: nvidia-headless-460-server + package: nvidia-headless-510-server version: 1.4.12 docker: skipInstall: true @@ -248,7 +248,7 @@ Resources.AWSEC2LaunchTemplatemasterustest1amastersminimalexamplecom.Properties. ConfigBase: memfs://clusters.example.com/minimal.example.com InstanceGroupName: master-us-test-1a InstanceGroupRole: Master - NodeupConfigHash: XfW6vDRNRfTHJPOGeKVjmi0X8oyJNK2bnzKsVZy00K8= + NodeupConfigHash: Pxds5Al5CozIY+FCKHuqBqRAPIeptslqIdIdX8lPnkE= __EOF_KUBE_ENV @@ -389,7 +389,7 @@ Resources.AWSEC2LaunchTemplatenodesminimalexamplecom.Properties.LaunchTemplateDa logLevel: info nvidiaGPU: enabled: true - package: nvidia-headless-460-server + package: nvidia-headless-510-server version: 1.4.12 docker: skipInstall: true @@ -422,7 +422,7 @@ Resources.AWSEC2LaunchTemplatenodesminimalexamplecom.Properties.LaunchTemplateDa ConfigBase: memfs://clusters.example.com/minimal.example.com InstanceGroupName: nodes InstanceGroupRole: Node - NodeupConfigHash: rXMJNOQF0/HDGwb7T9kem3+Ydeau/+I6oiA8ankZdEM= + NodeupConfigHash: me4mBnPl+0J3noivly6hq8f5geHok77lvs0noMjPxHQ= __EOF_KUBE_ENV diff --git a/tests/integration/update_cluster/nvidia/data/aws_launch_template_master-us-test-1a.masters.minimal.example.com_user_data b/tests/integration/update_cluster/nvidia/data/aws_launch_template_master-us-test-1a.masters.minimal.example.com_user_data index 6f16b2c374..6adc16a63d 100644 --- a/tests/integration/update_cluster/nvidia/data/aws_launch_template_master-us-test-1a.masters.minimal.example.com_user_data +++ b/tests/integration/update_cluster/nvidia/data/aws_launch_template_master-us-test-1a.masters.minimal.example.com_user_data @@ -132,7 +132,7 @@ containerd: logLevel: info nvidiaGPU: enabled: true - package: nvidia-headless-460-server + package: nvidia-headless-510-server version: 1.4.12 docker: skipInstall: true @@ -247,7 +247,7 @@ CloudProvider: aws ConfigBase: memfs://clusters.example.com/minimal.example.com InstanceGroupName: master-us-test-1a InstanceGroupRole: Master -NodeupConfigHash: XfW6vDRNRfTHJPOGeKVjmi0X8oyJNK2bnzKsVZy00K8= +NodeupConfigHash: Pxds5Al5CozIY+FCKHuqBqRAPIeptslqIdIdX8lPnkE= __EOF_KUBE_ENV diff --git a/tests/integration/update_cluster/nvidia/data/aws_launch_template_nodes.minimal.example.com_user_data b/tests/integration/update_cluster/nvidia/data/aws_launch_template_nodes.minimal.example.com_user_data index 162647d1a9..658aade637 100644 --- a/tests/integration/update_cluster/nvidia/data/aws_launch_template_nodes.minimal.example.com_user_data +++ b/tests/integration/update_cluster/nvidia/data/aws_launch_template_nodes.minimal.example.com_user_data @@ -132,7 +132,7 @@ containerd: logLevel: info nvidiaGPU: enabled: true - package: nvidia-headless-460-server + package: nvidia-headless-510-server version: 1.4.12 docker: skipInstall: true @@ -165,7 +165,7 @@ CloudProvider: aws ConfigBase: memfs://clusters.example.com/minimal.example.com InstanceGroupName: nodes InstanceGroupRole: Node -NodeupConfigHash: rXMJNOQF0/HDGwb7T9kem3+Ydeau/+I6oiA8ankZdEM= +NodeupConfigHash: me4mBnPl+0J3noivly6hq8f5geHok77lvs0noMjPxHQ= __EOF_KUBE_ENV diff --git a/tests/integration/update_cluster/nvidia/data/aws_s3_object_cluster-completed.spec_content b/tests/integration/update_cluster/nvidia/data/aws_s3_object_cluster-completed.spec_content index dc7c661fe4..4e921fc935 100644 --- a/tests/integration/update_cluster/nvidia/data/aws_s3_object_cluster-completed.spec_content +++ b/tests/integration/update_cluster/nvidia/data/aws_s3_object_cluster-completed.spec_content @@ -22,7 +22,7 @@ spec: logLevel: info nvidiaGPU: enabled: true - package: nvidia-headless-460-server + package: nvidia-headless-510-server version: 1.4.12 dnsZone: Z1AFAKE1ZON3YO docker: diff --git a/tests/integration/update_cluster/nvidia/data/aws_s3_object_minimal.example.com-addons-bootstrap_content b/tests/integration/update_cluster/nvidia/data/aws_s3_object_minimal.example.com-addons-bootstrap_content index 0435fbf604..eb0a372d34 100644 --- a/tests/integration/update_cluster/nvidia/data/aws_s3_object_minimal.example.com-addons-bootstrap_content +++ b/tests/integration/update_cluster/nvidia/data/aws_s3_object_minimal.example.com-addons-bootstrap_content @@ -47,7 +47,7 @@ spec: version: 9.99.0 - id: k8s-1.16 manifest: nvidia.addons.k8s.io/k8s-1.16.yaml - manifestHash: cee96cdd2bdf02a76bd8ed78e56ddac52339d9f03a5722184bf5716be4342e9d + manifestHash: 0b5963253c1cf13686226a8893b9b9e3ffd1373d5e2fb0699588d8714a4ba78a name: nvidia.addons.k8s.io selector: k8s-addon: nvidia.addons.k8s.io diff --git a/tests/integration/update_cluster/nvidia/data/aws_s3_object_minimal.example.com-addons-nvidia.addons.k8s.io-k8s-1.16_content b/tests/integration/update_cluster/nvidia/data/aws_s3_object_minimal.example.com-addons-nvidia.addons.k8s.io-k8s-1.16_content index bb3b9ab46a..a86c2be9e7 100644 --- a/tests/integration/update_cluster/nvidia/data/aws_s3_object_minimal.example.com-addons-nvidia.addons.k8s.io-k8s-1.16_content +++ b/tests/integration/update_cluster/nvidia/data/aws_s3_object_minimal.example.com-addons-nvidia.addons.k8s.io-k8s-1.16_content @@ -22,7 +22,7 @@ spec: containers: - args: - --fail-on-init-error=false - image: docker.io/nvidia/k8s-device-plugin:v0.10.0 + image: docker.io/nvidia/k8s-device-plugin:v0.11.0 name: nvidia-device-plugin-ctr securityContext: allowPrivilegeEscalation: false diff --git a/tests/integration/update_cluster/nvidia/data/aws_s3_object_nodeupconfig-master-us-test-1a_content b/tests/integration/update_cluster/nvidia/data/aws_s3_object_nodeupconfig-master-us-test-1a_content index e926223d5a..32aed8888b 100644 --- a/tests/integration/update_cluster/nvidia/data/aws_s3_object_nodeupconfig-master-us-test-1a_content +++ b/tests/integration/update_cluster/nvidia/data/aws_s3_object_nodeupconfig-master-us-test-1a_content @@ -255,7 +255,7 @@ KubeletConfig: shutdownGracePeriodCriticalPods: 10s NvidiaGPU: enabled: true - package: nvidia-headless-460-server + package: nvidia-headless-510-server UpdatePolicy: automatic channels: - memfs://clusters.example.com/minimal.example.com/addons/bootstrap-channel.yaml @@ -263,7 +263,7 @@ containerdConfig: logLevel: info nvidiaGPU: enabled: true - package: nvidia-headless-460-server + package: nvidia-headless-510-server version: 1.4.12 etcdManifests: - memfs://clusters.example.com/minimal.example.com/manifests/etcd/main.yaml diff --git a/tests/integration/update_cluster/nvidia/data/aws_s3_object_nodeupconfig-nodes_content b/tests/integration/update_cluster/nvidia/data/aws_s3_object_nodeupconfig-nodes_content index 76841844bb..1fe976d9cd 100644 --- a/tests/integration/update_cluster/nvidia/data/aws_s3_object_nodeupconfig-nodes_content +++ b/tests/integration/update_cluster/nvidia/data/aws_s3_object_nodeupconfig-nodes_content @@ -61,7 +61,7 @@ KubeletConfig: - nvidia.com/gpu:NoSchedule NvidiaGPU: enabled: true - package: nvidia-headless-460-server + package: nvidia-headless-510-server UpdatePolicy: automatic channels: - memfs://clusters.example.com/minimal.example.com/addons/bootstrap-channel.yaml @@ -69,5 +69,5 @@ containerdConfig: logLevel: info nvidiaGPU: enabled: true - package: nvidia-headless-460-server + package: nvidia-headless-510-server version: 1.4.12 diff --git a/upup/models/cloudup/resources/addons/nvidia.addons.k8s.io/k8s-1.16.yaml b/upup/models/cloudup/resources/addons/nvidia.addons.k8s.io/k8s-1.16.yaml index 7e088cd263..9a5944c499 100644 --- a/upup/models/cloudup/resources/addons/nvidia.addons.k8s.io/k8s-1.16.yaml +++ b/upup/models/cloudup/resources/addons/nvidia.addons.k8s.io/k8s-1.16.yaml @@ -17,7 +17,7 @@ spec: name: nvidia-device-plugin-ds spec: containers: - - image: docker.io/nvidia/k8s-device-plugin:v0.10.0 + - image: docker.io/nvidia/k8s-device-plugin:v0.11.0 name: nvidia-device-plugin-ctr args: ["--fail-on-init-error=false"] securityContext: