Bump nvidia device plugin to 0.11 and nvidia driver to 510

This commit is contained in:
Ole Markus With 2022-04-30 09:20:02 +02:00
parent 653aa6c606
commit e622736992
11 changed files with 20 additions and 18 deletions

View File

@ -27,6 +27,8 @@ kOps will directly manage the Karpenter Provisioner resources. Read more about h
* The minimum version for the Terraform AWS Provider has been bumped to 4.0.0 to address the deprecation of the aws_s3_bucket_object resource and its replacement with the aws_s3_object resource. Such resources will be destroyed and recreated without downtime when applying the changes.
* ARM64 support for nvidia device driver. Nvidia nodes on ARM64 requires Ubuntu 22.04 AMIs.
# Breaking changes
## Control plane taints and labels

View File

@ -17,7 +17,7 @@ limitations under the License.
package kops
// NvidiaDefaultDriverPackage is the nvidia driver default version
const NvidiaDefaultDriverPackage = "nvidia-headless-460-server"
const NvidiaDefaultDriverPackage = "nvidia-headless-510-server"
// ContainerdConfig is the configuration for containerd
type ContainerdConfig struct {
@ -45,7 +45,7 @@ type ContainerdConfig struct {
type NvidiaGPUConfig struct {
// Package is the name of the nvidia driver package that will be installed.
// Default is "nvidia-headless-460-server".
// Default is "nvidia-headless-510-server".
DriverPackage string `json:"package,omitempty"`
// Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
// They will only be installed on intances that has an Nvidia GPU.

View File

@ -133,7 +133,7 @@ Resources.AWSEC2LaunchTemplatemasterustest1amastersminimalexamplecom.Properties.
logLevel: info
nvidiaGPU:
enabled: true
package: nvidia-headless-460-server
package: nvidia-headless-510-server
version: 1.4.12
docker:
skipInstall: true
@ -248,7 +248,7 @@ Resources.AWSEC2LaunchTemplatemasterustest1amastersminimalexamplecom.Properties.
ConfigBase: memfs://clusters.example.com/minimal.example.com
InstanceGroupName: master-us-test-1a
InstanceGroupRole: Master
NodeupConfigHash: XfW6vDRNRfTHJPOGeKVjmi0X8oyJNK2bnzKsVZy00K8=
NodeupConfigHash: Pxds5Al5CozIY+FCKHuqBqRAPIeptslqIdIdX8lPnkE=
__EOF_KUBE_ENV
@ -389,7 +389,7 @@ Resources.AWSEC2LaunchTemplatenodesminimalexamplecom.Properties.LaunchTemplateDa
logLevel: info
nvidiaGPU:
enabled: true
package: nvidia-headless-460-server
package: nvidia-headless-510-server
version: 1.4.12
docker:
skipInstall: true
@ -422,7 +422,7 @@ Resources.AWSEC2LaunchTemplatenodesminimalexamplecom.Properties.LaunchTemplateDa
ConfigBase: memfs://clusters.example.com/minimal.example.com
InstanceGroupName: nodes
InstanceGroupRole: Node
NodeupConfigHash: rXMJNOQF0/HDGwb7T9kem3+Ydeau/+I6oiA8ankZdEM=
NodeupConfigHash: me4mBnPl+0J3noivly6hq8f5geHok77lvs0noMjPxHQ=
__EOF_KUBE_ENV

View File

@ -132,7 +132,7 @@ containerd:
logLevel: info
nvidiaGPU:
enabled: true
package: nvidia-headless-460-server
package: nvidia-headless-510-server
version: 1.4.12
docker:
skipInstall: true
@ -247,7 +247,7 @@ CloudProvider: aws
ConfigBase: memfs://clusters.example.com/minimal.example.com
InstanceGroupName: master-us-test-1a
InstanceGroupRole: Master
NodeupConfigHash: XfW6vDRNRfTHJPOGeKVjmi0X8oyJNK2bnzKsVZy00K8=
NodeupConfigHash: Pxds5Al5CozIY+FCKHuqBqRAPIeptslqIdIdX8lPnkE=
__EOF_KUBE_ENV

View File

@ -132,7 +132,7 @@ containerd:
logLevel: info
nvidiaGPU:
enabled: true
package: nvidia-headless-460-server
package: nvidia-headless-510-server
version: 1.4.12
docker:
skipInstall: true
@ -165,7 +165,7 @@ CloudProvider: aws
ConfigBase: memfs://clusters.example.com/minimal.example.com
InstanceGroupName: nodes
InstanceGroupRole: Node
NodeupConfigHash: rXMJNOQF0/HDGwb7T9kem3+Ydeau/+I6oiA8ankZdEM=
NodeupConfigHash: me4mBnPl+0J3noivly6hq8f5geHok77lvs0noMjPxHQ=
__EOF_KUBE_ENV

View File

@ -22,7 +22,7 @@ spec:
logLevel: info
nvidiaGPU:
enabled: true
package: nvidia-headless-460-server
package: nvidia-headless-510-server
version: 1.4.12
dnsZone: Z1AFAKE1ZON3YO
docker:

View File

@ -47,7 +47,7 @@ spec:
version: 9.99.0
- id: k8s-1.16
manifest: nvidia.addons.k8s.io/k8s-1.16.yaml
manifestHash: cee96cdd2bdf02a76bd8ed78e56ddac52339d9f03a5722184bf5716be4342e9d
manifestHash: 0b5963253c1cf13686226a8893b9b9e3ffd1373d5e2fb0699588d8714a4ba78a
name: nvidia.addons.k8s.io
selector:
k8s-addon: nvidia.addons.k8s.io

View File

@ -22,7 +22,7 @@ spec:
containers:
- args:
- --fail-on-init-error=false
image: docker.io/nvidia/k8s-device-plugin:v0.10.0
image: docker.io/nvidia/k8s-device-plugin:v0.11.0
name: nvidia-device-plugin-ctr
securityContext:
allowPrivilegeEscalation: false

View File

@ -255,7 +255,7 @@ KubeletConfig:
shutdownGracePeriodCriticalPods: 10s
NvidiaGPU:
enabled: true
package: nvidia-headless-460-server
package: nvidia-headless-510-server
UpdatePolicy: automatic
channels:
- memfs://clusters.example.com/minimal.example.com/addons/bootstrap-channel.yaml
@ -263,7 +263,7 @@ containerdConfig:
logLevel: info
nvidiaGPU:
enabled: true
package: nvidia-headless-460-server
package: nvidia-headless-510-server
version: 1.4.12
etcdManifests:
- memfs://clusters.example.com/minimal.example.com/manifests/etcd/main.yaml

View File

@ -61,7 +61,7 @@ KubeletConfig:
- nvidia.com/gpu:NoSchedule
NvidiaGPU:
enabled: true
package: nvidia-headless-460-server
package: nvidia-headless-510-server
UpdatePolicy: automatic
channels:
- memfs://clusters.example.com/minimal.example.com/addons/bootstrap-channel.yaml
@ -69,5 +69,5 @@ containerdConfig:
logLevel: info
nvidiaGPU:
enabled: true
package: nvidia-headless-460-server
package: nvidia-headless-510-server
version: 1.4.12

View File

@ -17,7 +17,7 @@ spec:
name: nvidia-device-plugin-ds
spec:
containers:
- image: docker.io/nvidia/k8s-device-plugin:v0.10.0
- image: docker.io/nvidia/k8s-device-plugin:v0.11.0
name: nvidia-device-plugin-ctr
args: ["--fail-on-init-error=false"]
securityContext: