Merge pull request #17519 from sats-23/sup-image-tag

Add support for passing image for nvidia-k8s-device-plugin
This commit is contained in:
Kubernetes Prow Robot 2025-07-29 06:42:27 -07:00 committed by GitHub
commit f28b11cd4b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 48 additions and 15 deletions

View File

@ -932,10 +932,14 @@ spec:
Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
They will only be installed on intances that has an Nvidia GPU.
type: boolean
image:
description: Image defines the container image used to deploy
the Nvidia Kubernetes Device Plugin.
type: string
package:
description: |-
Package is the name of the nvidia driver package that will be installed.
Default is "nvidia-headless-460-server".
Default is "nvidia-driver-535-server".
type: string
type: object
packages:

View File

@ -168,10 +168,14 @@ spec:
Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
They will only be installed on intances that has an Nvidia GPU.
type: boolean
image:
description: Image defines the container image used to deploy
the Nvidia Kubernetes Device Plugin.
type: string
package:
description: |-
Package is the name of the nvidia driver package that will be installed.
Default is "nvidia-headless-460-server".
Default is "nvidia-driver-535-server".
type: string
type: object
packages:

View File

@ -21,8 +21,12 @@ import (
"k8s.io/apimachinery/pkg/util/intstr"
)
// NvidiaDefaultDriverPackage is the nvidia driver default version
const NvidiaDefaultDriverPackage = "nvidia-driver-535-server"
const (
// NvidiaDefaultDriverPackage is the nvidia driver default version
NvidiaDefaultDriverPackage = "nvidia-driver-535-server"
// NvidiaDevicePluginImage is the Nvidia K8s device plugin container image
NvidiaDevicePluginImage = "nvcr.io/nvidia/k8s-device-plugin:v0.17.3"
)
// ContainerdConfig is the configuration for containerd
type ContainerdConfig struct {
@ -69,11 +73,13 @@ type NRIConfig struct {
type NvidiaGPUConfig struct {
// Package is the name of the nvidia driver package that will be installed.
// Default is "nvidia-headless-510-server".
// Default is "nvidia-driver-535-server".
DriverPackage string `json:"package,omitempty"`
// Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
// They will only be installed on intances that has an Nvidia GPU.
// They will only be installed on instances that has an Nvidia GPU.
Enabled *bool `json:"enabled,omitempty"`
// Image defines the container image used to deploy the Nvidia Kubernetes Device Plugin.
DevicePluginImage string `json:"image,omitempty"`
// DCGMExporterConfig configures the DCGM exporter
DCGMExporter *DCGMExporterConfig `json:"dcgmExporter,omitempty"`
}

View File

@ -66,11 +66,13 @@ type NRIConfig struct {
type NvidiaGPUConfig struct {
// Package is the name of the nvidia driver package that will be installed.
// Default is "nvidia-headless-460-server".
// Default is "nvidia-driver-535-server".
DriverPackage string `json:"package,omitempty"`
// Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
// They will only be installed on intances that has an Nvidia GPU.
Enabled *bool `json:"enabled,omitempty"`
// Image defines the container image used to deploy the Nvidia Kubernetes Device Plugin.
DevicePluginImage string `json:"image,omitempty"`
// DCGMExporterConfig configures the DCGM exporter
DCGMExporter *DCGMExporterConfig `json:"dcgmExporter,omitempty"`
}

View File

@ -6777,6 +6777,7 @@ func Convert_kops_NodeTerminationHandlerSpec_To_v1alpha2_NodeTerminationHandlerS
func autoConvert_v1alpha2_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfig, out *kops.NvidiaGPUConfig, s conversion.Scope) error {
out.DriverPackage = in.DriverPackage
out.Enabled = in.Enabled
out.DevicePluginImage = in.DevicePluginImage
if in.DCGMExporter != nil {
in, out := &in.DCGMExporter, &out.DCGMExporter
*out = new(kops.DCGMExporterConfig)
@ -6797,6 +6798,7 @@ func Convert_v1alpha2_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfi
func autoConvert_kops_NvidiaGPUConfig_To_v1alpha2_NvidiaGPUConfig(in *kops.NvidiaGPUConfig, out *NvidiaGPUConfig, s conversion.Scope) error {
out.DriverPackage = in.DriverPackage
out.Enabled = in.Enabled
out.DevicePluginImage = in.DevicePluginImage
if in.DCGMExporter != nil {
in, out := &in.DCGMExporter, &out.DCGMExporter
*out = new(DCGMExporterConfig)

View File

@ -66,11 +66,13 @@ type NRIConfig struct {
type NvidiaGPUConfig struct {
// Package is the name of the nvidia driver package that will be installed.
// Default is "nvidia-headless-460-server".
// Default is "nvidia-driver-535-server".
DriverPackage string `json:"package,omitempty"`
// Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
// They will only be installed on intances that has an Nvidia GPU.
Enabled *bool `json:"enabled,omitempty"`
// Image defines the container image used to deploy the Nvidia Kubernetes Device Plugin.
DevicePluginImage string `json:"image,omitempty"`
// DCGMExporterConfig configures the DCGM exporter
DCGMExporter *DCGMExporterConfig `json:"dcgmExporter,omitempty"`
}

View File

@ -7034,6 +7034,7 @@ func Convert_kops_NodeTerminationHandlerSpec_To_v1alpha3_NodeTerminationHandlerS
func autoConvert_v1alpha3_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfig, out *kops.NvidiaGPUConfig, s conversion.Scope) error {
out.DriverPackage = in.DriverPackage
out.Enabled = in.Enabled
out.DevicePluginImage = in.DevicePluginImage
if in.DCGMExporter != nil {
in, out := &in.DCGMExporter, &out.DCGMExporter
*out = new(kops.DCGMExporterConfig)
@ -7054,6 +7055,7 @@ func Convert_v1alpha3_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfi
func autoConvert_kops_NvidiaGPUConfig_To_v1alpha3_NvidiaGPUConfig(in *kops.NvidiaGPUConfig, out *NvidiaGPUConfig, s conversion.Scope) error {
out.DriverPackage = in.DriverPackage
out.Enabled = in.Enabled
out.DevicePluginImage = in.DevicePluginImage
if in.DCGMExporter != nil {
in, out := &in.DCGMExporter, &out.DCGMExporter
*out = new(DCGMExporterConfig)

View File

@ -57,8 +57,14 @@ func (b *ContainerdOptionsBuilder) BuildOptions(o *kops.Cluster) error {
// Set default log level to INFO
containerd.LogLevel = fi.PtrTo("info")
if containerd.NvidiaGPU != nil && fi.ValueOf(containerd.NvidiaGPU.Enabled) && containerd.NvidiaGPU.DriverPackage == "" {
containerd.NvidiaGPU.DriverPackage = kops.NvidiaDefaultDriverPackage
if containerd.NvidiaGPU != nil && fi.ValueOf(containerd.NvidiaGPU.Enabled) {
if containerd.NvidiaGPU.DriverPackage == "" {
containerd.NvidiaGPU.DriverPackage = kops.NvidiaDefaultDriverPackage
}
if containerd.NvidiaGPU.DevicePluginImage == "" {
containerd.NvidiaGPU.DevicePluginImage = kops.NvidiaDevicePluginImage
}
}
return nil

View File

@ -130,7 +130,7 @@ ClusterName: minimal.example.com
ConfigBase: memfs://clusters.example.com/minimal.example.com
InstanceGroupName: master-us-test-1a
InstanceGroupRole: ControlPlane
NodeupConfigHash: erzIlDR9TAcOlxUIr7WouTREk7YdM0Ma4N2K9r5gk8w=
NodeupConfigHash: e8ACnEK0qWqIMgJoxl9ojRfcJMBGi+zNVdLBnTkE0Go=
__EOF_KUBE_ENV

View File

@ -153,7 +153,7 @@ ConfigServer:
- https://kops-controller.internal.minimal.example.com:3988/
InstanceGroupName: nodes
InstanceGroupRole: Node
NodeupConfigHash: y10GCJ5TES8qY2D0+M5IOOVd3YWvQdbJk1LQxxA37pY=
NodeupConfigHash: de+zCCygHUW6ieE/GHLXxzLD92pxl20/vODdPL2IbrA=
__EOF_KUBE_ENV

View File

@ -28,6 +28,7 @@ spec:
logLevel: info
nvidiaGPU:
enabled: true
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
package: nvidia-driver-535-server
runc:
version: 1.2.4

View File

@ -92,7 +92,7 @@ spec:
version: 9.99.0
- id: k8s-1.16
manifest: nvidia.addons.k8s.io/k8s-1.16.yaml
manifestHash: 5c2ea4e24c2272166a0bef02986a881160d23c39df8c7a5951a894702199d8a4
manifestHash: 6233f19a8b22836db6e6a25a121990ab77ea14c4e235a83004710afb515c8ff7
name: nvidia.addons.k8s.io
selector:
k8s-addon: nvidia.addons.k8s.io

View File

@ -22,7 +22,7 @@ spec:
containers:
- args:
- --fail-on-init-error=false
image: nvcr.io/nvidia/k8s-device-plugin:v0.12.2
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
name: nvidia-device-plugin-ctr
securityContext:
allowPrivilegeEscalation: false

View File

@ -308,6 +308,7 @@ Networking:
serviceClusterIPRange: 100.64.0.0/13
NvidiaGPU:
enabled: true
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
package: nvidia-driver-535-server
UpdatePolicy: automatic
channels:
@ -319,6 +320,7 @@ containerdConfig:
logLevel: info
nvidiaGPU:
enabled: true
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
package: nvidia-driver-535-server
runc:
version: 1.2.4

View File

@ -58,12 +58,14 @@ Networking:
serviceClusterIPRange: 100.64.0.0/13
NvidiaGPU:
enabled: true
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
package: nvidia-driver-535-server
UpdatePolicy: automatic
containerdConfig:
logLevel: info
nvidiaGPU:
enabled: true
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
package: nvidia-driver-535-server
runc:
version: 1.2.4

View File

@ -17,7 +17,7 @@ spec:
name: nvidia-device-plugin-ds
spec:
containers:
- image: nvcr.io/nvidia/k8s-device-plugin:v0.12.2
- image: {{ $.Containerd.NvidiaGPU.DevicePluginImage }}
name: nvidia-device-plugin-ctr
args: ["--fail-on-init-error=false"]
securityContext: