Merge pull request #17519 from sats-23/sup-image-tag

Add support for passing image for nvidia-k8s-device-plugin
This commit is contained in:
Kubernetes Prow Robot 2025-07-29 06:42:27 -07:00 committed by GitHub
commit f28b11cd4b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 48 additions and 15 deletions

View File

@ -932,10 +932,14 @@ spec:
Enabled determines if kOps will install the Nvidia GPU runtime and drivers. Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
They will only be installed on intances that has an Nvidia GPU. They will only be installed on intances that has an Nvidia GPU.
type: boolean type: boolean
image:
description: Image defines the container image used to deploy
the Nvidia Kubernetes Device Plugin.
type: string
package: package:
description: |- description: |-
Package is the name of the nvidia driver package that will be installed. Package is the name of the nvidia driver package that will be installed.
Default is "nvidia-headless-460-server". Default is "nvidia-driver-535-server".
type: string type: string
type: object type: object
packages: packages:

View File

@ -168,10 +168,14 @@ spec:
Enabled determines if kOps will install the Nvidia GPU runtime and drivers. Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
They will only be installed on intances that has an Nvidia GPU. They will only be installed on intances that has an Nvidia GPU.
type: boolean type: boolean
image:
description: Image defines the container image used to deploy
the Nvidia Kubernetes Device Plugin.
type: string
package: package:
description: |- description: |-
Package is the name of the nvidia driver package that will be installed. Package is the name of the nvidia driver package that will be installed.
Default is "nvidia-headless-460-server". Default is "nvidia-driver-535-server".
type: string type: string
type: object type: object
packages: packages:

View File

@ -21,8 +21,12 @@ import (
"k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/intstr"
) )
// NvidiaDefaultDriverPackage is the nvidia driver default version const (
const NvidiaDefaultDriverPackage = "nvidia-driver-535-server" // NvidiaDefaultDriverPackage is the nvidia driver default version
NvidiaDefaultDriverPackage = "nvidia-driver-535-server"
// NvidiaDevicePluginImage is the Nvidia K8s device plugin container image
NvidiaDevicePluginImage = "nvcr.io/nvidia/k8s-device-plugin:v0.17.3"
)
// ContainerdConfig is the configuration for containerd // ContainerdConfig is the configuration for containerd
type ContainerdConfig struct { type ContainerdConfig struct {
@ -69,11 +73,13 @@ type NRIConfig struct {
type NvidiaGPUConfig struct { type NvidiaGPUConfig struct {
// Package is the name of the nvidia driver package that will be installed. // Package is the name of the nvidia driver package that will be installed.
// Default is "nvidia-headless-510-server". // Default is "nvidia-driver-535-server".
DriverPackage string `json:"package,omitempty"` DriverPackage string `json:"package,omitempty"`
// Enabled determines if kOps will install the Nvidia GPU runtime and drivers. // Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
// They will only be installed on intances that has an Nvidia GPU. // They will only be installed on instances that has an Nvidia GPU.
Enabled *bool `json:"enabled,omitempty"` Enabled *bool `json:"enabled,omitempty"`
// Image defines the container image used to deploy the Nvidia Kubernetes Device Plugin.
DevicePluginImage string `json:"image,omitempty"`
// DCGMExporterConfig configures the DCGM exporter // DCGMExporterConfig configures the DCGM exporter
DCGMExporter *DCGMExporterConfig `json:"dcgmExporter,omitempty"` DCGMExporter *DCGMExporterConfig `json:"dcgmExporter,omitempty"`
} }

View File

@ -66,11 +66,13 @@ type NRIConfig struct {
type NvidiaGPUConfig struct { type NvidiaGPUConfig struct {
// Package is the name of the nvidia driver package that will be installed. // Package is the name of the nvidia driver package that will be installed.
// Default is "nvidia-headless-460-server". // Default is "nvidia-driver-535-server".
DriverPackage string `json:"package,omitempty"` DriverPackage string `json:"package,omitempty"`
// Enabled determines if kOps will install the Nvidia GPU runtime and drivers. // Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
// They will only be installed on intances that has an Nvidia GPU. // They will only be installed on intances that has an Nvidia GPU.
Enabled *bool `json:"enabled,omitempty"` Enabled *bool `json:"enabled,omitempty"`
// Image defines the container image used to deploy the Nvidia Kubernetes Device Plugin.
DevicePluginImage string `json:"image,omitempty"`
// DCGMExporterConfig configures the DCGM exporter // DCGMExporterConfig configures the DCGM exporter
DCGMExporter *DCGMExporterConfig `json:"dcgmExporter,omitempty"` DCGMExporter *DCGMExporterConfig `json:"dcgmExporter,omitempty"`
} }

View File

@ -6777,6 +6777,7 @@ func Convert_kops_NodeTerminationHandlerSpec_To_v1alpha2_NodeTerminationHandlerS
func autoConvert_v1alpha2_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfig, out *kops.NvidiaGPUConfig, s conversion.Scope) error { func autoConvert_v1alpha2_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfig, out *kops.NvidiaGPUConfig, s conversion.Scope) error {
out.DriverPackage = in.DriverPackage out.DriverPackage = in.DriverPackage
out.Enabled = in.Enabled out.Enabled = in.Enabled
out.DevicePluginImage = in.DevicePluginImage
if in.DCGMExporter != nil { if in.DCGMExporter != nil {
in, out := &in.DCGMExporter, &out.DCGMExporter in, out := &in.DCGMExporter, &out.DCGMExporter
*out = new(kops.DCGMExporterConfig) *out = new(kops.DCGMExporterConfig)
@ -6797,6 +6798,7 @@ func Convert_v1alpha2_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfi
func autoConvert_kops_NvidiaGPUConfig_To_v1alpha2_NvidiaGPUConfig(in *kops.NvidiaGPUConfig, out *NvidiaGPUConfig, s conversion.Scope) error { func autoConvert_kops_NvidiaGPUConfig_To_v1alpha2_NvidiaGPUConfig(in *kops.NvidiaGPUConfig, out *NvidiaGPUConfig, s conversion.Scope) error {
out.DriverPackage = in.DriverPackage out.DriverPackage = in.DriverPackage
out.Enabled = in.Enabled out.Enabled = in.Enabled
out.DevicePluginImage = in.DevicePluginImage
if in.DCGMExporter != nil { if in.DCGMExporter != nil {
in, out := &in.DCGMExporter, &out.DCGMExporter in, out := &in.DCGMExporter, &out.DCGMExporter
*out = new(DCGMExporterConfig) *out = new(DCGMExporterConfig)

View File

@ -66,11 +66,13 @@ type NRIConfig struct {
type NvidiaGPUConfig struct { type NvidiaGPUConfig struct {
// Package is the name of the nvidia driver package that will be installed. // Package is the name of the nvidia driver package that will be installed.
// Default is "nvidia-headless-460-server". // Default is "nvidia-driver-535-server".
DriverPackage string `json:"package,omitempty"` DriverPackage string `json:"package,omitempty"`
// Enabled determines if kOps will install the Nvidia GPU runtime and drivers. // Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
// They will only be installed on intances that has an Nvidia GPU. // They will only be installed on intances that has an Nvidia GPU.
Enabled *bool `json:"enabled,omitempty"` Enabled *bool `json:"enabled,omitempty"`
// Image defines the container image used to deploy the Nvidia Kubernetes Device Plugin.
DevicePluginImage string `json:"image,omitempty"`
// DCGMExporterConfig configures the DCGM exporter // DCGMExporterConfig configures the DCGM exporter
DCGMExporter *DCGMExporterConfig `json:"dcgmExporter,omitempty"` DCGMExporter *DCGMExporterConfig `json:"dcgmExporter,omitempty"`
} }

View File

@ -7034,6 +7034,7 @@ func Convert_kops_NodeTerminationHandlerSpec_To_v1alpha3_NodeTerminationHandlerS
func autoConvert_v1alpha3_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfig, out *kops.NvidiaGPUConfig, s conversion.Scope) error { func autoConvert_v1alpha3_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfig, out *kops.NvidiaGPUConfig, s conversion.Scope) error {
out.DriverPackage = in.DriverPackage out.DriverPackage = in.DriverPackage
out.Enabled = in.Enabled out.Enabled = in.Enabled
out.DevicePluginImage = in.DevicePluginImage
if in.DCGMExporter != nil { if in.DCGMExporter != nil {
in, out := &in.DCGMExporter, &out.DCGMExporter in, out := &in.DCGMExporter, &out.DCGMExporter
*out = new(kops.DCGMExporterConfig) *out = new(kops.DCGMExporterConfig)
@ -7054,6 +7055,7 @@ func Convert_v1alpha3_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfi
func autoConvert_kops_NvidiaGPUConfig_To_v1alpha3_NvidiaGPUConfig(in *kops.NvidiaGPUConfig, out *NvidiaGPUConfig, s conversion.Scope) error { func autoConvert_kops_NvidiaGPUConfig_To_v1alpha3_NvidiaGPUConfig(in *kops.NvidiaGPUConfig, out *NvidiaGPUConfig, s conversion.Scope) error {
out.DriverPackage = in.DriverPackage out.DriverPackage = in.DriverPackage
out.Enabled = in.Enabled out.Enabled = in.Enabled
out.DevicePluginImage = in.DevicePluginImage
if in.DCGMExporter != nil { if in.DCGMExporter != nil {
in, out := &in.DCGMExporter, &out.DCGMExporter in, out := &in.DCGMExporter, &out.DCGMExporter
*out = new(DCGMExporterConfig) *out = new(DCGMExporterConfig)

View File

@ -57,8 +57,14 @@ func (b *ContainerdOptionsBuilder) BuildOptions(o *kops.Cluster) error {
// Set default log level to INFO // Set default log level to INFO
containerd.LogLevel = fi.PtrTo("info") containerd.LogLevel = fi.PtrTo("info")
if containerd.NvidiaGPU != nil && fi.ValueOf(containerd.NvidiaGPU.Enabled) && containerd.NvidiaGPU.DriverPackage == "" { if containerd.NvidiaGPU != nil && fi.ValueOf(containerd.NvidiaGPU.Enabled) {
containerd.NvidiaGPU.DriverPackage = kops.NvidiaDefaultDriverPackage if containerd.NvidiaGPU.DriverPackage == "" {
containerd.NvidiaGPU.DriverPackage = kops.NvidiaDefaultDriverPackage
}
if containerd.NvidiaGPU.DevicePluginImage == "" {
containerd.NvidiaGPU.DevicePluginImage = kops.NvidiaDevicePluginImage
}
} }
return nil return nil

View File

@ -130,7 +130,7 @@ ClusterName: minimal.example.com
ConfigBase: memfs://clusters.example.com/minimal.example.com ConfigBase: memfs://clusters.example.com/minimal.example.com
InstanceGroupName: master-us-test-1a InstanceGroupName: master-us-test-1a
InstanceGroupRole: ControlPlane InstanceGroupRole: ControlPlane
NodeupConfigHash: erzIlDR9TAcOlxUIr7WouTREk7YdM0Ma4N2K9r5gk8w= NodeupConfigHash: e8ACnEK0qWqIMgJoxl9ojRfcJMBGi+zNVdLBnTkE0Go=
__EOF_KUBE_ENV __EOF_KUBE_ENV

View File

@ -153,7 +153,7 @@ ConfigServer:
- https://kops-controller.internal.minimal.example.com:3988/ - https://kops-controller.internal.minimal.example.com:3988/
InstanceGroupName: nodes InstanceGroupName: nodes
InstanceGroupRole: Node InstanceGroupRole: Node
NodeupConfigHash: y10GCJ5TES8qY2D0+M5IOOVd3YWvQdbJk1LQxxA37pY= NodeupConfigHash: de+zCCygHUW6ieE/GHLXxzLD92pxl20/vODdPL2IbrA=
__EOF_KUBE_ENV __EOF_KUBE_ENV

View File

@ -28,6 +28,7 @@ spec:
logLevel: info logLevel: info
nvidiaGPU: nvidiaGPU:
enabled: true enabled: true
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
package: nvidia-driver-535-server package: nvidia-driver-535-server
runc: runc:
version: 1.2.4 version: 1.2.4

View File

@ -92,7 +92,7 @@ spec:
version: 9.99.0 version: 9.99.0
- id: k8s-1.16 - id: k8s-1.16
manifest: nvidia.addons.k8s.io/k8s-1.16.yaml manifest: nvidia.addons.k8s.io/k8s-1.16.yaml
manifestHash: 5c2ea4e24c2272166a0bef02986a881160d23c39df8c7a5951a894702199d8a4 manifestHash: 6233f19a8b22836db6e6a25a121990ab77ea14c4e235a83004710afb515c8ff7
name: nvidia.addons.k8s.io name: nvidia.addons.k8s.io
selector: selector:
k8s-addon: nvidia.addons.k8s.io k8s-addon: nvidia.addons.k8s.io

View File

@ -22,7 +22,7 @@ spec:
containers: containers:
- args: - args:
- --fail-on-init-error=false - --fail-on-init-error=false
image: nvcr.io/nvidia/k8s-device-plugin:v0.12.2 image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
name: nvidia-device-plugin-ctr name: nvidia-device-plugin-ctr
securityContext: securityContext:
allowPrivilegeEscalation: false allowPrivilegeEscalation: false

View File

@ -308,6 +308,7 @@ Networking:
serviceClusterIPRange: 100.64.0.0/13 serviceClusterIPRange: 100.64.0.0/13
NvidiaGPU: NvidiaGPU:
enabled: true enabled: true
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
package: nvidia-driver-535-server package: nvidia-driver-535-server
UpdatePolicy: automatic UpdatePolicy: automatic
channels: channels:
@ -319,6 +320,7 @@ containerdConfig:
logLevel: info logLevel: info
nvidiaGPU: nvidiaGPU:
enabled: true enabled: true
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
package: nvidia-driver-535-server package: nvidia-driver-535-server
runc: runc:
version: 1.2.4 version: 1.2.4

View File

@ -58,12 +58,14 @@ Networking:
serviceClusterIPRange: 100.64.0.0/13 serviceClusterIPRange: 100.64.0.0/13
NvidiaGPU: NvidiaGPU:
enabled: true enabled: true
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
package: nvidia-driver-535-server package: nvidia-driver-535-server
UpdatePolicy: automatic UpdatePolicy: automatic
containerdConfig: containerdConfig:
logLevel: info logLevel: info
nvidiaGPU: nvidiaGPU:
enabled: true enabled: true
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
package: nvidia-driver-535-server package: nvidia-driver-535-server
runc: runc:
version: 1.2.4 version: 1.2.4

View File

@ -17,7 +17,7 @@ spec:
name: nvidia-device-plugin-ds name: nvidia-device-plugin-ds
spec: spec:
containers: containers:
- image: nvcr.io/nvidia/k8s-device-plugin:v0.12.2 - image: {{ $.Containerd.NvidiaGPU.DevicePluginImage }}
name: nvidia-device-plugin-ctr name: nvidia-device-plugin-ctr
args: ["--fail-on-init-error=false"] args: ["--fail-on-init-error=false"]
securityContext: securityContext: