mirror of https://github.com/kubernetes/kops.git
Merge pull request #17519 from sats-23/sup-image-tag
Add support for passing image for nvidia-k8s-device-plugin
This commit is contained in:
commit
f28b11cd4b
|
@ -932,10 +932,14 @@ spec:
|
|||
Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
|
||||
They will only be installed on intances that has an Nvidia GPU.
|
||||
type: boolean
|
||||
image:
|
||||
description: Image defines the container image used to deploy
|
||||
the Nvidia Kubernetes Device Plugin.
|
||||
type: string
|
||||
package:
|
||||
description: |-
|
||||
Package is the name of the nvidia driver package that will be installed.
|
||||
Default is "nvidia-headless-460-server".
|
||||
Default is "nvidia-driver-535-server".
|
||||
type: string
|
||||
type: object
|
||||
packages:
|
||||
|
|
|
@ -168,10 +168,14 @@ spec:
|
|||
Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
|
||||
They will only be installed on intances that has an Nvidia GPU.
|
||||
type: boolean
|
||||
image:
|
||||
description: Image defines the container image used to deploy
|
||||
the Nvidia Kubernetes Device Plugin.
|
||||
type: string
|
||||
package:
|
||||
description: |-
|
||||
Package is the name of the nvidia driver package that will be installed.
|
||||
Default is "nvidia-headless-460-server".
|
||||
Default is "nvidia-driver-535-server".
|
||||
type: string
|
||||
type: object
|
||||
packages:
|
||||
|
|
|
@ -21,8 +21,12 @@ import (
|
|||
"k8s.io/apimachinery/pkg/util/intstr"
|
||||
)
|
||||
|
||||
const (
|
||||
// NvidiaDefaultDriverPackage is the nvidia driver default version
|
||||
const NvidiaDefaultDriverPackage = "nvidia-driver-535-server"
|
||||
NvidiaDefaultDriverPackage = "nvidia-driver-535-server"
|
||||
// NvidiaDevicePluginImage is the Nvidia K8s device plugin container image
|
||||
NvidiaDevicePluginImage = "nvcr.io/nvidia/k8s-device-plugin:v0.17.3"
|
||||
)
|
||||
|
||||
// ContainerdConfig is the configuration for containerd
|
||||
type ContainerdConfig struct {
|
||||
|
@ -69,11 +73,13 @@ type NRIConfig struct {
|
|||
|
||||
type NvidiaGPUConfig struct {
|
||||
// Package is the name of the nvidia driver package that will be installed.
|
||||
// Default is "nvidia-headless-510-server".
|
||||
// Default is "nvidia-driver-535-server".
|
||||
DriverPackage string `json:"package,omitempty"`
|
||||
// Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
|
||||
// They will only be installed on intances that has an Nvidia GPU.
|
||||
// They will only be installed on instances that has an Nvidia GPU.
|
||||
Enabled *bool `json:"enabled,omitempty"`
|
||||
// Image defines the container image used to deploy the Nvidia Kubernetes Device Plugin.
|
||||
DevicePluginImage string `json:"image,omitempty"`
|
||||
// DCGMExporterConfig configures the DCGM exporter
|
||||
DCGMExporter *DCGMExporterConfig `json:"dcgmExporter,omitempty"`
|
||||
}
|
||||
|
|
|
@ -66,11 +66,13 @@ type NRIConfig struct {
|
|||
|
||||
type NvidiaGPUConfig struct {
|
||||
// Package is the name of the nvidia driver package that will be installed.
|
||||
// Default is "nvidia-headless-460-server".
|
||||
// Default is "nvidia-driver-535-server".
|
||||
DriverPackage string `json:"package,omitempty"`
|
||||
// Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
|
||||
// They will only be installed on intances that has an Nvidia GPU.
|
||||
Enabled *bool `json:"enabled,omitempty"`
|
||||
// Image defines the container image used to deploy the Nvidia Kubernetes Device Plugin.
|
||||
DevicePluginImage string `json:"image,omitempty"`
|
||||
// DCGMExporterConfig configures the DCGM exporter
|
||||
DCGMExporter *DCGMExporterConfig `json:"dcgmExporter,omitempty"`
|
||||
}
|
||||
|
|
|
@ -6777,6 +6777,7 @@ func Convert_kops_NodeTerminationHandlerSpec_To_v1alpha2_NodeTerminationHandlerS
|
|||
func autoConvert_v1alpha2_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfig, out *kops.NvidiaGPUConfig, s conversion.Scope) error {
|
||||
out.DriverPackage = in.DriverPackage
|
||||
out.Enabled = in.Enabled
|
||||
out.DevicePluginImage = in.DevicePluginImage
|
||||
if in.DCGMExporter != nil {
|
||||
in, out := &in.DCGMExporter, &out.DCGMExporter
|
||||
*out = new(kops.DCGMExporterConfig)
|
||||
|
@ -6797,6 +6798,7 @@ func Convert_v1alpha2_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfi
|
|||
func autoConvert_kops_NvidiaGPUConfig_To_v1alpha2_NvidiaGPUConfig(in *kops.NvidiaGPUConfig, out *NvidiaGPUConfig, s conversion.Scope) error {
|
||||
out.DriverPackage = in.DriverPackage
|
||||
out.Enabled = in.Enabled
|
||||
out.DevicePluginImage = in.DevicePluginImage
|
||||
if in.DCGMExporter != nil {
|
||||
in, out := &in.DCGMExporter, &out.DCGMExporter
|
||||
*out = new(DCGMExporterConfig)
|
||||
|
|
|
@ -66,11 +66,13 @@ type NRIConfig struct {
|
|||
|
||||
type NvidiaGPUConfig struct {
|
||||
// Package is the name of the nvidia driver package that will be installed.
|
||||
// Default is "nvidia-headless-460-server".
|
||||
// Default is "nvidia-driver-535-server".
|
||||
DriverPackage string `json:"package,omitempty"`
|
||||
// Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
|
||||
// They will only be installed on intances that has an Nvidia GPU.
|
||||
Enabled *bool `json:"enabled,omitempty"`
|
||||
// Image defines the container image used to deploy the Nvidia Kubernetes Device Plugin.
|
||||
DevicePluginImage string `json:"image,omitempty"`
|
||||
// DCGMExporterConfig configures the DCGM exporter
|
||||
DCGMExporter *DCGMExporterConfig `json:"dcgmExporter,omitempty"`
|
||||
}
|
||||
|
|
|
@ -7034,6 +7034,7 @@ func Convert_kops_NodeTerminationHandlerSpec_To_v1alpha3_NodeTerminationHandlerS
|
|||
func autoConvert_v1alpha3_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfig, out *kops.NvidiaGPUConfig, s conversion.Scope) error {
|
||||
out.DriverPackage = in.DriverPackage
|
||||
out.Enabled = in.Enabled
|
||||
out.DevicePluginImage = in.DevicePluginImage
|
||||
if in.DCGMExporter != nil {
|
||||
in, out := &in.DCGMExporter, &out.DCGMExporter
|
||||
*out = new(kops.DCGMExporterConfig)
|
||||
|
@ -7054,6 +7055,7 @@ func Convert_v1alpha3_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfi
|
|||
func autoConvert_kops_NvidiaGPUConfig_To_v1alpha3_NvidiaGPUConfig(in *kops.NvidiaGPUConfig, out *NvidiaGPUConfig, s conversion.Scope) error {
|
||||
out.DriverPackage = in.DriverPackage
|
||||
out.Enabled = in.Enabled
|
||||
out.DevicePluginImage = in.DevicePluginImage
|
||||
if in.DCGMExporter != nil {
|
||||
in, out := &in.DCGMExporter, &out.DCGMExporter
|
||||
*out = new(DCGMExporterConfig)
|
||||
|
|
|
@ -57,9 +57,15 @@ func (b *ContainerdOptionsBuilder) BuildOptions(o *kops.Cluster) error {
|
|||
// Set default log level to INFO
|
||||
containerd.LogLevel = fi.PtrTo("info")
|
||||
|
||||
if containerd.NvidiaGPU != nil && fi.ValueOf(containerd.NvidiaGPU.Enabled) && containerd.NvidiaGPU.DriverPackage == "" {
|
||||
if containerd.NvidiaGPU != nil && fi.ValueOf(containerd.NvidiaGPU.Enabled) {
|
||||
if containerd.NvidiaGPU.DriverPackage == "" {
|
||||
containerd.NvidiaGPU.DriverPackage = kops.NvidiaDefaultDriverPackage
|
||||
}
|
||||
|
||||
if containerd.NvidiaGPU.DevicePluginImage == "" {
|
||||
containerd.NvidiaGPU.DevicePluginImage = kops.NvidiaDevicePluginImage
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -130,7 +130,7 @@ ClusterName: minimal.example.com
|
|||
ConfigBase: memfs://clusters.example.com/minimal.example.com
|
||||
InstanceGroupName: master-us-test-1a
|
||||
InstanceGroupRole: ControlPlane
|
||||
NodeupConfigHash: erzIlDR9TAcOlxUIr7WouTREk7YdM0Ma4N2K9r5gk8w=
|
||||
NodeupConfigHash: e8ACnEK0qWqIMgJoxl9ojRfcJMBGi+zNVdLBnTkE0Go=
|
||||
|
||||
__EOF_KUBE_ENV
|
||||
|
||||
|
|
|
@ -153,7 +153,7 @@ ConfigServer:
|
|||
- https://kops-controller.internal.minimal.example.com:3988/
|
||||
InstanceGroupName: nodes
|
||||
InstanceGroupRole: Node
|
||||
NodeupConfigHash: y10GCJ5TES8qY2D0+M5IOOVd3YWvQdbJk1LQxxA37pY=
|
||||
NodeupConfigHash: de+zCCygHUW6ieE/GHLXxzLD92pxl20/vODdPL2IbrA=
|
||||
|
||||
__EOF_KUBE_ENV
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ spec:
|
|||
logLevel: info
|
||||
nvidiaGPU:
|
||||
enabled: true
|
||||
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
|
||||
package: nvidia-driver-535-server
|
||||
runc:
|
||||
version: 1.2.4
|
||||
|
|
|
@ -92,7 +92,7 @@ spec:
|
|||
version: 9.99.0
|
||||
- id: k8s-1.16
|
||||
manifest: nvidia.addons.k8s.io/k8s-1.16.yaml
|
||||
manifestHash: 5c2ea4e24c2272166a0bef02986a881160d23c39df8c7a5951a894702199d8a4
|
||||
manifestHash: 6233f19a8b22836db6e6a25a121990ab77ea14c4e235a83004710afb515c8ff7
|
||||
name: nvidia.addons.k8s.io
|
||||
selector:
|
||||
k8s-addon: nvidia.addons.k8s.io
|
||||
|
|
|
@ -22,7 +22,7 @@ spec:
|
|||
containers:
|
||||
- args:
|
||||
- --fail-on-init-error=false
|
||||
image: nvcr.io/nvidia/k8s-device-plugin:v0.12.2
|
||||
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
|
||||
name: nvidia-device-plugin-ctr
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
|
|
|
@ -308,6 +308,7 @@ Networking:
|
|||
serviceClusterIPRange: 100.64.0.0/13
|
||||
NvidiaGPU:
|
||||
enabled: true
|
||||
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
|
||||
package: nvidia-driver-535-server
|
||||
UpdatePolicy: automatic
|
||||
channels:
|
||||
|
@ -319,6 +320,7 @@ containerdConfig:
|
|||
logLevel: info
|
||||
nvidiaGPU:
|
||||
enabled: true
|
||||
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
|
||||
package: nvidia-driver-535-server
|
||||
runc:
|
||||
version: 1.2.4
|
||||
|
|
|
@ -58,12 +58,14 @@ Networking:
|
|||
serviceClusterIPRange: 100.64.0.0/13
|
||||
NvidiaGPU:
|
||||
enabled: true
|
||||
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
|
||||
package: nvidia-driver-535-server
|
||||
UpdatePolicy: automatic
|
||||
containerdConfig:
|
||||
logLevel: info
|
||||
nvidiaGPU:
|
||||
enabled: true
|
||||
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
|
||||
package: nvidia-driver-535-server
|
||||
runc:
|
||||
version: 1.2.4
|
||||
|
|
|
@ -17,7 +17,7 @@ spec:
|
|||
name: nvidia-device-plugin-ds
|
||||
spec:
|
||||
containers:
|
||||
- image: nvcr.io/nvidia/k8s-device-plugin:v0.12.2
|
||||
- image: {{ $.Containerd.NvidiaGPU.DevicePluginImage }}
|
||||
name: nvidia-device-plugin-ctr
|
||||
args: ["--fail-on-init-error=false"]
|
||||
securityContext:
|
||||
|
|
Loading…
Reference in New Issue