mirror of https://github.com/kubernetes/kops.git
Merge pull request #17519 from sats-23/sup-image-tag
Add support for passing image for nvidia-k8s-device-plugin
This commit is contained in:
commit
f28b11cd4b
|
@ -932,10 +932,14 @@ spec:
|
||||||
Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
|
Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
|
||||||
They will only be installed on intances that has an Nvidia GPU.
|
They will only be installed on intances that has an Nvidia GPU.
|
||||||
type: boolean
|
type: boolean
|
||||||
|
image:
|
||||||
|
description: Image defines the container image used to deploy
|
||||||
|
the Nvidia Kubernetes Device Plugin.
|
||||||
|
type: string
|
||||||
package:
|
package:
|
||||||
description: |-
|
description: |-
|
||||||
Package is the name of the nvidia driver package that will be installed.
|
Package is the name of the nvidia driver package that will be installed.
|
||||||
Default is "nvidia-headless-460-server".
|
Default is "nvidia-driver-535-server".
|
||||||
type: string
|
type: string
|
||||||
type: object
|
type: object
|
||||||
packages:
|
packages:
|
||||||
|
|
|
@ -168,10 +168,14 @@ spec:
|
||||||
Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
|
Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
|
||||||
They will only be installed on intances that has an Nvidia GPU.
|
They will only be installed on intances that has an Nvidia GPU.
|
||||||
type: boolean
|
type: boolean
|
||||||
|
image:
|
||||||
|
description: Image defines the container image used to deploy
|
||||||
|
the Nvidia Kubernetes Device Plugin.
|
||||||
|
type: string
|
||||||
package:
|
package:
|
||||||
description: |-
|
description: |-
|
||||||
Package is the name of the nvidia driver package that will be installed.
|
Package is the name of the nvidia driver package that will be installed.
|
||||||
Default is "nvidia-headless-460-server".
|
Default is "nvidia-driver-535-server".
|
||||||
type: string
|
type: string
|
||||||
type: object
|
type: object
|
||||||
packages:
|
packages:
|
||||||
|
|
|
@ -21,8 +21,12 @@ import (
|
||||||
"k8s.io/apimachinery/pkg/util/intstr"
|
"k8s.io/apimachinery/pkg/util/intstr"
|
||||||
)
|
)
|
||||||
|
|
||||||
// NvidiaDefaultDriverPackage is the nvidia driver default version
|
const (
|
||||||
const NvidiaDefaultDriverPackage = "nvidia-driver-535-server"
|
// NvidiaDefaultDriverPackage is the nvidia driver default version
|
||||||
|
NvidiaDefaultDriverPackage = "nvidia-driver-535-server"
|
||||||
|
// NvidiaDevicePluginImage is the Nvidia K8s device plugin container image
|
||||||
|
NvidiaDevicePluginImage = "nvcr.io/nvidia/k8s-device-plugin:v0.17.3"
|
||||||
|
)
|
||||||
|
|
||||||
// ContainerdConfig is the configuration for containerd
|
// ContainerdConfig is the configuration for containerd
|
||||||
type ContainerdConfig struct {
|
type ContainerdConfig struct {
|
||||||
|
@ -69,11 +73,13 @@ type NRIConfig struct {
|
||||||
|
|
||||||
type NvidiaGPUConfig struct {
|
type NvidiaGPUConfig struct {
|
||||||
// Package is the name of the nvidia driver package that will be installed.
|
// Package is the name of the nvidia driver package that will be installed.
|
||||||
// Default is "nvidia-headless-510-server".
|
// Default is "nvidia-driver-535-server".
|
||||||
DriverPackage string `json:"package,omitempty"`
|
DriverPackage string `json:"package,omitempty"`
|
||||||
// Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
|
// Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
|
||||||
// They will only be installed on intances that has an Nvidia GPU.
|
// They will only be installed on instances that has an Nvidia GPU.
|
||||||
Enabled *bool `json:"enabled,omitempty"`
|
Enabled *bool `json:"enabled,omitempty"`
|
||||||
|
// Image defines the container image used to deploy the Nvidia Kubernetes Device Plugin.
|
||||||
|
DevicePluginImage string `json:"image,omitempty"`
|
||||||
// DCGMExporterConfig configures the DCGM exporter
|
// DCGMExporterConfig configures the DCGM exporter
|
||||||
DCGMExporter *DCGMExporterConfig `json:"dcgmExporter,omitempty"`
|
DCGMExporter *DCGMExporterConfig `json:"dcgmExporter,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
|
@ -66,11 +66,13 @@ type NRIConfig struct {
|
||||||
|
|
||||||
type NvidiaGPUConfig struct {
|
type NvidiaGPUConfig struct {
|
||||||
// Package is the name of the nvidia driver package that will be installed.
|
// Package is the name of the nvidia driver package that will be installed.
|
||||||
// Default is "nvidia-headless-460-server".
|
// Default is "nvidia-driver-535-server".
|
||||||
DriverPackage string `json:"package,omitempty"`
|
DriverPackage string `json:"package,omitempty"`
|
||||||
// Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
|
// Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
|
||||||
// They will only be installed on intances that has an Nvidia GPU.
|
// They will only be installed on intances that has an Nvidia GPU.
|
||||||
Enabled *bool `json:"enabled,omitempty"`
|
Enabled *bool `json:"enabled,omitempty"`
|
||||||
|
// Image defines the container image used to deploy the Nvidia Kubernetes Device Plugin.
|
||||||
|
DevicePluginImage string `json:"image,omitempty"`
|
||||||
// DCGMExporterConfig configures the DCGM exporter
|
// DCGMExporterConfig configures the DCGM exporter
|
||||||
DCGMExporter *DCGMExporterConfig `json:"dcgmExporter,omitempty"`
|
DCGMExporter *DCGMExporterConfig `json:"dcgmExporter,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
|
@ -6777,6 +6777,7 @@ func Convert_kops_NodeTerminationHandlerSpec_To_v1alpha2_NodeTerminationHandlerS
|
||||||
func autoConvert_v1alpha2_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfig, out *kops.NvidiaGPUConfig, s conversion.Scope) error {
|
func autoConvert_v1alpha2_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfig, out *kops.NvidiaGPUConfig, s conversion.Scope) error {
|
||||||
out.DriverPackage = in.DriverPackage
|
out.DriverPackage = in.DriverPackage
|
||||||
out.Enabled = in.Enabled
|
out.Enabled = in.Enabled
|
||||||
|
out.DevicePluginImage = in.DevicePluginImage
|
||||||
if in.DCGMExporter != nil {
|
if in.DCGMExporter != nil {
|
||||||
in, out := &in.DCGMExporter, &out.DCGMExporter
|
in, out := &in.DCGMExporter, &out.DCGMExporter
|
||||||
*out = new(kops.DCGMExporterConfig)
|
*out = new(kops.DCGMExporterConfig)
|
||||||
|
@ -6797,6 +6798,7 @@ func Convert_v1alpha2_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfi
|
||||||
func autoConvert_kops_NvidiaGPUConfig_To_v1alpha2_NvidiaGPUConfig(in *kops.NvidiaGPUConfig, out *NvidiaGPUConfig, s conversion.Scope) error {
|
func autoConvert_kops_NvidiaGPUConfig_To_v1alpha2_NvidiaGPUConfig(in *kops.NvidiaGPUConfig, out *NvidiaGPUConfig, s conversion.Scope) error {
|
||||||
out.DriverPackage = in.DriverPackage
|
out.DriverPackage = in.DriverPackage
|
||||||
out.Enabled = in.Enabled
|
out.Enabled = in.Enabled
|
||||||
|
out.DevicePluginImage = in.DevicePluginImage
|
||||||
if in.DCGMExporter != nil {
|
if in.DCGMExporter != nil {
|
||||||
in, out := &in.DCGMExporter, &out.DCGMExporter
|
in, out := &in.DCGMExporter, &out.DCGMExporter
|
||||||
*out = new(DCGMExporterConfig)
|
*out = new(DCGMExporterConfig)
|
||||||
|
|
|
@ -66,11 +66,13 @@ type NRIConfig struct {
|
||||||
|
|
||||||
type NvidiaGPUConfig struct {
|
type NvidiaGPUConfig struct {
|
||||||
// Package is the name of the nvidia driver package that will be installed.
|
// Package is the name of the nvidia driver package that will be installed.
|
||||||
// Default is "nvidia-headless-460-server".
|
// Default is "nvidia-driver-535-server".
|
||||||
DriverPackage string `json:"package,omitempty"`
|
DriverPackage string `json:"package,omitempty"`
|
||||||
// Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
|
// Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
|
||||||
// They will only be installed on intances that has an Nvidia GPU.
|
// They will only be installed on intances that has an Nvidia GPU.
|
||||||
Enabled *bool `json:"enabled,omitempty"`
|
Enabled *bool `json:"enabled,omitempty"`
|
||||||
|
// Image defines the container image used to deploy the Nvidia Kubernetes Device Plugin.
|
||||||
|
DevicePluginImage string `json:"image,omitempty"`
|
||||||
// DCGMExporterConfig configures the DCGM exporter
|
// DCGMExporterConfig configures the DCGM exporter
|
||||||
DCGMExporter *DCGMExporterConfig `json:"dcgmExporter,omitempty"`
|
DCGMExporter *DCGMExporterConfig `json:"dcgmExporter,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
|
@ -7034,6 +7034,7 @@ func Convert_kops_NodeTerminationHandlerSpec_To_v1alpha3_NodeTerminationHandlerS
|
||||||
func autoConvert_v1alpha3_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfig, out *kops.NvidiaGPUConfig, s conversion.Scope) error {
|
func autoConvert_v1alpha3_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfig, out *kops.NvidiaGPUConfig, s conversion.Scope) error {
|
||||||
out.DriverPackage = in.DriverPackage
|
out.DriverPackage = in.DriverPackage
|
||||||
out.Enabled = in.Enabled
|
out.Enabled = in.Enabled
|
||||||
|
out.DevicePluginImage = in.DevicePluginImage
|
||||||
if in.DCGMExporter != nil {
|
if in.DCGMExporter != nil {
|
||||||
in, out := &in.DCGMExporter, &out.DCGMExporter
|
in, out := &in.DCGMExporter, &out.DCGMExporter
|
||||||
*out = new(kops.DCGMExporterConfig)
|
*out = new(kops.DCGMExporterConfig)
|
||||||
|
@ -7054,6 +7055,7 @@ func Convert_v1alpha3_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfi
|
||||||
func autoConvert_kops_NvidiaGPUConfig_To_v1alpha3_NvidiaGPUConfig(in *kops.NvidiaGPUConfig, out *NvidiaGPUConfig, s conversion.Scope) error {
|
func autoConvert_kops_NvidiaGPUConfig_To_v1alpha3_NvidiaGPUConfig(in *kops.NvidiaGPUConfig, out *NvidiaGPUConfig, s conversion.Scope) error {
|
||||||
out.DriverPackage = in.DriverPackage
|
out.DriverPackage = in.DriverPackage
|
||||||
out.Enabled = in.Enabled
|
out.Enabled = in.Enabled
|
||||||
|
out.DevicePluginImage = in.DevicePluginImage
|
||||||
if in.DCGMExporter != nil {
|
if in.DCGMExporter != nil {
|
||||||
in, out := &in.DCGMExporter, &out.DCGMExporter
|
in, out := &in.DCGMExporter, &out.DCGMExporter
|
||||||
*out = new(DCGMExporterConfig)
|
*out = new(DCGMExporterConfig)
|
||||||
|
|
|
@ -57,8 +57,14 @@ func (b *ContainerdOptionsBuilder) BuildOptions(o *kops.Cluster) error {
|
||||||
// Set default log level to INFO
|
// Set default log level to INFO
|
||||||
containerd.LogLevel = fi.PtrTo("info")
|
containerd.LogLevel = fi.PtrTo("info")
|
||||||
|
|
||||||
if containerd.NvidiaGPU != nil && fi.ValueOf(containerd.NvidiaGPU.Enabled) && containerd.NvidiaGPU.DriverPackage == "" {
|
if containerd.NvidiaGPU != nil && fi.ValueOf(containerd.NvidiaGPU.Enabled) {
|
||||||
containerd.NvidiaGPU.DriverPackage = kops.NvidiaDefaultDriverPackage
|
if containerd.NvidiaGPU.DriverPackage == "" {
|
||||||
|
containerd.NvidiaGPU.DriverPackage = kops.NvidiaDefaultDriverPackage
|
||||||
|
}
|
||||||
|
|
||||||
|
if containerd.NvidiaGPU.DevicePluginImage == "" {
|
||||||
|
containerd.NvidiaGPU.DevicePluginImage = kops.NvidiaDevicePluginImage
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|
|
@ -130,7 +130,7 @@ ClusterName: minimal.example.com
|
||||||
ConfigBase: memfs://clusters.example.com/minimal.example.com
|
ConfigBase: memfs://clusters.example.com/minimal.example.com
|
||||||
InstanceGroupName: master-us-test-1a
|
InstanceGroupName: master-us-test-1a
|
||||||
InstanceGroupRole: ControlPlane
|
InstanceGroupRole: ControlPlane
|
||||||
NodeupConfigHash: erzIlDR9TAcOlxUIr7WouTREk7YdM0Ma4N2K9r5gk8w=
|
NodeupConfigHash: e8ACnEK0qWqIMgJoxl9ojRfcJMBGi+zNVdLBnTkE0Go=
|
||||||
|
|
||||||
__EOF_KUBE_ENV
|
__EOF_KUBE_ENV
|
||||||
|
|
||||||
|
|
|
@ -153,7 +153,7 @@ ConfigServer:
|
||||||
- https://kops-controller.internal.minimal.example.com:3988/
|
- https://kops-controller.internal.minimal.example.com:3988/
|
||||||
InstanceGroupName: nodes
|
InstanceGroupName: nodes
|
||||||
InstanceGroupRole: Node
|
InstanceGroupRole: Node
|
||||||
NodeupConfigHash: y10GCJ5TES8qY2D0+M5IOOVd3YWvQdbJk1LQxxA37pY=
|
NodeupConfigHash: de+zCCygHUW6ieE/GHLXxzLD92pxl20/vODdPL2IbrA=
|
||||||
|
|
||||||
__EOF_KUBE_ENV
|
__EOF_KUBE_ENV
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,7 @@ spec:
|
||||||
logLevel: info
|
logLevel: info
|
||||||
nvidiaGPU:
|
nvidiaGPU:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
|
||||||
package: nvidia-driver-535-server
|
package: nvidia-driver-535-server
|
||||||
runc:
|
runc:
|
||||||
version: 1.2.4
|
version: 1.2.4
|
||||||
|
|
|
@ -92,7 +92,7 @@ spec:
|
||||||
version: 9.99.0
|
version: 9.99.0
|
||||||
- id: k8s-1.16
|
- id: k8s-1.16
|
||||||
manifest: nvidia.addons.k8s.io/k8s-1.16.yaml
|
manifest: nvidia.addons.k8s.io/k8s-1.16.yaml
|
||||||
manifestHash: 5c2ea4e24c2272166a0bef02986a881160d23c39df8c7a5951a894702199d8a4
|
manifestHash: 6233f19a8b22836db6e6a25a121990ab77ea14c4e235a83004710afb515c8ff7
|
||||||
name: nvidia.addons.k8s.io
|
name: nvidia.addons.k8s.io
|
||||||
selector:
|
selector:
|
||||||
k8s-addon: nvidia.addons.k8s.io
|
k8s-addon: nvidia.addons.k8s.io
|
||||||
|
|
|
@ -22,7 +22,7 @@ spec:
|
||||||
containers:
|
containers:
|
||||||
- args:
|
- args:
|
||||||
- --fail-on-init-error=false
|
- --fail-on-init-error=false
|
||||||
image: nvcr.io/nvidia/k8s-device-plugin:v0.12.2
|
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
|
||||||
name: nvidia-device-plugin-ctr
|
name: nvidia-device-plugin-ctr
|
||||||
securityContext:
|
securityContext:
|
||||||
allowPrivilegeEscalation: false
|
allowPrivilegeEscalation: false
|
||||||
|
|
|
@ -308,6 +308,7 @@ Networking:
|
||||||
serviceClusterIPRange: 100.64.0.0/13
|
serviceClusterIPRange: 100.64.0.0/13
|
||||||
NvidiaGPU:
|
NvidiaGPU:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
|
||||||
package: nvidia-driver-535-server
|
package: nvidia-driver-535-server
|
||||||
UpdatePolicy: automatic
|
UpdatePolicy: automatic
|
||||||
channels:
|
channels:
|
||||||
|
@ -319,6 +320,7 @@ containerdConfig:
|
||||||
logLevel: info
|
logLevel: info
|
||||||
nvidiaGPU:
|
nvidiaGPU:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
|
||||||
package: nvidia-driver-535-server
|
package: nvidia-driver-535-server
|
||||||
runc:
|
runc:
|
||||||
version: 1.2.4
|
version: 1.2.4
|
||||||
|
|
|
@ -58,12 +58,14 @@ Networking:
|
||||||
serviceClusterIPRange: 100.64.0.0/13
|
serviceClusterIPRange: 100.64.0.0/13
|
||||||
NvidiaGPU:
|
NvidiaGPU:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
|
||||||
package: nvidia-driver-535-server
|
package: nvidia-driver-535-server
|
||||||
UpdatePolicy: automatic
|
UpdatePolicy: automatic
|
||||||
containerdConfig:
|
containerdConfig:
|
||||||
logLevel: info
|
logLevel: info
|
||||||
nvidiaGPU:
|
nvidiaGPU:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
image: nvcr.io/nvidia/k8s-device-plugin:v0.17.3
|
||||||
package: nvidia-driver-535-server
|
package: nvidia-driver-535-server
|
||||||
runc:
|
runc:
|
||||||
version: 1.2.4
|
version: 1.2.4
|
||||||
|
|
|
@ -17,7 +17,7 @@ spec:
|
||||||
name: nvidia-device-plugin-ds
|
name: nvidia-device-plugin-ds
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- image: nvcr.io/nvidia/k8s-device-plugin:v0.12.2
|
- image: {{ $.Containerd.NvidiaGPU.DevicePluginImage }}
|
||||||
name: nvidia-device-plugin-ctr
|
name: nvidia-device-plugin-ctr
|
||||||
args: ["--fail-on-init-error=false"]
|
args: ["--fail-on-init-error=false"]
|
||||||
securityContext:
|
securityContext:
|
||||||
|
|
Loading…
Reference in New Issue