Move nvidia config under containerd

This commit is contained in:
Ole Markus With 2021-09-02 20:44:09 +02:00
parent dae4b123ba
commit f5fed2a08d
40 changed files with 262 additions and 204 deletions

View File

@ -184,7 +184,10 @@ func TestMinimal(t *testing.T) {
func TestNvidia(t *testing.T) {
newIntegrationTest("minimal.example.com", "nvidia").
withAddons("nvidia.addons.k8s.io-k8s-1.16").
withAddons(
dnsControllerAddon,
"nvidia.addons.k8s.io-k8s-1.16",
).
runTestTerraformAWS(t)
}

View File

@ -658,6 +658,19 @@ spec:
description: LogLevel controls the logging details [trace, debug,
info, warn, error, fatal, panic] (default "info").
type: string
nvidiaGPU:
description: NvidiaGPU configures the Nvidia GPU runtime.
properties:
enabled:
description: Enabled determines if kOps will install the Nvidia
GPU runtime and drivers. They will only be installed on
intances that has an Nvidia GPU.
type: boolean
package:
description: Package is the name of the nvidia driver package
that will be installed. Default is "nvidia-headless-460-server".
type: string
type: object
packages:
description: Packages overrides the URL and hash for the packages.
properties:
@ -4238,19 +4251,6 @@ spec:
to false.
type: boolean
type: object
nvidia:
description: NvidiaConfiguration configures the Nvidia GPU runtime.
properties:
enabled:
description: Enabled determines if kOps will install the Nvidia
GPU runtime and drivers. They will only be installed on intances
that has an Nvidia GPU.
type: boolean
package:
description: Package is the name of the nvidia driver package
that will be installed. Default is "nvidia-headless-460-server".
type: string
type: object
podCIDR:
description: PodCIDR is the CIDR from which we allocate IPs for pods
type: string

View File

@ -693,7 +693,7 @@ func (c *NodeupModelContext) CNIConfDir() string {
}
func (c *NodeupModelContext) InstallNvidiaRuntime() bool {
return c.NodeupConfig.Nvidia != nil &&
fi.BoolValue(c.NodeupConfig.Nvidia.Enabled) &&
return c.NodeupConfig.NvidiaGPU != nil &&
fi.BoolValue(c.NodeupConfig.NvidiaGPU.Enabled) &&
c.GPUVendor == architectures.GPUVendorNvidia
}

View File

@ -41,7 +41,7 @@ func (b *NvidiaBuilder) Build(c *fi.ModelBuilderContext) error {
},
})
c.AddTask(&nodetasks.Package{Name: "nvidia-container-runtime"})
c.AddTask(&nodetasks.Package{Name: b.NodeupConfig.Nvidia.DriverPackage})
c.AddTask(&nodetasks.Package{Name: b.NodeupConfig.NvidiaGPU.DriverPackage})
}
return nil
}

View File

@ -212,17 +212,6 @@ type ClusterSpec struct {
ServiceAccountIssuerDiscovery *ServiceAccountIssuerDiscoveryConfig `json:"serviceAccountIssuerDiscovery,omitempty"`
// SnapshotController defines the CSI Snapshot Controller configuration.
SnapshotController *SnapshotControllerConfig `json:"snapshotController,omitempty"`
// NvidiaConfiguration configures the Nvidia GPU runtime.
Nvidia *NvidiaConfig `json:"nvidia,omitempty"`
}
type NvidiaConfig struct {
// Package is the name of the nvidia driver package that will be installed.
// Default is "nvidia-headless-460-server".
DriverPackage string `json:"package,omitempty"`
// Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
// They will only be installed on intances that has an Nvidia GPU.
Enabled *bool `json:"enabled,omitempty"`
}
// ServiceAccountIssuerDiscoveryConfig configures an OIDC Issuer.

View File

@ -36,4 +36,15 @@ type ContainerdConfig struct {
State *string `json:"state,omitempty" flag:"state"`
// Version used to pick the containerd package.
Version *string `json:"version,omitempty"`
// NvidiaGPU configures the Nvidia GPU runtime.
NvidiaGPU *NvidiaGPUConfig `json:"nvidiaGPU,omitempty"`
}
type NvidiaGPUConfig struct {
// Package is the name of the nvidia driver package that will be installed.
// Default is "nvidia-headless-460-server".
DriverPackage string `json:"package,omitempty"`
// Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
// They will only be installed on intances that has an Nvidia GPU.
Enabled *bool `json:"enabled,omitempty"`
}

View File

@ -210,17 +210,6 @@ type ClusterSpec struct {
ServiceAccountIssuerDiscovery *ServiceAccountIssuerDiscoveryConfig `json:"serviceAccountIssuerDiscovery,omitempty"`
// SnapshotController defines the CSI Snapshot Controller configuration.
SnapshotController *SnapshotControllerConfig `json:"snapshotController,omitempty"`
// NvidiaConfiguration configures the Nvidia GPU runtime.
Nvidia *NvidiaConfig `json:"nvidia,omitempty"`
}
type NvidiaConfig struct {
// Package is the name of the nvidia driver package that will be installed.
// Default is "nvidia-headless-460-server".
DriverPackage string `json:"package,omitempty"`
// Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
// They will only be installed on intances that has an Nvidia GPU.
Enabled *bool `json:"enabled,omitempty"`
}
// ServiceAccountIssuerDiscoveryConfig configures an OIDC Issuer.

View File

@ -36,4 +36,15 @@ type ContainerdConfig struct {
State *string `json:"state,omitempty" flag:"state"`
// Version used to pick the containerd package.
Version *string `json:"version,omitempty"`
// NvidiaGPU configures the Nvidia GPU runtime.
NvidiaGPU *NvidiaGPUConfig `json:"nvidiaGPU,omitempty"`
}
type NvidiaGPUConfig struct {
// Package is the name of the nvidia driver package that will be installed.
// Default is "nvidia-headless-460-server".
DriverPackage string `json:"package,omitempty"`
// Enabled determines if kOps will install the Nvidia GPU runtime and drivers.
// They will only be installed on intances that has an Nvidia GPU.
Enabled *bool `json:"enabled,omitempty"`
}

View File

@ -883,13 +883,13 @@ func RegisterConversions(s *runtime.Scheme) error {
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*NvidiaConfig)(nil), (*kops.NvidiaConfig)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha2_NvidiaConfig_To_kops_NvidiaConfig(a.(*NvidiaConfig), b.(*kops.NvidiaConfig), scope)
if err := s.AddGeneratedConversionFunc((*NvidiaGPUConfig)(nil), (*kops.NvidiaGPUConfig)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha2_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(a.(*NvidiaGPUConfig), b.(*kops.NvidiaGPUConfig), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*kops.NvidiaConfig)(nil), (*NvidiaConfig)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_kops_NvidiaConfig_To_v1alpha2_NvidiaConfig(a.(*kops.NvidiaConfig), b.(*NvidiaConfig), scope)
if err := s.AddGeneratedConversionFunc((*kops.NvidiaGPUConfig)(nil), (*NvidiaGPUConfig)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_kops_NvidiaGPUConfig_To_v1alpha2_NvidiaGPUConfig(a.(*kops.NvidiaGPUConfig), b.(*NvidiaGPUConfig), scope)
}); err != nil {
return err
}
@ -2700,15 +2700,6 @@ func autoConvert_v1alpha2_ClusterSpec_To_kops_ClusterSpec(in *ClusterSpec, out *
} else {
out.SnapshotController = nil
}
if in.Nvidia != nil {
in, out := &in.Nvidia, &out.Nvidia
*out = new(kops.NvidiaConfig)
if err := Convert_v1alpha2_NvidiaConfig_To_kops_NvidiaConfig(*in, *out, s); err != nil {
return err
}
} else {
out.Nvidia = nil
}
return nil
}
@ -3121,15 +3112,6 @@ func autoConvert_kops_ClusterSpec_To_v1alpha2_ClusterSpec(in *kops.ClusterSpec,
} else {
out.SnapshotController = nil
}
if in.Nvidia != nil {
in, out := &in.Nvidia, &out.Nvidia
*out = new(NvidiaConfig)
if err := Convert_kops_NvidiaConfig_To_v1alpha2_NvidiaConfig(*in, *out, s); err != nil {
return err
}
} else {
out.Nvidia = nil
}
return nil
}
@ -3192,6 +3174,15 @@ func autoConvert_v1alpha2_ContainerdConfig_To_kops_ContainerdConfig(in *Containe
out.SkipInstall = in.SkipInstall
out.State = in.State
out.Version = in.Version
if in.NvidiaGPU != nil {
in, out := &in.NvidiaGPU, &out.NvidiaGPU
*out = new(kops.NvidiaGPUConfig)
if err := Convert_v1alpha2_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(*in, *out, s); err != nil {
return err
}
} else {
out.NvidiaGPU = nil
}
return nil
}
@ -3218,6 +3209,15 @@ func autoConvert_kops_ContainerdConfig_To_v1alpha2_ContainerdConfig(in *kops.Con
out.SkipInstall = in.SkipInstall
out.State = in.State
out.Version = in.Version
if in.NvidiaGPU != nil {
in, out := &in.NvidiaGPU, &out.NvidiaGPU
*out = new(NvidiaGPUConfig)
if err := Convert_kops_NvidiaGPUConfig_To_v1alpha2_NvidiaGPUConfig(*in, *out, s); err != nil {
return err
}
} else {
out.NvidiaGPU = nil
}
return nil
}
@ -6150,26 +6150,26 @@ func Convert_kops_NodeTerminationHandlerConfig_To_v1alpha2_NodeTerminationHandle
return autoConvert_kops_NodeTerminationHandlerConfig_To_v1alpha2_NodeTerminationHandlerConfig(in, out, s)
}
func autoConvert_v1alpha2_NvidiaConfig_To_kops_NvidiaConfig(in *NvidiaConfig, out *kops.NvidiaConfig, s conversion.Scope) error {
func autoConvert_v1alpha2_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfig, out *kops.NvidiaGPUConfig, s conversion.Scope) error {
out.DriverPackage = in.DriverPackage
out.Enabled = in.Enabled
return nil
}
// Convert_v1alpha2_NvidiaConfig_To_kops_NvidiaConfig is an autogenerated conversion function.
func Convert_v1alpha2_NvidiaConfig_To_kops_NvidiaConfig(in *NvidiaConfig, out *kops.NvidiaConfig, s conversion.Scope) error {
return autoConvert_v1alpha2_NvidiaConfig_To_kops_NvidiaConfig(in, out, s)
// Convert_v1alpha2_NvidiaGPUConfig_To_kops_NvidiaGPUConfig is an autogenerated conversion function.
func Convert_v1alpha2_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in *NvidiaGPUConfig, out *kops.NvidiaGPUConfig, s conversion.Scope) error {
return autoConvert_v1alpha2_NvidiaGPUConfig_To_kops_NvidiaGPUConfig(in, out, s)
}
func autoConvert_kops_NvidiaConfig_To_v1alpha2_NvidiaConfig(in *kops.NvidiaConfig, out *NvidiaConfig, s conversion.Scope) error {
func autoConvert_kops_NvidiaGPUConfig_To_v1alpha2_NvidiaGPUConfig(in *kops.NvidiaGPUConfig, out *NvidiaGPUConfig, s conversion.Scope) error {
out.DriverPackage = in.DriverPackage
out.Enabled = in.Enabled
return nil
}
// Convert_kops_NvidiaConfig_To_v1alpha2_NvidiaConfig is an autogenerated conversion function.
func Convert_kops_NvidiaConfig_To_v1alpha2_NvidiaConfig(in *kops.NvidiaConfig, out *NvidiaConfig, s conversion.Scope) error {
return autoConvert_kops_NvidiaConfig_To_v1alpha2_NvidiaConfig(in, out, s)
// Convert_kops_NvidiaGPUConfig_To_v1alpha2_NvidiaGPUConfig is an autogenerated conversion function.
func Convert_kops_NvidiaGPUConfig_To_v1alpha2_NvidiaGPUConfig(in *kops.NvidiaGPUConfig, out *NvidiaGPUConfig, s conversion.Scope) error {
return autoConvert_kops_NvidiaGPUConfig_To_v1alpha2_NvidiaGPUConfig(in, out, s)
}
func autoConvert_v1alpha2_OpenstackBlockStorageConfig_To_kops_OpenstackBlockStorageConfig(in *OpenstackBlockStorageConfig, out *kops.OpenstackBlockStorageConfig, s conversion.Scope) error {

View File

@ -1185,11 +1185,6 @@ func (in *ClusterSpec) DeepCopyInto(out *ClusterSpec) {
*out = new(SnapshotControllerConfig)
(*in).DeepCopyInto(*out)
}
if in.Nvidia != nil {
in, out := &in.Nvidia, &out.Nvidia
*out = new(NvidiaConfig)
(*in).DeepCopyInto(*out)
}
return
}
@ -1272,6 +1267,11 @@ func (in *ContainerdConfig) DeepCopyInto(out *ContainerdConfig) {
*out = new(string)
**out = **in
}
if in.NvidiaGPU != nil {
in, out := &in.NvidiaGPU, &out.NvidiaGPU
*out = new(NvidiaGPUConfig)
(*in).DeepCopyInto(*out)
}
return
}
@ -4137,7 +4137,7 @@ func (in *NodeTerminationHandlerConfig) DeepCopy() *NodeTerminationHandlerConfig
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *NvidiaConfig) DeepCopyInto(out *NvidiaConfig) {
func (in *NvidiaGPUConfig) DeepCopyInto(out *NvidiaGPUConfig) {
*out = *in
if in.Enabled != nil {
in, out := &in.Enabled, &out.Enabled
@ -4147,12 +4147,12 @@ func (in *NvidiaConfig) DeepCopyInto(out *NvidiaConfig) {
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NvidiaConfig.
func (in *NvidiaConfig) DeepCopy() *NvidiaConfig {
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NvidiaGPUConfig.
func (in *NvidiaGPUConfig) DeepCopy() *NvidiaGPUConfig {
if in == nil {
return nil
}
out := new(NvidiaConfig)
out := new(NvidiaGPUConfig)
in.DeepCopyInto(out)
return out
}

View File

@ -214,17 +214,13 @@ func validateClusterSpec(spec *kops.ClusterSpec, c *kops.Cluster, fieldPath *fie
}
if spec.Containerd != nil {
allErrs = append(allErrs, validateContainerdConfig(spec.Containerd, fieldPath.Child("containerd"))...)
allErrs = append(allErrs, validateContainerdConfig(spec, spec.Containerd, fieldPath.Child("containerd"))...)
}
if spec.Docker != nil {
allErrs = append(allErrs, validateDockerConfig(spec.Docker, fieldPath.Child("docker"))...)
}
if spec.Nvidia != nil {
allErrs = append(allErrs, validateNvidiaConfig(spec, fieldPath.Child("nvidia"))...)
}
if spec.Assets != nil {
if spec.Assets.ContainerProxy != nil && spec.Assets.ContainerRegistry != nil {
allErrs = append(allErrs, field.Forbidden(fieldPath.Child("assets", "containerProxy"), "containerProxy cannot be used in conjunction with containerRegistry"))
@ -1260,7 +1256,7 @@ func validateContainerRuntime(runtime *string, fldPath *field.Path) field.ErrorL
return allErrs
}
func validateContainerdConfig(config *kops.ContainerdConfig, fldPath *field.Path) field.ErrorList {
func validateContainerdConfig(spec *kops.ClusterSpec, config *kops.ContainerdConfig, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
if config.Version != nil {
@ -1317,6 +1313,10 @@ func validateContainerdConfig(config *kops.ContainerdConfig, fldPath *field.Path
}
}
if config.NvidiaGPU != nil {
allErrs = append(allErrs, validateNvidiaConfig(spec, config.NvidiaGPU, fldPath.Child("nvidia"))...)
}
return allErrs
}
@ -1391,8 +1391,7 @@ func validateDockerConfig(config *kops.DockerConfig, fldPath *field.Path) field.
return allErrs
}
func validateNvidiaConfig(spec *kops.ClusterSpec, fldPath *field.Path) (allErrs field.ErrorList) {
nvidia := spec.Nvidia
func validateNvidiaConfig(spec *kops.ClusterSpec, nvidia *kops.NvidiaGPUConfig, fldPath *field.Path) (allErrs field.ErrorList) {
if !fi.BoolValue(nvidia.Enabled) {
return allErrs
}

View File

@ -1320,8 +1320,10 @@ func Test_Validate_Nvdia(t *testing.T) {
}{
{
Input: kops.ClusterSpec{
Nvidia: &kops.NvidiaConfig{
Enabled: fi.Bool(true),
Containerd: &kops.ContainerdConfig{
NvidiaGPU: &kops.NvidiaGPUConfig{
Enabled: fi.Bool(true),
},
},
CloudProvider: "aws",
ContainerRuntime: "containerd",
@ -1329,27 +1331,31 @@ func Test_Validate_Nvdia(t *testing.T) {
},
{
Input: kops.ClusterSpec{
Nvidia: &kops.NvidiaConfig{
Enabled: fi.Bool(true),
Containerd: &kops.ContainerdConfig{
NvidiaGPU: &kops.NvidiaGPUConfig{
Enabled: fi.Bool(true),
},
},
CloudProvider: "gce",
ContainerRuntime: "containerd",
},
ExpectedErrors: []string{"Forbidden::nvidia"},
ExpectedErrors: []string{"Forbidden::containerd.nvidiaGPU"},
},
{
Input: kops.ClusterSpec{
Nvidia: &kops.NvidiaConfig{
Enabled: fi.Bool(true),
Containerd: &kops.ContainerdConfig{
NvidiaGPU: &kops.NvidiaGPUConfig{
Enabled: fi.Bool(true),
},
},
CloudProvider: "aws",
ContainerRuntime: "docker",
},
ExpectedErrors: []string{"Forbidden::nvidia"},
ExpectedErrors: []string{"Forbidden::containerd.nvidiaGPU"},
},
}
for _, g := range grid {
errs := validateNvidiaConfig(&g.Input, field.NewPath("nvidia"))
errs := validateNvidiaConfig(&g.Input, g.Input.Containerd.NvidiaGPU, field.NewPath("containerd", "nvidiaGPU"))
testErrors(t, g.Input, errs, g.ExpectedErrors)
}
}

View File

@ -1269,11 +1269,6 @@ func (in *ClusterSpec) DeepCopyInto(out *ClusterSpec) {
*out = new(SnapshotControllerConfig)
(*in).DeepCopyInto(*out)
}
if in.Nvidia != nil {
in, out := &in.Nvidia, &out.Nvidia
*out = new(NvidiaConfig)
(*in).DeepCopyInto(*out)
}
return
}
@ -1379,6 +1374,11 @@ func (in *ContainerdConfig) DeepCopyInto(out *ContainerdConfig) {
*out = new(string)
**out = **in
}
if in.NvidiaGPU != nil {
in, out := &in.NvidiaGPU, &out.NvidiaGPU
*out = new(NvidiaGPUConfig)
(*in).DeepCopyInto(*out)
}
return
}
@ -4319,7 +4319,7 @@ func (in *NodeTerminationHandlerConfig) DeepCopy() *NodeTerminationHandlerConfig
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *NvidiaConfig) DeepCopyInto(out *NvidiaConfig) {
func (in *NvidiaGPUConfig) DeepCopyInto(out *NvidiaGPUConfig) {
*out = *in
if in.Enabled != nil {
in, out := &in.Enabled, &out.Enabled
@ -4329,12 +4329,12 @@ func (in *NvidiaConfig) DeepCopyInto(out *NvidiaConfig) {
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NvidiaConfig.
func (in *NvidiaConfig) DeepCopy() *NvidiaConfig {
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NvidiaGPUConfig.
func (in *NvidiaGPUConfig) DeepCopy() *NvidiaGPUConfig {
if in == nil {
return nil
}
out := new(NvidiaConfig)
out := new(NvidiaGPUConfig)
in.DeepCopyInto(out)
return out
}

View File

@ -76,8 +76,8 @@ type Config struct {
// APIServerConfig is additional configuration for nodes running an APIServer.
APIServerConfig *APIServerConfig `json:",omitempty"`
// Nvidia contains the configuration for nvidia
Nvidia *kops.NvidiaConfig `json:",omitempty"`
// NvidiaGPU contains the configuration for nvidia
NvidiaGPU *kops.NvidiaGPUConfig `json:",omitempty"`
}
// BootConfig is the configuration for the nodeup binary that might be too big to fit in userdata.

View File

@ -83,5 +83,9 @@ func (b *ContainerdOptionsBuilder) BuildOptions(o interface{}) error {
containerd.SkipInstall = true
}
if containerd.NvidiaGPU != nil && fi.BoolValue(containerd.NvidiaGPU.Enabled) && containerd.NvidiaGPU.DriverPackage == "" {
containerd.NvidiaGPU.DriverPackage = "nvidia-headless-460-server"
}
return nil
}

View File

@ -18,7 +18,6 @@ package components
import (
"k8s.io/kops/pkg/apis/kops"
"k8s.io/kops/upup/pkg/fi"
"k8s.io/kops/upup/pkg/fi/loader"
)
@ -53,9 +52,5 @@ func (b *DefaultsOptionsBuilder) BuildOptions(o interface{}) error {
options.ExternalDNS.Provider = kops.ExternalDNSProviderDNSController
}
if options.Nvidia != nil && fi.BoolValue(options.Nvidia.Enabled) && options.Nvidia.DriverPackage == "" {
options.Nvidia.DriverPackage = "nvidia-headless-460-server"
}
return nil
}

View File

@ -148,6 +148,7 @@
"ec2:CreateSecurityGroup",
"ec2:CreateTags",
"ec2:DescribeAccountAttributes",
"ec2:DescribeInstanceTypes",
"ec2:DescribeInstances",
"ec2:DescribeRegions",
"ec2:DescribeRouteTables",

View File

@ -27,6 +27,7 @@
{
"Action": [
"autoscaling:DescribeAutoScalingInstances",
"ec2:DescribeInstanceTypes",
"ec2:DescribeInstances",
"iam:GetServerCertificate",
"iam:ListServerCertificates",

View File

@ -148,6 +148,7 @@
"ec2:CreateSecurityGroup",
"ec2:CreateTags",
"ec2:DescribeAccountAttributes",
"ec2:DescribeInstanceTypes",
"ec2:DescribeInstances",
"ec2:DescribeRegions",
"ec2:DescribeRouteTables",

View File

@ -27,6 +27,7 @@
{
"Action": [
"autoscaling:DescribeAutoScalingInstances",
"ec2:DescribeInstanceTypes",
"ec2:DescribeInstances",
"iam:GetServerCertificate",
"iam:ListServerCertificates",

View File

@ -117,6 +117,7 @@
"autoscaling:DescribeTags",
"ec2:CreateSecurityGroup",
"ec2:CreateTags",
"ec2:DescribeInstanceTypes",
"ec2:DescribeInstances",
"ec2:DescribeRegions",
"ec2:DescribeRouteTables",

View File

@ -27,6 +27,7 @@
{
"Action": [
"autoscaling:DescribeAutoScalingInstances",
"ec2:DescribeInstanceTypes",
"ec2:DescribeInstances",
"iam:GetServerCertificate",
"iam:ListServerCertificates",

View File

@ -13,42 +13,6 @@
"*"
]
},
{
"Action": "ec2:CreateTags",
"Condition": {
"StringEquals": {
"ec2:CreateAction": [
"CreateVolume",
"CreateSnapshot"
]
}
},
"Effect": "Allow",
"Resource": [
"arn:aws:ec2:*:*:volume/*",
"arn:aws:ec2:*:*:snapshot/*"
]
},
{
"Action": [
"elasticloadbalancing:CreateLoadBalancer",
"elasticloadbalancing:CreateLoadBalancerPolicy",
"elasticloadbalancing:CreateLoadBalancerListeners",
"ec2:CreateSecurityGroup",
"ec2:CreateVolume",
"elasticloadbalancing:CreateListener",
"elasticloadbalancing:CreateTargetGroup"
],
"Condition": {
"StringEquals": {
"aws:RequestTag/KubernetesCluster": "minimal.example.com"
}
},
"Effect": "Allow",
"Resource": [
"*"
]
},
{
"Action": [
"s3:Get*"
@ -122,7 +86,8 @@
},
{
"Action": [
"route53:ListHostedZones"
"route53:ListHostedZones",
"route53:ListTagsForResource"
],
"Effect": "Allow",
"Resource": [
@ -130,16 +95,20 @@
]
},
{
"Action": [
"ec2:CreateVolume"
],
"Action": "ec2:CreateTags",
"Condition": {
"StringEquals": {
"aws:RequestTag/KubernetesCluster": "minimal.example.com"
"ec2:CreateAction": [
"CreateVolume",
"CreateSnapshot"
]
}
},
"Effect": "Allow",
"Resource": "*"
"Resource": [
"arn:aws:ec2:*:*:volume/*",
"arn:aws:ec2:*:*:snapshot/*"
]
},
{
"Action": "ec2:CreateTags",
@ -242,6 +211,24 @@
},
"Effect": "Allow",
"Resource": "*"
},
{
"Action": [
"ec2:CreateSecurityGroup",
"ec2:CreateVolume",
"elasticloadbalancing:CreateListener",
"elasticloadbalancing:CreateLoadBalancer",
"elasticloadbalancing:CreateLoadBalancerListeners",
"elasticloadbalancing:CreateLoadBalancerPolicy",
"elasticloadbalancing:CreateTargetGroup"
],
"Condition": {
"StringEquals": {
"aws:RequestTag/KubernetesCluster": "minimal.example.com"
}
},
"Effect": "Allow",
"Resource": "*"
}
],
"Version": "2012-10-17"

View File

@ -9,7 +9,6 @@
"arn:aws:s3:::placeholder-read-bucket/clusters.example.com/minimal.example.com/addons/*",
"arn:aws:s3:::placeholder-read-bucket/clusters.example.com/minimal.example.com/cluster-completed.spec",
"arn:aws:s3:::placeholder-read-bucket/clusters.example.com/minimal.example.com/igconfig/node/*",
"arn:aws:s3:::placeholder-read-bucket/clusters.example.com/minimal.example.com/pki/ssh/*",
"arn:aws:s3:::placeholder-read-bucket/clusters.example.com/minimal.example.com/secrets/dockerconfig"
]
},

View File

@ -130,7 +130,10 @@ cloudConfig:
containerRuntime: containerd
containerd:
logLevel: info
version: 1.4.6
nvidiaGPU:
enabled: true
package: nvidia-headless-460-server
version: 1.4.9
docker:
skipInstall: true
encryptionConfig: null
@ -244,7 +247,7 @@ CloudProvider: aws
ConfigBase: memfs://clusters.example.com/minimal.example.com
InstanceGroupName: master-us-test-1a
InstanceGroupRole: Master
NodeupConfigHash: yGAfgK0NPZYEUOLMMMzSZkCbHknciWasTJdPN/4s2ZQ=
NodeupConfigHash: FzCpF+yF0cPmOxGl501tNmKOLgm6DaN78GDi94ewPMg=
__EOF_KUBE_ENV

View File

@ -130,7 +130,10 @@ cloudConfig:
containerRuntime: containerd
containerd:
logLevel: info
version: 1.4.6
nvidiaGPU:
enabled: true
package: nvidia-headless-460-server
version: 1.4.9
docker:
skipInstall: true
kubeProxy:
@ -162,7 +165,7 @@ CloudProvider: aws
ConfigBase: memfs://clusters.example.com/minimal.example.com
InstanceGroupName: nodes
InstanceGroupRole: Node
NodeupConfigHash: 6SVOFamP/Ej8sto4LpyakkBlhlXKfTDUttMpr2yW358=
NodeupConfigHash: MeubP7qtHsRda0sdIWd0HRS7HglCYd06CsDkZBoFCGY=
__EOF_KUBE_ENV

View File

@ -20,7 +20,10 @@ spec:
containerRuntime: containerd
containerd:
logLevel: info
version: 1.4.6
nvidiaGPU:
enabled: true
package: nvidia-headless-460-server
version: 1.4.9
dnsZone: Z1AFAKE1ZON3YO
docker:
skipInstall: true
@ -45,6 +48,8 @@ spec:
name: events
provider: Manager
version: 3.4.13
externalDns:
provider: dns-controller
iam:
legacy: false
keyStore: memfs://clusters.example.com/minimal.example.com/pki
@ -113,6 +118,7 @@ spec:
nodeLocalDNS:
cpuRequest: 25m
enabled: false
image: k8s.gcr.io/dns/k8s-dns-node-cache:1.20.0
memoryRequest: 5Mi
provider: CoreDNS
replicas: 2
@ -168,9 +174,6 @@ spec:
networking:
cni: {}
nonMasqueradeCIDR: 100.64.0.0/10
nvidia:
enabled: true
package: nvidia-headless-460-server
podCIDR: 100.96.0.0/11
secretStore: memfs://clusters.example.com/minimal.example.com/secrets
serviceClusterIPRange: 100.64.0.0/13

View File

@ -10,7 +10,7 @@ spec:
- --client-key=/secrets/client.key
command:
- /kube-apiserver-healthcheck
image: k8s.gcr.io/kops/kube-apiserver-healthcheck:1.22.0-alpha.2
image: k8s.gcr.io/kops/kube-apiserver-healthcheck:1.23.0-alpha.1
livenessProbe:
httpGet:
host: 127.0.0.1

View File

@ -6,48 +6,48 @@ spec:
addons:
- id: k8s-1.16
manifest: kops-controller.addons.k8s.io/k8s-1.16.yaml
manifestHash: 886d0565bb011313ee94e8497700550b415eea75
manifestHash: a4dfdc45efacbd78fb163ddae16ddf05f6fc2bcadde59724f2504ca9823b736d
name: kops-controller.addons.k8s.io
needsRollingUpdate: control-plane
selector:
k8s-addon: kops-controller.addons.k8s.io
- manifest: core.addons.k8s.io/v1.4.0.yaml
manifestHash: 9283cd74e74b10e441d3f1807c49c1bef8fac8c8
manifestHash: 18233793a8442224d052e44891e737c67ccfb4e051e95216392319653f4cb0e5
name: core.addons.k8s.io
selector:
k8s-addon: core.addons.k8s.io
- id: k8s-1.12
manifest: coredns.addons.k8s.io/k8s-1.12.yaml
manifestHash: 004bda4e250d9cec5d5f3e732056020b78b0ab88
manifestHash: 3bf8c29c45f0f7dbbb1671b577f302a19418b55d214f6847ff586f1ee9d1ba71
name: coredns.addons.k8s.io
selector:
k8s-addon: coredns.addons.k8s.io
- id: k8s-1.9
manifest: kubelet-api.rbac.addons.k8s.io/k8s-1.9.yaml
manifestHash: 8ee090e41be5e8bcd29ee799b1608edcd2dd8b65
manifestHash: 01c120e887bd98d82ef57983ad58a0b22bc85efb48108092a24c4b82e4c9ea81
name: kubelet-api.rbac.addons.k8s.io
selector:
k8s-addon: kubelet-api.rbac.addons.k8s.io
- manifest: limit-range.addons.k8s.io/v1.5.0.yaml
manifestHash: 6ed889ae6a8d83dd6e5b511f831b3ac65950cf9d
manifestHash: 2d55c3bc5e354e84a3730a65b42f39aba630a59dc8d32b30859fcce3d3178bc2
name: limit-range.addons.k8s.io
selector:
k8s-addon: limit-range.addons.k8s.io
- id: k8s-1.12
manifest: dns-controller.addons.k8s.io/k8s-1.12.yaml
manifestHash: 2096284cd9a5115cb2ea85c8f952d2a9a0cd2d7e
manifestHash: 3c65b7e57090250314919fe504c42463af8cbfca9a06d2e645a3c9f312f69fca
name: dns-controller.addons.k8s.io
selector:
k8s-addon: dns-controller.addons.k8s.io
- id: k8s-1.16
manifest: nvidia.addons.k8s.io/k8s-1.16.yaml
manifestHash: b7da0d26b6c91660a6279a3e35e528b66fbd3a5a
manifestHash: e1fc6effc77349a83fb33e39250433d6434f1606ffb16445d87ae4d0d660b30f
name: nvidia.addons.k8s.io
selector:
k8s-addon: nvidia.addons.k8s.io
- id: v1.15.0
manifest: storage-aws.addons.k8s.io/v1.15.0.yaml
manifestHash: d474dbcc9b9c5cd2e87b41a7755851811f5f48aa
manifestHash: 065ae832ddac8d0931e9992d6a76f43a33a36975a38003b34f4c5d86a7d42780
name: storage-aws.addons.k8s.io
selector:
k8s-addon: storage-aws.addons.k8s.io

View File

@ -7,3 +7,50 @@ metadata:
app.kubernetes.io/managed-by: kops
k8s-addon: core.addons.k8s.io
name: kube-system
---
apiVersion: v1
kind: ServiceAccount
metadata:
creationTimestamp: null
labels:
addon.kops.k8s.io/name: core.addons.k8s.io
app.kubernetes.io/managed-by: kops
k8s-addon: core.addons.k8s.io
name: kube-dns
namespace: kube-system
---
apiVersion: v1
kind: ServiceAccount
metadata:
creationTimestamp: null
labels:
addon.kops.k8s.io/name: core.addons.k8s.io
app.kubernetes.io/managed-by: kops
k8s-addon: core.addons.k8s.io
name: kube-proxy
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
creationTimestamp: null
labels:
addon.kops.k8s.io/name: core.addons.k8s.io
app.kubernetes.io/managed-by: kops
k8s-addon: core.addons.k8s.io
name: kubeadm:node-proxier
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:node-proxier
subjects:
- apiGroup: ""
kind: ServiceAccount
name: kube-proxy
namespace: kube-system

View File

@ -40,12 +40,6 @@ rules:
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
---
@ -90,10 +84,10 @@ data:
forward . /etc/resolv.conf {
max_concurrent 1000
}
loop
cache 30
loadbalance
loop
reload
loadbalance
}
kind: ConfigMap
metadata:
@ -151,7 +145,7 @@ spec:
- args:
- -conf
- /etc/coredns/Corefile
image: coredns/coredns:1.8.3
image: k8s.gcr.io/coredns/coredns:v1.8.4
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 5
@ -198,7 +192,7 @@ spec:
readOnly: true
dnsPolicy: Default
nodeSelector:
beta.kubernetes.io/os: linux
kubernetes.io/os: linux
priorityClassName: system-cluster-critical
serviceAccountName: coredns
tolerations:
@ -374,12 +368,14 @@ spec:
- --default-params={"linear":{"coresPerReplica":256,"nodesPerReplica":16,"preventSinglePointFailure":true}}
- --logtostderr=true
- --v=2
image: k8s.gcr.io/cpa/cluster-proportional-autoscaler:1.8.3
image: k8s.gcr.io/cpa/cluster-proportional-autoscaler:1.8.4
name: autoscaler
resources:
requests:
cpu: 20m
memory: 10Mi
nodeSelector:
kubernetes.io/os: linux
priorityClassName: system-cluster-critical
serviceAccountName: coredns-autoscaler
tolerations:

View File

@ -7,7 +7,7 @@ metadata:
app.kubernetes.io/managed-by: kops
k8s-addon: dns-controller.addons.k8s.io
k8s-app: dns-controller
version: v1.22.0-alpha.2
version: v1.23.0-alpha.1
name: dns-controller
namespace: kube-system
spec:
@ -24,7 +24,7 @@ spec:
labels:
k8s-addon: dns-controller.addons.k8s.io
k8s-app: dns-controller
version: v1.22.0-alpha.2
version: v1.23.0-alpha.1
spec:
containers:
- command:
@ -37,7 +37,7 @@ spec:
env:
- name: KUBERNETES_SERVICE_HOST
value: 127.0.0.1
image: k8s.gcr.io/kops/dns-controller:1.22.0-alpha.2
image: k8s.gcr.io/kops/dns-controller:1.23.0-alpha.1
name: dns-controller
resources:
requests:
@ -92,7 +92,7 @@ rules:
- list
- watch
- apiGroups:
- extensions
- networking
resources:
- ingresses
verbs:

View File

@ -23,7 +23,7 @@ metadata:
app.kubernetes.io/managed-by: kops
k8s-addon: kops-controller.addons.k8s.io
k8s-app: kops-controller
version: v1.22.0-alpha.2
version: v1.23.0-alpha.1
name: kops-controller
namespace: kube-system
spec:
@ -37,7 +37,7 @@ spec:
labels:
k8s-addon: kops-controller.addons.k8s.io
k8s-app: kops-controller
version: v1.22.0-alpha.2
version: v1.23.0-alpha.1
spec:
containers:
- command:
@ -47,7 +47,7 @@ spec:
env:
- name: KUBERNETES_SERVICE_HOST
value: 127.0.0.1
image: k8s.gcr.io/kops/kops-controller:1.22.0-alpha.2
image: k8s.gcr.io/kops/kops-controller:1.23.0-alpha.1
name: kops-controller
resources:
requests:

View File

@ -56,14 +56,14 @@ Assets:
- 681c81b7934ae2bf38b9f12d891683972d1fbbf6d7d97e50940a47b139d41b35@https://storage.googleapis.com/kubernetes-release/release/v1.21.0/bin/linux/amd64/kubelet
- 9f74f2fa7ee32ad07e17211725992248470310ca1988214518806b39b1dad9f0@https://storage.googleapis.com/kubernetes-release/release/v1.21.0/bin/linux/amd64/kubectl
- 977824932d5667c7a37aa6a3cbba40100a6873e7bd97e83e8be837e3e7afd0a8@https://storage.googleapis.com/k8s-artifacts-cni/release/v0.8.7/cni-plugins-linux-amd64-v0.8.7.tgz
- 6ae4763598c9583f8b50605f19d6c7e9ef93c216706465e73dfc84ee6b63a238@https://github.com/containerd/containerd/releases/download/v1.4.6/cri-containerd-cni-1.4.6-linux-amd64.tar.gz
- 9911479f86012d6eab7e0f532da8f807a8b0f555ee09ef89367d8c31243073bb@https://github.com/containerd/containerd/releases/download/v1.4.9/cri-containerd-cni-1.4.9-linux-amd64.tar.gz
- f90ed6dcef534e6d1ae17907dc7eb40614b8945ad4af7f0e98d2be7cde8165c6@https://artifacts.k8s.io/binaries/kops/1.21.0-alpha.1/linux/amd64/protokube,https://github.com/kubernetes/kops/releases/download/v1.21.0-alpha.1/protokube-linux-amd64
- 9992e7eb2a2e93f799e5a9e98eb718637433524bc65f630357201a79f49b13d0@https://artifacts.k8s.io/binaries/kops/1.21.0-alpha.1/linux/amd64/channels,https://github.com/kubernetes/kops/releases/download/v1.21.0-alpha.1/channels-linux-amd64
arm64:
- 17832b192be5ea314714f7e16efd5e5f65347974bbbf41def6b02f68931380c4@https://storage.googleapis.com/kubernetes-release/release/v1.21.0/bin/linux/arm64/kubelet
- a4dd7100f547a40d3e2f83850d0bab75c6ea5eb553f0a80adcf73155bef1fd0d@https://storage.googleapis.com/kubernetes-release/release/v1.21.0/bin/linux/arm64/kubectl
- ae13d7b5c05bd180ea9b5b68f44bdaa7bfb41034a2ef1d68fd8e1259797d642f@https://storage.googleapis.com/k8s-artifacts-cni/release/v0.8.7/cni-plugins-linux-arm64-v0.8.7.tgz
- be8c9a5a06ebec8fb1d36e867cd00fb5777746a9812a0cae2966778ff899c525@https://download.docker.com/linux/static/stable/aarch64/docker-20.10.7.tgz
- 4eb9d5e2adf718cd7ee59f6951715f3113c9c4ee49c75c9efb9747f2c3457b2b@https://download.docker.com/linux/static/stable/aarch64/docker-20.10.8.tgz
- 2f599c3d54f4c4bdbcc95aaf0c7b513a845d8f9503ec5b34c9f86aa1bc34fc0c@https://artifacts.k8s.io/binaries/kops/1.21.0-alpha.1/linux/arm64/protokube,https://github.com/kubernetes/kops/releases/download/v1.21.0-alpha.1/protokube-linux-arm64
- 9d842e3636a95de2315cdea2be7a282355aac0658ef0b86d5dc2449066538f13@https://artifacts.k8s.io/binaries/kops/1.21.0-alpha.1/linux/arm64/channels,https://github.com/kubernetes/kops/releases/download/v1.21.0-alpha.1/channels-linux-arm64
CAs:
@ -253,7 +253,7 @@ KubeletConfig:
nonMasqueradeCIDR: 100.64.0.0/10
podManifestPath: /etc/kubernetes/manifests
registerSchedulable: false
Nvidia:
NvidiaGPU:
enabled: true
package: nvidia-headless-460-server
UpdatePolicy: automatic
@ -261,7 +261,10 @@ channels:
- memfs://clusters.example.com/minimal.example.com/addons/bootstrap-channel.yaml
containerdConfig:
logLevel: info
version: 1.4.6
nvidiaGPU:
enabled: true
package: nvidia-headless-460-server
version: 1.4.9
etcdManifests:
- memfs://clusters.example.com/minimal.example.com/manifests/etcd/main.yaml
- memfs://clusters.example.com/minimal.example.com/manifests/etcd/events.yaml

View File

@ -3,12 +3,12 @@ Assets:
- 681c81b7934ae2bf38b9f12d891683972d1fbbf6d7d97e50940a47b139d41b35@https://storage.googleapis.com/kubernetes-release/release/v1.21.0/bin/linux/amd64/kubelet
- 9f74f2fa7ee32ad07e17211725992248470310ca1988214518806b39b1dad9f0@https://storage.googleapis.com/kubernetes-release/release/v1.21.0/bin/linux/amd64/kubectl
- 977824932d5667c7a37aa6a3cbba40100a6873e7bd97e83e8be837e3e7afd0a8@https://storage.googleapis.com/k8s-artifacts-cni/release/v0.8.7/cni-plugins-linux-amd64-v0.8.7.tgz
- 6ae4763598c9583f8b50605f19d6c7e9ef93c216706465e73dfc84ee6b63a238@https://github.com/containerd/containerd/releases/download/v1.4.6/cri-containerd-cni-1.4.6-linux-amd64.tar.gz
- 9911479f86012d6eab7e0f532da8f807a8b0f555ee09ef89367d8c31243073bb@https://github.com/containerd/containerd/releases/download/v1.4.9/cri-containerd-cni-1.4.9-linux-amd64.tar.gz
arm64:
- 17832b192be5ea314714f7e16efd5e5f65347974bbbf41def6b02f68931380c4@https://storage.googleapis.com/kubernetes-release/release/v1.21.0/bin/linux/arm64/kubelet
- a4dd7100f547a40d3e2f83850d0bab75c6ea5eb553f0a80adcf73155bef1fd0d@https://storage.googleapis.com/kubernetes-release/release/v1.21.0/bin/linux/arm64/kubectl
- ae13d7b5c05bd180ea9b5b68f44bdaa7bfb41034a2ef1d68fd8e1259797d642f@https://storage.googleapis.com/k8s-artifacts-cni/release/v0.8.7/cni-plugins-linux-arm64-v0.8.7.tgz
- be8c9a5a06ebec8fb1d36e867cd00fb5777746a9812a0cae2966778ff899c525@https://download.docker.com/linux/static/stable/aarch64/docker-20.10.7.tgz
- 4eb9d5e2adf718cd7ee59f6951715f3113c9c4ee49c75c9efb9747f2c3457b2b@https://download.docker.com/linux/static/stable/aarch64/docker-20.10.8.tgz
CAs:
kubernetes-ca: |
-----BEGIN CERTIFICATE-----
@ -58,7 +58,7 @@ KubeletConfig:
podManifestPath: /etc/kubernetes/manifests
taints:
- nvidia.com/gpu:NoSchedule
Nvidia:
NvidiaGPU:
enabled: true
package: nvidia-headless-460-server
UpdatePolicy: automatic
@ -66,4 +66,7 @@ channels:
- memfs://clusters.example.com/minimal.example.com/addons/bootstrap-channel.yaml
containerdConfig:
logLevel: info
version: 1.4.6
nvidiaGPU:
enabled: true
package: nvidia-headless-460-server
version: 1.4.9

View File

@ -9,6 +9,9 @@ spec:
channel: stable
cloudProvider: aws
configBase: memfs://clusters.example.com/minimal.example.com
containerd:
nvidiaGPU:
enabled: true
containerRuntime: "containerd"
etcdClusters:
- etcdMembers:
@ -29,8 +32,6 @@ spec:
networking:
cni: {}
nonMasqueradeCIDR: 100.64.0.0/10
nvidia:
enabled: true
sshAccess:
- 0.0.0.0/0
topology:

View File

@ -1419,8 +1419,8 @@ func (n *nodeUpConfigBuilder) BuildConfig(ig *kops.InstanceGroup, apiserverAddit
config.ContainerdConfig = cluster.Spec.Containerd
}
if cluster.Spec.Nvidia != nil {
config.Nvidia = cluster.Spec.Nvidia
if cluster.Spec.Containerd.NvidiaGPU != nil {
config.NvidiaGPU = cluster.Spec.Containerd.NvidiaGPU
}
if ig.Spec.WarmPool != nil || cluster.Spec.WarmPool != nil {

View File

@ -591,7 +591,7 @@ func (b *BootstrapChannelBuilder) buildAddons(c *fi.ModelBuilderContext) (*chann
}
}
nvidia := b.Cluster.Spec.Nvidia
nvidia := b.Cluster.Spec.Containerd.NvidiaGPU
if nvidia != nil && fi.BoolValue(nvidia.Enabled) {

View File

@ -161,7 +161,7 @@ func PopulateInstanceGroupSpec(cluster *kops.Cluster, input *kops.InstanceGroup,
return nil, fmt.Errorf("unable to infer any Subnets for InstanceGroup %s ", ig.ObjectMeta.Name)
}
if fi.BoolValue(cluster.Spec.Nvidia.Enabled) {
if cluster.Spec.Containerd.NvidiaGPU != nil && fi.BoolValue(cluster.Spec.Containerd.NvidiaGPU.Enabled) {
switch kops.CloudProviderID(cluster.Spec.CloudProvider) {
case kops.CloudProviderAWS:
mt, err := awsup.GetMachineTypeInfo(cloud.(awsup.AWSCloud), ig.Spec.MachineType)

View File

@ -277,7 +277,7 @@ func (c *NodeUpCommand) Run(out io.Writer) error {
}
// If Nvidia is enabled in the cluster, check if this instance has support for it.
nvidia := c.cluster.Spec.Nvidia
nvidia := c.cluster.Spec.Containerd.NvidiaGPU
if nvidia != nil && fi.BoolValue(nvidia.Enabled) {
awsCloud := cloud.(awsup.AWSCloud)
// Get the instance type's detailed information.