diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 79eb0bbf99..efdf521d6c 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -46,3 +46,29 @@ jobs: with: name: tests-e2e-scenarios-bare-metal path: /tmp/artifacts/ + + tests-e2e-scenarios-bare-metal-ipv6: + runs-on: ubuntu-24.04 + timeout-minutes: 70 + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + with: + path: ${{ env.GOPATH }}/src/k8s.io/kops + + - name: Set up go + uses: actions/setup-go@41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed + with: + go-version-file: '${{ env.GOPATH }}/src/k8s.io/kops/go.mod' + + - name: tests/e2e/scenarios/bare-metal/run-test + working-directory: ${{ env.GOPATH }}/src/k8s.io/kops + run: | + timeout 60m tests/e2e/scenarios/bare-metal/scenario-ipv6 + env: + ARTIFACTS: /tmp/artifacts + - name: Archive production artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: tests-e2e-scenarios-bare-metal-ipv6 + path: /tmp/artifacts/ diff --git a/cmd/kops-controller/controllers/gceipam.go b/cmd/kops-controller/controllers/gceipam.go index 0a750f4c0b..37a8e8c9ad 100644 --- a/cmd/kops-controller/controllers/gceipam.go +++ b/cmd/kops-controller/controllers/gceipam.go @@ -56,7 +56,7 @@ func NewGCEIPAMReconciler(mgr manager.Manager) (*GCEIPAMReconciler, error) { return r, nil } -// GCEIPAMReconciler observes Node objects, assigning their`PodCIDRs` from the instance's `ExternalIpv6`. +// GCEIPAMReconciler observes Node objects, assigning their `PodCIDRs` from the instance's `ExternalIpv6`. type GCEIPAMReconciler struct { // client is the controller-runtime client client client.Client diff --git a/cmd/kops-controller/controllers/metalipam.go b/cmd/kops-controller/controllers/metalipam.go new file mode 100644 index 0000000000..796d234d41 --- /dev/null +++ b/cmd/kops-controller/controllers/metalipam.go @@ -0,0 +1,115 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "fmt" + + "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + corev1client "k8s.io/client-go/kubernetes/typed/core/v1" + "k8s.io/klog/v2" + kopsapi "k8s.io/kops/pkg/apis/kops/v1alpha2" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/manager" +) + +// NewMetalIPAMReconciler is the constructor for a MetalIPAMReconciler +func NewMetalIPAMReconciler(ctx context.Context, mgr manager.Manager) (*MetalIPAMReconciler, error) { + klog.Info("starting metal ipam controller") + r := &MetalIPAMReconciler{ + client: mgr.GetClient(), + log: ctrl.Log.WithName("controllers").WithName("metal_ipam"), + } + + coreClient, err := corev1client.NewForConfig(mgr.GetConfig()) + if err != nil { + return nil, fmt.Errorf("building corev1 client: %w", err) + } + r.coreV1Client = coreClient + + return r, nil +} + +// MetalIPAMReconciler observes Node objects, assigning their `PodCIDRs` from the instance's `ExternalIpv6`. +type MetalIPAMReconciler struct { + // client is the controller-runtime client + client client.Client + + // log is a logr + log logr.Logger + + // coreV1Client is a client-go client for patching nodes + coreV1Client *corev1client.CoreV1Client +} + +// +kubebuilder:rbac:groups=,resources=nodes,verbs=get;list;watch;patch +// Reconcile is the main reconciler function that observes node changes. +func (r *MetalIPAMReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := klog.FromContext(ctx) + + node := &corev1.Node{} + if err := r.client.Get(ctx, req.NamespacedName, node); err != nil { + if apierrors.IsNotFound(err) { + // we'll ignore not-found errors, since they can't be fixed by an immediate + // requeue (we'll need to wait for a new notification), and we can get them + // on deleted requests. + return ctrl.Result{}, nil + } + log.Error(err, "unable to fetch node", "node.name", node.Name) + return ctrl.Result{}, err + } + + host := &kopsapi.Host{} + id := types.NamespacedName{ + Namespace: "kops-system", + Name: node.Name, + } + if err := r.client.Get(ctx, id, host); err != nil { + log.Error(err, "unable to fetch host", "id", id) + return ctrl.Result{}, err + } + + if len(node.Spec.PodCIDRs) != 0 && len(host.Spec.PodCIDRs) > 0 { + log.V(2).Info("node has pod cidrs, skipping", "node.name", node.Name) + return ctrl.Result{}, nil + } + + if len(host.Spec.PodCIDRs) == 0 { + log.Info("host record has no pod cidrs, cannot assign pod cidrs to node", "node.name", node.Name) + return ctrl.Result{}, nil + } + + log.Info("assigning pod cidrs to node", "node.name", node.Name, "pod.cidrs", host.Spec.PodCIDRs) + if err := patchNodePodCIDRs(r.coreV1Client, ctx, node, host.Spec.PodCIDRs); err != nil { + log.Error(err, "unable to patch node", "node.name", node.Name) + return ctrl.Result{}, err + } + + return ctrl.Result{}, nil +} + +func (r *MetalIPAMReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + Named("metal_ipam"). + For(&corev1.Node{}). + Complete(r) +} diff --git a/cmd/kops-controller/main.go b/cmd/kops-controller/main.go index 1ac9d498da..f722e6aaba 100644 --- a/cmd/kops-controller/main.go +++ b/cmd/kops-controller/main.go @@ -392,6 +392,12 @@ func setupCloudIPAM(ctx context.Context, mgr manager.Manager, opt *config.Option return fmt.Errorf("creating gce IPAM controller: %w", err) } controller = ipamController + case "metal": + ipamController, err := controllers.NewMetalIPAMReconciler(ctx, mgr) + if err != nil { + return fmt.Errorf("creating metal IPAM controller: %w", err) + } + controller = ipamController default: return fmt.Errorf("kOps IPAM controller is not supported on cloud %q", opt.Cloud) } diff --git a/cmd/kops/toolbox_enroll.go b/cmd/kops/toolbox_enroll.go index 7fd321fed1..17e0e06196 100644 --- a/cmd/kops/toolbox_enroll.go +++ b/cmd/kops/toolbox_enroll.go @@ -45,11 +45,14 @@ func NewCmdToolboxEnroll(f commandutils.Factory, out io.Writer) *cobra.Command { cmd.Flags().StringVar(&options.ClusterName, "cluster", options.ClusterName, "Name of cluster to join") cmd.Flags().StringVar(&options.InstanceGroup, "instance-group", options.InstanceGroup, "Name of instance-group to join") + cmd.Flags().StringSliceVar(&options.PodCIDRs, "pod-cidr", options.PodCIDRs, "IP Address range to use for pods that run on this node") cmd.Flags().StringVar(&options.Host, "host", options.Host, "IP/hostname for machine to add") cmd.Flags().StringVar(&options.SSHUser, "ssh-user", options.SSHUser, "user for ssh") cmd.Flags().IntVar(&options.SSHPort, "ssh-port", options.SSHPort, "port for ssh") + cmd.Flags().BoolVar(&options.BuildHost, "build-host", options.BuildHost, "only build the host resource, don't apply it or enroll the node") + options.CreateKubecfgOptions.AddCommonFlags(cmd.Flags()) return cmd diff --git a/docs/cli/kops_toolbox_enroll.md b/docs/cli/kops_toolbox_enroll.md index 18d80a68b5..43c09a0f2e 100644 --- a/docs/cli/kops_toolbox_enroll.md +++ b/docs/cli/kops_toolbox_enroll.md @@ -23,10 +23,12 @@ kops toolbox enroll [CLUSTER] [flags] ``` --api-server string Override the API server used when communicating with the cluster kube-apiserver + --build-host only build the host resource, don't apply it or enroll the node --cluster string Name of cluster to join -h, --help help for enroll --host string IP/hostname for machine to add --instance-group string Name of instance-group to join + --pod-cidr strings IP Address range to use for pods that run on this node --ssh-port int port for ssh (default 22) --ssh-user string user for ssh (default "root") --use-kubeconfig Use the server endpoint from the local kubeconfig instead of inferring from cluster name diff --git a/k8s/crds/kops.k8s.io_hosts.yaml b/k8s/crds/kops.k8s.io_hosts.yaml index 4f730e2f4d..8f5f228ae1 100644 --- a/k8s/crds/kops.k8s.io_hosts.yaml +++ b/k8s/crds/kops.k8s.io_hosts.yaml @@ -42,6 +42,12 @@ spec: properties: instanceGroup: type: string + podCIDRs: + description: PodCIDRs configures the IP ranges to be used for pods + on this node/host. + items: + type: string + type: array publicKey: type: string type: object diff --git a/nodeup/pkg/model/kube_apiserver.go b/nodeup/pkg/model/kube_apiserver.go index c0bbd94415..c3c99fb72b 100644 --- a/nodeup/pkg/model/kube_apiserver.go +++ b/nodeup/pkg/model/kube_apiserver.go @@ -19,10 +19,12 @@ package model import ( "context" "fmt" + "net" "path/filepath" "sort" "strings" + "k8s.io/klog/v2" "k8s.io/kops/pkg/apis/kops" "k8s.io/kops/pkg/flagbuilder" "k8s.io/kops/pkg/k8scodecs" @@ -77,6 +79,55 @@ func (b *KubeAPIServerBuilder) Build(c *fi.NodeupModelBuilderContext) error { } } + if b.CloudProvider() == kops.CloudProviderMetal { + // Workaround for https://github.com/kubernetes/kubernetes/issues/111671 + if b.IsIPv6Only() { + interfaces, err := net.Interfaces() + if err != nil { + return fmt.Errorf("getting local network interfaces: %w", err) + } + var ipv6s []net.IP + for _, intf := range interfaces { + addresses, err := intf.Addrs() + if err != nil { + return fmt.Errorf("getting addresses for network interface %q: %w", intf.Name, err) + } + for _, addr := range addresses { + ip, _, err := net.ParseCIDR(addr.String()) + if ip == nil { + return fmt.Errorf("parsing ip address %q (bound to network %q): %w", addr.String(), intf.Name, err) + } + if ip.To4() != nil { + // We're only looking for ipv6 + continue + } + if ip.IsLinkLocalUnicast() { + klog.V(4).Infof("ignoring link-local unicast addr %v", addr) + continue + } + if ip.IsLinkLocalMulticast() { + klog.V(4).Infof("ignoring link-local multicast addr %v", addr) + continue + } + if ip.IsLoopback() { + klog.V(4).Infof("ignoring loopback addr %v", addr) + continue + } + ipv6s = append(ipv6s, ip) + } + } + if len(ipv6s) > 1 { + klog.Warningf("found multiple ipv6s, choosing first: %v", ipv6s) + } + if len(ipv6s) == 0 { + klog.Warningf("did not find ipv6 address for kube-apiserver --advertise-address") + } + if len(ipv6s) > 0 { + kubeAPIServer.AdvertiseAddress = ipv6s[0].String() + } + } + } + b.configureOIDC(&kubeAPIServer) if err := b.writeAuthenticationConfig(c, &kubeAPIServer); err != nil { return err diff --git a/nodeup/pkg/model/prefix.go b/nodeup/pkg/model/prefix.go index de796341cf..0381b44672 100644 --- a/nodeup/pkg/model/prefix.go +++ b/nodeup/pkg/model/prefix.go @@ -41,6 +41,8 @@ func (b *PrefixBuilder) Build(c *fi.NodeupModelBuilderContext) error { }) case kops.CloudProviderGCE: // Prefix is assigned by GCE + case kops.CloudProviderMetal: + // IPv6 must be configured externally (not by nodeup) default: return fmt.Errorf("kOps IPAM controller not supported on cloud %q", b.CloudProvider()) } diff --git a/pkg/apis/kops/v1alpha2/host.go b/pkg/apis/kops/v1alpha2/host.go index 7e1295ca16..9aa242d3b6 100644 --- a/pkg/apis/kops/v1alpha2/host.go +++ b/pkg/apis/kops/v1alpha2/host.go @@ -36,6 +36,9 @@ type Host struct { type HostSpec struct { PublicKey string `json:"publicKey,omitempty"` InstanceGroup string `json:"instanceGroup,omitempty"` + + // PodCIDRs configures the IP ranges to be used for pods on this node/host. + PodCIDRs []string `json:"podCIDRs,omitempty"` } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go b/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go index dab1ba4875..c1b6e4cea6 100644 --- a/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go +++ b/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go @@ -2370,7 +2370,7 @@ func (in *Host) DeepCopyInto(out *Host) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - out.Spec = in.Spec + in.Spec.DeepCopyInto(&out.Spec) return } @@ -2428,6 +2428,11 @@ func (in *HostList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *HostSpec) DeepCopyInto(out *HostSpec) { *out = *in + if in.PodCIDRs != nil { + in, out := &in.PodCIDRs, &out.PodCIDRs + *out = make([]string, len(*in)) + copy(*out, *in) + } return } diff --git a/pkg/commands/toolbox_enroll.go b/pkg/commands/toolbox_enroll.go index aba1de01e4..2189822bdb 100644 --- a/pkg/commands/toolbox_enroll.go +++ b/pkg/commands/toolbox_enroll.go @@ -67,6 +67,12 @@ type ToolboxEnrollOptions struct { SSHUser string SSHPort int + // BuildHost is a flag to only build the host resource, don't apply it or enroll the node + BuildHost bool + + // PodCIDRs is the list of IP Address ranges to use for pods that run on this node + PodCIDRs []string + kubeconfig.CreateKubecfgOptions } @@ -85,6 +91,11 @@ func RunToolboxEnroll(ctx context.Context, f commandutils.Factory, out io.Writer if options.InstanceGroup == "" { return fmt.Errorf("instance-group is required") } + if options.Host == "" { + // Technically we could build the host resource without the PKI, but this isn't the case we are targeting right now. + return fmt.Errorf("host is required") + } + clientset, err := f.KopsClient() if err != nil { return err @@ -100,40 +111,11 @@ func RunToolboxEnroll(ctx context.Context, f commandutils.Factory, out io.Writer if err != nil { return err } - fullInstanceGroup, err := configBuilder.GetFullInstanceGroup(ctx) - if err != nil { - return err - } - bootstrapData, err := configBuilder.GetBootstrapData(ctx) - if err != nil { - return err - } // Enroll the node over SSH. - if options.Host != "" { - restConfig, err := f.RESTConfig(ctx, fullCluster, options.CreateKubecfgOptions) - if err != nil { - return err - } - - if err := enrollHost(ctx, fullInstanceGroup, options, bootstrapData, restConfig); err != nil { - return err - } - } - - return nil -} - -func enrollHost(ctx context.Context, ig *kops.InstanceGroup, options *ToolboxEnrollOptions, bootstrapData *BootstrapData, restConfig *rest.Config) error { - scheme := runtime.NewScheme() - if err := v1alpha2.AddToScheme(scheme); err != nil { - return fmt.Errorf("building kubernetes scheme: %w", err) - } - kubeClient, err := client.New(restConfig, client.Options{ - Scheme: scheme, - }) + restConfig, err := f.RESTConfig(ctx, fullCluster, options.CreateKubecfgOptions) if err != nil { - return fmt.Errorf("building kubernetes client: %w", err) + return err } sudo := true @@ -147,6 +129,39 @@ func enrollHost(ctx context.Context, ig *kops.InstanceGroup, options *ToolboxEnr } defer sshTarget.Close() + hostData, err := buildHostData(ctx, sshTarget, options) + if err != nil { + return err + } + + if options.BuildHost { + klog.Infof("building host data for %+v", hostData) + b, err := yaml.Marshal(hostData) + if err != nil { + return fmt.Errorf("error marshalling host data: %w", err) + } + fmt.Fprintf(out, "%s\n", string(b)) + return nil + } + + fullInstanceGroup, err := configBuilder.GetFullInstanceGroup(ctx) + if err != nil { + return err + } + bootstrapData, err := configBuilder.GetBootstrapData(ctx) + if err != nil { + return err + } + + if err := enrollHost(ctx, fullInstanceGroup, bootstrapData, restConfig, hostData, sshTarget); err != nil { + return err + } + + return nil +} + +// buildHostData builds an instance of the Host CRD, based on information in the options and by SSHing to the target host. +func buildHostData(ctx context.Context, sshTarget *SSHHost, options *ToolboxEnrollOptions) (*v1alpha2.Host, error) { publicKeyPath := "/etc/kubernetes/kops/pki/machine/public.pem" publicKeyBytes, err := sshTarget.readFile(ctx, publicKeyPath) @@ -154,19 +169,20 @@ func enrollHost(ctx context.Context, ig *kops.InstanceGroup, options *ToolboxEnr if errors.Is(err, fs.ErrNotExist) { publicKeyBytes = nil } else { - return fmt.Errorf("error reading public key %q: %w", publicKeyPath, err) + return nil, fmt.Errorf("error reading public key %q: %w", publicKeyPath, err) } } + // Create the key if it doesn't exist publicKeyBytes = bytes.TrimSpace(publicKeyBytes) if len(publicKeyBytes) == 0 { - if _, err := sshTarget.runScript(ctx, scriptCreateKey, ExecOptions{Sudo: sudo, Echo: true}); err != nil { - return err + if _, err := sshTarget.runScript(ctx, scriptCreateKey, ExecOptions{Echo: true}); err != nil { + return nil, err } b, err := sshTarget.readFile(ctx, publicKeyPath) if err != nil { - return fmt.Errorf("error reading public key %q (after creation): %w", publicKeyPath, err) + return nil, fmt.Errorf("error reading public key %q (after creation): %w", publicKeyPath, err) } publicKeyBytes = b } @@ -174,14 +190,37 @@ func enrollHost(ctx context.Context, ig *kops.InstanceGroup, options *ToolboxEnr hostname, err := sshTarget.getHostname(ctx) if err != nil { - return err + return nil, err + } + + host := &v1alpha2.Host{} + host.SetGroupVersionKind(v1alpha2.SchemeGroupVersion.WithKind("Host")) + host.Namespace = "kops-system" + host.Name = hostname + host.Spec.InstanceGroup = options.InstanceGroup + host.Spec.PublicKey = string(publicKeyBytes) + host.Spec.PodCIDRs = options.PodCIDRs + + return host, nil +} + +func enrollHost(ctx context.Context, ig *kops.InstanceGroup, bootstrapData *BootstrapData, restConfig *rest.Config, hostData *v1alpha2.Host, sshTarget *SSHHost) error { + scheme := runtime.NewScheme() + if err := v1alpha2.AddToScheme(scheme); err != nil { + return fmt.Errorf("building kubernetes scheme: %w", err) + } + kubeClient, err := client.New(restConfig, client.Options{ + Scheme: scheme, + }) + if err != nil { + return fmt.Errorf("building kubernetes client: %w", err) } // We can't create the host resource in the API server for control-plane nodes, // because the API server (likely) isn't running yet. if !ig.IsControlPlane() { - if err := createHostResourceInAPIServer(ctx, options, hostname, publicKeyBytes, kubeClient); err != nil { - return err + if err := kubeClient.Create(ctx, hostData); err != nil { + return fmt.Errorf("failed to create host %s/%s: %w", hostData.Namespace, hostData.Name, err) } } @@ -192,27 +231,13 @@ func enrollHost(ctx context.Context, ig *kops.InstanceGroup, options *ToolboxEnr } if len(bootstrapData.NodeupScript) != 0 { - if _, err := sshTarget.runScript(ctx, string(bootstrapData.NodeupScript), ExecOptions{Sudo: sudo, Echo: true}); err != nil { + if _, err := sshTarget.runScript(ctx, string(bootstrapData.NodeupScript), ExecOptions{Echo: true}); err != nil { return err } } return nil } -func createHostResourceInAPIServer(ctx context.Context, options *ToolboxEnrollOptions, nodeName string, publicKey []byte, client client.Client) error { - host := &v1alpha2.Host{} - host.Namespace = "kops-system" - host.Name = nodeName - host.Spec.InstanceGroup = options.InstanceGroup - host.Spec.PublicKey = string(publicKey) - - if err := client.Create(ctx, host); err != nil { - return fmt.Errorf("failed to create host %s/%s: %w", host.Namespace, host.Name, err) - } - - return nil -} - const scriptCreateKey = ` #!/bin/bash set -o errexit @@ -323,7 +348,7 @@ func (s *SSHHost) runScript(ctx context.Context, script string, options ExecOpti p := vfs.NewSSHPath(s.sshClient, s.hostname, scriptPath, s.sudo) defer func() { - if _, err := s.runCommand(ctx, "rm -rf "+tempDir, ExecOptions{Sudo: s.sudo, Echo: false}); err != nil { + if _, err := s.runCommand(ctx, "rm -rf "+tempDir, ExecOptions{Echo: false}); err != nil { klog.Warningf("error cleaning up temp directory %q: %v", tempDir, err) } }() @@ -344,7 +369,6 @@ type CommandOutput struct { // ExecOptions holds options for running a command remotely. type ExecOptions struct { - Sudo bool Echo bool } @@ -361,10 +385,11 @@ func (s *SSHHost) runCommand(ctx context.Context, command string, options ExecOp session.Stderr = &output.Stderr if options.Echo { - session.Stdout = io.MultiWriter(os.Stdout, session.Stdout) + // We send both to stderr, so we don't "corrupt" stdout + session.Stdout = io.MultiWriter(os.Stderr, session.Stdout) session.Stderr = io.MultiWriter(os.Stderr, session.Stderr) } - if options.Sudo { + if s.sudo { command = "sudo " + command } if err := session.Run(command); err != nil { @@ -376,7 +401,7 @@ func (s *SSHHost) runCommand(ctx context.Context, command string, options ExecOp // getHostname gets the hostname of the SSH target. // This is used as the node name when registering the node. func (s *SSHHost) getHostname(ctx context.Context) (string, error) { - output, err := s.runCommand(ctx, "hostname", ExecOptions{Sudo: false, Echo: true}) + output, err := s.runCommand(ctx, "hostname", ExecOptions{Echo: true}) if err != nil { return "", fmt.Errorf("failed to get hostname: %w", err) } diff --git a/pkg/kubeconfig/create_kubecfg.go b/pkg/kubeconfig/create_kubecfg.go index 737fe96c12..36168011f6 100644 --- a/pkg/kubeconfig/create_kubecfg.go +++ b/pkg/kubeconfig/create_kubecfg.go @@ -20,6 +20,7 @@ import ( "context" "crypto/x509/pkix" "fmt" + "net" "os/user" "sort" "time" @@ -91,7 +92,7 @@ func BuildKubecfg(ctx context.Context, cluster *kops.Cluster, keyStore fi.Keysto server = "https://" + cluster.APIInternalName() } else { if cluster.Spec.API.PublicName != "" { - server = "https://" + cluster.Spec.API.PublicName + server = "https://" + wrapIPv6Address(cluster.Spec.API.PublicName) } else { server = "https://api." + clusterName } @@ -132,7 +133,7 @@ func BuildKubecfg(ctx context.Context, cluster *kops.Cluster, keyStore fi.Keysto if len(targets) != 1 { klog.Warningf("Found multiple API endpoints (%v), choosing arbitrarily", targets) } - server = "https://" + targets[0] + server = "https://" + wrapIPv6Address(targets[0]) } } } @@ -221,3 +222,14 @@ func BuildKubecfg(ctx context.Context, cluster *kops.Cluster, keyStore fi.Keysto return b, nil } + +// wrapIPv6Address will wrap IPv6 addresses in square brackets, +// for use in URLs; other endpoints are unchanged. +func wrapIPv6Address(endpoint string) string { + ip := net.ParseIP(endpoint) + // IPv6 addresses are wrapped in square brackets in URLs + if ip != nil && ip.To4() == nil { + return "[" + endpoint + "]" + } + return endpoint +} diff --git a/pkg/model/components/kubelet.go b/pkg/model/components/kubelet.go index 53fc0a86f7..856c0cdd83 100644 --- a/pkg/model/components/kubelet.go +++ b/pkg/model/components/kubelet.go @@ -124,30 +124,37 @@ func (b *KubeletOptionsBuilder) configureKubelet(cluster *kops.Cluster, kubelet cloudProvider := cluster.GetCloudProvider() klog.V(1).Infof("Cloud Provider: %s", cloudProvider) - if cloudProvider != kops.CloudProviderMetal { - if b.controlPlaneKubernetesVersion.IsLT("1.31") { - switch cloudProvider { - case kops.CloudProviderAWS: - kubelet.CloudProvider = "aws" - case kops.CloudProviderGCE: - kubelet.CloudProvider = "gce" - case kops.CloudProviderDO: - kubelet.CloudProvider = "external" - case kops.CloudProviderHetzner: - kubelet.CloudProvider = "external" - case kops.CloudProviderOpenstack: - kubelet.CloudProvider = "openstack" - case kops.CloudProviderAzure: - kubelet.CloudProvider = "azure" - case kops.CloudProviderScaleway: - kubelet.CloudProvider = "external" - default: - kubelet.CloudProvider = "external" - } + if b.controlPlaneKubernetesVersion.IsLT("1.31") { + switch cloudProvider { + case kops.CloudProviderAWS: + kubelet.CloudProvider = "aws" + case kops.CloudProviderGCE: + kubelet.CloudProvider = "gce" + case kops.CloudProviderDO: + kubelet.CloudProvider = "external" + case kops.CloudProviderHetzner: + kubelet.CloudProvider = "external" + case kops.CloudProviderOpenstack: + kubelet.CloudProvider = "openstack" + case kops.CloudProviderAzure: + kubelet.CloudProvider = "azure" + case kops.CloudProviderScaleway: + kubelet.CloudProvider = "external" + case kops.CloudProviderMetal: + kubelet.CloudProvider = "" + default: + kubelet.CloudProvider = "external" + } - if cluster.Spec.ExternalCloudControllerManager != nil { - kubelet.CloudProvider = "external" - } + if cluster.Spec.ExternalCloudControllerManager != nil { + kubelet.CloudProvider = "external" + } + } else { + if cloudProvider == kops.CloudProviderMetal { + // metal does not (yet) have a cloud-controller-manager, so we don't need to set the cloud-provider flag + // If we do set it to external, kubelet will taint the node with the node.kops.k8s.io/uninitialized taint + // and there is no cloud-controller-manager to remove it + kubelet.CloudProvider = "" } else { kubelet.CloudProvider = "external" } diff --git a/tests/e2e/scenarios/bare-metal/cleanup b/tests/e2e/scenarios/bare-metal/cleanup index 60c52b8566..2061a961f1 100755 --- a/tests/e2e/scenarios/bare-metal/cleanup +++ b/tests/e2e/scenarios/bare-metal/cleanup @@ -38,6 +38,8 @@ sudo ip link del dev tap-vm0 || true sudo ip link del dev tap-vm1 || true sudo ip link del dev tap-vm2 || true +sudo ip link del dev br0 || true + rm -rf .build/vm0 rm -rf .build/vm1 rm -rf .build/vm2 diff --git a/tests/e2e/scenarios/bare-metal/e2e_test.go b/tests/e2e/scenarios/bare-metal/e2e_test.go index 2c596569c6..216fff05f6 100644 --- a/tests/e2e/scenarios/bare-metal/e2e_test.go +++ b/tests/e2e/scenarios/bare-metal/e2e_test.go @@ -54,6 +54,35 @@ func TestNodeAddresses(t *testing.T) { } } } +func TestNodesNotTainted(t *testing.T) { + h := NewHarness(context.Background(), t) + + nodes := h.Nodes() + + // Quick check that we have some nodes + if len(nodes) == 0 { + t.Errorf("expected some nodes, got 0 nodes") + } + + // Verify that the nodes aren't tainted + // In particular, we are checking for the node.cloudprovider.kubernetes.io/uninitialized taint + for _, node := range nodes { + t.Logf("node %s has taints: %v", node.Name, node.Spec.Taints) + for _, taint := range node.Spec.Taints { + switch taint.Key { + case "node.kops.k8s.io/uninitialized": + t.Errorf("unexpected taint for node %s: %s", node.Name, taint.Key) + t.Errorf("if we pass the --cloud-provider=external flag to kubelet, the node will be tainted with the node.kops.k8s.io/uninitialize taint") + t.Errorf("the taint is expected to be removed by the cloud-contoller-manager") + t.Errorf("(likely should be running a cloud-controller-manager in the cluster, or we should not pass the --cloud-provider=external flag to kubelet)") + case "node-role.kubernetes.io/control-plane": + // expected for control-plane nodes + default: + t.Errorf("unexpected taint for node %s: %s", node.Name, taint.Key) + } + } + } +} // Harness is a test harness for our bare-metal e2e tests type Harness struct { diff --git a/tests/e2e/scenarios/bare-metal/run-test b/tests/e2e/scenarios/bare-metal/run-test index fa3f26ecc2..895915875c 100755 --- a/tests/e2e/scenarios/bare-metal/run-test +++ b/tests/e2e/scenarios/bare-metal/run-test @@ -40,30 +40,39 @@ function cleanup() { fi } -if [[ -z "${SKIP_CLEANUP:-}" ]]; then - trap cleanup EXIT -fi +trap cleanup EXIT # Create the directory that will back our mock s3 storage rm -rf ${WORKDIR}/s3 mkdir -p ${WORKDIR}/s3/ +IPV4_PREFIX=10.123.45. + +VM0_IP=${IPV4_PREFIX}10 +VM1_IP=${IPV4_PREFIX}11 +VM2_IP=${IPV4_PREFIX}12 + # Start our VMs ${REPO_ROOT}/tests/e2e/scenarios/bare-metal/start-vms +# Start an SSH agent; enroll assumes SSH connectivity to the VMs with the key in the agent +eval $(ssh-agent) +ssh-add ${REPO_ROOT}/.build/.ssh/id_ed25519 + . hack/dev-build-metal.sh echo "Waiting 10 seconds for VMs to start" sleep 10 # Remove from known-hosts in case of reuse -ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.10 || true -ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.11 || true -ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.12 || true +ssh-keygen -f ~/.ssh/known_hosts -R ${VM0_IP} || true +ssh-keygen -f ~/.ssh/known_hosts -R ${VM1_IP} || true +ssh-keygen -f ~/.ssh/known_hosts -R ${VM2_IP} || true -ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.10 uptime -ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.11 uptime -ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.12 uptime +# Check SSH is working and accept the host keys +ssh -o StrictHostKeyChecking=accept-new root@${VM0_IP} uptime +ssh -o StrictHostKeyChecking=accept-new root@${VM1_IP} uptime +ssh -o StrictHostKeyChecking=accept-new root@${VM2_IP} uptime cd ${REPO_ROOT} @@ -93,7 +102,7 @@ ${KOPS} create cluster --cloud=metal metal.k8s.local --zones main --networking c # Set the IP ingress, required for metal cloud # TODO: is this the best option? -${KOPS} edit cluster metal.k8s.local --set spec.api.publicName=10.123.45.10 +${KOPS} edit cluster metal.k8s.local --set spec.api.publicName=${VM0_IP} # Use latest etcd-manager image (while we're adding features) #${KOPS} edit cluster metal.k8s.local --set 'spec.etcdClusters[*].manager.image=us-central1-docker.pkg.dev/k8s-staging-images/etcd-manager/etcd-manager-slim:v3.0.20250628-7-ga7be11fb' @@ -114,28 +123,24 @@ ${KOPS} get ig --name metal.k8s.local -oyaml ${KOPS} update cluster metal.k8s.local ${KOPS} update cluster metal.k8s.local --yes --admin -# Start an SSH agent; enroll assumes SSH connectivity to the VMs with the key in the agent -eval $(ssh-agent) -ssh-add ${REPO_ROOT}/.build/.ssh/id_ed25519 - # Enroll the control-plane VM -${KOPS} toolbox enroll --cluster metal.k8s.local --instance-group control-plane-main --host 10.123.45.10 --v=2 +${KOPS} toolbox enroll --cluster metal.k8s.local --instance-group control-plane-main --host ${VM0_IP} --v=2 # Manual creation of "volumes" for etcd, and setting up peer nodes -cat <