From 6b88da43768395384f91cc8ab4119b2296f6e2d2 Mon Sep 17 00:00:00 2001 From: justinsb Date: Mon, 11 Nov 2024 11:12:27 -0500 Subject: [PATCH 1/5] tests: add test for bare-metal with ipv6 IPv6 brings some new complexities, particularly around IPAM. --- .github/workflows/e2e.yml | 26 ++ tests/e2e/scenarios/bare-metal/cleanup | 2 + tests/e2e/scenarios/bare-metal/run-test | 61 ++-- tests/e2e/scenarios/bare-metal/scenario-ipv6 | 316 +++++++++++++++++++ 4 files changed, 377 insertions(+), 28 deletions(-) create mode 100755 tests/e2e/scenarios/bare-metal/scenario-ipv6 diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 79eb0bbf99..efdf521d6c 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -46,3 +46,29 @@ jobs: with: name: tests-e2e-scenarios-bare-metal path: /tmp/artifacts/ + + tests-e2e-scenarios-bare-metal-ipv6: + runs-on: ubuntu-24.04 + timeout-minutes: 70 + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + with: + path: ${{ env.GOPATH }}/src/k8s.io/kops + + - name: Set up go + uses: actions/setup-go@41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed + with: + go-version-file: '${{ env.GOPATH }}/src/k8s.io/kops/go.mod' + + - name: tests/e2e/scenarios/bare-metal/run-test + working-directory: ${{ env.GOPATH }}/src/k8s.io/kops + run: | + timeout 60m tests/e2e/scenarios/bare-metal/scenario-ipv6 + env: + ARTIFACTS: /tmp/artifacts + - name: Archive production artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: tests-e2e-scenarios-bare-metal-ipv6 + path: /tmp/artifacts/ diff --git a/tests/e2e/scenarios/bare-metal/cleanup b/tests/e2e/scenarios/bare-metal/cleanup index 60c52b8566..2061a961f1 100755 --- a/tests/e2e/scenarios/bare-metal/cleanup +++ b/tests/e2e/scenarios/bare-metal/cleanup @@ -38,6 +38,8 @@ sudo ip link del dev tap-vm0 || true sudo ip link del dev tap-vm1 || true sudo ip link del dev tap-vm2 || true +sudo ip link del dev br0 || true + rm -rf .build/vm0 rm -rf .build/vm1 rm -rf .build/vm2 diff --git a/tests/e2e/scenarios/bare-metal/run-test b/tests/e2e/scenarios/bare-metal/run-test index fa3f26ecc2..895915875c 100755 --- a/tests/e2e/scenarios/bare-metal/run-test +++ b/tests/e2e/scenarios/bare-metal/run-test @@ -40,30 +40,39 @@ function cleanup() { fi } -if [[ -z "${SKIP_CLEANUP:-}" ]]; then - trap cleanup EXIT -fi +trap cleanup EXIT # Create the directory that will back our mock s3 storage rm -rf ${WORKDIR}/s3 mkdir -p ${WORKDIR}/s3/ +IPV4_PREFIX=10.123.45. + +VM0_IP=${IPV4_PREFIX}10 +VM1_IP=${IPV4_PREFIX}11 +VM2_IP=${IPV4_PREFIX}12 + # Start our VMs ${REPO_ROOT}/tests/e2e/scenarios/bare-metal/start-vms +# Start an SSH agent; enroll assumes SSH connectivity to the VMs with the key in the agent +eval $(ssh-agent) +ssh-add ${REPO_ROOT}/.build/.ssh/id_ed25519 + . hack/dev-build-metal.sh echo "Waiting 10 seconds for VMs to start" sleep 10 # Remove from known-hosts in case of reuse -ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.10 || true -ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.11 || true -ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.12 || true +ssh-keygen -f ~/.ssh/known_hosts -R ${VM0_IP} || true +ssh-keygen -f ~/.ssh/known_hosts -R ${VM1_IP} || true +ssh-keygen -f ~/.ssh/known_hosts -R ${VM2_IP} || true -ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.10 uptime -ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.11 uptime -ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.12 uptime +# Check SSH is working and accept the host keys +ssh -o StrictHostKeyChecking=accept-new root@${VM0_IP} uptime +ssh -o StrictHostKeyChecking=accept-new root@${VM1_IP} uptime +ssh -o StrictHostKeyChecking=accept-new root@${VM2_IP} uptime cd ${REPO_ROOT} @@ -93,7 +102,7 @@ ${KOPS} create cluster --cloud=metal metal.k8s.local --zones main --networking c # Set the IP ingress, required for metal cloud # TODO: is this the best option? -${KOPS} edit cluster metal.k8s.local --set spec.api.publicName=10.123.45.10 +${KOPS} edit cluster metal.k8s.local --set spec.api.publicName=${VM0_IP} # Use latest etcd-manager image (while we're adding features) #${KOPS} edit cluster metal.k8s.local --set 'spec.etcdClusters[*].manager.image=us-central1-docker.pkg.dev/k8s-staging-images/etcd-manager/etcd-manager-slim:v3.0.20250628-7-ga7be11fb' @@ -114,28 +123,24 @@ ${KOPS} get ig --name metal.k8s.local -oyaml ${KOPS} update cluster metal.k8s.local ${KOPS} update cluster metal.k8s.local --yes --admin -# Start an SSH agent; enroll assumes SSH connectivity to the VMs with the key in the agent -eval $(ssh-agent) -ssh-add ${REPO_ROOT}/.build/.ssh/id_ed25519 - # Enroll the control-plane VM -${KOPS} toolbox enroll --cluster metal.k8s.local --instance-group control-plane-main --host 10.123.45.10 --v=2 +${KOPS} toolbox enroll --cluster metal.k8s.local --instance-group control-plane-main --host ${VM0_IP} --v=2 # Manual creation of "volumes" for etcd, and setting up peer nodes -cat < Date: Tue, 12 Nov 2024 09:22:03 -0500 Subject: [PATCH 2/5] metal: simple IPAM for IPv6 --- cmd/kops-controller/controllers/gceipam.go | 2 +- cmd/kops-controller/controllers/metalipam.go | 115 ++++++++++++++++++ cmd/kops-controller/main.go | 6 + cmd/kops/toolbox_enroll.go | 1 + docs/cli/kops_toolbox_enroll.md | 1 + k8s/crds/kops.k8s.io_hosts.yaml | 6 + nodeup/pkg/model/kube_apiserver.go | 51 ++++++++ nodeup/pkg/model/prefix.go | 2 + pkg/apis/kops/v1alpha2/host.go | 3 + .../kops/v1alpha2/zz_generated.deepcopy.go | 7 +- pkg/commands/toolbox_enroll.go | 4 + pkg/kubeconfig/create_kubecfg.go | 16 ++- tests/e2e/scenarios/bare-metal/scenario-ipv6 | 17 +-- 13 files changed, 219 insertions(+), 12 deletions(-) create mode 100644 cmd/kops-controller/controllers/metalipam.go diff --git a/cmd/kops-controller/controllers/gceipam.go b/cmd/kops-controller/controllers/gceipam.go index 0a750f4c0b..37a8e8c9ad 100644 --- a/cmd/kops-controller/controllers/gceipam.go +++ b/cmd/kops-controller/controllers/gceipam.go @@ -56,7 +56,7 @@ func NewGCEIPAMReconciler(mgr manager.Manager) (*GCEIPAMReconciler, error) { return r, nil } -// GCEIPAMReconciler observes Node objects, assigning their`PodCIDRs` from the instance's `ExternalIpv6`. +// GCEIPAMReconciler observes Node objects, assigning their `PodCIDRs` from the instance's `ExternalIpv6`. type GCEIPAMReconciler struct { // client is the controller-runtime client client client.Client diff --git a/cmd/kops-controller/controllers/metalipam.go b/cmd/kops-controller/controllers/metalipam.go new file mode 100644 index 0000000000..796d234d41 --- /dev/null +++ b/cmd/kops-controller/controllers/metalipam.go @@ -0,0 +1,115 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "fmt" + + "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + corev1client "k8s.io/client-go/kubernetes/typed/core/v1" + "k8s.io/klog/v2" + kopsapi "k8s.io/kops/pkg/apis/kops/v1alpha2" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/manager" +) + +// NewMetalIPAMReconciler is the constructor for a MetalIPAMReconciler +func NewMetalIPAMReconciler(ctx context.Context, mgr manager.Manager) (*MetalIPAMReconciler, error) { + klog.Info("starting metal ipam controller") + r := &MetalIPAMReconciler{ + client: mgr.GetClient(), + log: ctrl.Log.WithName("controllers").WithName("metal_ipam"), + } + + coreClient, err := corev1client.NewForConfig(mgr.GetConfig()) + if err != nil { + return nil, fmt.Errorf("building corev1 client: %w", err) + } + r.coreV1Client = coreClient + + return r, nil +} + +// MetalIPAMReconciler observes Node objects, assigning their `PodCIDRs` from the instance's `ExternalIpv6`. +type MetalIPAMReconciler struct { + // client is the controller-runtime client + client client.Client + + // log is a logr + log logr.Logger + + // coreV1Client is a client-go client for patching nodes + coreV1Client *corev1client.CoreV1Client +} + +// +kubebuilder:rbac:groups=,resources=nodes,verbs=get;list;watch;patch +// Reconcile is the main reconciler function that observes node changes. +func (r *MetalIPAMReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := klog.FromContext(ctx) + + node := &corev1.Node{} + if err := r.client.Get(ctx, req.NamespacedName, node); err != nil { + if apierrors.IsNotFound(err) { + // we'll ignore not-found errors, since they can't be fixed by an immediate + // requeue (we'll need to wait for a new notification), and we can get them + // on deleted requests. + return ctrl.Result{}, nil + } + log.Error(err, "unable to fetch node", "node.name", node.Name) + return ctrl.Result{}, err + } + + host := &kopsapi.Host{} + id := types.NamespacedName{ + Namespace: "kops-system", + Name: node.Name, + } + if err := r.client.Get(ctx, id, host); err != nil { + log.Error(err, "unable to fetch host", "id", id) + return ctrl.Result{}, err + } + + if len(node.Spec.PodCIDRs) != 0 && len(host.Spec.PodCIDRs) > 0 { + log.V(2).Info("node has pod cidrs, skipping", "node.name", node.Name) + return ctrl.Result{}, nil + } + + if len(host.Spec.PodCIDRs) == 0 { + log.Info("host record has no pod cidrs, cannot assign pod cidrs to node", "node.name", node.Name) + return ctrl.Result{}, nil + } + + log.Info("assigning pod cidrs to node", "node.name", node.Name, "pod.cidrs", host.Spec.PodCIDRs) + if err := patchNodePodCIDRs(r.coreV1Client, ctx, node, host.Spec.PodCIDRs); err != nil { + log.Error(err, "unable to patch node", "node.name", node.Name) + return ctrl.Result{}, err + } + + return ctrl.Result{}, nil +} + +func (r *MetalIPAMReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + Named("metal_ipam"). + For(&corev1.Node{}). + Complete(r) +} diff --git a/cmd/kops-controller/main.go b/cmd/kops-controller/main.go index 1ac9d498da..f722e6aaba 100644 --- a/cmd/kops-controller/main.go +++ b/cmd/kops-controller/main.go @@ -392,6 +392,12 @@ func setupCloudIPAM(ctx context.Context, mgr manager.Manager, opt *config.Option return fmt.Errorf("creating gce IPAM controller: %w", err) } controller = ipamController + case "metal": + ipamController, err := controllers.NewMetalIPAMReconciler(ctx, mgr) + if err != nil { + return fmt.Errorf("creating metal IPAM controller: %w", err) + } + controller = ipamController default: return fmt.Errorf("kOps IPAM controller is not supported on cloud %q", opt.Cloud) } diff --git a/cmd/kops/toolbox_enroll.go b/cmd/kops/toolbox_enroll.go index 7fd321fed1..9e0a1a5a4c 100644 --- a/cmd/kops/toolbox_enroll.go +++ b/cmd/kops/toolbox_enroll.go @@ -45,6 +45,7 @@ func NewCmdToolboxEnroll(f commandutils.Factory, out io.Writer) *cobra.Command { cmd.Flags().StringVar(&options.ClusterName, "cluster", options.ClusterName, "Name of cluster to join") cmd.Flags().StringVar(&options.InstanceGroup, "instance-group", options.InstanceGroup, "Name of instance-group to join") + cmd.Flags().StringSliceVar(&options.PodCIDRs, "pod-cidr", options.PodCIDRs, "IP Address range to use for pods that run on this node") cmd.Flags().StringVar(&options.Host, "host", options.Host, "IP/hostname for machine to add") cmd.Flags().StringVar(&options.SSHUser, "ssh-user", options.SSHUser, "user for ssh") diff --git a/docs/cli/kops_toolbox_enroll.md b/docs/cli/kops_toolbox_enroll.md index 18d80a68b5..cab2261871 100644 --- a/docs/cli/kops_toolbox_enroll.md +++ b/docs/cli/kops_toolbox_enroll.md @@ -27,6 +27,7 @@ kops toolbox enroll [CLUSTER] [flags] -h, --help help for enroll --host string IP/hostname for machine to add --instance-group string Name of instance-group to join + --pod-cidr strings IP Address range to use for pods that run on this node --ssh-port int port for ssh (default 22) --ssh-user string user for ssh (default "root") --use-kubeconfig Use the server endpoint from the local kubeconfig instead of inferring from cluster name diff --git a/k8s/crds/kops.k8s.io_hosts.yaml b/k8s/crds/kops.k8s.io_hosts.yaml index 4f730e2f4d..8f5f228ae1 100644 --- a/k8s/crds/kops.k8s.io_hosts.yaml +++ b/k8s/crds/kops.k8s.io_hosts.yaml @@ -42,6 +42,12 @@ spec: properties: instanceGroup: type: string + podCIDRs: + description: PodCIDRs configures the IP ranges to be used for pods + on this node/host. + items: + type: string + type: array publicKey: type: string type: object diff --git a/nodeup/pkg/model/kube_apiserver.go b/nodeup/pkg/model/kube_apiserver.go index c0bbd94415..c3c99fb72b 100644 --- a/nodeup/pkg/model/kube_apiserver.go +++ b/nodeup/pkg/model/kube_apiserver.go @@ -19,10 +19,12 @@ package model import ( "context" "fmt" + "net" "path/filepath" "sort" "strings" + "k8s.io/klog/v2" "k8s.io/kops/pkg/apis/kops" "k8s.io/kops/pkg/flagbuilder" "k8s.io/kops/pkg/k8scodecs" @@ -77,6 +79,55 @@ func (b *KubeAPIServerBuilder) Build(c *fi.NodeupModelBuilderContext) error { } } + if b.CloudProvider() == kops.CloudProviderMetal { + // Workaround for https://github.com/kubernetes/kubernetes/issues/111671 + if b.IsIPv6Only() { + interfaces, err := net.Interfaces() + if err != nil { + return fmt.Errorf("getting local network interfaces: %w", err) + } + var ipv6s []net.IP + for _, intf := range interfaces { + addresses, err := intf.Addrs() + if err != nil { + return fmt.Errorf("getting addresses for network interface %q: %w", intf.Name, err) + } + for _, addr := range addresses { + ip, _, err := net.ParseCIDR(addr.String()) + if ip == nil { + return fmt.Errorf("parsing ip address %q (bound to network %q): %w", addr.String(), intf.Name, err) + } + if ip.To4() != nil { + // We're only looking for ipv6 + continue + } + if ip.IsLinkLocalUnicast() { + klog.V(4).Infof("ignoring link-local unicast addr %v", addr) + continue + } + if ip.IsLinkLocalMulticast() { + klog.V(4).Infof("ignoring link-local multicast addr %v", addr) + continue + } + if ip.IsLoopback() { + klog.V(4).Infof("ignoring loopback addr %v", addr) + continue + } + ipv6s = append(ipv6s, ip) + } + } + if len(ipv6s) > 1 { + klog.Warningf("found multiple ipv6s, choosing first: %v", ipv6s) + } + if len(ipv6s) == 0 { + klog.Warningf("did not find ipv6 address for kube-apiserver --advertise-address") + } + if len(ipv6s) > 0 { + kubeAPIServer.AdvertiseAddress = ipv6s[0].String() + } + } + } + b.configureOIDC(&kubeAPIServer) if err := b.writeAuthenticationConfig(c, &kubeAPIServer); err != nil { return err diff --git a/nodeup/pkg/model/prefix.go b/nodeup/pkg/model/prefix.go index de796341cf..0381b44672 100644 --- a/nodeup/pkg/model/prefix.go +++ b/nodeup/pkg/model/prefix.go @@ -41,6 +41,8 @@ func (b *PrefixBuilder) Build(c *fi.NodeupModelBuilderContext) error { }) case kops.CloudProviderGCE: // Prefix is assigned by GCE + case kops.CloudProviderMetal: + // IPv6 must be configured externally (not by nodeup) default: return fmt.Errorf("kOps IPAM controller not supported on cloud %q", b.CloudProvider()) } diff --git a/pkg/apis/kops/v1alpha2/host.go b/pkg/apis/kops/v1alpha2/host.go index 7e1295ca16..9aa242d3b6 100644 --- a/pkg/apis/kops/v1alpha2/host.go +++ b/pkg/apis/kops/v1alpha2/host.go @@ -36,6 +36,9 @@ type Host struct { type HostSpec struct { PublicKey string `json:"publicKey,omitempty"` InstanceGroup string `json:"instanceGroup,omitempty"` + + // PodCIDRs configures the IP ranges to be used for pods on this node/host. + PodCIDRs []string `json:"podCIDRs,omitempty"` } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go b/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go index dab1ba4875..c1b6e4cea6 100644 --- a/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go +++ b/pkg/apis/kops/v1alpha2/zz_generated.deepcopy.go @@ -2370,7 +2370,7 @@ func (in *Host) DeepCopyInto(out *Host) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - out.Spec = in.Spec + in.Spec.DeepCopyInto(&out.Spec) return } @@ -2428,6 +2428,11 @@ func (in *HostList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *HostSpec) DeepCopyInto(out *HostSpec) { *out = *in + if in.PodCIDRs != nil { + in, out := &in.PodCIDRs, &out.PodCIDRs + *out = make([]string, len(*in)) + copy(*out, *in) + } return } diff --git a/pkg/commands/toolbox_enroll.go b/pkg/commands/toolbox_enroll.go index aba1de01e4..3097e35a9f 100644 --- a/pkg/commands/toolbox_enroll.go +++ b/pkg/commands/toolbox_enroll.go @@ -67,6 +67,9 @@ type ToolboxEnrollOptions struct { SSHUser string SSHPort int + // PodCIDRs is the list of IP Address ranges to use for pods that run on this node + PodCIDRs []string + kubeconfig.CreateKubecfgOptions } @@ -205,6 +208,7 @@ func createHostResourceInAPIServer(ctx context.Context, options *ToolboxEnrollOp host.Name = nodeName host.Spec.InstanceGroup = options.InstanceGroup host.Spec.PublicKey = string(publicKey) + host.Spec.PodCIDRs = options.PodCIDRs if err := client.Create(ctx, host); err != nil { return fmt.Errorf("failed to create host %s/%s: %w", host.Namespace, host.Name, err) diff --git a/pkg/kubeconfig/create_kubecfg.go b/pkg/kubeconfig/create_kubecfg.go index 737fe96c12..36168011f6 100644 --- a/pkg/kubeconfig/create_kubecfg.go +++ b/pkg/kubeconfig/create_kubecfg.go @@ -20,6 +20,7 @@ import ( "context" "crypto/x509/pkix" "fmt" + "net" "os/user" "sort" "time" @@ -91,7 +92,7 @@ func BuildKubecfg(ctx context.Context, cluster *kops.Cluster, keyStore fi.Keysto server = "https://" + cluster.APIInternalName() } else { if cluster.Spec.API.PublicName != "" { - server = "https://" + cluster.Spec.API.PublicName + server = "https://" + wrapIPv6Address(cluster.Spec.API.PublicName) } else { server = "https://api." + clusterName } @@ -132,7 +133,7 @@ func BuildKubecfg(ctx context.Context, cluster *kops.Cluster, keyStore fi.Keysto if len(targets) != 1 { klog.Warningf("Found multiple API endpoints (%v), choosing arbitrarily", targets) } - server = "https://" + targets[0] + server = "https://" + wrapIPv6Address(targets[0]) } } } @@ -221,3 +222,14 @@ func BuildKubecfg(ctx context.Context, cluster *kops.Cluster, keyStore fi.Keysto return b, nil } + +// wrapIPv6Address will wrap IPv6 addresses in square brackets, +// for use in URLs; other endpoints are unchanged. +func wrapIPv6Address(endpoint string) string { + ip := net.ParseIP(endpoint) + // IPv6 addresses are wrapped in square brackets in URLs + if ip != nil && ip.To4() == nil { + return "[" + endpoint + "]" + } + return endpoint +} diff --git a/tests/e2e/scenarios/bare-metal/scenario-ipv6 b/tests/e2e/scenarios/bare-metal/scenario-ipv6 index f9cf85b8f9..32314266bc 100755 --- a/tests/e2e/scenarios/bare-metal/scenario-ipv6 +++ b/tests/e2e/scenarios/bare-metal/scenario-ipv6 @@ -74,9 +74,9 @@ echo "Waiting 10 seconds for VMs to start" sleep 10 # Remove from known-hosts in case of reuse -ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.10 || true -ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.11 || true -ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.12 || true +ssh-keygen -f ~/.ssh/known_hosts -R ${VM0_IP} || true +ssh-keygen -f ~/.ssh/known_hosts -R ${VM1_IP} || true +ssh-keygen -f ~/.ssh/known_hosts -R ${VM2_IP} || true # Check SSH is working and accept the host keys ssh -o StrictHostKeyChecking=accept-new root@${VM0_IP} uptime @@ -206,8 +206,8 @@ ${KOPS} toolbox enroll --cluster ${CLUSTER_NAME} --instance-group control-plane- cat < Date: Mon, 17 Feb 2025 18:09:44 -0500 Subject: [PATCH 3/5] metal: don't set CCM external always for IPv6 While we do require CCM for IPv6, we should configure the appropriate CCM. --- pkg/model/components/kubelet.go | 53 ++++++++++++---------- tests/e2e/scenarios/bare-metal/e2e_test.go | 29 ++++++++++++ 2 files changed, 59 insertions(+), 23 deletions(-) diff --git a/pkg/model/components/kubelet.go b/pkg/model/components/kubelet.go index 53fc0a86f7..856c0cdd83 100644 --- a/pkg/model/components/kubelet.go +++ b/pkg/model/components/kubelet.go @@ -124,30 +124,37 @@ func (b *KubeletOptionsBuilder) configureKubelet(cluster *kops.Cluster, kubelet cloudProvider := cluster.GetCloudProvider() klog.V(1).Infof("Cloud Provider: %s", cloudProvider) - if cloudProvider != kops.CloudProviderMetal { - if b.controlPlaneKubernetesVersion.IsLT("1.31") { - switch cloudProvider { - case kops.CloudProviderAWS: - kubelet.CloudProvider = "aws" - case kops.CloudProviderGCE: - kubelet.CloudProvider = "gce" - case kops.CloudProviderDO: - kubelet.CloudProvider = "external" - case kops.CloudProviderHetzner: - kubelet.CloudProvider = "external" - case kops.CloudProviderOpenstack: - kubelet.CloudProvider = "openstack" - case kops.CloudProviderAzure: - kubelet.CloudProvider = "azure" - case kops.CloudProviderScaleway: - kubelet.CloudProvider = "external" - default: - kubelet.CloudProvider = "external" - } + if b.controlPlaneKubernetesVersion.IsLT("1.31") { + switch cloudProvider { + case kops.CloudProviderAWS: + kubelet.CloudProvider = "aws" + case kops.CloudProviderGCE: + kubelet.CloudProvider = "gce" + case kops.CloudProviderDO: + kubelet.CloudProvider = "external" + case kops.CloudProviderHetzner: + kubelet.CloudProvider = "external" + case kops.CloudProviderOpenstack: + kubelet.CloudProvider = "openstack" + case kops.CloudProviderAzure: + kubelet.CloudProvider = "azure" + case kops.CloudProviderScaleway: + kubelet.CloudProvider = "external" + case kops.CloudProviderMetal: + kubelet.CloudProvider = "" + default: + kubelet.CloudProvider = "external" + } - if cluster.Spec.ExternalCloudControllerManager != nil { - kubelet.CloudProvider = "external" - } + if cluster.Spec.ExternalCloudControllerManager != nil { + kubelet.CloudProvider = "external" + } + } else { + if cloudProvider == kops.CloudProviderMetal { + // metal does not (yet) have a cloud-controller-manager, so we don't need to set the cloud-provider flag + // If we do set it to external, kubelet will taint the node with the node.kops.k8s.io/uninitialized taint + // and there is no cloud-controller-manager to remove it + kubelet.CloudProvider = "" } else { kubelet.CloudProvider = "external" } diff --git a/tests/e2e/scenarios/bare-metal/e2e_test.go b/tests/e2e/scenarios/bare-metal/e2e_test.go index 2c596569c6..216fff05f6 100644 --- a/tests/e2e/scenarios/bare-metal/e2e_test.go +++ b/tests/e2e/scenarios/bare-metal/e2e_test.go @@ -54,6 +54,35 @@ func TestNodeAddresses(t *testing.T) { } } } +func TestNodesNotTainted(t *testing.T) { + h := NewHarness(context.Background(), t) + + nodes := h.Nodes() + + // Quick check that we have some nodes + if len(nodes) == 0 { + t.Errorf("expected some nodes, got 0 nodes") + } + + // Verify that the nodes aren't tainted + // In particular, we are checking for the node.cloudprovider.kubernetes.io/uninitialized taint + for _, node := range nodes { + t.Logf("node %s has taints: %v", node.Name, node.Spec.Taints) + for _, taint := range node.Spec.Taints { + switch taint.Key { + case "node.kops.k8s.io/uninitialized": + t.Errorf("unexpected taint for node %s: %s", node.Name, taint.Key) + t.Errorf("if we pass the --cloud-provider=external flag to kubelet, the node will be tainted with the node.kops.k8s.io/uninitialize taint") + t.Errorf("the taint is expected to be removed by the cloud-contoller-manager") + t.Errorf("(likely should be running a cloud-controller-manager in the cluster, or we should not pass the --cloud-provider=external flag to kubelet)") + case "node-role.kubernetes.io/control-plane": + // expected for control-plane nodes + default: + t.Errorf("unexpected taint for node %s: %s", node.Name, taint.Key) + } + } + } +} // Harness is a test harness for our bare-metal e2e tests type Harness struct { From 7086bd3e2915af918cea49df4fe4cb729dc612af Mon Sep 17 00:00:00 2001 From: justinsb Date: Mon, 17 Feb 2025 18:10:39 -0500 Subject: [PATCH 4/5] Fix IPv6 routes and use kube 1.32 --- tests/e2e/scenarios/bare-metal/scenario-ipv6 | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tests/e2e/scenarios/bare-metal/scenario-ipv6 b/tests/e2e/scenarios/bare-metal/scenario-ipv6 index 32314266bc..715032a92a 100755 --- a/tests/e2e/scenarios/bare-metal/scenario-ipv6 +++ b/tests/e2e/scenarios/bare-metal/scenario-ipv6 @@ -94,7 +94,12 @@ function configure_ipv6() { ssh root@${node_ip} ip link ssh root@${node_ip} ip -6 addr add ${ipv6_range} dev enp0s3 - ssh root@${node_ip} ip -6 route add default dev enp0s3 + + # Set our node as the default route + # (otherwise the kubelet will not discover the IPv6 addresses in ResolveBindAddress) + # node-to-node routes will be discovered by radvd + ssh root@${node_ip} ip -6 route add ${IPV6_PREFIX}0::/96 dev enp0s3 + ssh root@${node_ip} ip -6 route add default via ${IPV6_PREFIX}0:: cat < Date: Wed, 19 Feb 2025 06:51:04 -0500 Subject: [PATCH 5/5] metal: split host creation from enrollment This is needeed for bootstrapping the control plane, because it's a CRD so can't be registered until the control plane is running. It's also quite nice because we might want to review the contents of the host CRD, e.g. to verify the key out-of-band. --- cmd/kops/toolbox_enroll.go | 2 + docs/cli/kops_toolbox_enroll.md | 1 + pkg/commands/toolbox_enroll.go | 139 +++++++++++-------- tests/e2e/scenarios/bare-metal/scenario-ipv6 | 10 +- upup/pkg/fi/loader/options_loader.go | 2 +- 5 files changed, 91 insertions(+), 63 deletions(-) diff --git a/cmd/kops/toolbox_enroll.go b/cmd/kops/toolbox_enroll.go index 9e0a1a5a4c..17e0e06196 100644 --- a/cmd/kops/toolbox_enroll.go +++ b/cmd/kops/toolbox_enroll.go @@ -51,6 +51,8 @@ func NewCmdToolboxEnroll(f commandutils.Factory, out io.Writer) *cobra.Command { cmd.Flags().StringVar(&options.SSHUser, "ssh-user", options.SSHUser, "user for ssh") cmd.Flags().IntVar(&options.SSHPort, "ssh-port", options.SSHPort, "port for ssh") + cmd.Flags().BoolVar(&options.BuildHost, "build-host", options.BuildHost, "only build the host resource, don't apply it or enroll the node") + options.CreateKubecfgOptions.AddCommonFlags(cmd.Flags()) return cmd diff --git a/docs/cli/kops_toolbox_enroll.md b/docs/cli/kops_toolbox_enroll.md index cab2261871..43c09a0f2e 100644 --- a/docs/cli/kops_toolbox_enroll.md +++ b/docs/cli/kops_toolbox_enroll.md @@ -23,6 +23,7 @@ kops toolbox enroll [CLUSTER] [flags] ``` --api-server string Override the API server used when communicating with the cluster kube-apiserver + --build-host only build the host resource, don't apply it or enroll the node --cluster string Name of cluster to join -h, --help help for enroll --host string IP/hostname for machine to add diff --git a/pkg/commands/toolbox_enroll.go b/pkg/commands/toolbox_enroll.go index 3097e35a9f..2189822bdb 100644 --- a/pkg/commands/toolbox_enroll.go +++ b/pkg/commands/toolbox_enroll.go @@ -67,6 +67,9 @@ type ToolboxEnrollOptions struct { SSHUser string SSHPort int + // BuildHost is a flag to only build the host resource, don't apply it or enroll the node + BuildHost bool + // PodCIDRs is the list of IP Address ranges to use for pods that run on this node PodCIDRs []string @@ -88,6 +91,11 @@ func RunToolboxEnroll(ctx context.Context, f commandutils.Factory, out io.Writer if options.InstanceGroup == "" { return fmt.Errorf("instance-group is required") } + if options.Host == "" { + // Technically we could build the host resource without the PKI, but this isn't the case we are targeting right now. + return fmt.Errorf("host is required") + } + clientset, err := f.KopsClient() if err != nil { return err @@ -103,40 +111,11 @@ func RunToolboxEnroll(ctx context.Context, f commandutils.Factory, out io.Writer if err != nil { return err } - fullInstanceGroup, err := configBuilder.GetFullInstanceGroup(ctx) - if err != nil { - return err - } - bootstrapData, err := configBuilder.GetBootstrapData(ctx) - if err != nil { - return err - } // Enroll the node over SSH. - if options.Host != "" { - restConfig, err := f.RESTConfig(ctx, fullCluster, options.CreateKubecfgOptions) - if err != nil { - return err - } - - if err := enrollHost(ctx, fullInstanceGroup, options, bootstrapData, restConfig); err != nil { - return err - } - } - - return nil -} - -func enrollHost(ctx context.Context, ig *kops.InstanceGroup, options *ToolboxEnrollOptions, bootstrapData *BootstrapData, restConfig *rest.Config) error { - scheme := runtime.NewScheme() - if err := v1alpha2.AddToScheme(scheme); err != nil { - return fmt.Errorf("building kubernetes scheme: %w", err) - } - kubeClient, err := client.New(restConfig, client.Options{ - Scheme: scheme, - }) + restConfig, err := f.RESTConfig(ctx, fullCluster, options.CreateKubecfgOptions) if err != nil { - return fmt.Errorf("building kubernetes client: %w", err) + return err } sudo := true @@ -150,6 +129,39 @@ func enrollHost(ctx context.Context, ig *kops.InstanceGroup, options *ToolboxEnr } defer sshTarget.Close() + hostData, err := buildHostData(ctx, sshTarget, options) + if err != nil { + return err + } + + if options.BuildHost { + klog.Infof("building host data for %+v", hostData) + b, err := yaml.Marshal(hostData) + if err != nil { + return fmt.Errorf("error marshalling host data: %w", err) + } + fmt.Fprintf(out, "%s\n", string(b)) + return nil + } + + fullInstanceGroup, err := configBuilder.GetFullInstanceGroup(ctx) + if err != nil { + return err + } + bootstrapData, err := configBuilder.GetBootstrapData(ctx) + if err != nil { + return err + } + + if err := enrollHost(ctx, fullInstanceGroup, bootstrapData, restConfig, hostData, sshTarget); err != nil { + return err + } + + return nil +} + +// buildHostData builds an instance of the Host CRD, based on information in the options and by SSHing to the target host. +func buildHostData(ctx context.Context, sshTarget *SSHHost, options *ToolboxEnrollOptions) (*v1alpha2.Host, error) { publicKeyPath := "/etc/kubernetes/kops/pki/machine/public.pem" publicKeyBytes, err := sshTarget.readFile(ctx, publicKeyPath) @@ -157,19 +169,20 @@ func enrollHost(ctx context.Context, ig *kops.InstanceGroup, options *ToolboxEnr if errors.Is(err, fs.ErrNotExist) { publicKeyBytes = nil } else { - return fmt.Errorf("error reading public key %q: %w", publicKeyPath, err) + return nil, fmt.Errorf("error reading public key %q: %w", publicKeyPath, err) } } + // Create the key if it doesn't exist publicKeyBytes = bytes.TrimSpace(publicKeyBytes) if len(publicKeyBytes) == 0 { - if _, err := sshTarget.runScript(ctx, scriptCreateKey, ExecOptions{Sudo: sudo, Echo: true}); err != nil { - return err + if _, err := sshTarget.runScript(ctx, scriptCreateKey, ExecOptions{Echo: true}); err != nil { + return nil, err } b, err := sshTarget.readFile(ctx, publicKeyPath) if err != nil { - return fmt.Errorf("error reading public key %q (after creation): %w", publicKeyPath, err) + return nil, fmt.Errorf("error reading public key %q (after creation): %w", publicKeyPath, err) } publicKeyBytes = b } @@ -177,14 +190,37 @@ func enrollHost(ctx context.Context, ig *kops.InstanceGroup, options *ToolboxEnr hostname, err := sshTarget.getHostname(ctx) if err != nil { - return err + return nil, err + } + + host := &v1alpha2.Host{} + host.SetGroupVersionKind(v1alpha2.SchemeGroupVersion.WithKind("Host")) + host.Namespace = "kops-system" + host.Name = hostname + host.Spec.InstanceGroup = options.InstanceGroup + host.Spec.PublicKey = string(publicKeyBytes) + host.Spec.PodCIDRs = options.PodCIDRs + + return host, nil +} + +func enrollHost(ctx context.Context, ig *kops.InstanceGroup, bootstrapData *BootstrapData, restConfig *rest.Config, hostData *v1alpha2.Host, sshTarget *SSHHost) error { + scheme := runtime.NewScheme() + if err := v1alpha2.AddToScheme(scheme); err != nil { + return fmt.Errorf("building kubernetes scheme: %w", err) + } + kubeClient, err := client.New(restConfig, client.Options{ + Scheme: scheme, + }) + if err != nil { + return fmt.Errorf("building kubernetes client: %w", err) } // We can't create the host resource in the API server for control-plane nodes, // because the API server (likely) isn't running yet. if !ig.IsControlPlane() { - if err := createHostResourceInAPIServer(ctx, options, hostname, publicKeyBytes, kubeClient); err != nil { - return err + if err := kubeClient.Create(ctx, hostData); err != nil { + return fmt.Errorf("failed to create host %s/%s: %w", hostData.Namespace, hostData.Name, err) } } @@ -195,28 +231,13 @@ func enrollHost(ctx context.Context, ig *kops.InstanceGroup, options *ToolboxEnr } if len(bootstrapData.NodeupScript) != 0 { - if _, err := sshTarget.runScript(ctx, string(bootstrapData.NodeupScript), ExecOptions{Sudo: sudo, Echo: true}); err != nil { + if _, err := sshTarget.runScript(ctx, string(bootstrapData.NodeupScript), ExecOptions{Echo: true}); err != nil { return err } } return nil } -func createHostResourceInAPIServer(ctx context.Context, options *ToolboxEnrollOptions, nodeName string, publicKey []byte, client client.Client) error { - host := &v1alpha2.Host{} - host.Namespace = "kops-system" - host.Name = nodeName - host.Spec.InstanceGroup = options.InstanceGroup - host.Spec.PublicKey = string(publicKey) - host.Spec.PodCIDRs = options.PodCIDRs - - if err := client.Create(ctx, host); err != nil { - return fmt.Errorf("failed to create host %s/%s: %w", host.Namespace, host.Name, err) - } - - return nil -} - const scriptCreateKey = ` #!/bin/bash set -o errexit @@ -327,7 +348,7 @@ func (s *SSHHost) runScript(ctx context.Context, script string, options ExecOpti p := vfs.NewSSHPath(s.sshClient, s.hostname, scriptPath, s.sudo) defer func() { - if _, err := s.runCommand(ctx, "rm -rf "+tempDir, ExecOptions{Sudo: s.sudo, Echo: false}); err != nil { + if _, err := s.runCommand(ctx, "rm -rf "+tempDir, ExecOptions{Echo: false}); err != nil { klog.Warningf("error cleaning up temp directory %q: %v", tempDir, err) } }() @@ -348,7 +369,6 @@ type CommandOutput struct { // ExecOptions holds options for running a command remotely. type ExecOptions struct { - Sudo bool Echo bool } @@ -365,10 +385,11 @@ func (s *SSHHost) runCommand(ctx context.Context, command string, options ExecOp session.Stderr = &output.Stderr if options.Echo { - session.Stdout = io.MultiWriter(os.Stdout, session.Stdout) + // We send both to stderr, so we don't "corrupt" stdout + session.Stdout = io.MultiWriter(os.Stderr, session.Stdout) session.Stderr = io.MultiWriter(os.Stderr, session.Stderr) } - if options.Sudo { + if s.sudo { command = "sudo " + command } if err := session.Run(command); err != nil { @@ -380,7 +401,7 @@ func (s *SSHHost) runCommand(ctx context.Context, command string, options ExecOp // getHostname gets the hostname of the SSH target. // This is used as the node name when registering the node. func (s *SSHHost) getHostname(ctx context.Context) (string, error) { - output, err := s.runCommand(ctx, "hostname", ExecOptions{Sudo: false, Echo: true}) + output, err := s.runCommand(ctx, "hostname", ExecOptions{Echo: true}) if err != nil { return "", fmt.Errorf("failed to get hostname: %w", err) } diff --git a/tests/e2e/scenarios/bare-metal/scenario-ipv6 b/tests/e2e/scenarios/bare-metal/scenario-ipv6 index 715032a92a..8d6e3a6ae7 100755 --- a/tests/e2e/scenarios/bare-metal/scenario-ipv6 +++ b/tests/e2e/scenarios/bare-metal/scenario-ipv6 @@ -228,6 +228,13 @@ for i in {1..60}; do sleep 10 done +# Create CRD and namespace for host records +kubectl create ns kops-system +kubectl apply -f ${REPO_ROOT}/k8s/crds/kops.k8s.io_hosts.yaml + +# Create the host record (we can't auto create for control plane nodes) +${KOPS} toolbox enroll --cluster ${CLUSTER_NAME} --instance-group control-plane-main --host ${VM0_IP} --pod-cidr ${VM0_POD_CIDR} --v=2 --build-host | kubectl apply -f - + kubectl get nodes kubectl get pods -A @@ -238,9 +245,6 @@ sleep 10 kubectl get nodes kubectl get pods -A -# For host records -kubectl create ns kops-system -kubectl apply -f ${REPO_ROOT}/k8s/crds/kops.k8s.io_hosts.yaml # kops-controller extra permissions kubectl apply --server-side -f - <