tests: add test for bare-metal with ipv6

IPv6 brings some new complexities, particularly around IPAM.
This commit is contained in:
justinsb 2024-11-11 11:12:27 -05:00
parent 5f564fe1ab
commit 6b88da4376
4 changed files with 377 additions and 28 deletions

View File

@ -46,3 +46,29 @@ jobs:
with: with:
name: tests-e2e-scenarios-bare-metal name: tests-e2e-scenarios-bare-metal
path: /tmp/artifacts/ path: /tmp/artifacts/
tests-e2e-scenarios-bare-metal-ipv6:
runs-on: ubuntu-24.04
timeout-minutes: 70
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
path: ${{ env.GOPATH }}/src/k8s.io/kops
- name: Set up go
uses: actions/setup-go@41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed
with:
go-version-file: '${{ env.GOPATH }}/src/k8s.io/kops/go.mod'
- name: tests/e2e/scenarios/bare-metal/run-test
working-directory: ${{ env.GOPATH }}/src/k8s.io/kops
run: |
timeout 60m tests/e2e/scenarios/bare-metal/scenario-ipv6
env:
ARTIFACTS: /tmp/artifacts
- name: Archive production artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: tests-e2e-scenarios-bare-metal-ipv6
path: /tmp/artifacts/

View File

@ -38,6 +38,8 @@ sudo ip link del dev tap-vm0 || true
sudo ip link del dev tap-vm1 || true sudo ip link del dev tap-vm1 || true
sudo ip link del dev tap-vm2 || true sudo ip link del dev tap-vm2 || true
sudo ip link del dev br0 || true
rm -rf .build/vm0 rm -rf .build/vm0
rm -rf .build/vm1 rm -rf .build/vm1
rm -rf .build/vm2 rm -rf .build/vm2

View File

@ -40,30 +40,39 @@ function cleanup() {
fi fi
} }
if [[ -z "${SKIP_CLEANUP:-}" ]]; then trap cleanup EXIT
trap cleanup EXIT
fi
# Create the directory that will back our mock s3 storage # Create the directory that will back our mock s3 storage
rm -rf ${WORKDIR}/s3 rm -rf ${WORKDIR}/s3
mkdir -p ${WORKDIR}/s3/ mkdir -p ${WORKDIR}/s3/
IPV4_PREFIX=10.123.45.
VM0_IP=${IPV4_PREFIX}10
VM1_IP=${IPV4_PREFIX}11
VM2_IP=${IPV4_PREFIX}12
# Start our VMs # Start our VMs
${REPO_ROOT}/tests/e2e/scenarios/bare-metal/start-vms ${REPO_ROOT}/tests/e2e/scenarios/bare-metal/start-vms
# Start an SSH agent; enroll assumes SSH connectivity to the VMs with the key in the agent
eval $(ssh-agent)
ssh-add ${REPO_ROOT}/.build/.ssh/id_ed25519
. hack/dev-build-metal.sh . hack/dev-build-metal.sh
echo "Waiting 10 seconds for VMs to start" echo "Waiting 10 seconds for VMs to start"
sleep 10 sleep 10
# Remove from known-hosts in case of reuse # Remove from known-hosts in case of reuse
ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.10 || true ssh-keygen -f ~/.ssh/known_hosts -R ${VM0_IP} || true
ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.11 || true ssh-keygen -f ~/.ssh/known_hosts -R ${VM1_IP} || true
ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.12 || true ssh-keygen -f ~/.ssh/known_hosts -R ${VM2_IP} || true
ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.10 uptime # Check SSH is working and accept the host keys
ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.11 uptime ssh -o StrictHostKeyChecking=accept-new root@${VM0_IP} uptime
ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.12 uptime ssh -o StrictHostKeyChecking=accept-new root@${VM1_IP} uptime
ssh -o StrictHostKeyChecking=accept-new root@${VM2_IP} uptime
cd ${REPO_ROOT} cd ${REPO_ROOT}
@ -93,7 +102,7 @@ ${KOPS} create cluster --cloud=metal metal.k8s.local --zones main --networking c
# Set the IP ingress, required for metal cloud # Set the IP ingress, required for metal cloud
# TODO: is this the best option? # TODO: is this the best option?
${KOPS} edit cluster metal.k8s.local --set spec.api.publicName=10.123.45.10 ${KOPS} edit cluster metal.k8s.local --set spec.api.publicName=${VM0_IP}
# Use latest etcd-manager image (while we're adding features) # Use latest etcd-manager image (while we're adding features)
#${KOPS} edit cluster metal.k8s.local --set 'spec.etcdClusters[*].manager.image=us-central1-docker.pkg.dev/k8s-staging-images/etcd-manager/etcd-manager-slim:v3.0.20250628-7-ga7be11fb' #${KOPS} edit cluster metal.k8s.local --set 'spec.etcdClusters[*].manager.image=us-central1-docker.pkg.dev/k8s-staging-images/etcd-manager/etcd-manager-slim:v3.0.20250628-7-ga7be11fb'
@ -114,28 +123,24 @@ ${KOPS} get ig --name metal.k8s.local -oyaml
${KOPS} update cluster metal.k8s.local ${KOPS} update cluster metal.k8s.local
${KOPS} update cluster metal.k8s.local --yes --admin ${KOPS} update cluster metal.k8s.local --yes --admin
# Start an SSH agent; enroll assumes SSH connectivity to the VMs with the key in the agent
eval $(ssh-agent)
ssh-add ${REPO_ROOT}/.build/.ssh/id_ed25519
# Enroll the control-plane VM # Enroll the control-plane VM
${KOPS} toolbox enroll --cluster metal.k8s.local --instance-group control-plane-main --host 10.123.45.10 --v=2 ${KOPS} toolbox enroll --cluster metal.k8s.local --instance-group control-plane-main --host ${VM0_IP} --v=2
# Manual creation of "volumes" for etcd, and setting up peer nodes # Manual creation of "volumes" for etcd, and setting up peer nodes
cat <<EOF | ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.10 tee -a /etc/hosts cat <<EOF | ssh root@${VM0_IP} tee -a /etc/hosts
# Hosts added for etcd discovery # Hosts added for etcd discovery
10.123.45.10 node0.main.metal.k8s.local ${VM0_IP} node0.main.metal.k8s.local
10.123.45.10 node0.events.metal.k8s.local ${VM0_IP} node0.events.metal.k8s.local
EOF EOF
ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.10 cat /etc/hosts ssh root@${VM0_IP} cat /etc/hosts
ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.10 mkdir -p /mnt/disks/metal.k8s.local--main--0/mnt ssh root@${VM0_IP} mkdir -p /mnt/disks/metal.k8s.local--main--0/mnt
ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.10 touch /mnt/disks/metal.k8s.local--main--0/mnt/please-create-new-cluster ssh root@${VM0_IP} touch /mnt/disks/metal.k8s.local--main--0/mnt/please-create-new-cluster
ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.10 mkdir -p /mnt/disks/metal.k8s.local--events--0/mnt ssh root@${VM0_IP} mkdir -p /mnt/disks/metal.k8s.local--events--0/mnt
ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.10 touch /mnt/disks/metal.k8s.local--events--0/mnt/please-create-new-cluster ssh root@${VM0_IP} touch /mnt/disks/metal.k8s.local--events--0/mnt/please-create-new-cluster
echo "Waiting for kube to start" echo "Waiting for kube to start"
@ -204,18 +209,18 @@ function enroll_node() {
# Manual "discovery" for control-plane endpoints # Manual "discovery" for control-plane endpoints
# TODO: Replace with well-known IP # TODO: Replace with well-known IP
cat <<EOF | ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@${node_ip} tee -a /etc/hosts cat <<EOF | ssh root@${node_ip} tee -a /etc/hosts
# Hosts added for leader discovery # Hosts added for leader discovery
10.123.45.10 kops-controller.internal.metal.k8s.local ${VM0_IP} kops-controller.internal.metal.k8s.local
10.123.45.10 api.internal.metal.k8s.local ${VM0_IP} api.internal.metal.k8s.local
EOF EOF
timeout 10m ${KOPS} toolbox enroll --cluster metal.k8s.local --instance-group nodes-main --host ${node_ip} --v=2 timeout 10m ${KOPS} toolbox enroll --cluster metal.k8s.local --instance-group nodes-main --host ${node_ip} --v=2
} }
enroll_node 10.123.45.11 enroll_node ${VM1_IP}
enroll_node 10.123.45.12 enroll_node ${VM2_IP}
echo "Waiting 30 seconds for nodes to be ready" echo "Waiting 30 seconds for nodes to be ready"
sleep 30 sleep 30

View File

@ -0,0 +1,316 @@
#!/usr/bin/env bash
# Copyright 2024 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -o errexit
set -o nounset
set -o pipefail
set -o xtrace
REPO_ROOT=$(git rev-parse --show-toplevel)
cd ${REPO_ROOT}
WORKDIR=${REPO_ROOT}/.build/
BINDIR=${WORKDIR}/bin
mkdir -p "${BINDIR}"
go build -o ${BINDIR}/kops ./cmd/kops
export KOPS=${BINDIR}/kops
function cleanup() {
echo "running dump-artifacts"
${REPO_ROOT}/tests/e2e/scenarios/bare-metal/dump-artifacts || true
if [[ -z "${SKIP_CLEANUP:-}" ]]; then
echo "running cleanup"
${REPO_ROOT}/tests/e2e/scenarios/bare-metal/cleanup || true
fi
}
trap cleanup EXIT
# Create the directory that will back our mock s3 storage
rm -rf ${WORKDIR}/s3
mkdir -p ${WORKDIR}/s3/
IPV6_PREFIX=fd00:10:123:45:
IPV4_PREFIX=10.123.45.
VM0_IP=${IPV4_PREFIX}10
VM1_IP=${IPV4_PREFIX}11
VM2_IP=${IPV4_PREFIX}12
VM0_IPV6=${IPV6_PREFIX}a::
VM1_IPV6=${IPV6_PREFIX}b::
VM2_IPV6=${IPV6_PREFIX}c::
VM0_POD_CIDR=${IPV6_PREFIX}a::/96
VM1_POD_CIDR=${IPV6_PREFIX}b::/96
VM2_POD_CIDR=${IPV6_PREFIX}c::/96
# Start our VMs
${REPO_ROOT}/tests/e2e/scenarios/bare-metal/start-vms
# Start an SSH agent; enroll assumes SSH connectivity to the VMs with the key in the agent
eval $(ssh-agent)
ssh-add ${REPO_ROOT}/.build/.ssh/id_ed25519
. hack/dev-build-metal.sh
echo "Waiting 10 seconds for VMs to start"
sleep 10
# Remove from known-hosts in case of reuse
ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.10 || true
ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.11 || true
ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.12 || true
# Check SSH is working and accept the host keys
ssh -o StrictHostKeyChecking=accept-new root@${VM0_IP} uptime
ssh -o StrictHostKeyChecking=accept-new root@${VM1_IP} uptime
ssh -o StrictHostKeyChecking=accept-new root@${VM2_IP} uptime
cd ${REPO_ROOT}
# Configure IPv6 networking
function configure_ipv6() {
local hostname=$1
local node_ip=$2
local ipv6_ip=$3
local ipv6_range=$4
ssh root@${node_ip} ip link
ssh root@${node_ip} ip -6 addr add ${ipv6_range} dev enp0s3
ssh root@${node_ip} ip -6 route add default dev enp0s3
cat <<EOF | ssh root@${node_ip} tee /etc/resolv.conf
nameserver 8.8.8.8
nameserver 8.8.4.4
nameserver 2001:4860:4860::8888
nameserver 2001:4860:4860::8844
EOF
# Ensure /etc/hosts has an entry for the host
cat <<EOF | ssh root@${node_ip} tee -a /etc/hosts
::1 ${hostname} localhost
EOF
cat << EOF | ssh root@${node_ip} tee /etc/radvd.conf
interface enp0s3
{
AdvSendAdvert on;
AdvDefaultLifetime 0; # Not a default router
route ${ipv6_range}
{
};
};
EOF
ssh root@${node_ip} apt-get update
ssh root@${node_ip} apt-get install -y radvd
ssh root@${node_ip} systemctl restart radvd
ssh root@${node_ip} sysctl net.ipv6.conf.enp0s3.accept_ra=2
ssh root@${node_ip} sysctl net.ipv6.conf.enp0s3.accept_ra_rt_info_max_plen=96
ssh root@${node_ip} ip -6 addr
ssh root@${node_ip} ip -6 route
#sudo ip -6 route add ${ipv6_range} dev br0 via ${ipv6_ip}
}
# Configure our IPv6 addresses on the bridge
sudo ip address add ${IPV6_PREFIX}0::/96 dev br0 || true
sudo sysctl net.ipv6.conf.br0.accept_ra=2
sudo sysctl net.ipv6.conf.br0.accept_ra_rt_info_max_plen=96
# Configure the VMs on the bridge
configure_ipv6 vm0 ${VM0_IP} ${VM0_IPV6} ${VM0_POD_CIDR}
configure_ipv6 vm1 ${VM1_IP} ${VM1_IPV6} ${VM1_POD_CIDR}
configure_ipv6 vm2 ${VM2_IP} ${VM2_IPV6} ${VM2_POD_CIDR}
ip -6 route
# Check the VMs are OK
ping6 -c 1 ${VM0_IPV6}
ping6 -c 1 ${VM1_IPV6}
ping6 -c 1 ${VM2_IPV6}
# Enable feature flag for bare metal
export KOPS_FEATURE_FLAGS=Metal
# Set up the AWS credentials
export AWS_SECRET_ACCESS_KEY=secret
export AWS_ACCESS_KEY_ID=accesskey
export AWS_ENDPOINT_URL=http://10.123.45.1:8443
export AWS_REGION=us-east-1
export S3_ENDPOINT=${AWS_ENDPOINT_URL}
export S3_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
export S3_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
# Create the state-store bucket in our mock s3 server
export KOPS_STATE_STORE=s3://kops-state-store/
aws --version
aws s3 ls s3://kops-state-store || aws s3 mb s3://kops-state-store
export CLUSTER_NAME=metalipv6.k8s.local
# List clusters (there should not be any yet)
${KOPS} get cluster || true
# Create a cluster
${KOPS} create cluster --cloud=metal ${CLUSTER_NAME} --zones main --networking cni --ipv6
# Set the IP ingress, required for metal cloud
# TODO: is this the best option?
${KOPS} edit cluster ${CLUSTER_NAME} --set spec.api.publicName=${VM0_IPV6}
# Use latest etcd-manager image (while we're adding features)
${KOPS} edit cluster ${CLUSTER_NAME} --set 'spec.etcdClusters[*].manager.image=us-central1-docker.pkg.dev/k8s-staging-images/etcd-manager/etcd-manager-static:latest'
# Use 1.31 kubernetes so we get kube-apiserver fixes
export KOPS_RUN_TOO_NEW_VERSION=1
"${KOPS}" edit cluster ${CLUSTER_NAME} "--set=cluster.spec.kubernetesVersion=1.31.0"
# List clusters
${KOPS} get cluster
${KOPS} get cluster -oyaml
# List instance groups
${KOPS} get ig --name ${CLUSTER_NAME}
${KOPS} get ig --name ${CLUSTER_NAME} -oyaml
# Apply basic configuration
${KOPS} update cluster ${CLUSTER_NAME}
${KOPS} update cluster ${CLUSTER_NAME} --yes --admin
# Enroll the control-plane VM
${KOPS} toolbox enroll --cluster ${CLUSTER_NAME} --instance-group control-plane-main --host ${VM0_IP} --pod-cidr ${VM0_POD_CIDR} --v=2
# Manual creation of "volumes" for etcd, and setting up peer nodes
cat <<EOF | ssh root@${VM0_IP} tee -a /etc/hosts
# Hosts added for etcd discovery
10.123.45.10 node0.main.${CLUSTER_NAME}
10.123.45.10 node0.events.${CLUSTER_NAME}
EOF
ssh root@${VM0_IP} cat /etc/hosts
ssh root@${VM0_IP} mkdir -p /mnt/disks/${CLUSTER_NAME}--main--0/mnt
ssh root@${VM0_IP} touch /mnt/disks/${CLUSTER_NAME}--main--0/mnt/please-create-new-cluster
ssh root@${VM0_IP} mkdir -p /mnt/disks/${CLUSTER_NAME}--events--0/mnt
ssh root@${VM0_IP} touch /mnt/disks/${CLUSTER_NAME}--events--0/mnt/please-create-new-cluster
echo "Waiting for kube to start"
# Wait for kube-apiserver to be ready, timeout after 10 minutes
for i in {1..60}; do
if kubectl get nodes; then
break
fi
sleep 10
done
kubectl get nodes
kubectl get pods -A
# Install kindnet
kubectl create -f https://raw.githubusercontent.com/aojea/kindnet/main/install-kindnet.yaml
echo "Waiting 10 seconds for kindnet to start"
sleep 10
kubectl get nodes
kubectl get pods -A
# For host records
kubectl create ns kops-system
kubectl apply -f ${REPO_ROOT}/k8s/crds/kops.k8s.io_hosts.yaml
# kops-controller extra permissions
kubectl apply --server-side -f - <<EOF
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kops-controller:pki-verifier
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kops-controller:pki-verifier
subjects:
- apiGroup: rbac.authorization.k8s.io
kind: User
name: system:serviceaccount:kube-system:kops-controller
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kops-controller:pki-verifier
rules:
- apiGroups:
- "kops.k8s.io"
resources:
- hosts
verbs:
- get
- list
- watch
# Must be able to set node addresses
# TODO: Move out?
- apiGroups:
- ""
resources:
- nodes/status
verbs:
- patch
EOF
function enroll_node() {
local node_ip=$1
# Manual "discovery" for control-plane endpoints
# TODO: Replace with well-known IP
cat <<EOF | ssh root@${node_ip} tee -a /etc/hosts
# Hosts added for leader discovery
10.123.45.10 kops-controller.internal.${CLUSTER_NAME}
10.123.45.10 api.internal.${CLUSTER_NAME}
EOF
timeout 10m ${KOPS} toolbox enroll --cluster ${CLUSTER_NAME} --instance-group nodes-main --host ${node_ip} --v=2
}
enroll_node ${VM1_IP} ${VM1_POD_CIDR}
enroll_node ${VM2_IP} ${VM2_POD_CIDR}
echo "Waiting 30 seconds for nodes to be ready"
sleep 30
kubectl get nodes
kubectl get nodes -o yaml
kubectl get pods -A
# Ensure the cluster passes validation
${KOPS} validate cluster ${CLUSTER_NAME} --wait=10m
# Run a few bare-metal e2e tests
echo "running e2e tests"
cd ${REPO_ROOT}/tests/e2e/scenarios/bare-metal
go test -v .
echo "Test successful"