kops/tests/e2e/scenarios/bare-metal/run-test

229 lines
6.8 KiB
Bash
Executable File

#!/usr/bin/env bash
# Copyright 2024 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -o errexit
set -o nounset
set -o pipefail
set -o xtrace
REPO_ROOT=$(git rev-parse --show-toplevel)
cd ${REPO_ROOT}
WORKDIR=${REPO_ROOT}/.build/
BINDIR=${WORKDIR}/bin
mkdir -p "${BINDIR}"
go build -o ${BINDIR}/kops ./cmd/kops
export KOPS=${BINDIR}/kops
function cleanup() {
echo "running dump-artifacts"
${REPO_ROOT}/tests/e2e/scenarios/bare-metal/dump-artifacts || true
if [[ -z "${SKIP_CLEANUP:-}" ]]; then
echo "running cleanup"
${REPO_ROOT}/tests/e2e/scenarios/bare-metal/cleanup || true
fi
}
if [[ -z "${SKIP_CLEANUP:-}" ]]; then
trap cleanup EXIT
fi
# Create the directory that will back our mock s3 storage
rm -rf ${WORKDIR}/s3
mkdir -p ${WORKDIR}/s3/
# Start our VMs
${REPO_ROOT}/tests/e2e/scenarios/bare-metal/start-vms
. hack/dev-build-metal.sh
echo "Waiting 10 seconds for VMs to start"
sleep 10
# Remove from known-hosts in case of reuse
ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.10 || true
ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.11 || true
ssh-keygen -f ~/.ssh/known_hosts -R 10.123.45.12 || true
ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.10 uptime
ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.11 uptime
ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.12 uptime
cd ${REPO_ROOT}
# Enable feature flag for bare metal
export KOPS_FEATURE_FLAGS=Metal
# Set up the AWS credentials
export AWS_SECRET_ACCESS_KEY=secret
export AWS_ACCESS_KEY_ID=accesskey
export AWS_ENDPOINT_URL=http://10.123.45.1:8443
export AWS_REGION=us-east-1
export S3_ENDPOINT=${AWS_ENDPOINT_URL}
export S3_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
export S3_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
# Create the state-store bucket in our mock s3 server
export KOPS_STATE_STORE=s3://kops-state-store/
aws --version
aws s3 ls s3://kops-state-store || aws s3 mb s3://kops-state-store
# List clusters (there should not be any yet)
${KOPS} get cluster || true
# Create a cluster
${KOPS} create cluster --cloud=metal metal.k8s.local --zones main --networking cni
# Set the IP ingress, required for metal cloud
# TODO: is this the best option?
${KOPS} edit cluster metal.k8s.local --set spec.api.publicName=10.123.45.10
# Use latest etcd-manager image (while we're adding features)
${KOPS} edit cluster metal.k8s.local --set 'spec.etcdClusters[*].manager.image=us-central1-docker.pkg.dev/k8s-staging-images/etcd-manager/etcd-manager-static:latest'
# Use 1.31 kubernetes so we get kube-apiserver fixes
export KOPS_RUN_TOO_NEW_VERSION=1
"${KOPS}" edit cluster metal.k8s.local "--set=cluster.spec.kubernetesVersion=1.31.0"
# List clusters
${KOPS} get cluster
${KOPS} get cluster -oyaml
# List instance groups
${KOPS} get ig --name metal.k8s.local
${KOPS} get ig --name metal.k8s.local -oyaml
# Apply basic configuration
${KOPS} update cluster metal.k8s.local
${KOPS} update cluster metal.k8s.local --yes --admin
# Start an SSH agent; enroll assumes SSH connectivity to the VMs with the key in the agent
eval $(ssh-agent)
ssh-add ${REPO_ROOT}/.build/.ssh/id_ed25519
# Enroll the control-plane VM
${KOPS} toolbox enroll --cluster metal.k8s.local --instance-group control-plane-main --host 10.123.45.10 --v=2
# Manual creation of "volumes" for etcd, and setting up peer nodes
cat <<EOF | ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.10 tee -a /etc/hosts
# Hosts added for etcd discovery
10.123.45.10 node0.main.metal.k8s.local
10.123.45.10 node0.events.metal.k8s.local
EOF
ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.10 cat /etc/hosts
ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.10 mkdir -p /mnt/disks/metal.k8s.local--main--0/mnt
ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.10 touch /mnt/disks/metal.k8s.local--main--0/mnt/please-create-new-cluster
ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.10 mkdir -p /mnt/disks/metal.k8s.local--events--0/mnt
ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@10.123.45.10 touch /mnt/disks/metal.k8s.local--events--0/mnt/please-create-new-cluster
echo "Waiting for kube to start"
# Wait for kube-apiserver to be ready, timeout after 10 minutes
for i in {1..60}; do
if kubectl get nodes; then
break
fi
sleep 10
done
kubectl get nodes
kubectl get pods -A
# Install kindnet
kubectl create -f https://raw.githubusercontent.com/aojea/kindnet/main/install-kindnet.yaml
echo "Waiting 10 seconds for kindnet to start"
sleep 10
kubectl get nodes
kubectl get pods -A
# For host records
kubectl create ns kops-system
kubectl apply -f ${REPO_ROOT}/k8s/crds/kops.k8s.io_hosts.yaml
# kops-controller extra permissions
kubectl apply --server-side -f - <<EOF
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kops-controller:pki-verifier
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kops-controller:pki-verifier
subjects:
- apiGroup: rbac.authorization.k8s.io
kind: User
name: system:serviceaccount:kube-system:kops-controller
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kops-controller:pki-verifier
rules:
- apiGroups:
- "kops.k8s.io"
resources:
- hosts
verbs:
- get
- list
- watch
# Must be able to set node addresses
# TODO: Move out?
- apiGroups:
- ""
resources:
- nodes/status
verbs:
- patch
EOF
function enroll_node() {
local node_ip=$1
# Manual "discovery" for control-plane endpoints
# TODO: Replace with well-known IP
cat <<EOF | ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 root@${node_ip} tee -a /etc/hosts
# Hosts added for leader discovery
10.123.45.10 kops-controller.internal.metal.k8s.local
10.123.45.10 api.internal.metal.k8s.local
EOF
timeout 10m ${KOPS} toolbox enroll --cluster metal.k8s.local --instance-group nodes-main --host ${node_ip} --v=2
}
enroll_node 10.123.45.11
enroll_node 10.123.45.12
echo "Waiting 30 seconds for nodes to be ready"
sleep 30
kubectl get nodes
kubectl get pods -A
# Ensure the cluster passes validation
${KOPS} validate cluster metal.k8s.local --wait=10m
echo "Test successful"