fix: add ready check, error log, and manual test (#87)

Signed-off-by: matttrach <matt.trachier@suse.com>
This commit is contained in:
Matt Trachier 2025-04-11 09:27:58 -05:00 committed by GitHub
parent 8c1226f6b7
commit f00468fe00
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 190 additions and 14 deletions

48
.github/workflows/manual.yaml vendored Normal file
View File

@ -0,0 +1,48 @@
name: manual
on: workflow_dispatch
env:
AWS_REGION: us-west-2
AWS_ROLE: arn:aws:iam::270074865685:role/terraform-module-ci-test
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
ACME_SERVER_URL: https://acme-v02.api.letsencrypt.org/directory
permissions: write-all
jobs:
test_TestOneBasic:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
token: ${{secrets.GITHUB_TOKEN}}
fetch-depth: 0
- id: aws-creds
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{env.AWS_ROLE}}
role-session-name: ${{github.run_id}}
aws-region: ${{env.AWS_REGION}}
role-duration-seconds: 7200 # 2 hours
output-credentials: true
- name: install-nix
run: |
curl -L https://nixos.org/nix/install | sh
source /home/runner/.nix-profile/etc/profile.d/nix.sh
nix --version
which nix
- name: run_tests
shell: '/home/runner/.nix-profile/bin/nix develop --ignore-environment --extra-experimental-features nix-command --extra-experimental-features flakes --keep HOME --keep SSH_AUTH_SOCK --keep IDENTIFIER --keep GITHUB_TOKEN --keep GITHUB_OWNER --keep ZONE --keep AWS_ROLE --keep AWS_REGION --keep AWS_DEFAULT_REGION --keep AWS_ACCESS_KEY_ID --keep AWS_SECRET_ACCESS_KEY --keep AWS_SESSION_TOKEN --keep UPDATECLI_GPGTOKEN --keep UPDATECLI_GITHUB_TOKEN --keep UPDATECLI_GITHUB_ACTOR --keep GPG_SIGNING_KEY --keep NIX_SSL_CERT_FILE --keep NIX_ENV_LOADED --keep TERM --command bash -e {0}'
env:
AWS_ACCESS_KEY_ID: ${{ steps.aws-creds.outputs.aws-access-key-id }}
AWS_SECRET_ACCESS_KEY: ${{ steps.aws-creds.outputs.aws-secret-access-key }}
AWS_SESSION_TOKEN: ${{ steps.aws-creds.outputs.aws-session-token }}
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
GITHUB_OWNER: rancher
IDENTIFIER: ${{github.run_id}}
ZONE: ${{secrets.ZONE}}
ACME_SERVER_URL: https://acme-v02.api.letsencrypt.org/directory
RANCHER_INSECURE: false
run: |
./run_tests.sh -t TestOneBasic

View File

@ -224,6 +224,49 @@ jobs:
run: |
./run_tests.sh -t TestDownstreamProd
test_Cleanup:
needs:
- release
- test_TestOneBasic
- test_TestProdBasic
- test_TestDownstreamBasic
- test_TestDownstreamProd
if: needs.release.outputs.release_pr
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
token: ${{secrets.GITHUB_TOKEN}}
fetch-depth: 0
- id: aws-creds
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{env.AWS_ROLE}}
role-session-name: ${{github.run_id}}
aws-region: ${{env.AWS_REGION}}
role-duration-seconds: 7200 # 2 hours
output-credentials: true
- name: install-nix
run: |
curl -L https://nixos.org/nix/install | sh
source /home/runner/.nix-profile/etc/profile.d/nix.sh
nix --version
which nix
- name: cleanup
shell: '/home/runner/.nix-profile/bin/nix develop --ignore-environment --extra-experimental-features nix-command --extra-experimental-features flakes --keep HOME --keep SSH_AUTH_SOCK --keep IDENTIFIER --keep GITHUB_TOKEN --keep GITHUB_OWNER --keep ZONE --keep AWS_ROLE --keep AWS_REGION --keep AWS_DEFAULT_REGION --keep AWS_ACCESS_KEY_ID --keep AWS_SECRET_ACCESS_KEY --keep AWS_SESSION_TOKEN --keep UPDATECLI_GPGTOKEN --keep UPDATECLI_GITHUB_TOKEN --keep UPDATECLI_GITHUB_ACTOR --keep GPG_SIGNING_KEY --keep NIX_SSL_CERT_FILE --keep NIX_ENV_LOADED --keep TERM --command bash -e {0}'
env:
AWS_ACCESS_KEY_ID: ${{ steps.aws-creds.outputs.aws-access-key-id }}
AWS_SECRET_ACCESS_KEY: ${{ steps.aws-creds.outputs.aws-secret-access-key }}
AWS_SESSION_TOKEN: ${{ steps.aws-creds.outputs.aws-session-token }}
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
GITHUB_OWNER: rancher
IDENTIFIER: ${{github.run_id}}
ZONE: ${{secrets.ZONE}}
ACME_SERVER_URL: https://acme-v02.api.letsencrypt.org/directory
RANCHER_INSECURE: false
run: |
./run_tests.sh -c $IDENTIFIER
report:
needs:
- release
@ -231,6 +274,7 @@ jobs:
- test_TestProdBasic
- test_TestDownstreamBasic
- test_TestDownstreamProd
- test_Cleanup
if: success() && needs.release.outputs.release_pr #Ensure the test jobs succeeded, and that a release PR was created.
runs-on: ubuntu-latest
steps:

View File

@ -150,10 +150,10 @@ resource "helm_release" "rancher" {
chart = "${path.root}/rancher-${local.rancher_version}.tgz" # "${local.rancher_helm_repository}/${local.rancher_channel}/rancher-${local.rancher_version}.tgz"
namespace = "cattle-system"
create_namespace = false
wait = true
wait_for_jobs = true
wait = false
wait_for_jobs = false
force_update = true
timeout = 2400 # 40m
timeout = 1800 # 30m
set {
name = "hostname"

View File

@ -67,10 +67,10 @@ resource "helm_release" "rancher" {
chart = "${path.root}/rancher-${local.rancher_version}.tgz" #"${local.rancher_helm_repository}/${local.rancher_channel}/rancher-${local.rancher_version}.tgz"
namespace = "cattle-system"
create_namespace = false
wait = true
wait_for_jobs = true
wait = false
wait_for_jobs = false
force_update = true
timeout = 2400 # 40m
timeout = 1800 # 30m
set {
name = "hostname"

View File

@ -3,21 +3,28 @@
rerun_failed=false
specific_test=""
specific_package=""
cleanup_id=""
while getopts ":r:t:p:" opt; do
while getopts ":r:t:p:c:" opt; do
case $opt in
r) rerun_failed=true ;;
t) specific_test="$OPTARG" ;;
p) specific_package="$OPTARG" ;;
c) cleanup_id="$OPTARG" ;;
\?) cat <<EOT >&2 && exit 1 ;;
Invalid option -$OPTARG, valid options are
-r to re-run failed tests
-t to specify a specific test (eg. TestBase)
-p to specify a specific test package (eg. base)
-c to run clean up only with the given id (eg. abc123)
EOT
esac
done
if [ -n "$cleanup_id" ]; then
export IDENTIFIER="$cleanup_id"
fi
run_tests() {
local rerun=$1
REPO_ROOT="$(git rev-parse --show-toplevel)"
@ -99,13 +106,15 @@ if [ -z "$GITHUB_TOKEN" ]; then echo "GITHUB_TOKEN isn't set"; else echo "GITHUB
if [ -z "$GITHUB_OWNER" ]; then echo "GITHUB_OWNER isn't set"; else echo "GITHUB_OWNER is set"; fi
if [ -z "$ZONE" ]; then echo "ZONE isn't set"; else echo "ZONE is set"; fi
# Run tests initially
run_tests false
if [ -z "$cleanup_id" ]; then
# Run tests initially
run_tests false
# Check if we need to rerun failed tests
if [ "$rerun_failed" = true ] && [ -f "/tmp/${IDENTIFIER}_failed_tests.txt" ]; then
echo "Rerunning failed tests..."
run_tests true
# Check if we need to rerun failed tests
if [ "$rerun_failed" = true ] && [ -f "/tmp/${IDENTIFIER}_failed_tests.txt" ]; then
echo "Rerunning failed tests..."
run_tests true
fi
fi
echo "Clearing leftovers with Id $IDENTIFIER in $AWS_REGION..."

10
test/scripts/getLogs.sh Executable file
View File

@ -0,0 +1,10 @@
#!/bin/bash
kubectl get nodes || true
kubectl get all -A || true
kubectl get pods -A || true
sleep 10
kubectl get pods -A || true
sleep 10
kubectl get pods -A || true

View File

@ -1,4 +1,5 @@
#!/bin/bash
set -x
JSONPATH="'{range .items[*]}
{.metadata.name}{\"\\t\"} \
@ -46,6 +47,13 @@ while notReady; do
fi
done
echo "Nodes are ready..."
echo "nodes..."
kubectl get nodes || true
echo "all..."
kubectl get all -A || true
echo "pods..."
kubectl get pods -A || true
exit 0

View File

@ -95,12 +95,19 @@ func TestOneBasic(t *testing.T) {
_, err = terraform.InitAndApplyE(t, terraformOptions)
if err != nil {
t.Log("Test failed, tearing down...")
util.GetErrorLogs(t, testDir + "/kubeconfig")
util.Teardown(t, testDir, terraformOptions, keyPair)
os.Remove(exampleDir + ".terraform.lock.hcl")
sshAgent.Stop()
t.Fatalf("Error creating cluster: %s", err)
}
t.Log("Test passed, tearing down...")
util.CheckReady(t, testDir + "/kubeconfig")
if t.Failed() {
t.Log("Test failed...")
} else {
t.Log("Test passed...")
}
util.Teardown(t, testDir, terraformOptions, keyPair)
os.Remove(exampleDir + ".terraform.lock.hcl")
sshAgent.Stop()

View File

@ -16,6 +16,7 @@ import (
aws "github.com/gruntwork-io/terratest/modules/aws"
g "github.com/gruntwork-io/terratest/modules/git"
"github.com/gruntwork-io/terratest/modules/random"
"github.com/gruntwork-io/terratest/modules/shell"
"github.com/gruntwork-io/terratest/modules/terraform"
"golang.org/x/oauth2"
)
@ -417,3 +418,52 @@ func Teardown(t *testing.T, directory string, options *terraform.Options, keyPai
}
aws.DeleteEC2KeyPair(t, keyPair)
}
func GetErrorLogs(t *testing.T, kubeconfigPath string) {
repoRoot, err := filepath.Abs(g.GetRepoRoot(t))
if err != nil {
t.Logf("Error getting git root directory: %v", err)
}
script, err := os.ReadFile(repoRoot + "/test/scripts/getLogs.sh")
if err != nil {
t.Logf("Error reading script: %v", err)
}
errorLogsScript := shell.Command{
Command: "bash",
Args: []string{"-c", string(script)},
Env: map[string]string{
"KUBECONFIG": kubeconfigPath,
},
}
out, err := shell.RunCommandAndGetOutputE(t, errorLogsScript)
if err != nil {
t.Logf("Error running script: %s", err)
}
t.Logf("Log script output: %s", out)
}
func CheckReady(t *testing.T, kubeconfigPath string) {
repoRoot, err := filepath.Abs(g.GetRepoRoot(t))
if err != nil {
t.Logf("Error getting git root directory: %v", err)
t.Fail()
}
script, err := os.ReadFile(repoRoot + "/test/scripts/readyNodes.sh")
if err != nil {
t.Logf("Error reading script: %v", err)
t.Fail()
}
readyScript := shell.Command{
Command: "bash",
Args: []string{"-c", string(script)},
Env: map[string]string{
"KUBECONFIG": kubeconfigPath,
},
}
out, err := shell.RunCommandAndGetOutputE(t, readyScript)
if err != nil {
t.Logf("Error running script: %s", err)
t.Fail()
}
t.Logf("Ready script output: %s", out)
}