diff --git a/.github/workflows/manual.yaml b/.github/workflows/manual.yaml new file mode 100644 index 0000000..3dc411b --- /dev/null +++ b/.github/workflows/manual.yaml @@ -0,0 +1,48 @@ +name: manual + +on: workflow_dispatch + +env: + AWS_REGION: us-west-2 + AWS_ROLE: arn:aws:iam::270074865685:role/terraform-module-ci-test + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} + ACME_SERVER_URL: https://acme-v02.api.letsencrypt.org/directory + +permissions: write-all + +jobs: + test_TestOneBasic: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + token: ${{secrets.GITHUB_TOKEN}} + fetch-depth: 0 + - id: aws-creds + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{env.AWS_ROLE}} + role-session-name: ${{github.run_id}} + aws-region: ${{env.AWS_REGION}} + role-duration-seconds: 7200 # 2 hours + output-credentials: true + - name: install-nix + run: | + curl -L https://nixos.org/nix/install | sh + source /home/runner/.nix-profile/etc/profile.d/nix.sh + nix --version + which nix + - name: run_tests + shell: '/home/runner/.nix-profile/bin/nix develop --ignore-environment --extra-experimental-features nix-command --extra-experimental-features flakes --keep HOME --keep SSH_AUTH_SOCK --keep IDENTIFIER --keep GITHUB_TOKEN --keep GITHUB_OWNER --keep ZONE --keep AWS_ROLE --keep AWS_REGION --keep AWS_DEFAULT_REGION --keep AWS_ACCESS_KEY_ID --keep AWS_SECRET_ACCESS_KEY --keep AWS_SESSION_TOKEN --keep UPDATECLI_GPGTOKEN --keep UPDATECLI_GITHUB_TOKEN --keep UPDATECLI_GITHUB_ACTOR --keep GPG_SIGNING_KEY --keep NIX_SSL_CERT_FILE --keep NIX_ENV_LOADED --keep TERM --command bash -e {0}' + env: + AWS_ACCESS_KEY_ID: ${{ steps.aws-creds.outputs.aws-access-key-id }} + AWS_SECRET_ACCESS_KEY: ${{ steps.aws-creds.outputs.aws-secret-access-key }} + AWS_SESSION_TOKEN: ${{ steps.aws-creds.outputs.aws-session-token }} + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} + GITHUB_OWNER: rancher + IDENTIFIER: ${{github.run_id}} + ZONE: ${{secrets.ZONE}} + ACME_SERVER_URL: https://acme-v02.api.letsencrypt.org/directory + RANCHER_INSECURE: false + run: | + ./run_tests.sh -t TestOneBasic diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 0d43383..2ff8db2 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -224,6 +224,49 @@ jobs: run: | ./run_tests.sh -t TestDownstreamProd + test_Cleanup: + needs: + - release + - test_TestOneBasic + - test_TestProdBasic + - test_TestDownstreamBasic + - test_TestDownstreamProd + if: needs.release.outputs.release_pr + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + token: ${{secrets.GITHUB_TOKEN}} + fetch-depth: 0 + - id: aws-creds + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{env.AWS_ROLE}} + role-session-name: ${{github.run_id}} + aws-region: ${{env.AWS_REGION}} + role-duration-seconds: 7200 # 2 hours + output-credentials: true + - name: install-nix + run: | + curl -L https://nixos.org/nix/install | sh + source /home/runner/.nix-profile/etc/profile.d/nix.sh + nix --version + which nix + - name: cleanup + shell: '/home/runner/.nix-profile/bin/nix develop --ignore-environment --extra-experimental-features nix-command --extra-experimental-features flakes --keep HOME --keep SSH_AUTH_SOCK --keep IDENTIFIER --keep GITHUB_TOKEN --keep GITHUB_OWNER --keep ZONE --keep AWS_ROLE --keep AWS_REGION --keep AWS_DEFAULT_REGION --keep AWS_ACCESS_KEY_ID --keep AWS_SECRET_ACCESS_KEY --keep AWS_SESSION_TOKEN --keep UPDATECLI_GPGTOKEN --keep UPDATECLI_GITHUB_TOKEN --keep UPDATECLI_GITHUB_ACTOR --keep GPG_SIGNING_KEY --keep NIX_SSL_CERT_FILE --keep NIX_ENV_LOADED --keep TERM --command bash -e {0}' + env: + AWS_ACCESS_KEY_ID: ${{ steps.aws-creds.outputs.aws-access-key-id }} + AWS_SECRET_ACCESS_KEY: ${{ steps.aws-creds.outputs.aws-secret-access-key }} + AWS_SESSION_TOKEN: ${{ steps.aws-creds.outputs.aws-session-token }} + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} + GITHUB_OWNER: rancher + IDENTIFIER: ${{github.run_id}} + ZONE: ${{secrets.ZONE}} + ACME_SERVER_URL: https://acme-v02.api.letsencrypt.org/directory + RANCHER_INSECURE: false + run: | + ./run_tests.sh -c $IDENTIFIER + report: needs: - release @@ -231,6 +274,7 @@ jobs: - test_TestProdBasic - test_TestDownstreamBasic - test_TestDownstreamProd + - test_Cleanup if: success() && needs.release.outputs.release_pr #Ensure the test jobs succeeded, and that a release PR was created. runs-on: ubuntu-latest steps: diff --git a/modules/rancher_bootstrap/rancher/main.tf b/modules/rancher_bootstrap/rancher/main.tf index 5b5083e..5745b6e 100644 --- a/modules/rancher_bootstrap/rancher/main.tf +++ b/modules/rancher_bootstrap/rancher/main.tf @@ -150,10 +150,10 @@ resource "helm_release" "rancher" { chart = "${path.root}/rancher-${local.rancher_version}.tgz" # "${local.rancher_helm_repository}/${local.rancher_channel}/rancher-${local.rancher_version}.tgz" namespace = "cattle-system" create_namespace = false - wait = true - wait_for_jobs = true + wait = false + wait_for_jobs = false force_update = true - timeout = 2400 # 40m + timeout = 1800 # 30m set { name = "hostname" diff --git a/modules/rancher_bootstrap/rancher_externalTLS/main.tf b/modules/rancher_bootstrap/rancher_externalTLS/main.tf index e7c03b9..be7ce67 100644 --- a/modules/rancher_bootstrap/rancher_externalTLS/main.tf +++ b/modules/rancher_bootstrap/rancher_externalTLS/main.tf @@ -67,10 +67,10 @@ resource "helm_release" "rancher" { chart = "${path.root}/rancher-${local.rancher_version}.tgz" #"${local.rancher_helm_repository}/${local.rancher_channel}/rancher-${local.rancher_version}.tgz" namespace = "cattle-system" create_namespace = false - wait = true - wait_for_jobs = true + wait = false + wait_for_jobs = false force_update = true - timeout = 2400 # 40m + timeout = 1800 # 30m set { name = "hostname" diff --git a/run_tests.sh b/run_tests.sh index 68c0708..ff0c7ac 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -3,21 +3,28 @@ rerun_failed=false specific_test="" specific_package="" +cleanup_id="" -while getopts ":r:t:p:" opt; do +while getopts ":r:t:p:c:" opt; do case $opt in r) rerun_failed=true ;; t) specific_test="$OPTARG" ;; p) specific_package="$OPTARG" ;; + c) cleanup_id="$OPTARG" ;; \?) cat <&2 && exit 1 ;; Invalid option -$OPTARG, valid options are -r to re-run failed tests -t to specify a specific test (eg. TestBase) -p to specify a specific test package (eg. base) + -c to run clean up only with the given id (eg. abc123) EOT esac done +if [ -n "$cleanup_id" ]; then + export IDENTIFIER="$cleanup_id" +fi + run_tests() { local rerun=$1 REPO_ROOT="$(git rev-parse --show-toplevel)" @@ -99,13 +106,15 @@ if [ -z "$GITHUB_TOKEN" ]; then echo "GITHUB_TOKEN isn't set"; else echo "GITHUB if [ -z "$GITHUB_OWNER" ]; then echo "GITHUB_OWNER isn't set"; else echo "GITHUB_OWNER is set"; fi if [ -z "$ZONE" ]; then echo "ZONE isn't set"; else echo "ZONE is set"; fi -# Run tests initially -run_tests false +if [ -z "$cleanup_id" ]; then + # Run tests initially + run_tests false -# Check if we need to rerun failed tests -if [ "$rerun_failed" = true ] && [ -f "/tmp/${IDENTIFIER}_failed_tests.txt" ]; then - echo "Rerunning failed tests..." - run_tests true + # Check if we need to rerun failed tests + if [ "$rerun_failed" = true ] && [ -f "/tmp/${IDENTIFIER}_failed_tests.txt" ]; then + echo "Rerunning failed tests..." + run_tests true + fi fi echo "Clearing leftovers with Id $IDENTIFIER in $AWS_REGION..." diff --git a/test/scripts/getLogs.sh b/test/scripts/getLogs.sh new file mode 100755 index 0000000..beb42c7 --- /dev/null +++ b/test/scripts/getLogs.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +kubectl get nodes || true +kubectl get all -A || true + +kubectl get pods -A || true +sleep 10 +kubectl get pods -A || true +sleep 10 +kubectl get pods -A || true diff --git a/test/scripts/readyNodes.sh b/test/scripts/readyNodes.sh index 9c4b07a..6c6d3ad 100755 --- a/test/scripts/readyNodes.sh +++ b/test/scripts/readyNodes.sh @@ -1,4 +1,5 @@ #!/bin/bash +set -x JSONPATH="'{range .items[*]} {.metadata.name}{\"\\t\"} \ @@ -46,6 +47,13 @@ while notReady; do fi done +echo "Nodes are ready..." + +echo "nodes..." kubectl get nodes || true +echo "all..." kubectl get all -A || true +echo "pods..." +kubectl get pods -A || true + exit 0 diff --git a/test/tests/one/one_test.go b/test/tests/one/one_test.go index 132b434..3e899f8 100644 --- a/test/tests/one/one_test.go +++ b/test/tests/one/one_test.go @@ -95,12 +95,19 @@ func TestOneBasic(t *testing.T) { _, err = terraform.InitAndApplyE(t, terraformOptions) if err != nil { + t.Log("Test failed, tearing down...") + util.GetErrorLogs(t, testDir + "/kubeconfig") util.Teardown(t, testDir, terraformOptions, keyPair) os.Remove(exampleDir + ".terraform.lock.hcl") sshAgent.Stop() t.Fatalf("Error creating cluster: %s", err) } - t.Log("Test passed, tearing down...") + util.CheckReady(t, testDir + "/kubeconfig") + if t.Failed() { + t.Log("Test failed...") + } else { + t.Log("Test passed...") + } util.Teardown(t, testDir, terraformOptions, keyPair) os.Remove(exampleDir + ".terraform.lock.hcl") sshAgent.Stop() diff --git a/test/tests/util.go b/test/tests/util.go index 4c36b49..86b95fb 100644 --- a/test/tests/util.go +++ b/test/tests/util.go @@ -16,6 +16,7 @@ import ( aws "github.com/gruntwork-io/terratest/modules/aws" g "github.com/gruntwork-io/terratest/modules/git" "github.com/gruntwork-io/terratest/modules/random" + "github.com/gruntwork-io/terratest/modules/shell" "github.com/gruntwork-io/terratest/modules/terraform" "golang.org/x/oauth2" ) @@ -417,3 +418,52 @@ func Teardown(t *testing.T, directory string, options *terraform.Options, keyPai } aws.DeleteEC2KeyPair(t, keyPair) } + +func GetErrorLogs(t *testing.T, kubeconfigPath string) { + repoRoot, err := filepath.Abs(g.GetRepoRoot(t)) + if err != nil { + t.Logf("Error getting git root directory: %v", err) + } + script, err := os.ReadFile(repoRoot + "/test/scripts/getLogs.sh") + if err != nil { + t.Logf("Error reading script: %v", err) + } + errorLogsScript := shell.Command{ + Command: "bash", + Args: []string{"-c", string(script)}, + Env: map[string]string{ + "KUBECONFIG": kubeconfigPath, + }, + } + out, err := shell.RunCommandAndGetOutputE(t, errorLogsScript) + if err != nil { + t.Logf("Error running script: %s", err) + } + t.Logf("Log script output: %s", out) +} + +func CheckReady(t *testing.T, kubeconfigPath string) { + repoRoot, err := filepath.Abs(g.GetRepoRoot(t)) + if err != nil { + t.Logf("Error getting git root directory: %v", err) + t.Fail() + } + script, err := os.ReadFile(repoRoot + "/test/scripts/readyNodes.sh") + if err != nil { + t.Logf("Error reading script: %v", err) + t.Fail() + } + readyScript := shell.Command{ + Command: "bash", + Args: []string{"-c", string(script)}, + Env: map[string]string{ + "KUBECONFIG": kubeconfigPath, + }, + } + out, err := shell.RunCommandAndGetOutputE(t, readyScript) + if err != nil { + t.Logf("Error running script: %s", err) + t.Fail() + } + t.Logf("Ready script output: %s", out) +}