Merge pull request #3308 from cevich/always_collect_logs

Cirrus: Simplify log collection commands
2019-06-17 21:38:25 +02:00 · 2019-06-17 21:38:25 +02:00 · 058c93f850
parent bce4a93575 4e9f5e5f2b
commit 058c93f850
2 changed files with 54 additions and 37 deletions
--- a/.cirrus.yml
+++ b/.cirrus.yml
@ -21,12 +21,10 @@ env:
    CIRRUS_SHELL: "/bin/bash"
    # Save a little typing (path relative to $CIRRUS_WORKING_DIR)
    SCRIPT_BASE: "./contrib/cirrus"
-    # Command to prefix every output line with a timestamp
+    CIRRUS_CLONE_DEPTH: 50
+    # Command to prefix output lines with timing information
    # (can't do inline awk script, Cirrus-CI or YAML mangles quoting)
    TIMESTAMP: "awk --file ${CIRRUS_WORKING_DIR}/${SCRIPT_BASE}/timestamp.awk"
-    # Command to log critical filesystems, types, and sizes.
-    DFCMD: "df -lhTx tmpfs"
-    CIRRUS_CLONE_DEPTH: 50

    ####
    #### Cache-image names to test with
@ -280,18 +278,15 @@ testing_task:
    setup_environment_script: '$SCRIPT_BASE/setup_environment.sh |& ${TIMESTAMP}'
    unit_test_script: '$SCRIPT_BASE/unit_test.sh |& ${TIMESTAMP}'
    integration_test_script: '$SCRIPT_BASE/integration_test.sh |& ${TIMESTAMP}'
-    ginkgo_node_logs_script: 'cat $CIRRUS_WORKING_DIR/test/e2e/ginkgo-node-*.log || echo "Ginkgo node logs not found"'
-    df_script: '${DFCMD}'
-    audit_log_script: 'cat /var/log/audit/audit.log || cat /var/log/kern.log'
-    journalctl_b_script: 'journalctl -b'

    on_failure:
        failed_master_script: '$CIRRUS_WORKING_DIR/$SCRIPT_BASE/notice_master_failure.sh'
-        # Job has already failed, don't fail again and miss collecting data
-        failed_ginkgo_node_logs_script: 'cat $CIRRUS_WORKING_DIR/test/e2e/ginkgo-node-*.log || echo "Ginkgo node logs not found"'
-        failed_df_script: '${DFCMD}'
-        failed_audit_log_script: 'cat /var/log/audit/audit.log || cat /var/log/kern.log || echo "Uh oh, cat audit.log failed"'
-        failed_journalctl_b_script: 'journalctl -b || echo "Uh oh, journalctl -b failed"'
+
+    always: &standardlogs
+        ginkgo_node_logs_script: '$SCRIPT_BASE/logcollector.sh ginkgo'
+        df_script: '$SCRIPT_BASE/logcollector.sh df'
+        audit_log_script: '$SCRIPT_BASE/logcollector.sh audit'
+        journal_script: '$SCRIPT_BASE/logcollector.sh journal'


 # This task executes tests under unique environments/conditions
@ -316,16 +311,13 @@ special_testing_rootless_task:

    setup_environment_script: '$SCRIPT_BASE/setup_environment.sh |& ${TIMESTAMP}'
    integration_test_script: '$SCRIPT_BASE/integration_test.sh |& ${TIMESTAMP}'
-    df_script: '${DFCMD}'
-    audit_log_script: 'cat /var/log/audit/audit.log || cat /var/log/kern.log'
-    journalctl_b_script: 'journalctl -b'

    on_failure:
        failed_master_script: '$CIRRUS_WORKING_DIR/$SCRIPT_BASE/notice_master_failure.sh'
-        # Job has already failed, don't fail again and miss collecting data
-        failed_df_script: '${DFCMD}'
-        failed_audit_log_script: 'cat /var/log/audit/audit.log || cat /var/log/kern.log || echo "Uh oh, cat audit.log failed"'
-        failed_journalctl_b_script: 'journalctl -b || echo "Uh oh, journalctl -b failed"'
+
+    always:
+        <<: *standardlogs
+

 special_testing_in_podman_task:

@ -344,16 +336,12 @@ special_testing_in_podman_task:

    setup_environment_script: '$SCRIPT_BASE/setup_environment.sh |& ${TIMESTAMP}'
    integration_test_script: '$SCRIPT_BASE/integration_test.sh |& ${TIMESTAMP}'
-    df_script: '${DFCMD}'
-    audit_log_script: 'cat /var/log/audit/audit.log || cat /var/log/kern.log'
-    journalctl_b_script: 'journalctl -b'

    on_failure:
        failed_master_script: '$CIRRUS_WORKING_DIR/$SCRIPT_BASE/notice_master_failure.sh'
-        # Job has already failed, don't fail again and miss collecting data
-        failed_df_script: '${DFCMD}'
-        failed_audit_log_script: 'cat /var/log/audit/audit.log || cat /var/log/kern.log || echo "Uh oh, cat audit.log failed"'
-        failed_journalctl_b_script: 'journalctl -b || echo "Uh oh, journalctl -b failed"'
+
+    always:
+        <<: *standardlogs


 # Test building of new cache-images for future PR testing, in this PR.
@ -416,16 +404,9 @@ verify_test_built_images_task:
    environment_script: '$SCRIPT_BASE/setup_environment.sh |& ${TIMESTAMP}'

    integration_test_script: '$SCRIPT_BASE/integration_test.sh |& ${TIMESTAMP}'
-    ginkgo_node_logs_script: 'cat $CIRRUS_WORKING_DIR/test/e2e/ginkgo-node-*.log || echo "Ginkgo node logs not found"'
-    df_script: '${DFCMD}'
-    audit_log_script: 'cat /var/log/audit/audit.log || cat /var/log/kern.log'
-    journalctl_b_script: 'journalctl -b'
-    on_failure:
-        # Job has already failed, don't fail again and miss collecting data
-        failed_ginkgo_node_logs_script: 'cat $CIRRUS_WORKING_DIR/test/e2e/ginkgo-node-*.log || echo "Ginkgo node logs not found"'
-        failed_df_script: '${DFCMD}'
-        failed_audit_log_script: 'cat /var/log/audit/audit.log || cat /var/log/kern.log || echo "Uh oh, cat audit.log failed"'
-        failed_journalctl_b_script: 'journalctl -b || echo "Uh oh, journalctl -b failed"'
+
+    always:
+        <<: *standardlogs


 # Build new cache-images for future PR testing, but only after a PR merge.
--- a/contrib/cirrus/logcollector.sh
+++ b/contrib/cirrus/logcollector.sh
@ -0,0 +1,36 @@
+#!/bin/bash
+
+set -e
+
+source $(dirname $0)/lib.sh
+
+req_env_var CIRRUS_WORKING_DIR OS_RELEASE_ID
+
+# Assume there are other log collection commands to follow - Don't
+# let one break another that may be useful, but also keep any
+# actual script-problems fatal so they are noticed right away.
+showrun() {
+    echo '+ '$(printf " %q" "$@")
+    set +e
+    echo '------------------------------------------------------------'
+    "$@"
+    local status=$?
+    [[ $status -eq 0 ]] || \
+        echo "[ rc = $status -- proceeding anyway ]"
+    echo '------------------------------------------------------------'
+    set -e
+}
+
+case $1 in
+    audit)
+        case $OS_RELEASE_ID in
+            ubuntu) showrun cat /var/log/kern.log ;;
+            fedora) showrun cat /var/log/audit/audit.log ;;
+            *) bad_os_id_ver ;;
+        esac
+        ;;
+    df) showrun df -lhTx tmpfs ;;
+    ginkgo) showrun cat $CIRRUS_WORKING_DIR/test/e2e/ginkgo-node-*.log ;;
+    journal) showrun journalctl -b ;;
+    *) die 1 "Warning, $(basename $0) doesn't know how to handle the parameter '$1'"
+esac