Tests: Fix common flakes, and improve apiv2 test log

- apiv2 - the 'ten /info requests' test is flaking often, taking ~8 seconds (our limit is 7, up from 5 a few weeks ago). Brent suggested that the first /info call might be expensive, because it needs to access storage. So, let's prime it by running one /info outside the timing loop. And, because even that continues to fail, bump it up to 10 seconds and file #8076 to track the slowdown. - toolbox test - WaitForReady() has timed out, even on one occasion causing a run failure because it failed 3 times. Solution: bump up timeout from 2s to 5s. Not really great, but CI systems are underpowered, and it's not unreasonable that 2s might be too low. - sdnotify test - add a 'podman wait' between stop & rm. This may prevent a "cannot rm container as it is running" race condition. While working on this, Brent and I noticed a few ways that test-apiv2 logging can be improved: - test name: when request is POST, display the jsonified parameters, not the original input ones. This should make it much easier to reproduce failures. - use curl's "--write-out" option to capture http code, content type, and request time. We were getting the first two via grep from logged headers; this is cleaner. And there was no other way to get timing. We now include the timing as X-Response-Time in the log file. - abort on *any* curl error, not just 7 (cannot connect). Any error at all from curl is bad news. Signed-off-by: Ed Santiago <santiago@redhat.com>
2020-10-19 12:30:31 -06:00 · 2020-10-19 12:30:31 -06:00 · f5b3dc976c
parent 36682115b0
commit f5b3dc976c
4 changed files with 31 additions and 18 deletions
--- a/test/apiv2/01-basic.at
+++ b/test/apiv2/01-basic.at
@ -59,7 +59,10 @@ t GET info 200                \
  .DefaultRuntime~.*$runtime  \
  .MemTotal~[0-9]\\+

-# Timing: make sure server stays responsive
+# Timing: make sure server stays responsive.
+# Because /info may need to check storage, it may be slow the first time.
+# Let's invoke it once to prime caches, then run ten queries in a timed loop.
+t GET info 200
 t0=$SECONDS
 for i in $(seq 1 10); do
    # FIXME: someday: refactor t(), separate out the 'curl' logic so we
@ -70,7 +73,8 @@ t1=$SECONDS
 delta_t=$((t1 - t2))

 # Desired number of seconds in which we expect to run.
-want=7
+# FIXME: 10 seconds is a lot! PR #8076 opened to investigate why.
+want=10
 if [ $delta_t -le $want ]; then
    _show_ok 1 "Time for ten /info requests ($delta_t seconds) <= ${want}s"
 else
--- a/test/apiv2/test-apiv2
+++ b/test/apiv2/test-apiv2
@ -179,7 +179,7 @@ function t() {
    # POST requests require an extra params arg
    if [[ $method = "POST" ]]; then
        curl_args="-d $(jsonify $1)"
-        testname="$testname [$1]"
+        testname="$testname [$curl_args]"
        shift
    fi

@ -204,21 +204,30 @@ function t() {
    echo "-------------------------------------------------------------" >>$LOG
    echo "\$ $testname"                                                  >>$LOG
    rm -f $WORKDIR/curl.*
-    curl -s -X $method ${curl_args}                   \
-         -H 'Content-type: application/json'          \
-         --dump-header $WORKDIR/curl.headers.out       \
-         -o $WORKDIR/curl.result.out "$url"
+    # -s = silent, but --write-out 'format' gives us important response data
+    response=$(curl -s -X $method ${curl_args}                   \
+                    -H 'Content-type: application/json'          \
+                    --dump-header $WORKDIR/curl.headers.out       \
+                    --write-out '%{http_code}^%{content_type}^%{time_total}' \
+                    -o $WORKDIR/curl.result.out "$url")

-    if [[ $? -eq 7 ]]; then
-        echo "FATAL: curl failure on $url - cannot continue" >&2
+    # Any error from curl is instant bad news, from which we can't recover
+    rc=$?
+    if [[ $rc -ne 0 ]]; then
+        echo "FATAL: curl failure ($rc) on $url - cannot continue" >&2
        exit 1
    fi

-    cat $WORKDIR/curl.headers.out >>$LOG 2>/dev/null || true
+    # Show returned headers (without trailing ^M or empty lines) in log file.
+    # Sometimes -- I can't remember why! -- we don't get headers.
+    if [[ -e $WORKDIR/curl.headers.out ]]; then
+        tr -d '\015' < $WORKDIR/curl.headers.out | egrep '.' >>$LOG
+    fi
+
+    IFS='^' read actual_code content_type time_total <<<"$response"
+    printf "X-Response-Time: ${time_total}s\n\n" >>$LOG

    # Log results, if text. If JSON, filter through jq for readability.
-    content_type=$(sed -ne 's/^Content-Type:[ ]\+//pi' <$WORKDIR/curl.headers.out)
-
    if [[ $content_type =~ /octet ]]; then
        output="[$(file --brief $WORKDIR/curl.result.out)]"
        echo "$output" >>$LOG
@ -233,10 +242,8 @@ function t() {
    fi

    # Test return code
-    actual_code=$(head -n1 $WORKDIR/curl.headers.out | awk '/^HTTP/ { print $2}')
    is "$actual_code" "$expected_code" "$testname : status"

-
    # Special case: 204/304, by definition, MUST NOT return content (rfc2616)
    if [[ $expected_code = 204 || $expected_code = 304 ]]; then
        if [ -n "$*" ]; then
--- a/test/e2e/toolbox_test.go
+++ b/test/e2e/toolbox_test.go
@ -222,7 +222,7 @@ var _ = Describe("Toolbox-specific testing", func() {
 		session.WaitWithDefaultTimeout()
 		Expect(session.ExitCode()).To(Equal(0))

-		Expect(WaitContainerReady(podmanTest, "test", "READY", 2, 1)).To(BeTrue())
+		Expect(WaitContainerReady(podmanTest, "test", "READY", 5, 1)).To(BeTrue())

 		expectedOutput := fmt.Sprintf("%s:x:%s:%s::%s:%s",
 			username, uid, gid, homeDir, shell)
@ -257,7 +257,7 @@ var _ = Describe("Toolbox-specific testing", func() {
 		session.WaitWithDefaultTimeout()
 		Expect(session.ExitCode()).To(Equal(0))

-		Expect(WaitContainerReady(podmanTest, "test", "READY", 2, 1)).To(BeTrue())
+		Expect(WaitContainerReady(podmanTest, "test", "READY", 5, 1)).To(BeTrue())

 		session = podmanTest.Podman([]string{"exec", "test", "cat", "/etc/group"})
 		session.WaitWithDefaultTimeout()
@ -301,7 +301,7 @@ var _ = Describe("Toolbox-specific testing", func() {
 		session.WaitWithDefaultTimeout()
 		Expect(session.ExitCode()).To(Equal(0))

-		Expect(WaitContainerReady(podmanTest, "test", "READY", 2, 1)).To(BeTrue())
+		Expect(WaitContainerReady(podmanTest, "test", "READY", 5, 1)).To(BeTrue())

 		expectedUser := fmt.Sprintf("%s:x:%s:%s::%s:%s",
 			username, uid, gid, homeDir, shell)
@ -358,7 +358,7 @@ var _ = Describe("Toolbox-specific testing", func() {
 		session.WaitWithDefaultTimeout()
 		Expect(session.ExitCode()).To(Equal(0))

-		Expect(WaitContainerReady(podmanTest, "test", "READY", 2, 1)).To(BeTrue())
+		Expect(WaitContainerReady(podmanTest, "test", "READY", 5, 1)).To(BeTrue())

 		session = podmanTest.Podman([]string{"logs", "test"})
 		session.WaitWithDefaultTimeout()
--- a/test/system/260-sdnotify.bats
+++ b/test/system/260-sdnotify.bats
@ -107,6 +107,7 @@ function _assert_mainpid_is_conmon() {

    # Done. Stop container, clean up.
    run_podman exec $cid touch /stop
+    run_podman wait $cid
    run_podman rm $cid
    _stop_socat
 }
@ -142,6 +143,7 @@ function _assert_mainpid_is_conmon() {

    # Done. Stop container, clean up.
    run_podman exec $cid touch /stop
+    run_podman wait $cid
    run_podman rm $cid
    run_podman rmi $_FEDORA
    _stop_socat