Tests: Fix common flakes, and improve apiv2 test log

- apiv2 - the 'ten /info requests' test is flaking often,
  taking ~8 seconds (our limit is 7, up from 5 a few weeks
  ago). Brent suggested that the first /info call might be
  expensive, because it needs to access storage. So, let's
  prime it by running one /info outside the timing loop.
  And, because even that continues to fail, bump it up
  to 10 seconds and file #8076 to track the slowdown.

- toolbox test - WaitForReady() has timed out, even on one
  occasion causing a run failure because it failed 3 times.
  Solution: bump up timeout from 2s to 5s. Not really great,
  but CI systems are underpowered, and it's not unreasonable
  that 2s might be too low.

- sdnotify test - add a 'podman wait' between stop & rm.
  This may prevent a "cannot rm container as it is running"
  race condition.

While working on this, Brent and I noticed a few ways that
test-apiv2 logging can be improved:

- test name: when request is POST, display the jsonified
  parameters, not the original input ones. This should
  make it much easier to reproduce failures.

- use curl's "--write-out" option to capture http code,
  content type, and request time. We were getting the
  first two via grep from logged headers; this is cleaner.
  And there was no other way to get timing. We now include
  the timing as X-Response-Time in the log file.

- abort on *any* curl error, not just 7 (cannot connect).
  Any error at all from curl is bad news.

Signed-off-by: Ed Santiago <santiago@redhat.com>
This commit is contained in:
Ed Santiago 2020-10-19 12:30:31 -06:00
parent 36682115b0
commit f5b3dc976c
4 changed files with 31 additions and 18 deletions

View File

@ -59,7 +59,10 @@ t GET info 200 \
.DefaultRuntime~.*$runtime \
.MemTotal~[0-9]\\+
# Timing: make sure server stays responsive
# Timing: make sure server stays responsive.
# Because /info may need to check storage, it may be slow the first time.
# Let's invoke it once to prime caches, then run ten queries in a timed loop.
t GET info 200
t0=$SECONDS
for i in $(seq 1 10); do
# FIXME: someday: refactor t(), separate out the 'curl' logic so we
@ -70,7 +73,8 @@ t1=$SECONDS
delta_t=$((t1 - t2))
# Desired number of seconds in which we expect to run.
want=7
# FIXME: 10 seconds is a lot! PR #8076 opened to investigate why.
want=10
if [ $delta_t -le $want ]; then
_show_ok 1 "Time for ten /info requests ($delta_t seconds) <= ${want}s"
else

View File

@ -179,7 +179,7 @@ function t() {
# POST requests require an extra params arg
if [[ $method = "POST" ]]; then
curl_args="-d $(jsonify $1)"
testname="$testname [$1]"
testname="$testname [$curl_args]"
shift
fi
@ -204,21 +204,30 @@ function t() {
echo "-------------------------------------------------------------" >>$LOG
echo "\$ $testname" >>$LOG
rm -f $WORKDIR/curl.*
curl -s -X $method ${curl_args} \
-H 'Content-type: application/json' \
--dump-header $WORKDIR/curl.headers.out \
-o $WORKDIR/curl.result.out "$url"
# -s = silent, but --write-out 'format' gives us important response data
response=$(curl -s -X $method ${curl_args} \
-H 'Content-type: application/json' \
--dump-header $WORKDIR/curl.headers.out \
--write-out '%{http_code}^%{content_type}^%{time_total}' \
-o $WORKDIR/curl.result.out "$url")
if [[ $? -eq 7 ]]; then
echo "FATAL: curl failure on $url - cannot continue" >&2
# Any error from curl is instant bad news, from which we can't recover
rc=$?
if [[ $rc -ne 0 ]]; then
echo "FATAL: curl failure ($rc) on $url - cannot continue" >&2
exit 1
fi
cat $WORKDIR/curl.headers.out >>$LOG 2>/dev/null || true
# Show returned headers (without trailing ^M or empty lines) in log file.
# Sometimes -- I can't remember why! -- we don't get headers.
if [[ -e $WORKDIR/curl.headers.out ]]; then
tr -d '\015' < $WORKDIR/curl.headers.out | egrep '.' >>$LOG
fi
IFS='^' read actual_code content_type time_total <<<"$response"
printf "X-Response-Time: ${time_total}s\n\n" >>$LOG
# Log results, if text. If JSON, filter through jq for readability.
content_type=$(sed -ne 's/^Content-Type:[ ]\+//pi' <$WORKDIR/curl.headers.out)
if [[ $content_type =~ /octet ]]; then
output="[$(file --brief $WORKDIR/curl.result.out)]"
echo "$output" >>$LOG
@ -233,10 +242,8 @@ function t() {
fi
# Test return code
actual_code=$(head -n1 $WORKDIR/curl.headers.out | awk '/^HTTP/ { print $2}')
is "$actual_code" "$expected_code" "$testname : status"
# Special case: 204/304, by definition, MUST NOT return content (rfc2616)
if [[ $expected_code = 204 || $expected_code = 304 ]]; then
if [ -n "$*" ]; then

View File

@ -222,7 +222,7 @@ var _ = Describe("Toolbox-specific testing", func() {
session.WaitWithDefaultTimeout()
Expect(session.ExitCode()).To(Equal(0))
Expect(WaitContainerReady(podmanTest, "test", "READY", 2, 1)).To(BeTrue())
Expect(WaitContainerReady(podmanTest, "test", "READY", 5, 1)).To(BeTrue())
expectedOutput := fmt.Sprintf("%s:x:%s:%s::%s:%s",
username, uid, gid, homeDir, shell)
@ -257,7 +257,7 @@ var _ = Describe("Toolbox-specific testing", func() {
session.WaitWithDefaultTimeout()
Expect(session.ExitCode()).To(Equal(0))
Expect(WaitContainerReady(podmanTest, "test", "READY", 2, 1)).To(BeTrue())
Expect(WaitContainerReady(podmanTest, "test", "READY", 5, 1)).To(BeTrue())
session = podmanTest.Podman([]string{"exec", "test", "cat", "/etc/group"})
session.WaitWithDefaultTimeout()
@ -301,7 +301,7 @@ var _ = Describe("Toolbox-specific testing", func() {
session.WaitWithDefaultTimeout()
Expect(session.ExitCode()).To(Equal(0))
Expect(WaitContainerReady(podmanTest, "test", "READY", 2, 1)).To(BeTrue())
Expect(WaitContainerReady(podmanTest, "test", "READY", 5, 1)).To(BeTrue())
expectedUser := fmt.Sprintf("%s:x:%s:%s::%s:%s",
username, uid, gid, homeDir, shell)
@ -358,7 +358,7 @@ var _ = Describe("Toolbox-specific testing", func() {
session.WaitWithDefaultTimeout()
Expect(session.ExitCode()).To(Equal(0))
Expect(WaitContainerReady(podmanTest, "test", "READY", 2, 1)).To(BeTrue())
Expect(WaitContainerReady(podmanTest, "test", "READY", 5, 1)).To(BeTrue())
session = podmanTest.Podman([]string{"logs", "test"})
session.WaitWithDefaultTimeout()

View File

@ -107,6 +107,7 @@ function _assert_mainpid_is_conmon() {
# Done. Stop container, clean up.
run_podman exec $cid touch /stop
run_podman wait $cid
run_podman rm $cid
_stop_socat
}
@ -142,6 +143,7 @@ function _assert_mainpid_is_conmon() {
# Done. Stop container, clean up.
run_podman exec $cid touch /stop
run_podman wait $cid
run_podman rm $cid
run_podman rmi $_FEDORA
_stop_socat