CI: systests: instrument flaky tests

Three infrequent flakes. Add debug code to help track
down if/when they happen again.

And, one of them, fix a logic bug that will save us 8-10s
on system tests runs.

Signed-off-by: Ed Santiago <santiago@redhat.com>
This commit is contained in:
Ed Santiago 2024-03-25 06:30:28 -06:00
parent d2f639f61f
commit e697631af9
3 changed files with 23 additions and 3 deletions

View File

@ -455,8 +455,11 @@ $name stderr" "logs work with passthrough"
# Kill the pod and make sure the service is not running. # Kill the pod and make sure the service is not running.
run_podman pod kill test_pod run_podman pod kill test_pod
for i in {0..20}; do for i in {0..20}; do
# echos are for debugging test flakes
echo "$_LOG_PROMPT systemctl is-active $service_name"
run systemctl is-active $service_name run systemctl is-active $service_name
if [[ $output == "failed" ]]; then echo "$output"
if [[ "$output" == "inactive" ]]; then
break break
fi fi
sleep 0.5 sleep 0.5

View File

@ -734,8 +734,18 @@ spec:
run_podman kube play --configmap=$configmap_file $pod_file run_podman kube play --configmap=$configmap_file $pod_file
run_podman wait test_pod-server run_podman wait test_pod-server
run_podman logs test_pod-server
is "$output" "foo:bar" # systemd logs are unreliable; we may need to retry a few times
# https://github.com/systemd/systemd/issues/28650
local retries=10
while [[ $retries -gt 0 ]]; do
run_podman logs test_pod-server
test -n "$output" && break
sleep 0.1
retries=$((retries - 1))
done
assert "$retries" -gt 0 "Timed out waiting for podman logs"
assert "$output" = "foo:bar" "output from podman logs"
run_podman kube down $pod_file run_podman kube down $pod_file
} }

View File

@ -112,6 +112,13 @@ function stop_registry() {
# Make sure socket is closed # Make sure socket is closed
if tcp_port_probe $PODMAN_LOGIN_REGISTRY_PORT; then if tcp_port_probe $PODMAN_LOGIN_REGISTRY_PORT; then
# for debugging flakes
echo ""
echo "ps auxww --forest"
ps auxww --forest
echo ""
echo "lsof -i -P"
lsof -i -P
die "Socket still seems open" die "Socket still seems open"
fi fi
} }