diff --git a/libpod/healthcheck.go b/libpod/healthcheck.go index 96740d2c13..ada4e004d3 100644 --- a/libpod/healthcheck.go +++ b/libpod/healthcheck.go @@ -15,6 +15,7 @@ import ( "time" "github.com/containers/podman/v5/libpod/define" + "github.com/containers/podman/v5/libpod/shutdown" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" ) @@ -273,6 +274,13 @@ func (c *Container) incrementStartupHCSuccessCounter(ctx context.Context) { // Which happens to be us. // So this has to be last - after this, systemd serves us a // SIGTERM and we exit. + // Special case, via SIGTERM we exit(1) which means systemd logs a failure in the unit. + // We do not want this as the unit will be leaked on failure states unless "reset-failed" + // is called. Fundamentally this is expected so switch it to exit 0. + // NOTE: This is only safe while being called from "podman healthcheck run" which we know + // is the case here as we should not alter the exit code of another process that just + // happened to call this. + shutdown.SetExitCode(0) if err := c.removeTransientFiles(ctx, true, oldUnit); err != nil { logrus.Errorf("Error removing container %s healthcheck: %v", c.ID(), err) return diff --git a/test/system/220-healthcheck.bats b/test/system/220-healthcheck.bats index 662ecf8ff6..f232d6ea4f 100644 --- a/test/system/220-healthcheck.bats +++ b/test/system/220-healthcheck.bats @@ -98,7 +98,11 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\\\n\" # Check that we now we do have valid podman units with this # name so that the leak check below does not turn into a NOP without noticing. - assert "$(systemctl list-units --type timer | grep $cid)" =~ "podman" "Healthcheck systemd unit exists" + run -0 systemctl list-units + cidmatch=$(grep "$cid" <<<"$output") + echo "$cidmatch" + assert "$cidmatch" =~ " $cid-[0-9a-f]+\.timer *.*/podman healthcheck run $cid" \ + "Healthcheck systemd unit exists" current_time=$(date --iso-8601=ns) # After three successive failures, container should no longer be healthy @@ -117,7 +121,12 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\\\n\" # Important check for https://github.com/containers/podman/issues/22884 # We never should leak the unit files, healthcheck uses the cid in name so just grep that. - assert "$(systemctl list-units --type timer | grep $cid)" == "" "Healthcheck systemd unit cleanup" + # (Ignore .scope units, those are conmon and can linger for 5 minutes) + # (Ignore .mount, too. They are created/removed by systemd based on the actual real mounts + # on the host and that is async and might be slow enough in CI to cause failures.) + run -0 systemctl list-units --quiet "*$cid*" + except_scope_mount=$(grep -vF ".scope " <<<"$output" | { grep -vF ".mount" || true; } ) + assert "$except_scope_mount" == "" "Healthcheck systemd unit cleanup: no units leaked" } @test "podman healthcheck - restart cleans up old state" {