Merge pull request #20347 from Luap99/healthcheck-event

healthcheck: make sure to always show health_status events
2023-10-12 18:15:58 +00:00 · 2023-10-12 18:15:58 +00:00 · e4ef271376
parent fb87f0a7bb 99a14332ef
commit e4ef271376
2 changed files with 31 additions and 4 deletions
--- a/libpod/events/journal_linux.go
+++ b/libpod/events/journal_linux.go
@ -74,7 +74,19 @@ func (e EventJournalD) Write(ee Event) error {
 	case Volume:
 		m["PODMAN_NAME"] = ee.Name
 	}
-	return journal.Send(ee.ToHumanReadable(false), journal.PriInfo, m)
+
+	// starting with commmit 7e6e267329 we set LogLevel=notice for the systemd healthcheck unit
+	// This so it doesn't log the started/stopped unit messages al the time which spam the
+	// journal if a small interval is used. That however broke the healthcheck event as it no
+	// longer showed up in podman events when running as root as we only send the event on info
+	// level. To fix this we have to send the event on notice level.
+	// https://github.com/containers/podman/issues/20342
+	prio := journal.PriInfo
+	if len(ee.HealthStatus) > 0 {
+		prio = journal.PriNotice
+	}
+
+	return journal.Send(ee.ToHumanReadable(false), prio, m)
 }

 // Read reads events from the journal and sends qualified events to the event channel
--- a/test/system/220-healthcheck.bats
+++ b/test/system/220-healthcheck.bats
@ -11,6 +11,8 @@ load helpers
 function _check_health {
    local testname="$1"
    local tests="$2"
+    local since="$3"
+    local hc_status="$4"

    run_podman inspect --format "{{json .State.Healthcheck}}" healthcheck_c

@ -18,6 +20,16 @@ function _check_health {
        actual=$(jq ".$field" <<<"$output")
        is "$actual" "$expect" "$testname - .State.Healthcheck.$field"
    done
+
+    # Make sure we can read the healthcheck event in podman events (#20342)
+    run_podman events --filter container=healthcheck_c --filter event=health_status \
+        --since "$since" --stream=false --format "{{.HealthStatus}}"
+    # Because the assert below would fail with "lines: bad array subscript" when
+    # there are no events lets special case this to provide a more meaningful error.
+    if [[ -z "$output" ]]; then
+        die "no healthcheck events"
+    fi
+    assert "${lines[-1]}" == "$hc_status" "$testname - podman events health status"
 }

@test "podman healthcheck" {
@ -34,6 +46,7 @@ function _check_health {
    run_podman inspect healthcheck_c --format "{{.Config.HealthcheckOnFailureAction}}"
    is "$output" "kill" "on-failure action is set to kill"

+    current_time=$(date --iso-8601=seconds)
    # We can't check for 'starting' because a 1-second interval is too
    # short; it could run healthcheck before we get to our first check.
    #
@ -46,8 +59,9 @@ Status           | \"healthy\"
 FailingStreak    | 0
 Log[-1].ExitCode | 0
 Log[-1].Output   | \"Life is Good on stdout\\\nLife is Good on stderr\"
-"
+" "$current_time" "healthy"

+    current_time=$(date --iso-8601=seconds)
    # Force a failure
    run_podman exec healthcheck_c touch /uh-oh
    sleep 2
@ -57,8 +71,9 @@ Status           | \"healthy\"
 FailingStreak    | [123]
 Log[-1].ExitCode | 1
 Log[-1].Output   | \"Uh-oh on stdout!\\\nUh-oh on stderr!\"
-"
+" "$current_time" "healthy"

+    current_time=$(date --iso-8601=seconds)
    # After three successive failures, container should no longer be healthy
    sleep 5
    _check_health "Three or more failures" "
@ -66,7 +81,7 @@ Status           | \"unhealthy\"
 FailingStreak    | [3456]
 Log[-1].ExitCode | 1
 Log[-1].Output   | \"Uh-oh on stdout!\\\nUh-oh on stderr!\"
-"
+" "$current_time" "unhealthy"

    # now the on-failure should kick in and kill the container
    run_podman wait healthcheck_c