Merge pull request #25423 from Honny1/hc-kill-status

Add stopped status for HealthCheck
2025-03-04 13:17:58 +00:00 · 2025-03-04 13:17:58 +00:00 · 4ac061f383
parent ebec104391 511d912685
commit 4ac061f383
6 changed files with 45 additions and 44 deletions
--- a/cmd/podman/healthcheck/run.go
+++ b/cmd/podman/healthcheck/run.go
@ -35,7 +35,8 @@ func run(cmd *cobra.Command, args []string) error {
 	if err != nil {
 		return err
 	}
-	if response.Status == define.HealthCheckUnhealthy || response.Status == define.HealthCheckStarting {
+	switch response.Status {
 	case define.HealthCheckUnhealthy, define.HealthCheckStarting, define.HealthCheckStopped:
 		registry.SetExitCode(1)
 		fmt.Println(response.Status)
 	}
--- a/libpod/define/healthchecks.go
+++ b/libpod/define/healthchecks.go
@ -20,6 +20,9 @@ const (
 	HealthCheckStarting string = "starting"
 	// HealthCheckReset describes reset of HealthCheck logs
 	HealthCheckReset string = "reset"
 	// HealthCheckStopped describes the time when container was stopped during HealthCheck
 	// and HealthCheck was terminated
 	HealthCheckStopped string = "stopped"
 )
 // HealthCheckStatus represents the current state of a container
@ -49,6 +52,19 @@ const (
 	HealthCheckStartup HealthCheckStatus = iota
 )
 func (s HealthCheckStatus) String() string {
 	switch s {
 	case HealthCheckSuccess:
 		return HealthCheckHealthy
 	case HealthCheckStartup:
 		return HealthCheckStarting
 	case HealthCheckContainerStopped:
 		return HealthCheckStopped
 	default:
 		return HealthCheckUnhealthy
 	}
 }
 // Healthcheck defaults.  These are used both in the cli as well in
 // libpod and were moved from cmd/podman/common
 const (
--- a/libpod/healthcheck.go
+++ b/libpod/healthcheck.go
@ -113,6 +113,14 @@ func (c *Container) runHealthCheck(ctx context.Context, isStartup bool) (define.
 		returnCode = 1
 	}
 	if !c.batched {
 		c.lock.Lock()
 		defer c.lock.Unlock()
 		if err := c.syncContainer(); err != nil {
 			return define.HealthCheckInternalError, "", err
 		}
 	}
 	// Handle startup HC
 	if isStartup {
 		inStartPeriod = true
@ -124,6 +132,10 @@ func (c *Container) runHealthCheck(ctx context.Context, isStartup bool) (define.
 		}
 	}
 	if exitCode != 0 && c.ensureState(define.ContainerStateStopped, define.ContainerStateStopping, define.ContainerStateExited) {
 		hcResult = define.HealthCheckContainerStopped
 	}
 	timeEnd := time.Now()
 	if c.HealthCheckConfig().StartPeriod > 0 {
 		// there is a start-period we need to honor; we add startPeriod to container start time
@ -148,7 +160,7 @@ func (c *Container) runHealthCheck(ctx context.Context, isStartup bool) (define.
 	hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog)
-	healthCheckResult, err := c.updateHealthCheckLog(hcl, inStartPeriod, isStartup)
+	healthCheckResult, err := c.updateHealthCheckLog(hcl, hcResult, inStartPeriod, isStartup)
 	if err != nil {
 		return hcResult, "", fmt.Errorf("unable to update health check log %s for %s: %w", c.config.HealthLogDestination, c.ID(), err)
 	}
@ -216,17 +228,8 @@ func checkHealthCheckCanBeRun(c *Container) (define.HealthCheckStatus, error) {
 // Increment the current startup healthcheck success counter.
 // Can stop the startup HC and start the regular HC if the startup HC has enough
 // consecutive successes.
 // NOTE: The caller must lock and sync the container.
 func (c *Container) incrementStartupHCSuccessCounter(ctx context.Context) {
 	if !c.batched {
 		c.lock.Lock()
 		defer c.lock.Unlock()
 		if err := c.syncContainer(); err != nil {
 			logrus.Errorf("Error syncing container %s state: %v", c.ID(), err)
 			return
 		}
 	}
 	// We don't have a startup HC, can't do anything
 	if c.config.StartupHealthCheckConfig == nil {
 		return
@ -299,17 +302,8 @@ func (c *Container) recreateHealthCheckTimer(ctx context.Context, isStartup bool
 // Increment the current startup healthcheck failure counter.
 // Can restart the container if the HC fails enough times consecutively.
 // NOTE: The caller must lock and sync the container.
 func (c *Container) incrementStartupHCFailureCounter(ctx context.Context) {
 	if !c.batched {
 		c.lock.Lock()
 		defer c.lock.Unlock()
 		if err := c.syncContainer(); err != nil {
 			logrus.Errorf("Error syncing container %s state: %v", c.ID(), err)
 			return
 		}
 	}
 	// We don't have a startup HC, can't do anything
 	if c.config.StartupHealthCheckConfig == nil {
 		return
@ -371,10 +365,8 @@ func (c *Container) isUnhealthy() (bool, error) {
 }
 // UpdateHealthCheckLog parses the health check results and writes the log
-func (c *Container) updateHealthCheckLog(hcl define.HealthCheckLog, inStartPeriod, isStartup bool) (define.HealthCheckResults, error) {
+// NOTE: The caller must lock the container.
-	c.lock.Lock()
+func (c *Container) updateHealthCheckLog(hcl define.HealthCheckLog, hcResult define.HealthCheckStatus, inStartPeriod, isStartup bool) (define.HealthCheckResults, error) {
 	defer c.lock.Unlock()
 	// If we are playing a kube yaml then let's honor the start period time for
 	// both failing and succeeding cases to match kube behavior.
 	// So don't update the health check log till the start period is over
@ -394,7 +386,9 @@ func (c *Container) updateHealthCheckLog(hcl define.HealthCheckLog, inStartPerio
 		if len(healthCheck.Status) < 1 {
 			healthCheck.Status = define.HealthCheckHealthy
 		}
-		if !inStartPeriod {
+		if hcResult == define.HealthCheckContainerStopped {
 			healthCheck.Status = define.HealthCheckStopped
 		} else if !inStartPeriod {
 			// increment failing streak
 			healthCheck.FailingStreak++
 			// if failing streak > retries, then status to unhealthy
--- a/pkg/api/handlers/libpod/healthcheck.go
+++ b/pkg/api/handlers/libpod/healthcheck.go
@ -31,14 +31,8 @@ func RunHealthCheck(w http.ResponseWriter, r *http.Request) {
 		utils.InternalServerError(w, err)
 		return
 	}
 	hcStatus := define.HealthCheckUnhealthy
 	if status == define.HealthCheckSuccess {
 		hcStatus = define.HealthCheckHealthy
 	} else if status == define.HealthCheckStartup {
 		hcStatus = define.HealthCheckStarting
 	}
 	report := define.HealthCheckResults{
-		Status: hcStatus,
+		Status: status.String(),
 	}
 	utils.WriteResponse(w, http.StatusOK, report)
 }
--- a/pkg/domain/infra/abi/healthcheck.go
+++ b/pkg/domain/infra/abi/healthcheck.go
@ -14,14 +14,8 @@ func (ic *ContainerEngine) HealthCheckRun(ctx context.Context, nameOrID string,
 	if err != nil {
 		return nil, err
 	}
 	hcStatus := define.HealthCheckUnhealthy
 	if status == define.HealthCheckSuccess {
 		hcStatus = define.HealthCheckHealthy
 	} else if status == define.HealthCheckStartup {
 		hcStatus = define.HealthCheckStarting
 	}
 	report := define.HealthCheckResults{
-		Status: hcStatus,
+		Status: status.String(),
 	}
 	return &report, nil
 }
--- a/test/system/220-healthcheck.bats
+++ b/test/system/220-healthcheck.bats
@ -469,13 +469,14 @@ function _check_health_log {
@test "podman healthcheck - stop container when healthcheck runs" {
    ctr="c-h-$(safename)"
    msg="hc-msg-$(random_string)"
    hcStatus=$PODMAN_TMPDIR/hcStatus
    run_podman run -d --name $ctr             \
           --health-cmd "sleep 20; echo $msg" \
           $IMAGE /home/podman/pause
    timeout --foreground -v --kill=10 60 \
-        $PODMAN healthcheck run $ctr &
+        $PODMAN healthcheck run $ctr &> $hcStatus &
    hc_pid=$!
    run_podman inspect $ctr --format "{{.State.Status}}"
@ -487,9 +488,10 @@ function _check_health_log {
    rc=0
    wait -n $hc_pid || rc=$?
    assert $rc -eq 1 "exit status check of healthcheck command"
    assert $(< $hcStatus) == "stopped" "Health status"
-    run_podman inspect $ctr --format "{{.State.Status}}"
+    run_podman inspect $ctr --format "{{.State.Status}}--{{.State.Health.Status}}--{{.State.Health.FailingStreak}}"
-    assert "$output" == "exited" "Container is stopped"
+    assert "$output" == "exited--stopped--0" "Container is stopped -- Health status -- failing streak"
    run_podman inspect $ctr --format "{{.State.Health.Log}}"
    assert "$output" !~ "$msg" "Health log message not found"