diff --git a/docs/source/markdown/podman-wait.1.md.in b/docs/source/markdown/podman-wait.1.md.in index db3ce0d54f..a946a9bcd5 100644 --- a/docs/source/markdown/podman-wait.1.md.in +++ b/docs/source/markdown/podman-wait.1.md.in @@ -14,6 +14,11 @@ name or ID. In the case of multiple containers, Podman waits on each consecutiv After all specified containers are stopped, the containers' return codes are printed separated by newline in the same order as they were given to the command. +NOTE: there is an inherent race condition when waiting for containers with a +restart policy of `always` or `on-failure`, such as those created by `podman +kube play`. Such containers may be repeatedly exiting and restarting, possibly +with different exit codes, but `podman wait` can only display and detect one. + ## OPTIONS #### **--condition**=*state* diff --git a/libpod/container_api.go b/libpod/container_api.go index 8627a3ea7a..d66e9dae27 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -592,13 +592,21 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration) conmonAlive, err := c.ociRuntime.CheckConmonRunning(c) switch { case errors.Is(err, define.ErrNoSuchCtr): + // Container has been removed, so we assume the + // exit code is present in the DB. containerRemoved = true case err != nil: return false, -1, err case !conmonAlive: + // Give the exit code at most 20 seconds to + // show up in the DB. That should largely be + // enough for the cleanup process. timerDuration := time.Second * 20 conmonTimer = *time.NewTimer(timerDuration) conmonTimerSet = true + case conmonAlive: + // Continue waiting if conmon's still running. + return false, -1, nil } } @@ -609,7 +617,18 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration) case <-conmonTimer.C: logrus.Debugf("Exceeded conmon timeout waiting for container %s to exit", id) default: - if !c.ensureState(define.ContainerStateExited, define.ContainerStateConfigured) { + switch c.state.State { + case define.ContainerStateExited, define.ContainerStateConfigured: + // Container exited, so we can look up the exit code. + case define.ContainerStateStopped: + // Continue looping unless the restart policy is always. + // In this case, the container would never transition to + // the exited state, so we need to look up the exit code. + if c.config.RestartPolicy != define.RestartPolicyAlways { + return false, -1, nil + } + default: + // Continue looping return false, -1, nil } } @@ -617,9 +636,11 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration) exitCode, err := c.runtime.state.GetContainerExitCode(id) if err != nil { - if errors.Is(err, define.ErrNoSuchExitCode) && c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated) { - // The container never ran. - return true, 0, nil + if errors.Is(err, define.ErrNoSuchExitCode) { + // If the container is configured or created, we must assume it never ran. + if c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated) { + return true, 0, nil + } } return true, -1, fmt.Errorf("%w (container in state %s)", err, c.state.State) } diff --git a/test/system/030-run.bats b/test/system/030-run.bats index 49875b9a1a..d46111e862 100644 --- a/test/system/030-run.bats +++ b/test/system/030-run.bats @@ -1104,5 +1104,13 @@ EOF rm -rf $romount } +@test "podman run --restart=always -- wait" { + # regression test for #18572 to make sure Podman waits less than 20 seconds + ctr=$(random_string) + run_podman run -d --restart=always --name=$ctr $IMAGE false + PODMAN_TIMEOUT=20 run_podman wait $ctr + is "$output" "1" "container should exit 1" + run_podman rm -f -t0 $ctr +} # vim: filetype=sh