Merge pull request #18581 from vrothberg/fix-18572

wait: look for exit code in stopped state
This commit is contained in:
OpenShift Merge Robot 2023-05-22 11:51:14 -04:00 committed by GitHub
commit af8d19dc2e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 38 additions and 4 deletions

View File

@ -14,6 +14,11 @@ name or ID. In the case of multiple containers, Podman waits on each consecutiv
After all specified containers are stopped, the containers' return codes are printed
separated by newline in the same order as they were given to the command.
NOTE: there is an inherent race condition when waiting for containers with a
restart policy of `always` or `on-failure`, such as those created by `podman
kube play`. Such containers may be repeatedly exiting and restarting, possibly
with different exit codes, but `podman wait` can only display and detect one.
## OPTIONS
#### **--condition**=*state*

View File

@ -592,13 +592,21 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
conmonAlive, err := c.ociRuntime.CheckConmonRunning(c)
switch {
case errors.Is(err, define.ErrNoSuchCtr):
// Container has been removed, so we assume the
// exit code is present in the DB.
containerRemoved = true
case err != nil:
return false, -1, err
case !conmonAlive:
// Give the exit code at most 20 seconds to
// show up in the DB. That should largely be
// enough for the cleanup process.
timerDuration := time.Second * 20
conmonTimer = *time.NewTimer(timerDuration)
conmonTimerSet = true
case conmonAlive:
// Continue waiting if conmon's still running.
return false, -1, nil
}
}
@ -609,7 +617,18 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
case <-conmonTimer.C:
logrus.Debugf("Exceeded conmon timeout waiting for container %s to exit", id)
default:
if !c.ensureState(define.ContainerStateExited, define.ContainerStateConfigured) {
switch c.state.State {
case define.ContainerStateExited, define.ContainerStateConfigured:
// Container exited, so we can look up the exit code.
case define.ContainerStateStopped:
// Continue looping unless the restart policy is always.
// In this case, the container would never transition to
// the exited state, so we need to look up the exit code.
if c.config.RestartPolicy != define.RestartPolicyAlways {
return false, -1, nil
}
default:
// Continue looping
return false, -1, nil
}
}
@ -617,9 +636,11 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
exitCode, err := c.runtime.state.GetContainerExitCode(id)
if err != nil {
if errors.Is(err, define.ErrNoSuchExitCode) && c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated) {
// The container never ran.
return true, 0, nil
if errors.Is(err, define.ErrNoSuchExitCode) {
// If the container is configured or created, we must assume it never ran.
if c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated) {
return true, 0, nil
}
}
return true, -1, fmt.Errorf("%w (container in state %s)", err, c.state.State)
}

View File

@ -1104,5 +1104,13 @@ EOF
rm -rf $romount
}
@test "podman run --restart=always -- wait" {
# regression test for #18572 to make sure Podman waits less than 20 seconds
ctr=$(random_string)
run_podman run -d --restart=always --name=$ctr $IMAGE false
PODMAN_TIMEOUT=20 run_podman wait $ctr
is "$output" "1" "container should exit 1"
run_podman rm -f -t0 $ctr
}
# vim: filetype=sh