libpod: simplify WaitForExit()

The current code did several complicated state checks that simply do not
work properly on a fast restarting container. It uses a special case for
--restart=always but forgot to take care of --restart=on-failure which
always hang for 20s until it run into the timeout.

The old logic also used to call CheckConmonRunning() but synced the
state before which means it may check a new conmon every time and thus
misses exits.

To fix the new the code is much simpler. Check the conmon pid, if it is
no longer running then get then check exit file and get exit code.

This is related to #23473 but I am not sure if this fixes it because we
cannot reproduce.

Signed-off-by: Paul Holzinger <pholzing@redhat.com>
This commit is contained in:
Paul Holzinger 2024-08-13 15:21:55 +02:00
parent f456b53a0e
commit 8a943311db
No known key found for this signature in database
GPG Key ID: EB145DD938A3CAF2
3 changed files with 88 additions and 110 deletions

View File

@ -541,119 +541,97 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
} }
return -1, define.ErrCtrRemoved return -1, define.ErrCtrRemoved
} }
var conmonTimer time.Timer
conmonTimerSet := false
conmonPidFd := c.getConmonPidFd() if !c.batched {
if conmonPidFd != -1 { c.lock.Lock()
defer unix.Close(conmonPidFd) defer c.lock.Unlock()
}
conmonPidFdTriggered := false
getExitCode := func() (bool, int32, error) { // Note this is one of the rare cases where we do not like to use syncContainer() as it does read the exit file
containerRemoved := false // We like to avoid that here because that means we might read it before container cleanup run and possible
if !c.batched { // removed the container
c.lock.Lock() if err := c.runtime.state.UpdateContainer(c); err != nil {
defer c.lock.Unlock() if errors.Is(err, define.ErrNoSuchCtr) {
} // if the container is not valid at this point as it was deleted,
// check if the exit code was recorded in the db.
if err := c.syncContainer(); err != nil { exitCode, err := c.runtime.state.GetContainerExitCode(id)
if !errors.Is(err, define.ErrNoSuchCtr) { if err == nil {
return false, -1, err return exitCode, nil
}
containerRemoved = true
}
// If conmon is not alive anymore set a timer to make sure
// we're returning even if conmon has forcefully been killed.
if !conmonTimerSet && !containerRemoved {
conmonAlive, err := c.ociRuntime.CheckConmonRunning(c)
switch {
case errors.Is(err, define.ErrNoSuchCtr):
// Container has been removed, so we assume the
// exit code is present in the DB.
containerRemoved = true
case err != nil:
return false, -1, err
case !conmonAlive:
// Give the exit code at most 20 seconds to
// show up in the DB. That should largely be
// enough for the cleanup process.
timerDuration := time.Second * 20
conmonTimer = *time.NewTimer(timerDuration)
conmonTimerSet = true
case conmonAlive:
// Continue waiting if conmon's still running.
return false, -1, nil
}
}
timedout := ""
if !containerRemoved {
// If conmon is dead for more than $timerDuration or if the
// container has exited properly, try to look up the exit code.
select {
case <-conmonTimer.C:
logrus.Debugf("Exceeded conmon timeout waiting for container %s to exit", id)
timedout = " [exceeded conmon timeout waiting for container to exit]"
default:
switch c.state.State {
case define.ContainerStateExited, define.ContainerStateConfigured:
// Container exited, so we can look up the exit code.
case define.ContainerStateStopped:
// Continue looping unless the restart policy is always.
// In this case, the container would never transition to
// the exited state, so we need to look up the exit code.
if c.config.RestartPolicy != define.RestartPolicyAlways {
return false, -1, nil
}
default:
// Continue looping
return false, -1, nil
} }
} }
return -1, err
} }
}
conmonPID := c.state.ConmonPID
// conmonPID == 0 means container is not running
if conmonPID == 0 {
exitCode, err := c.runtime.state.GetContainerExitCode(id) exitCode, err := c.runtime.state.GetContainerExitCode(id)
if err != nil { if err != nil {
if errors.Is(err, define.ErrNoSuchExitCode) { if errors.Is(err, define.ErrNoSuchExitCode) {
// If the container is configured or created, we must assume it never ran. // If the container is configured or created, we must assume it never ran.
if c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated) { if c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated) {
return true, 0, nil return 0, nil
} }
} }
return true, -1, fmt.Errorf("%w (container in state %s)%s", err, c.state.State, timedout)
}
return true, exitCode, nil
}
for {
hasExited, exitCode, err := getExitCode()
if hasExited {
return exitCode, err
}
if err != nil {
return -1, err return -1, err
} }
select { return exitCode, nil
case <-ctx.Done(): }
return -1, fmt.Errorf("waiting for exit code of container %s canceled", id)
default: conmonPidFd := c.getConmonPidFd()
if conmonPidFd != -1 && !conmonPidFdTriggered { if conmonPidFd > -1 {
// If possible (pidfd works), the first cycle we block until conmon dies defer unix.Close(conmonPidFd)
// If this happens, and we fall back to the old poll delay }
// There is a deadlock in the cleanup code for "play kube" which causes
// conmon to not exit, so unfortunately we have to use the poll interval // we cannot wait locked as we would hold the lock forever, so we unlock and then lock again
// timeout here to avoid hanging. c.lock.Unlock()
fds := []unix.PollFd{{Fd: int32(conmonPidFd), Events: unix.POLLIN}} err := waitForConmonExit(conmonPID, conmonPidFd, pollInterval)
_, _ = unix.Poll(fds, int(pollInterval.Milliseconds())) c.lock.Lock()
conmonPidFdTriggered = true if err != nil {
} else { return -1, fmt.Errorf("failed to wait for conmon to exit: %w", err)
time.Sleep(pollInterval) }
// we locked again so we must sync the state
if err := c.syncContainer(); err != nil {
if errors.Is(err, define.ErrNoSuchCtr) {
// if the container is not valid at this point as it was deleted,
// check if the exit code was recorded in the db.
exitCode, err := c.runtime.state.GetContainerExitCode(id)
if err == nil {
return exitCode, nil
} }
} }
return -1, err
} }
// syncContainer already did the exit file read so nothing extra for us to do here
return c.runtime.state.GetContainerExitCode(id)
}
func waitForConmonExit(conmonPID, conmonPidFd int, pollInterval time.Duration) error {
if conmonPidFd > -1 {
for {
fds := []unix.PollFd{{Fd: int32(conmonPidFd), Events: unix.POLLIN}}
if _, err := unix.Poll(fds, -1); err != nil {
if err == unix.EINTR {
continue
}
return err
}
return nil
}
}
// no pidfd support, we must poll the pid
for {
if err := unix.Kill(conmonPID, 0); err != nil {
if err == unix.ESRCH {
break
}
return err
}
time.Sleep(pollInterval)
}
return nil
} }
type waitResult struct { type waitResult struct {

View File

@ -694,16 +694,14 @@ func (c *Container) makePlatformBindMounts() error {
} }
func (c *Container) getConmonPidFd() int { func (c *Container) getConmonPidFd() int {
if c.state.ConmonPID != 0 { // Track lifetime of conmon precisely using pidfd_open + poll.
// Track lifetime of conmon precisely using pidfd_open + poll. // There are many cases for this to fail, for instance conmon is dead
// There are many cases for this to fail, for instance conmon is dead // or pidfd_open is not supported (pre linux 5.3), so fall back to the
// or pidfd_open is not supported (pre linux 5.3), so fall back to the // traditional loop with poll + sleep
// traditional loop with poll + sleep if fd, err := unix.PidfdOpen(c.state.ConmonPID, 0); err == nil {
if fd, err := unix.PidfdOpen(c.state.ConmonPID, 0); err == nil { return fd
return fd } else if err != unix.ENOSYS && err != unix.ESRCH {
} else if err != unix.ENOSYS && err != unix.ESRCH { logrus.Debugf("PidfdOpen(%d) failed: %v", c.state.ConmonPID, err)
logrus.Debugf("PidfdOpen(%d) failed: %v", c.state.ConmonPID, err)
}
} }
return -1 return -1
} }

View File

@ -1289,13 +1289,15 @@ EOF
rm -rf $romount rm -rf $romount
} }
@test "podman run --restart=always -- wait" { @test "podman run --restart=always/on-failure -- wait" {
# regression test for #18572 to make sure Podman waits less than 20 seconds # regression test for #18572 to make sure Podman waits less than 20 seconds
ctr=c_$(safename) ctr=c_$(safename)
run_podman run -d --restart=always --name=$ctr $IMAGE false for policy in always on-failure; do
PODMAN_TIMEOUT=20 run_podman wait $ctr run_podman run -d --restart=$policy --name=$ctr $IMAGE false
is "$output" "1" "container should exit 1" PODMAN_TIMEOUT=20 run_podman wait $ctr
run_podman rm -f -t0 $ctr is "$output" "1" "container should exit 1 (policy: $policy)"
run_podman rm -f -t0 $ctr
done
} }
@test "podman run - custom static_dir" { @test "podman run - custom static_dir" {