libpod: simplify WaitForExit()

The current code did several complicated state checks that simply do not work properly on a fast restarting container. It uses a special case for --restart=always but forgot to take care of --restart=on-failure which always hang for 20s until it run into the timeout. The old logic also used to call CheckConmonRunning() but synced the state before which means it may check a new conmon every time and thus misses exits. To fix the new the code is much simpler. Check the conmon pid, if it is no longer running then get then check exit file and get exit code. This is related to #23473 but I am not sure if this fixes it because we cannot reproduce. Signed-off-by: Paul Holzinger <pholzing@redhat.com>
2024-08-13 15:21:55 +02:00 · 2024-08-13 15:21:55 +02:00 · 8a943311db
parent f456b53a0e
commit 8a943311db
3 changed files with 88 additions and 110 deletions
--- a/libpod/container_api.go
+++ b/libpod/container_api.go
@ -541,119 +541,97 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
 		}
 		return -1, define.ErrCtrRemoved
 	}
 	var conmonTimer time.Timer
 	conmonTimerSet := false
-	conmonPidFd := c.getConmonPidFd()
+	if !c.batched {
-	if conmonPidFd != -1 {
+		c.lock.Lock()
-		defer unix.Close(conmonPidFd)
+		defer c.lock.Unlock()
 	}
 	conmonPidFdTriggered := false
-	getExitCode := func() (bool, int32, error) {
+		// Note this is one of the rare cases where we do not like to use syncContainer() as it does read the exit file
-		containerRemoved := false
+		// We like to avoid that here because that means we might read it before container cleanup run and possible
-		if !c.batched {
+		// removed the container
-			c.lock.Lock()
+		if err := c.runtime.state.UpdateContainer(c); err != nil {
-			defer c.lock.Unlock()
+			if errors.Is(err, define.ErrNoSuchCtr) {
-		}
+				// if the container is not valid at this point as it was deleted,
-
+				// check if the exit code was recorded in the db.
-		if err := c.syncContainer(); err != nil {
+				exitCode, err := c.runtime.state.GetContainerExitCode(id)
-			if !errors.Is(err, define.ErrNoSuchCtr) {
+				if err == nil {
-				return false, -1, err
+					return exitCode, nil
 			}
 			containerRemoved = true
 		}
 		// If conmon is not alive anymore set a timer to make sure
 		// we're returning even if conmon has forcefully been killed.
 		if !conmonTimerSet && !containerRemoved {
 			conmonAlive, err := c.ociRuntime.CheckConmonRunning(c)
 			switch {
 			case errors.Is(err, define.ErrNoSuchCtr):
 				// Container has been removed, so we assume the
 				// exit code is present in the DB.
 				containerRemoved = true
 			case err != nil:
 				return false, -1, err
 			case !conmonAlive:
 				// Give the exit code at most 20 seconds to
 				// show up in the DB.  That should largely be
 				// enough for the cleanup process.
 				timerDuration := time.Second * 20
 				conmonTimer = *time.NewTimer(timerDuration)
 				conmonTimerSet = true
 			case conmonAlive:
 				// Continue waiting if conmon's still running.
 				return false, -1, nil
 			}
 		}
 		timedout := ""
 		if !containerRemoved {
 			// If conmon is dead for more than $timerDuration or if the
 			// container has exited properly, try to look up the exit code.
 			select {
 			case <-conmonTimer.C:
 				logrus.Debugf("Exceeded conmon timeout waiting for container %s to exit", id)
 				timedout = " [exceeded conmon timeout waiting for container to exit]"
 			default:
 				switch c.state.State {
 				case define.ContainerStateExited, define.ContainerStateConfigured:
 					// Container exited, so we can look up the exit code.
 				case define.ContainerStateStopped:
 					// Continue looping unless the restart policy is always.
 					// In this case, the container would never transition to
 					// the exited state, so we need to look up the exit code.
 					if c.config.RestartPolicy != define.RestartPolicyAlways {
 						return false, -1, nil
 					}
 				default:
 					// Continue looping
 					return false, -1, nil
 				}
 			}
 			return -1, err
 		}
 	}
 	conmonPID := c.state.ConmonPID
 	// conmonPID == 0 means container is not running
 	if conmonPID == 0 {
 		exitCode, err := c.runtime.state.GetContainerExitCode(id)
 		if err != nil {
 			if errors.Is(err, define.ErrNoSuchExitCode) {
 				// If the container is configured or created, we must assume it never ran.
 				if c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated) {
-					return true, 0, nil
+					return 0, nil
 				}
 			}
 			return true, -1, fmt.Errorf("%w (container in state %s)%s", err, c.state.State, timedout)
 		}
 		return true, exitCode, nil
 	}
 	for {
 		hasExited, exitCode, err := getExitCode()
 		if hasExited {
 			return exitCode, err
 		}
 		if err != nil {
 			return -1, err
 		}
-		select {
+		return exitCode, nil
-		case <-ctx.Done():
+	}
-			return -1, fmt.Errorf("waiting for exit code of container %s canceled", id)
+
-		default:
+	conmonPidFd := c.getConmonPidFd()
-			if conmonPidFd != -1 && !conmonPidFdTriggered {
+	if conmonPidFd > -1 {
-				// If possible (pidfd works), the first cycle we block until conmon dies
+		defer unix.Close(conmonPidFd)
-				// If this happens, and we fall back to the old poll delay
+	}
-				// There is a deadlock in the cleanup code for "play kube" which causes
+
-				// conmon to not exit, so unfortunately we have to use the poll interval
+	// we cannot wait locked as we would hold the lock forever, so we unlock and then lock again
-				// timeout here to avoid hanging.
+	c.lock.Unlock()
-				fds := []unix.PollFd{{Fd: int32(conmonPidFd), Events: unix.POLLIN}}
+	err := waitForConmonExit(conmonPID, conmonPidFd, pollInterval)
-				_, _ = unix.Poll(fds, int(pollInterval.Milliseconds()))
+	c.lock.Lock()
-				conmonPidFdTriggered = true
+	if err != nil {
-			} else {
+		return -1, fmt.Errorf("failed to wait for conmon to exit: %w", err)
-				time.Sleep(pollInterval)
+	}
 	// we locked again so we must sync the state
 	if err := c.syncContainer(); err != nil {
 		if errors.Is(err, define.ErrNoSuchCtr) {
 			// if the container is not valid at this point as it was deleted,
 			// check if the exit code was recorded in the db.
 			exitCode, err := c.runtime.state.GetContainerExitCode(id)
 			if err == nil {
 				return exitCode, nil
 			}
 		}
 		return -1, err
 	}
 	// syncContainer already did the exit file read so nothing extra for us to do here
 	return c.runtime.state.GetContainerExitCode(id)
 }
 func waitForConmonExit(conmonPID, conmonPidFd int, pollInterval time.Duration) error {
 	if conmonPidFd > -1 {
 		for {
 			fds := []unix.PollFd{{Fd: int32(conmonPidFd), Events: unix.POLLIN}}
 			if _, err := unix.Poll(fds, -1); err != nil {
 				if err == unix.EINTR {
 					continue
 				}
 				return err
 			}
 			return nil
 		}
 	}
 	// no pidfd support, we must poll the pid
 	for {
 		if err := unix.Kill(conmonPID, 0); err != nil {
 			if err == unix.ESRCH {
 				break
 			}
 			return err
 		}
 		time.Sleep(pollInterval)
 	}
 	return nil
 }
 type waitResult struct {
--- a/libpod/container_internal_linux.go
+++ b/libpod/container_internal_linux.go
@ -694,16 +694,14 @@ func (c *Container) makePlatformBindMounts() error {
 }
 func (c *Container) getConmonPidFd() int {
-	if c.state.ConmonPID != 0 {
+	// Track lifetime of conmon precisely using pidfd_open + poll.
-		// Track lifetime of conmon precisely using pidfd_open + poll.
+	// There are many cases for this to fail, for instance conmon is dead
-		// There are many cases for this to fail, for instance conmon is dead
+	// or pidfd_open is not supported (pre linux 5.3), so fall back to the
-		// or pidfd_open is not supported (pre linux 5.3), so fall back to the
+	// traditional loop with poll + sleep
-		// traditional loop with poll + sleep
+	if fd, err := unix.PidfdOpen(c.state.ConmonPID, 0); err == nil {
-		if fd, err := unix.PidfdOpen(c.state.ConmonPID, 0); err == nil {
+		return fd
-			return fd
+	} else if err != unix.ENOSYS && err != unix.ESRCH {
-		} else if err != unix.ENOSYS && err != unix.ESRCH {
+		logrus.Debugf("PidfdOpen(%d) failed: %v", c.state.ConmonPID, err)
 			logrus.Debugf("PidfdOpen(%d) failed: %v", c.state.ConmonPID, err)
 		}
 	}
 	return -1
 }
--- a/test/system/030-run.bats
+++ b/test/system/030-run.bats
@ -1289,13 +1289,15 @@ EOF
    rm -rf $romount
 }
-@test "podman run --restart=always -- wait" {
+@test "podman run --restart=always/on-failure -- wait" {
    # regression test for #18572 to make sure Podman waits less than 20 seconds
    ctr=c_$(safename)
-    run_podman run -d --restart=always --name=$ctr $IMAGE false
+    for policy in always on-failure; do
-    PODMAN_TIMEOUT=20 run_podman wait $ctr
+        run_podman run -d --restart=$policy --name=$ctr $IMAGE false
-    is "$output" "1" "container should exit 1"
+        PODMAN_TIMEOUT=20 run_podman wait $ctr
-    run_podman rm -f -t0 $ctr
+        is "$output" "1" "container should exit 1 (policy: $policy)"
        run_podman rm -f -t0 $ctr
    done
 }
@test "podman run - custom static_dir" {