Merge pull request #23985 from Luap99/wait-hang

wait: fix handling of multiple conditions with exited
This commit is contained in:
openshift-merge-bot[bot] 2024-09-18 12:26:28 +00:00 committed by GitHub
commit f580ae0d19
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 32 additions and 17 deletions

View File

@ -9,7 +9,6 @@ import (
"io"
"net/http"
"os"
"sync"
"time"
"github.com/containers/common/pkg/resize"
@ -596,7 +595,7 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
// we cannot wait locked as we would hold the lock forever, so we unlock and then lock again
c.lock.Unlock()
err := waitForConmonExit(conmonPID, conmonPidFd, pollInterval)
err := waitForConmonExit(ctx, conmonPID, conmonPidFd, pollInterval)
c.lock.Lock()
if err != nil {
return -1, fmt.Errorf("failed to wait for conmon to exit: %w", err)
@ -619,15 +618,24 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
return c.runtime.state.GetContainerExitCode(id)
}
func waitForConmonExit(conmonPID, conmonPidFd int, pollInterval time.Duration) error {
func waitForConmonExit(ctx context.Context, conmonPID, conmonPidFd int, pollInterval time.Duration) error {
if conmonPidFd > -1 {
for {
fds := []unix.PollFd{{Fd: int32(conmonPidFd), Events: unix.POLLIN}}
if _, err := unix.Poll(fds, -1); err != nil {
if n, err := unix.Poll(fds, int(pollInterval.Milliseconds())); err != nil {
if err == unix.EINTR {
continue
}
return err
} else if n == 0 {
// n == 0 means timeout
select {
case <-ctx.Done():
return define.ErrCanceled
default:
// context not done, wait again
continue
}
}
return nil
}
@ -640,7 +648,11 @@ func waitForConmonExit(conmonPID, conmonPidFd int, pollInterval time.Duration) e
}
return err
}
time.Sleep(pollInterval)
select {
case <-ctx.Done():
return define.ErrCanceled
case <-time.After(pollInterval):
}
}
return nil
}
@ -695,22 +707,15 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
}
}
var wg sync.WaitGroup
if waitForExit {
wg.Add(1)
go func() {
defer wg.Done()
code, err := c.WaitForExit(ctx, waitTimeout)
trySend(code, err)
}()
}
if len(wantedStates) > 0 || len(wantedHealthStates) > 0 {
wg.Add(1)
go func() {
defer wg.Done()
stoppedCount := 0
for {
if len(wantedStates) > 0 {
@ -780,7 +785,6 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
case <-ctx.Done():
result = waitResult{-1, define.ErrCanceled}
}
wg.Wait()
return result.code, result.err
}

View File

@ -108,4 +108,19 @@ var _ = Describe("Podman wait", func() {
Expect(session).Should(ExitCleanly())
Expect(session.OutputToStringArray()).To(Equal([]string{"0", "0", "0"}))
})
It("podman wait on multiple conditions", func() {
session := podmanTest.Podman([]string{"run", "-d", ALPINE, "sleep", "100"})
session.Wait(20)
Expect(session).Should(ExitCleanly())
cid := session.OutputToString()
// condition should return once nay of the condition is met not all of them,
// as the container is running this should return immediately
// https://github.com/containers/podman-py/issues/425
session = podmanTest.Podman([]string{"wait", "--condition", "running,exited", cid})
session.Wait(20)
Expect(session).Should(ExitCleanly())
Expect(session.OutputToString()).To(Equal("-1"))
})
})

View File

@ -136,10 +136,6 @@ function _log_test_restarted() {
fi
cname="c-ltr-$(safename)"
run_podman run --log-driver=$driver ${events_backend} --name $cname $IMAGE sh -c 'start=0; if test -s log; then start=`tail -n 1 log`; fi; seq `expr $start + 1` `expr $start + 10` | tee -a log'
# FIXME: #9597
# run/start is flaking for remote so let's wait for the container condition
# to stop wasting energy until the root cause gets fixed.
run_podman container wait --condition=exited --condition=stopped $cname
run_podman ${events_backend} start -a $cname
logfile=$(mktemp -p ${PODMAN_TMPDIR} logfileXXXXXXXX)
$PODMAN $_PODMAN_TEST_OPTS ${events_backend} logs -f $cname > $logfile