wait: fix handling of multiple conditions with exited

As it turns on things are not so simple after all...
In podman-py it was reported[1] that waiting might hang, per our docs wait
on multiple conditions should exit once the first one is hit and not all
of them. However because the new wait logic never checked if the context
was cancelled the goroutine kept running until conmon exited and because
we used a waitgroup to wait for all of them to finish it blocked until
that happened.

First we can remove the waitgroup as we only need to wait for one of
them anyway via the channel. While this alone fixes the hang it would
still leak the other goroutine. As there is no way to cancel a goroutine
all the code must check for a cancelled context in the wait loop to no
leak.

Fixes 8a943311db ("libpod: simplify WaitForExit()")
[1] https://github.com/containers/podman-py/issues/425

Signed-off-by: Paul Holzinger <pholzing@redhat.com>
This commit is contained in:
Paul Holzinger 2024-09-17 16:48:37 +02:00
parent f4a08f46b7
commit fbed3a01d2
No known key found for this signature in database
GPG Key ID: EB145DD938A3CAF2
2 changed files with 32 additions and 13 deletions

View File

@ -9,7 +9,6 @@ import (
"io"
"net/http"
"os"
"sync"
"time"
"github.com/containers/common/pkg/resize"
@ -596,7 +595,7 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
// we cannot wait locked as we would hold the lock forever, so we unlock and then lock again
c.lock.Unlock()
err := waitForConmonExit(conmonPID, conmonPidFd, pollInterval)
err := waitForConmonExit(ctx, conmonPID, conmonPidFd, pollInterval)
c.lock.Lock()
if err != nil {
return -1, fmt.Errorf("failed to wait for conmon to exit: %w", err)
@ -619,15 +618,24 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
return c.runtime.state.GetContainerExitCode(id)
}
func waitForConmonExit(conmonPID, conmonPidFd int, pollInterval time.Duration) error {
func waitForConmonExit(ctx context.Context, conmonPID, conmonPidFd int, pollInterval time.Duration) error {
if conmonPidFd > -1 {
for {
fds := []unix.PollFd{{Fd: int32(conmonPidFd), Events: unix.POLLIN}}
if _, err := unix.Poll(fds, -1); err != nil {
if n, err := unix.Poll(fds, int(pollInterval.Milliseconds())); err != nil {
if err == unix.EINTR {
continue
}
return err
} else if n == 0 {
// n == 0 means timeout
select {
case <-ctx.Done():
return define.ErrCanceled
default:
// context not done, wait again
continue
}
}
return nil
}
@ -640,7 +648,11 @@ func waitForConmonExit(conmonPID, conmonPidFd int, pollInterval time.Duration) e
}
return err
}
time.Sleep(pollInterval)
select {
case <-ctx.Done():
return define.ErrCanceled
case <-time.After(pollInterval):
}
}
return nil
}
@ -695,22 +707,15 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
}
}
var wg sync.WaitGroup
if waitForExit {
wg.Add(1)
go func() {
defer wg.Done()
code, err := c.WaitForExit(ctx, waitTimeout)
trySend(code, err)
}()
}
if len(wantedStates) > 0 || len(wantedHealthStates) > 0 {
wg.Add(1)
go func() {
defer wg.Done()
stoppedCount := 0
for {
if len(wantedStates) > 0 {
@ -780,7 +785,6 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
case <-ctx.Done():
result = waitResult{-1, define.ErrCanceled}
}
wg.Wait()
return result.code, result.err
}

View File

@ -108,4 +108,19 @@ var _ = Describe("Podman wait", func() {
Expect(session).Should(ExitCleanly())
Expect(session.OutputToStringArray()).To(Equal([]string{"0", "0", "0"}))
})
It("podman wait on multiple conditions", func() {
session := podmanTest.Podman([]string{"run", "-d", ALPINE, "sleep", "100"})
session.Wait(20)
Expect(session).Should(ExitCleanly())
cid := session.OutputToString()
// condition should return once nay of the condition is met not all of them,
// as the container is running this should return immediately
// https://github.com/containers/podman-py/issues/425
session = podmanTest.Podman([]string{"wait", "--condition", "running,exited", cid})
session.Wait(20)
Expect(session).Should(ExitCleanly())
Expect(session.OutputToString()).To(Equal("-1"))
})
})