mirror of https://github.com/containers/podman.git
wait: fix handling of multiple conditions with exited
As it turns on things are not so simple after all...
In podman-py it was reported[1] that waiting might hang, per our docs wait
on multiple conditions should exit once the first one is hit and not all
of them. However because the new wait logic never checked if the context
was cancelled the goroutine kept running until conmon exited and because
we used a waitgroup to wait for all of them to finish it blocked until
that happened.
First we can remove the waitgroup as we only need to wait for one of
them anyway via the channel. While this alone fixes the hang it would
still leak the other goroutine. As there is no way to cancel a goroutine
all the code must check for a cancelled context in the wait loop to no
leak.
Fixes 8a943311db
("libpod: simplify WaitForExit()")
[1] https://github.com/containers/podman-py/issues/425
Signed-off-by: Paul Holzinger <pholzing@redhat.com>
This commit is contained in:
parent
f4a08f46b7
commit
fbed3a01d2
|
@ -9,7 +9,6 @@ import (
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"sync"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/containers/common/pkg/resize"
|
"github.com/containers/common/pkg/resize"
|
||||||
|
@ -596,7 +595,7 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
|
||||||
|
|
||||||
// we cannot wait locked as we would hold the lock forever, so we unlock and then lock again
|
// we cannot wait locked as we would hold the lock forever, so we unlock and then lock again
|
||||||
c.lock.Unlock()
|
c.lock.Unlock()
|
||||||
err := waitForConmonExit(conmonPID, conmonPidFd, pollInterval)
|
err := waitForConmonExit(ctx, conmonPID, conmonPidFd, pollInterval)
|
||||||
c.lock.Lock()
|
c.lock.Lock()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return -1, fmt.Errorf("failed to wait for conmon to exit: %w", err)
|
return -1, fmt.Errorf("failed to wait for conmon to exit: %w", err)
|
||||||
|
@ -619,15 +618,24 @@ func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration)
|
||||||
return c.runtime.state.GetContainerExitCode(id)
|
return c.runtime.state.GetContainerExitCode(id)
|
||||||
}
|
}
|
||||||
|
|
||||||
func waitForConmonExit(conmonPID, conmonPidFd int, pollInterval time.Duration) error {
|
func waitForConmonExit(ctx context.Context, conmonPID, conmonPidFd int, pollInterval time.Duration) error {
|
||||||
if conmonPidFd > -1 {
|
if conmonPidFd > -1 {
|
||||||
for {
|
for {
|
||||||
fds := []unix.PollFd{{Fd: int32(conmonPidFd), Events: unix.POLLIN}}
|
fds := []unix.PollFd{{Fd: int32(conmonPidFd), Events: unix.POLLIN}}
|
||||||
if _, err := unix.Poll(fds, -1); err != nil {
|
if n, err := unix.Poll(fds, int(pollInterval.Milliseconds())); err != nil {
|
||||||
if err == unix.EINTR {
|
if err == unix.EINTR {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
|
} else if n == 0 {
|
||||||
|
// n == 0 means timeout
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return define.ErrCanceled
|
||||||
|
default:
|
||||||
|
// context not done, wait again
|
||||||
|
continue
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -640,7 +648,11 @@ func waitForConmonExit(conmonPID, conmonPidFd int, pollInterval time.Duration) e
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
time.Sleep(pollInterval)
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return define.ErrCanceled
|
||||||
|
case <-time.After(pollInterval):
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -695,22 +707,15 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
|
||||||
|
|
||||||
if waitForExit {
|
if waitForExit {
|
||||||
wg.Add(1)
|
|
||||||
go func() {
|
go func() {
|
||||||
defer wg.Done()
|
|
||||||
|
|
||||||
code, err := c.WaitForExit(ctx, waitTimeout)
|
code, err := c.WaitForExit(ctx, waitTimeout)
|
||||||
trySend(code, err)
|
trySend(code, err)
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(wantedStates) > 0 || len(wantedHealthStates) > 0 {
|
if len(wantedStates) > 0 || len(wantedHealthStates) > 0 {
|
||||||
wg.Add(1)
|
|
||||||
go func() {
|
go func() {
|
||||||
defer wg.Done()
|
|
||||||
stoppedCount := 0
|
stoppedCount := 0
|
||||||
for {
|
for {
|
||||||
if len(wantedStates) > 0 {
|
if len(wantedStates) > 0 {
|
||||||
|
@ -780,7 +785,6 @@ func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeou
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
result = waitResult{-1, define.ErrCanceled}
|
result = waitResult{-1, define.ErrCanceled}
|
||||||
}
|
}
|
||||||
wg.Wait()
|
|
||||||
return result.code, result.err
|
return result.code, result.err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -108,4 +108,19 @@ var _ = Describe("Podman wait", func() {
|
||||||
Expect(session).Should(ExitCleanly())
|
Expect(session).Should(ExitCleanly())
|
||||||
Expect(session.OutputToStringArray()).To(Equal([]string{"0", "0", "0"}))
|
Expect(session.OutputToStringArray()).To(Equal([]string{"0", "0", "0"}))
|
||||||
})
|
})
|
||||||
|
|
||||||
|
It("podman wait on multiple conditions", func() {
|
||||||
|
session := podmanTest.Podman([]string{"run", "-d", ALPINE, "sleep", "100"})
|
||||||
|
session.Wait(20)
|
||||||
|
Expect(session).Should(ExitCleanly())
|
||||||
|
cid := session.OutputToString()
|
||||||
|
|
||||||
|
// condition should return once nay of the condition is met not all of them,
|
||||||
|
// as the container is running this should return immediately
|
||||||
|
// https://github.com/containers/podman-py/issues/425
|
||||||
|
session = podmanTest.Podman([]string{"wait", "--condition", "running,exited", cid})
|
||||||
|
session.Wait(20)
|
||||||
|
Expect(session).Should(ExitCleanly())
|
||||||
|
Expect(session.OutputToString()).To(Equal("-1"))
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
Loading…
Reference in New Issue