mirror of https://github.com/containers/podman.git
Lock pod while starting and stopping containers
The intention behind this is to stop races between `pod stop|start` and `container stop|start` being run at the same time. This could result in containers with no working network (they join the still-running infra container's netns, which is then torn down as the infra container is stopped, leaving the container in an otherwise unused, nonfunctional, orphan netns. Locking the pod (if present) in the public container start and stop APIs should be sufficient to stop this. Signed-off-by: Matt Heon <mheon@redhat.com>
This commit is contained in:
parent
be5d807b62
commit
06fa617f61
|
@ -85,34 +85,23 @@ func (c *Container) initUnlocked(ctx context.Context, recursive bool) error {
|
||||||
// Start requires that all dependency containers (e.g. pod infra containers) are
|
// Start requires that all dependency containers (e.g. pod infra containers) are
|
||||||
// running before starting the container. The recursive parameter, if set, will start all
|
// running before starting the container. The recursive parameter, if set, will start all
|
||||||
// dependencies before starting this container.
|
// dependencies before starting this container.
|
||||||
func (c *Container) Start(ctx context.Context, recursive bool) (finalErr error) {
|
func (c *Container) Start(ctx context.Context, recursive bool) error {
|
||||||
if !c.batched {
|
// Have to lock the pod the container is a part of.
|
||||||
c.lock.Lock()
|
// This prevents running `podman start` at the same time a
|
||||||
defer c.lock.Unlock()
|
// `podman pod stop` is running, which could lead to wierd races.
|
||||||
|
// Pod locks come before container locks, so do this first.
|
||||||
// defer's are executed LIFO so we are locked here
|
if c.config.Pod != "" {
|
||||||
// as long as we call this after the defer unlock()
|
// If we get an error, the pod was probably removed.
|
||||||
defer func() {
|
// So we get an expected ErrCtrRemoved instead of ErrPodRemoved,
|
||||||
if finalErr != nil {
|
// just ignore this and move on to syncing the container.
|
||||||
if err := saveContainerError(c, finalErr); err != nil {
|
pod, _ := c.runtime.state.Pod(c.config.Pod)
|
||||||
logrus.Debug(err)
|
if pod != nil {
|
||||||
}
|
pod.lock.Lock()
|
||||||
}
|
defer pod.lock.Unlock()
|
||||||
}()
|
|
||||||
|
|
||||||
if err := c.syncContainer(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err := c.prepareToStart(ctx, recursive); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start the container
|
return c.startNoPodLock(ctx, recursive)
|
||||||
if err := c.start(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return c.waitForHealthy(ctx)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update updates the given container.
|
// Update updates the given container.
|
||||||
|
@ -294,6 +283,21 @@ func (c *Container) Stop() error {
|
||||||
// manually. If timeout is 0, SIGKILL will be used immediately to kill the
|
// manually. If timeout is 0, SIGKILL will be used immediately to kill the
|
||||||
// container.
|
// container.
|
||||||
func (c *Container) StopWithTimeout(timeout uint) (finalErr error) {
|
func (c *Container) StopWithTimeout(timeout uint) (finalErr error) {
|
||||||
|
// Have to lock the pod the container is a part of.
|
||||||
|
// This prevents running `podman stop` at the same time a
|
||||||
|
// `podman pod start` is running, which could lead to wierd races.
|
||||||
|
// Pod locks come before container locks, so do this first.
|
||||||
|
if c.config.Pod != "" {
|
||||||
|
// If we get an error, the pod was probably removed.
|
||||||
|
// So we get an expected ErrCtrRemoved instead of ErrPodRemoved,
|
||||||
|
// just ignore this and move on to syncing the container.
|
||||||
|
pod, _ := c.runtime.state.Pod(c.config.Pod)
|
||||||
|
if pod != nil {
|
||||||
|
pod.lock.Lock()
|
||||||
|
defer pod.lock.Unlock()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if !c.batched {
|
if !c.batched {
|
||||||
c.lock.Lock()
|
c.lock.Lock()
|
||||||
defer c.lock.Unlock()
|
defer c.lock.Unlock()
|
||||||
|
|
|
@ -1260,6 +1260,40 @@ func (c *Container) initAndStart(ctx context.Context) (retErr error) {
|
||||||
return c.waitForHealthy(ctx)
|
return c.waitForHealthy(ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Internal function to start a container without taking the pod lock.
|
||||||
|
// Please note that this DOES take the container lock.
|
||||||
|
// Intended to be used in pod-related functions.
|
||||||
|
func (c *Container) startNoPodLock(ctx context.Context, recursive bool) (finalErr error) {
|
||||||
|
if !c.batched {
|
||||||
|
c.lock.Lock()
|
||||||
|
defer c.lock.Unlock()
|
||||||
|
|
||||||
|
// defer's are executed LIFO so we are locked here
|
||||||
|
// as long as we call this after the defer unlock()
|
||||||
|
defer func() {
|
||||||
|
if finalErr != nil {
|
||||||
|
if err := saveContainerError(c, finalErr); err != nil {
|
||||||
|
logrus.Debug(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
if err := c.syncContainer(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := c.prepareToStart(ctx, recursive); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start the container
|
||||||
|
if err := c.start(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return c.waitForHealthy(ctx)
|
||||||
|
}
|
||||||
|
|
||||||
// Internal, non-locking function to start a container
|
// Internal, non-locking function to start a container
|
||||||
func (c *Container) start() error {
|
func (c *Container) start() error {
|
||||||
if c.config.Spec.Process != nil {
|
if c.config.Spec.Process != nil {
|
||||||
|
|
|
@ -24,7 +24,7 @@ func (p *Pod) startInitContainers(ctx context.Context) error {
|
||||||
}
|
}
|
||||||
// Now iterate init containers
|
// Now iterate init containers
|
||||||
for _, initCon := range initCtrs {
|
for _, initCon := range initCtrs {
|
||||||
if err := initCon.Start(ctx, true); err != nil {
|
if err := initCon.startNoPodLock(ctx, true); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// Check that the init container waited correctly and the exit
|
// Check that the init container waited correctly and the exit
|
||||||
|
|
Loading…
Reference in New Issue