fix race conditions in start/attach logic
The current code did something like this: lock() getState() unlock() if state != running lock() getState() == running -> error unlock() This of course is wrong because between the first unlock() and second lock() call another process could have modified the state. This meant that sometimes you would get a weird error on start because the internal setup errored as the container was already running. In general any state check without holding the lock is incorrect and will result in race conditions. As such refactor the code to combine both StartAndAttach and Attach() into one function that can handle both. With that we can move the running check into the locked code. Also use typed error for this specific error case then the callers can check and ignore the specific error when needed. This also allows us to fix races in the compat API that did a similar racy state check. This commit changes slightly how we output the result, previously a start on already running container would never print the id/name of the container which is confusing and sort of breaks idempotence. Now it will include the output except when --all is used. Then it only reports the ids that were actually started. Fixes #23246 Signed-off-by: Paul Holzinger <pholzing@redhat.com>
This commit is contained in:
parent
360c4f372d
commit
3280da0500
|
@ -15,7 +15,6 @@ import (
|
|||
"github.com/containers/common/pkg/resize"
|
||||
"github.com/containers/podman/v5/libpod/define"
|
||||
"github.com/containers/podman/v5/libpod/events"
|
||||
"github.com/containers/podman/v5/pkg/signal"
|
||||
"github.com/containers/storage/pkg/archive"
|
||||
spec "github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
@ -142,13 +141,12 @@ func (c *Container) Update(resources *spec.LinuxResources, restartPolicy *string
|
|||
return c.update(resources, restartPolicy, restartRetries)
|
||||
}
|
||||
|
||||
// StartAndAttach starts a container and attaches to it.
|
||||
// This acts as a combination of the Start and Attach APIs, ensuring proper
|
||||
// Attach to a container.
|
||||
// The last parameter "start" can be used to also start the container.
|
||||
// This will then Start and Attach APIs, ensuring proper
|
||||
// ordering of the two such that no output from the container is lost (e.g. the
|
||||
// Attach call occurs before Start).
|
||||
// In overall functionality, it is identical to the Start call, with the added
|
||||
// side effect that an attach session will also be started.
|
||||
func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachStreams, keys string, resize <-chan resize.TerminalSize, recursive bool) (retChan <-chan error, finalErr error) {
|
||||
func (c *Container) Attach(ctx context.Context, streams *define.AttachStreams, keys string, resize <-chan resize.TerminalSize, start bool) (retChan <-chan error, finalErr error) {
|
||||
defer func() {
|
||||
if finalErr != nil {
|
||||
// Have to re-lock.
|
||||
|
@ -175,9 +173,27 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
|
|||
}
|
||||
}
|
||||
|
||||
if err := c.prepareToStart(ctx, recursive); err != nil {
|
||||
return nil, err
|
||||
if c.state.State != define.ContainerStateRunning {
|
||||
if !start {
|
||||
return nil, errors.New("you can only attach to running containers")
|
||||
}
|
||||
|
||||
if err := c.prepareToStart(ctx, true); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if !start {
|
||||
// A bit awkward, technically passthrough never supports attach. We only pretend
|
||||
// it does as we leak the stdio fds down into the container but that of course only
|
||||
// works if we are the process that started conmon with the right fds.
|
||||
if c.LogDriver() == define.PassthroughLogging {
|
||||
return nil, fmt.Errorf("this container is using the 'passthrough' log driver, cannot attach: %w", define.ErrNoLogs)
|
||||
} else if c.LogDriver() == define.PassthroughTTYLogging {
|
||||
return nil, fmt.Errorf("this container is using the 'passthrough-tty' log driver, cannot attach: %w", define.ErrNoLogs)
|
||||
}
|
||||
}
|
||||
|
||||
attachChan := make(chan error)
|
||||
|
||||
// We need to ensure that we don't return until start() fired in attach.
|
||||
|
@ -194,7 +210,7 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
|
|||
opts := new(AttachOptions)
|
||||
opts.Streams = streams
|
||||
opts.DetachKeys = &keys
|
||||
opts.Start = true
|
||||
opts.Start = start
|
||||
opts.Started = startedChan
|
||||
|
||||
// attach and start the container on a different thread. waitForHealthy must
|
||||
|
@ -213,7 +229,13 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
|
|||
c.newContainerEvent(events.Attach)
|
||||
}
|
||||
|
||||
return attachChan, c.waitForHealthy(ctx)
|
||||
if start {
|
||||
if err := c.waitForHealthy(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return attachChan, nil
|
||||
}
|
||||
|
||||
// RestartWithTimeout restarts a running container and takes a given timeout in uint
|
||||
|
@ -315,61 +337,6 @@ func (c *Container) Kill(signal uint) error {
|
|||
return c.save()
|
||||
}
|
||||
|
||||
// Attach attaches to a container.
|
||||
// This function returns when the attach finishes. It does not hold the lock for
|
||||
// the duration of its runtime, only using it at the beginning to verify state.
|
||||
func (c *Container) Attach(streams *define.AttachStreams, keys string, resize <-chan resize.TerminalSize) error {
|
||||
if c.LogDriver() == define.PassthroughLogging {
|
||||
return fmt.Errorf("this container is using the 'passthrough' log driver, cannot attach: %w", define.ErrNoLogs)
|
||||
}
|
||||
if c.LogDriver() == define.PassthroughTTYLogging {
|
||||
return fmt.Errorf("this container is using the 'passthrough-tty' log driver, cannot attach: %w", define.ErrNoLogs)
|
||||
}
|
||||
if !c.batched {
|
||||
c.lock.Lock()
|
||||
if err := c.syncContainer(); err != nil {
|
||||
c.lock.Unlock()
|
||||
return err
|
||||
}
|
||||
// We are NOT holding the lock for the duration of the function.
|
||||
c.lock.Unlock()
|
||||
}
|
||||
|
||||
if !c.ensureState(define.ContainerStateCreated, define.ContainerStateRunning) {
|
||||
return fmt.Errorf("can only attach to created or running containers: %w", define.ErrCtrStateInvalid)
|
||||
}
|
||||
|
||||
// HACK: This is really gross, but there isn't a better way without
|
||||
// splitting attach into separate versions for StartAndAttach and normal
|
||||
// attaching, and I really do not want to do that right now.
|
||||
// Send a SIGWINCH after attach succeeds so that most programs will
|
||||
// redraw the screen for the new attach session.
|
||||
attachRdy := make(chan bool, 1)
|
||||
if c.Terminal() {
|
||||
go func() {
|
||||
<-attachRdy
|
||||
c.lock.Lock()
|
||||
defer c.lock.Unlock()
|
||||
if err := c.ociRuntime.KillContainer(c, uint(signal.SIGWINCH), false); err != nil {
|
||||
logrus.Warnf("Unable to send SIGWINCH to container %s after attach: %v", c.ID(), err)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// Start resizing
|
||||
if c.LogDriver() != define.PassthroughLogging && c.LogDriver() != define.PassthroughTTYLogging {
|
||||
registerResizeFunc(resize, c.bundlePath())
|
||||
}
|
||||
|
||||
opts := new(AttachOptions)
|
||||
opts.Streams = streams
|
||||
opts.DetachKeys = &keys
|
||||
opts.AttachReady = attachRdy
|
||||
|
||||
c.newContainerEvent(events.Attach)
|
||||
return c.ociRuntime.Attach(c, opts)
|
||||
}
|
||||
|
||||
// HTTPAttach forwards an attach session over a hijacked HTTP session.
|
||||
// HTTPAttach will consume and close the included httpCon, which is expected to
|
||||
// be sourced from a hijacked HTTP connection.
|
||||
|
|
|
@ -821,6 +821,11 @@ func (c *Container) save() error {
|
|||
func (c *Container) prepareToStart(ctx context.Context, recursive bool) (retErr error) {
|
||||
// Container must be created or stopped to be started
|
||||
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateExited) {
|
||||
// Special case: Let the caller know that is is already running,
|
||||
// the caller can then decide to ignore/handle the error the way it needs.
|
||||
if c.state.State == define.ContainerStateRunning {
|
||||
return fmt.Errorf("container %s: %w", c.ID(), define.ErrCtrStateRunning)
|
||||
}
|
||||
return fmt.Errorf("container %s must be in Created or Stopped state to be started: %w", c.ID(), define.ErrCtrStateInvalid)
|
||||
}
|
||||
|
||||
|
|
|
@ -60,6 +60,8 @@ var (
|
|||
// ErrCtrStateInvalid indicates a container is in an improper state for
|
||||
// the requested operation
|
||||
ErrCtrStateInvalid = errors.New("container state improper")
|
||||
// ErrCtrStateRunning indicates a container is running.
|
||||
ErrCtrStateRunning = errors.New("container is running")
|
||||
// ErrExecSessionStateInvalid indicates that an exec session is in an
|
||||
// improper state for the requested operation
|
||||
ErrExecSessionStateInvalid = errors.New("exec session state improper")
|
||||
|
|
|
@ -91,6 +91,7 @@ func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
|
|||
}
|
||||
params.Started <- true
|
||||
}
|
||||
close(params.Started)
|
||||
|
||||
if passthrough {
|
||||
return nil
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package compat
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"net/http"
|
||||
|
||||
api "github.com/containers/podman/v5/pkg/api/types"
|
||||
|
@ -33,16 +34,11 @@ func StartContainer(w http.ResponseWriter, r *http.Request) {
|
|||
utils.ContainerNotFound(w, name, err)
|
||||
return
|
||||
}
|
||||
state, err := con.State()
|
||||
if err != nil {
|
||||
utils.InternalServerError(w, err)
|
||||
return
|
||||
}
|
||||
if state == define.ContainerStateRunning {
|
||||
utils.WriteResponse(w, http.StatusNotModified, nil)
|
||||
return
|
||||
}
|
||||
if err := con.Start(r.Context(), true); err != nil {
|
||||
if errors.Is(err, define.ErrCtrStateRunning) {
|
||||
utils.WriteResponse(w, http.StatusNotModified, nil)
|
||||
return
|
||||
}
|
||||
utils.InternalServerError(w, err)
|
||||
return
|
||||
}
|
||||
|
|
|
@ -795,13 +795,6 @@ func (ic *ContainerEngine) ContainerAttach(ctx context.Context, nameOrID string,
|
|||
}
|
||||
|
||||
ctr := containers[0]
|
||||
conState, err := ctr.State()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to determine state of %s: %w", ctr.ID(), err)
|
||||
}
|
||||
if conState != define.ContainerStateRunning {
|
||||
return fmt.Errorf("you can only attach to running containers")
|
||||
}
|
||||
|
||||
// If the container is in a pod, also set to recursively start dependencies
|
||||
err = terminal.StartAttachCtr(ctx, ctr.Container, options.Stdout, options.Stderr, options.Stdin, options.DetachKeys, options.SigProxy, false)
|
||||
|
@ -939,14 +932,9 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri
|
|||
// There can only be one container if attach was used
|
||||
for i := range containers {
|
||||
ctr := containers[i]
|
||||
ctrState, err := ctr.State()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ctrRunning := ctrState == define.ContainerStateRunning
|
||||
|
||||
if options.Attach {
|
||||
err = terminal.StartAttachCtr(ctx, ctr.Container, options.Stdout, options.Stderr, options.Stdin, options.DetachKeys, options.SigProxy, !ctrRunning)
|
||||
err = terminal.StartAttachCtr(ctx, ctr.Container, options.Stdout, options.Stderr, options.Stdin, options.DetachKeys, options.SigProxy, true)
|
||||
if errors.Is(err, define.ErrDetach) {
|
||||
// User manually detached
|
||||
// Exit cleanly immediately
|
||||
|
@ -970,16 +958,6 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri
|
|||
return reports, fmt.Errorf("attempting to start container %s would cause a deadlock; please run 'podman system renumber' to resolve", ctr.ID())
|
||||
}
|
||||
|
||||
if ctrRunning {
|
||||
reports = append(reports, &entities.ContainerStartReport{
|
||||
Id: ctr.ID(),
|
||||
RawInput: ctr.rawInput,
|
||||
Err: nil,
|
||||
ExitCode: 0,
|
||||
})
|
||||
return reports, err
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
reports = append(reports, &entities.ContainerStartReport{
|
||||
Id: ctr.ID(),
|
||||
|
@ -1008,34 +986,43 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri
|
|||
return reports, nil
|
||||
} // end attach
|
||||
|
||||
// Start the container if it's not running already.
|
||||
if !ctrRunning {
|
||||
// Handle non-attach start
|
||||
// If the container is in a pod, also set to recursively start dependencies
|
||||
report := &entities.ContainerStartReport{
|
||||
Id: ctr.ID(),
|
||||
RawInput: ctr.rawInput,
|
||||
ExitCode: 125,
|
||||
}
|
||||
if err := ctr.Start(ctx, true); err != nil {
|
||||
report.Err = err
|
||||
if errors.Is(err, define.ErrWillDeadlock) {
|
||||
report.Err = fmt.Errorf("please run 'podman system renumber' to resolve deadlocks: %w", err)
|
||||
// Handle non-attach start
|
||||
// If the container is in a pod, also set to recursively start dependencies
|
||||
report := &entities.ContainerStartReport{
|
||||
Id: ctr.ID(),
|
||||
RawInput: ctr.rawInput,
|
||||
ExitCode: 125,
|
||||
}
|
||||
if err := ctr.Start(ctx, true); err != nil {
|
||||
// Already running is no error for the start command as it is idempotent.
|
||||
if errors.Is(err, define.ErrCtrStateRunning) {
|
||||
// If all is set we only want to output the actual started containers
|
||||
// so do not include the entry in the result.
|
||||
if !options.All {
|
||||
report.ExitCode = 0
|
||||
reports = append(reports, report)
|
||||
continue
|
||||
}
|
||||
report.Err = fmt.Errorf("unable to start container %q: %w", ctr.ID(), err)
|
||||
reports = append(reports, report)
|
||||
if ctr.AutoRemove() {
|
||||
if _, _, err := ic.removeContainer(ctx, ctr.Container, entities.RmOptions{}); err != nil {
|
||||
logrus.Errorf("Removing container %s: %v", ctr.ID(), err)
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
report.ExitCode = 0
|
||||
|
||||
report.Err = err
|
||||
if errors.Is(err, define.ErrWillDeadlock) {
|
||||
report.Err = fmt.Errorf("please run 'podman system renumber' to resolve deadlocks: %w", err)
|
||||
reports = append(reports, report)
|
||||
continue
|
||||
}
|
||||
report.Err = fmt.Errorf("unable to start container %q: %w", ctr.ID(), err)
|
||||
if ctr.AutoRemove() {
|
||||
if _, _, err := ic.removeContainer(ctx, ctr.Container, entities.RmOptions{}); err != nil {
|
||||
logrus.Errorf("Removing container %s: %v", ctr.ID(), err)
|
||||
}
|
||||
}
|
||||
reports = append(reports, report)
|
||||
continue
|
||||
}
|
||||
// no error set exit code to 0
|
||||
report.ExitCode = 0
|
||||
reports = append(reports, report)
|
||||
}
|
||||
return reports, nil
|
||||
}
|
||||
|
|
|
@ -89,11 +89,7 @@ func StartAttachCtr(ctx context.Context, ctr *libpod.Container, stdout, stderr,
|
|||
ProxySignals(ctr)
|
||||
}
|
||||
|
||||
if !startContainer {
|
||||
return ctr.Attach(streams, detachKeys, resize)
|
||||
}
|
||||
|
||||
attachChan, err := ctr.StartAndAttach(ctx, streams, detachKeys, resize, true)
|
||||
attachChan, err := ctr.Attach(ctx, streams, detachKeys, resize, startContainer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue