Merge pull request #23258 from Luap99/start-error

fix race conditions in start/attach logic
This commit is contained in:
openshift-merge-bot[bot] 2024-07-15 12:11:56 +00:00 committed by GitHub
commit 2f673aa8f7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 78 additions and 124 deletions

View File

@ -15,7 +15,6 @@ import (
"github.com/containers/common/pkg/resize"
"github.com/containers/podman/v5/libpod/define"
"github.com/containers/podman/v5/libpod/events"
"github.com/containers/podman/v5/pkg/signal"
"github.com/containers/storage/pkg/archive"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
@ -142,13 +141,12 @@ func (c *Container) Update(resources *spec.LinuxResources, restartPolicy *string
return c.update(resources, restartPolicy, restartRetries)
}
// StartAndAttach starts a container and attaches to it.
// This acts as a combination of the Start and Attach APIs, ensuring proper
// Attach to a container.
// The last parameter "start" can be used to also start the container.
// This will then Start and Attach APIs, ensuring proper
// ordering of the two such that no output from the container is lost (e.g. the
// Attach call occurs before Start).
// In overall functionality, it is identical to the Start call, with the added
// side effect that an attach session will also be started.
func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachStreams, keys string, resize <-chan resize.TerminalSize, recursive bool) (retChan <-chan error, finalErr error) {
func (c *Container) Attach(ctx context.Context, streams *define.AttachStreams, keys string, resize <-chan resize.TerminalSize, start bool) (retChan <-chan error, finalErr error) {
defer func() {
if finalErr != nil {
// Have to re-lock.
@ -175,9 +173,27 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
}
}
if err := c.prepareToStart(ctx, recursive); err != nil {
return nil, err
if c.state.State != define.ContainerStateRunning {
if !start {
return nil, errors.New("you can only attach to running containers")
}
if err := c.prepareToStart(ctx, true); err != nil {
return nil, err
}
}
if !start {
// A bit awkward, technically passthrough never supports attach. We only pretend
// it does as we leak the stdio fds down into the container but that of course only
// works if we are the process that started conmon with the right fds.
if c.LogDriver() == define.PassthroughLogging {
return nil, fmt.Errorf("this container is using the 'passthrough' log driver, cannot attach: %w", define.ErrNoLogs)
} else if c.LogDriver() == define.PassthroughTTYLogging {
return nil, fmt.Errorf("this container is using the 'passthrough-tty' log driver, cannot attach: %w", define.ErrNoLogs)
}
}
attachChan := make(chan error)
// We need to ensure that we don't return until start() fired in attach.
@ -194,7 +210,7 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
opts := new(AttachOptions)
opts.Streams = streams
opts.DetachKeys = &keys
opts.Start = true
opts.Start = start
opts.Started = startedChan
// attach and start the container on a different thread. waitForHealthy must
@ -213,7 +229,13 @@ func (c *Container) StartAndAttach(ctx context.Context, streams *define.AttachSt
c.newContainerEvent(events.Attach)
}
return attachChan, c.waitForHealthy(ctx)
if start {
if err := c.waitForHealthy(ctx); err != nil {
return nil, err
}
}
return attachChan, nil
}
// RestartWithTimeout restarts a running container and takes a given timeout in uint
@ -315,61 +337,6 @@ func (c *Container) Kill(signal uint) error {
return c.save()
}
// Attach attaches to a container.
// This function returns when the attach finishes. It does not hold the lock for
// the duration of its runtime, only using it at the beginning to verify state.
func (c *Container) Attach(streams *define.AttachStreams, keys string, resize <-chan resize.TerminalSize) error {
if c.LogDriver() == define.PassthroughLogging {
return fmt.Errorf("this container is using the 'passthrough' log driver, cannot attach: %w", define.ErrNoLogs)
}
if c.LogDriver() == define.PassthroughTTYLogging {
return fmt.Errorf("this container is using the 'passthrough-tty' log driver, cannot attach: %w", define.ErrNoLogs)
}
if !c.batched {
c.lock.Lock()
if err := c.syncContainer(); err != nil {
c.lock.Unlock()
return err
}
// We are NOT holding the lock for the duration of the function.
c.lock.Unlock()
}
if !c.ensureState(define.ContainerStateCreated, define.ContainerStateRunning) {
return fmt.Errorf("can only attach to created or running containers: %w", define.ErrCtrStateInvalid)
}
// HACK: This is really gross, but there isn't a better way without
// splitting attach into separate versions for StartAndAttach and normal
// attaching, and I really do not want to do that right now.
// Send a SIGWINCH after attach succeeds so that most programs will
// redraw the screen for the new attach session.
attachRdy := make(chan bool, 1)
if c.Terminal() {
go func() {
<-attachRdy
c.lock.Lock()
defer c.lock.Unlock()
if err := c.ociRuntime.KillContainer(c, uint(signal.SIGWINCH), false); err != nil {
logrus.Warnf("Unable to send SIGWINCH to container %s after attach: %v", c.ID(), err)
}
}()
}
// Start resizing
if c.LogDriver() != define.PassthroughLogging && c.LogDriver() != define.PassthroughTTYLogging {
registerResizeFunc(resize, c.bundlePath())
}
opts := new(AttachOptions)
opts.Streams = streams
opts.DetachKeys = &keys
opts.AttachReady = attachRdy
c.newContainerEvent(events.Attach)
return c.ociRuntime.Attach(c, opts)
}
// HTTPAttach forwards an attach session over a hijacked HTTP session.
// HTTPAttach will consume and close the included httpCon, which is expected to
// be sourced from a hijacked HTTP connection.

View File

@ -821,6 +821,11 @@ func (c *Container) save() error {
func (c *Container) prepareToStart(ctx context.Context, recursive bool) (retErr error) {
// Container must be created or stopped to be started
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateExited) {
// Special case: Let the caller know that is is already running,
// the caller can then decide to ignore/handle the error the way it needs.
if c.state.State == define.ContainerStateRunning {
return fmt.Errorf("container %s: %w", c.ID(), define.ErrCtrStateRunning)
}
return fmt.Errorf("container %s must be in Created or Stopped state to be started: %w", c.ID(), define.ErrCtrStateInvalid)
}

View File

@ -60,6 +60,8 @@ var (
// ErrCtrStateInvalid indicates a container is in an improper state for
// the requested operation
ErrCtrStateInvalid = errors.New("container state improper")
// ErrCtrStateRunning indicates a container is running.
ErrCtrStateRunning = errors.New("container is running")
// ErrExecSessionStateInvalid indicates that an exec session is in an
// improper state for the requested operation
ErrExecSessionStateInvalid = errors.New("exec session state improper")

View File

@ -91,6 +91,7 @@ func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
}
params.Started <- true
}
close(params.Started)
if passthrough {
return nil

View File

@ -1,6 +1,7 @@
package compat
import (
"errors"
"net/http"
api "github.com/containers/podman/v5/pkg/api/types"
@ -33,16 +34,11 @@ func StartContainer(w http.ResponseWriter, r *http.Request) {
utils.ContainerNotFound(w, name, err)
return
}
state, err := con.State()
if err != nil {
utils.InternalServerError(w, err)
return
}
if state == define.ContainerStateRunning {
utils.WriteResponse(w, http.StatusNotModified, nil)
return
}
if err := con.Start(r.Context(), true); err != nil {
if errors.Is(err, define.ErrCtrStateRunning) {
utils.WriteResponse(w, http.StatusNotModified, nil)
return
}
utils.InternalServerError(w, err)
return
}

View File

@ -795,13 +795,6 @@ func (ic *ContainerEngine) ContainerAttach(ctx context.Context, nameOrID string,
}
ctr := containers[0]
conState, err := ctr.State()
if err != nil {
return fmt.Errorf("unable to determine state of %s: %w", ctr.ID(), err)
}
if conState != define.ContainerStateRunning {
return fmt.Errorf("you can only attach to running containers")
}
// If the container is in a pod, also set to recursively start dependencies
err = terminal.StartAttachCtr(ctx, ctr.Container, options.Stdout, options.Stderr, options.Stdin, options.DetachKeys, options.SigProxy, false)
@ -939,14 +932,9 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri
// There can only be one container if attach was used
for i := range containers {
ctr := containers[i]
ctrState, err := ctr.State()
if err != nil {
return nil, err
}
ctrRunning := ctrState == define.ContainerStateRunning
if options.Attach {
err = terminal.StartAttachCtr(ctx, ctr.Container, options.Stdout, options.Stderr, options.Stdin, options.DetachKeys, options.SigProxy, !ctrRunning)
err = terminal.StartAttachCtr(ctx, ctr.Container, options.Stdout, options.Stderr, options.Stdin, options.DetachKeys, options.SigProxy, true)
if errors.Is(err, define.ErrDetach) {
// User manually detached
// Exit cleanly immediately
@ -970,16 +958,6 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri
return reports, fmt.Errorf("attempting to start container %s would cause a deadlock; please run 'podman system renumber' to resolve", ctr.ID())
}
if ctrRunning {
reports = append(reports, &entities.ContainerStartReport{
Id: ctr.ID(),
RawInput: ctr.rawInput,
Err: nil,
ExitCode: 0,
})
return reports, err
}
if err != nil {
reports = append(reports, &entities.ContainerStartReport{
Id: ctr.ID(),
@ -1008,34 +986,43 @@ func (ic *ContainerEngine) ContainerStart(ctx context.Context, namesOrIds []stri
return reports, nil
} // end attach
// Start the container if it's not running already.
if !ctrRunning {
// Handle non-attach start
// If the container is in a pod, also set to recursively start dependencies
report := &entities.ContainerStartReport{
Id: ctr.ID(),
RawInput: ctr.rawInput,
ExitCode: 125,
}
if err := ctr.Start(ctx, true); err != nil {
report.Err = err
if errors.Is(err, define.ErrWillDeadlock) {
report.Err = fmt.Errorf("please run 'podman system renumber' to resolve deadlocks: %w", err)
// Handle non-attach start
// If the container is in a pod, also set to recursively start dependencies
report := &entities.ContainerStartReport{
Id: ctr.ID(),
RawInput: ctr.rawInput,
ExitCode: 125,
}
if err := ctr.Start(ctx, true); err != nil {
// Already running is no error for the start command as it is idempotent.
if errors.Is(err, define.ErrCtrStateRunning) {
// If all is set we only want to output the actual started containers
// so do not include the entry in the result.
if !options.All {
report.ExitCode = 0
reports = append(reports, report)
continue
}
report.Err = fmt.Errorf("unable to start container %q: %w", ctr.ID(), err)
reports = append(reports, report)
if ctr.AutoRemove() {
if _, _, err := ic.removeContainer(ctx, ctr.Container, entities.RmOptions{}); err != nil {
logrus.Errorf("Removing container %s: %v", ctr.ID(), err)
}
}
continue
}
report.ExitCode = 0
report.Err = err
if errors.Is(err, define.ErrWillDeadlock) {
report.Err = fmt.Errorf("please run 'podman system renumber' to resolve deadlocks: %w", err)
reports = append(reports, report)
continue
}
report.Err = fmt.Errorf("unable to start container %q: %w", ctr.ID(), err)
if ctr.AutoRemove() {
if _, _, err := ic.removeContainer(ctx, ctr.Container, entities.RmOptions{}); err != nil {
logrus.Errorf("Removing container %s: %v", ctr.ID(), err)
}
}
reports = append(reports, report)
continue
}
// no error set exit code to 0
report.ExitCode = 0
reports = append(reports, report)
}
return reports, nil
}

View File

@ -89,11 +89,7 @@ func StartAttachCtr(ctx context.Context, ctr *libpod.Container, stdout, stderr,
ProxySignals(ctr)
}
if !startContainer {
return ctr.Attach(streams, detachKeys, resize)
}
attachChan, err := ctr.StartAndAttach(ctx, streams, detachKeys, resize, true)
attachChan, err := ctr.Attach(ctx, streams, detachKeys, resize, startContainer)
if err != nil {
return err
}