From fc2f5758cf22fe5d3d46be7e4642abc0735e2c8d Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 11 Nov 2013 21:58:40 -0600 Subject: [PATCH 1/2] fix container restart race condition Finish container cleanup before setting the state to stopped. Otherwise, for an application that exits quickly, a call to Restart can allow Start to be called again before cleanup is done, resulting in overritten data in the Container struct. --- container.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/container.go b/container.go index 9dac345447..16f45cb490 100644 --- a/container.go +++ b/container.go @@ -1287,9 +1287,6 @@ func (container *Container) monitor() { exitCode = container.cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus() } - // Report status back - container.State.setStopped(exitCode) - if container.runtime != nil && container.runtime.srv != nil { container.runtime.srv.LogEvent("die", container.ID, container.runtime.repositories.ImageName(container.Image)) } @@ -1302,6 +1299,9 @@ func (container *Container) monitor() { container.stdin, container.stdinPipe = io.Pipe() } + // Report status back + container.State.setStopped(exitCode) + // Release the lock close(container.waitLock) From 3cbec951772b9849c0ba679a5988b54384d05ce1 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 8 Nov 2013 08:40:46 -0600 Subject: [PATCH 2/2] simplify the runtime Register reconnect logic Refactor the Register code a little bit to make it easier to comprehend. --- runtime.go | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/runtime.go b/runtime.go index 4559164e28..7e386a20ff 100644 --- a/runtime.go +++ b/runtime.go @@ -118,9 +118,6 @@ func (runtime *Runtime) Register(container *Container) error { return err } - // init the wait lock - container.waitLock = make(chan struct{}) - container.runtime = runtime // Attach to stdout and stderr @@ -136,10 +133,6 @@ func (runtime *Runtime) Register(container *Container) error { runtime.containers.PushBack(container) runtime.idIndex.Add(container.ID) - // When we actually restart, Start() do the monitoring. - // However, when we simply 'reattach', we have to restart a monitor - nomonitor := false - // FIXME: if the container is supposed to be running but is not, auto restart it? // if so, then we need to restart monitor and init a new lock // If the container is supposed to be running, make sure of it @@ -157,7 +150,6 @@ func (runtime *Runtime) Register(container *Container) error { if err := container.Start(); err != nil { return err } - nomonitor = true } else { utils.Debugf("Marking as stopped") container.State.setStopped(-127) @@ -165,16 +157,17 @@ func (runtime *Runtime) Register(container *Container) error { return err } } - } - } + } else { + utils.Debugf("Reconnecting to container %v", container.ID) - // If the container is not running or just has been flagged not running - // then close the wait lock chan (will be reset upon start) - if !container.State.Running { - close(container.waitLock) - } else if !nomonitor { - container.allocateNetwork() - go container.monitor() + if err := container.allocateNetwork(); err != nil { + return err + } + + container.waitLock = make(chan struct{}) + + go container.monitor() + } } return nil }