From f067e263677fc86f9610ca61fbe42f63efad91f2 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 25 Mar 2014 23:21:07 +0000 Subject: [PATCH 1/7] Ensure that all containers are stopped cleanly at shutdown Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- runtime/runtime.go | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/runtime/runtime.go b/runtime/runtime.go index b035f5df9f..85880ff9ab 100644 --- a/runtime/runtime.go +++ b/runtime/runtime.go @@ -778,8 +778,31 @@ func NewRuntimeFromDirectory(config *daemonconfig.Config, eng *engine.Engine) (* return runtime, nil } +func (runtime *Runtime) shutdown() error { + group := sync.WaitGroup{} + utils.Debugf("starting clean shutdown of all containers...") + for _, container := range runtime.List() { + if container.State.IsRunning() { + utils.Debugf("stopping %s", container.ID) + group.Add(1) + + go func() { + defer group.Done() + container.Stop(10) + }() + } + } + group.Wait() + + return nil +} + func (runtime *Runtime) Close() error { errorsStrings := []string{} + if err := runtime.shutdown(); err != nil { + utils.Errorf("runtime.shutdown(): %s", err) + errorsStrings = append(errorsStrings, err.Error()) + } if err := portallocator.ReleaseAll(); err != nil { utils.Errorf("portallocator.ReleaseAll(): %s", err) errorsStrings = append(errorsStrings, err.Error()) From 5b9069bd990dca0a35d8e490c6f6b56d27163bb8 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 26 Mar 2014 00:04:55 +0000 Subject: [PATCH 2/7] Add kill for other drivers on restart Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- runtime/runtime.go | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/runtime/runtime.go b/runtime/runtime.go index 85880ff9ab..4ece7d1533 100644 --- a/runtime/runtime.go +++ b/runtime/runtime.go @@ -174,6 +174,7 @@ func (runtime *Runtime) Register(container *Container) error { if container.State.IsGhost() { utils.Debugf("killing ghost %s", container.ID) + existingPid := container.State.Pid container.State.SetGhost(false) container.State.SetStopped(0) @@ -181,9 +182,20 @@ func (runtime *Runtime) Register(container *Container) error { // no ghost processes are left when docker dies if container.ExecDriver == "" || strings.Contains(container.ExecDriver, "lxc") { lxc.KillLxc(container.ID, 9) - if err := container.Unmount(); err != nil { - utils.Debugf("ghost unmount error %s", err) + } else { + // use the current driver and ensure that the container is dead x.x + cmd := &execdriver.Command{ + ID: container.ID, } + var err error + cmd.Process, err = os.FindProcess(existingPid) + if err != nil { + utils.Debugf("cannot find existing process for %d", existingPid) + } + runtime.execDriver.Kill(cmd, 9) + } + if err := container.Unmount(); err != nil { + utils.Debugf("ghost unmount error %s", err) } } From 5bb82f6313d7f789783ffac854be85a44a56617e Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 26 Mar 2014 06:48:16 +0000 Subject: [PATCH 3/7] Ensure a reliable way to kill ghost containers on reboot Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/exec.go | 7 +++++- pkg/libcontainer/nsinit/state.go | 16 +++++++++---- pkg/system/proc.go | 26 +++++++++++++++++++++ runtime/execdriver/driver.go | 1 + runtime/execdriver/lxc/driver.go | 4 ++++ runtime/execdriver/native/driver.go | 36 ++++++++++++++++++++++++++--- runtime/runtime.go | 2 +- 7 files changed, 83 insertions(+), 9 deletions(-) create mode 100644 pkg/system/proc.go diff --git a/pkg/libcontainer/nsinit/exec.go b/pkg/libcontainer/nsinit/exec.go index 73842f729f..c07c45de3c 100644 --- a/pkg/libcontainer/nsinit/exec.go +++ b/pkg/libcontainer/nsinit/exec.go @@ -50,8 +50,13 @@ func (ns *linuxNs) Exec(container *libcontainer.Container, term Terminal, args [ if err := command.Start(); err != nil { return -1, err } + + started, err := system.GetProcessStartTime(command.Process.Pid) + if err != nil { + return -1, err + } ns.logger.Printf("writting pid %d to file\n", command.Process.Pid) - if err := ns.stateWriter.WritePid(command.Process.Pid); err != nil { + if err := ns.stateWriter.WritePid(command.Process.Pid, started); err != nil { command.Process.Kill() return -1, err } diff --git a/pkg/libcontainer/nsinit/state.go b/pkg/libcontainer/nsinit/state.go index af38008c03..26d7fa4230 100644 --- a/pkg/libcontainer/nsinit/state.go +++ b/pkg/libcontainer/nsinit/state.go @@ -10,7 +10,7 @@ import ( // StateWriter handles writing and deleting the pid file // on disk type StateWriter interface { - WritePid(pid int) error + WritePid(pid int, startTime string) error DeletePid() error } @@ -19,10 +19,18 @@ type DefaultStateWriter struct { } // writePidFile writes the namespaced processes pid to pid in the rootfs for the container -func (d *DefaultStateWriter) WritePid(pid int) error { - return ioutil.WriteFile(filepath.Join(d.Root, "pid"), []byte(fmt.Sprint(pid)), 0655) +func (d *DefaultStateWriter) WritePid(pid int, startTime string) error { + err := ioutil.WriteFile(filepath.Join(d.Root, "pid"), []byte(fmt.Sprint(pid)), 0655) + if err != nil { + return err + } + return ioutil.WriteFile(filepath.Join(d.Root, "start"), []byte(startTime), 0655) } func (d *DefaultStateWriter) DeletePid() error { - return os.Remove(filepath.Join(d.Root, "pid")) + err := os.Remove(filepath.Join(d.Root, "pid")) + if serr := os.Remove(filepath.Join(d.Root, "start")); err == nil { + err = serr + } + return err } diff --git a/pkg/system/proc.go b/pkg/system/proc.go new file mode 100644 index 0000000000..a492346c7f --- /dev/null +++ b/pkg/system/proc.go @@ -0,0 +1,26 @@ +package system + +import ( + "io/ioutil" + "path/filepath" + "strconv" + "strings" +) + +// look in /proc to find the process start time so that we can verify +// that this pid has started after ourself +func GetProcessStartTime(pid int) (string, error) { + data, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat")) + if err != nil { + return "", err + } + parts := strings.Split(string(data), " ") + // the starttime is located at pos 22 + // from the man page + // + // starttime %llu (was %lu before Linux 2.6) + // (22) The time the process started after system boot. In kernels before Linux 2.6, this + // value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks + // (divide by sysconf(_SC_CLK_TCK)). + return parts[22-1], nil // starts at 1 +} diff --git a/runtime/execdriver/driver.go b/runtime/execdriver/driver.go index d067973419..27a575cb3a 100644 --- a/runtime/execdriver/driver.go +++ b/runtime/execdriver/driver.go @@ -84,6 +84,7 @@ type Driver interface { Name() string // Driver name Info(id string) Info // "temporary" hack (until we move state from core to plugins) GetPidsForContainer(id string) ([]int, error) // Returns a list of pids for the given container. + Terminate(c *Command) error // kill it with fire } // Network settings of the container diff --git a/runtime/execdriver/lxc/driver.go b/runtime/execdriver/lxc/driver.go index 896f215366..ef16dcc380 100644 --- a/runtime/execdriver/lxc/driver.go +++ b/runtime/execdriver/lxc/driver.go @@ -204,6 +204,10 @@ func (d *driver) Kill(c *execdriver.Command, sig int) error { return KillLxc(c.ID, sig) } +func (d *driver) Terminate(c *execdriver.Command) error { + return KillLxc(c.ID, 9) +} + func (d *driver) version() string { var ( version string diff --git a/runtime/execdriver/native/driver.go b/runtime/execdriver/native/driver.go index 4acc4b388c..c5a3837615 100644 --- a/runtime/execdriver/native/driver.go +++ b/runtime/execdriver/native/driver.go @@ -117,9 +117,39 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba } func (d *driver) Kill(p *execdriver.Command, sig int) error { - err := syscall.Kill(p.Process.Pid, syscall.Signal(sig)) + return syscall.Kill(p.Process.Pid, syscall.Signal(sig)) +} + +func (d *driver) Terminate(p *execdriver.Command) error { + // lets check the start time for the process + started, err := d.readStartTime(p) + if err != nil { + // if we don't have the data on disk then we can assume the process is gone + // because this is only removed after we know the process has stopped + if os.IsNotExist(err) { + return nil + } + return err + } + + currentStartTime, err := system.GetProcessStartTime(p.Process.Pid) + if err != nil { + return err + } + if started == currentStartTime { + err = syscall.Kill(p.Process.Pid, 9) + } d.removeContainerRoot(p.ID) return err + +} + +func (d *driver) readStartTime(p *execdriver.Command) (string, error) { + data, err := ioutil.ReadFile(filepath.Join(d.root, p.ID, "start")) + if err != nil { + return "", err + } + return string(data), nil } func (d *driver) Info(id string) execdriver.Info { @@ -235,9 +265,9 @@ type dockerStateWriter struct { callback execdriver.StartCallback } -func (d *dockerStateWriter) WritePid(pid int) error { +func (d *dockerStateWriter) WritePid(pid int, started string) error { d.c.ContainerPid = pid - err := d.dsw.WritePid(pid) + err := d.dsw.WritePid(pid, started) if d.callback != nil { d.callback(d.c) } diff --git a/runtime/runtime.go b/runtime/runtime.go index 4ece7d1533..1c99a02811 100644 --- a/runtime/runtime.go +++ b/runtime/runtime.go @@ -192,7 +192,7 @@ func (runtime *Runtime) Register(container *Container) error { if err != nil { utils.Debugf("cannot find existing process for %d", existingPid) } - runtime.execDriver.Kill(cmd, 9) + runtime.execDriver.Terminate(cmd) } if err := container.Unmount(); err != nil { utils.Debugf("ghost unmount error %s", err) From 283daced0c919be760947d44d7e46c80e1054d64 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 26 Mar 2014 06:55:46 +0000 Subject: [PATCH 4/7] Don't send prctl to be consistent with other drivers Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/nsinit/init.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pkg/libcontainer/nsinit/init.go b/pkg/libcontainer/nsinit/init.go index 85182326ee..c7c2addb18 100644 --- a/pkg/libcontainer/nsinit/init.go +++ b/pkg/libcontainer/nsinit/init.go @@ -54,11 +54,6 @@ func (ns *linuxNs) Init(container *libcontainer.Container, uncleanRootfs, consol return fmt.Errorf("setctty %s", err) } } - // this is our best effort to let the process know that the parent has died and that it - // should it should act on it how it sees fit - if err := system.ParentDeathSignal(uintptr(syscall.SIGTERM)); err != nil { - return fmt.Errorf("parent death signal %s", err) - } if err := setupNetwork(container, context); err != nil { return fmt.Errorf("setup networking %s", err) } From e36d89b0f9c8ba5b071374310ca632f6b2fdb7a1 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 26 Mar 2014 06:59:41 +0000 Subject: [PATCH 5/7] Ensure state is saved to disk after we kill the ghost Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- runtime/runtime.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/runtime/runtime.go b/runtime/runtime.go index 1c99a02811..d5c1a96ada 100644 --- a/runtime/runtime.go +++ b/runtime/runtime.go @@ -197,6 +197,9 @@ func (runtime *Runtime) Register(container *Container) error { if err := container.Unmount(); err != nil { utils.Debugf("ghost unmount error %s", err) } + if err := container.ToDisk(); err != nil { + utils.Debugf("saving ghost state to disk %s", err) + } } info := runtime.execDriver.Info(container.ID) From 93779cc7fee4ee0690d9dd28eed478a418e79577 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 1 Apr 2014 00:11:17 +0000 Subject: [PATCH 6/7] Send sigterm and wait forever Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- runtime/container.go | 1 - runtime/runtime.go | 11 ++++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/runtime/container.go b/runtime/container.go index ed68fd0844..bd4a6f2bea 100644 --- a/runtime/container.go +++ b/runtime/container.go @@ -915,7 +915,6 @@ func (container *Container) Stop(seconds int) error { // 1. Send a SIGTERM if err := container.KillSig(15); err != nil { - utils.Debugf("Error sending kill SIGTERM: %s", err) log.Print("Failed to send SIGTERM to the process, force killing") if err := container.KillSig(9); err != nil { return err diff --git a/runtime/runtime.go b/runtime/runtime.go index d5c1a96ada..9e8323279e 100644 --- a/runtime/runtime.go +++ b/runtime/runtime.go @@ -797,13 +797,18 @@ func (runtime *Runtime) shutdown() error { group := sync.WaitGroup{} utils.Debugf("starting clean shutdown of all containers...") for _, container := range runtime.List() { - if container.State.IsRunning() { - utils.Debugf("stopping %s", container.ID) + c := container + if c.State.IsRunning() { + utils.Debugf("stopping %s", c.ID) group.Add(1) go func() { defer group.Done() - container.Stop(10) + if err := c.KillSig(15); err != nil { + utils.Debugf("kill 15 error for %s - %s", c.ID, err) + } + c.Wait() + utils.Debugf("container stopped %s", c.ID) }() } } From ac9b06ae95f1da8407934036ab1e4019a96a6b21 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 1 Apr 2014 08:18:52 +0000 Subject: [PATCH 7/7] Update sig message Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- server/server.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/server.go b/server/server.go index 278cab2b2a..65dbcca47b 100644 --- a/server/server.go +++ b/server/server.go @@ -54,7 +54,7 @@ func InitServer(job *engine.Job) engine.Status { gosignal.Notify(c, os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT) go func() { sig := <-c - log.Printf("Received signal '%v', exiting\n", sig) + log.Printf("Received signal '%v', starting shutdown of docker...\n", sig) utils.RemovePidFile(srv.runtime.Config().Pidfile) srv.Close() os.Exit(0)