From 3dcaf20d6bf912cbc3eb689302d41047f9cd0f82 Mon Sep 17 00:00:00 2001 From: "Guillaume J. Charmes" Date: Sun, 31 Mar 2013 17:40:39 -0700 Subject: [PATCH 1/8] Check if the containers are really running when starting docker --- runtime.go | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/runtime.go b/runtime.go index 9122f0c664..4f3c0a6237 100644 --- a/runtime.go +++ b/runtime.go @@ -7,8 +7,10 @@ import ( "io" "io/ioutil" "os" + "os/exec" "path" "sort" + "strings" "sync" "time" ) @@ -132,6 +134,23 @@ func (runtime *Runtime) Register(container *Container) error { if err := validateId(container.Id); err != nil { return err } + + // FIXME: if the container is supposed to be running but is not, auto restart it? + // If the container is supposed to be running, make sure of if + if container.State.Running { + if output, err := exec.Command("lxc-info", "-n", container.Id).CombinedOutput(); err != nil { + return err + } else { + if !strings.Contains(string(output), "RUNNING") { + Debugf("Container %s was supposed to be running be is not.", container.Id) + container.State.Running = false + if err := container.ToDisk(); err != nil { + return err + } + } + } + } + container.runtime = runtime // Setup state lock (formerly in newState() lock := new(sync.Mutex) @@ -259,7 +278,6 @@ func NewRuntimeFromDirectory(root string) (*Runtime, error) { // If the auth file does not exist, keep going return nil, err } - runtime := &Runtime{ root: root, repository: runtimeRepo, From 8c36e6920a9e17171dc67a79ab20d39886404e73 Mon Sep 17 00:00:00 2001 From: "Guillaume J. Charmes" Date: Sun, 31 Mar 2013 20:14:54 -0700 Subject: [PATCH 2/8] Working in progress: add unit tests for the running state check --- runtime_test.go | 41 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/runtime_test.go b/runtime_test.go index b8a6c429c9..604f4aac38 100644 --- a/runtime_test.go +++ b/runtime_test.go @@ -279,8 +279,32 @@ func TestRestore(t *testing.T) { t.Fatal(err) } defer runtime1.Destroy(container1) - if len(runtime1.List()) != 1 { - t.Errorf("Expected 1 container, %v found", len(runtime1.List())) + + // Create a second container meant to be killed + container1_1, err := runtime1.Create(&Config{ + Image: GetTestImage(runtime1).Id, + Cmd: []string{"/bin/cat"}, + OpenStdin: true, + }, + ) + if err != nil { + t.Fatal(err) + } + defer runtime1.Destroy(container1_1) + + // Start the container non blocking + if err := container1_1.Start(); err != nil { + t.Fatal(err) + } + + // Simulate a crash/manual quit of dockerd: process dies, states stays 'Running' + if err := exec.Command("lxc-kill", "-n", container1_1.Id, "9").Run(); err == nil { + t.Fatalf("container supposed to be killed (return error 255). Success received.") + } + container1_1.State.Running = true + + if len(runtime1.List()) != 2 { + t.Errorf("Expected 2 container, %v found", len(runtime1.List())) } if err := container1.Run(); err != nil { t.Fatal(err) @@ -293,8 +317,17 @@ func TestRestore(t *testing.T) { t.Fatal(err) } defer nuke(runtime2) - if len(runtime2.List()) != 1 { - t.Errorf("Expected 1 container, %v found", len(runtime2.List())) + if len(runtime2.List()) != 2 { + t.Errorf("Expected 2 container, %v found", len(runtime2.List())) + } + runningCount := 0 + for _, c := range runtime2.List() { + if c.State.Running { + runningCount++ + } + } + if runningCount != 0 { + t.Fatalf("Expected 0 container alive, %d found", runningCount) } container2 := runtime2.Get(container1.Id) if container2 == nil { From c780ff5ae726bc8acd30b9ef476cd111c477ec35 Mon Sep 17 00:00:00 2001 From: shin- Date: Tue, 2 Apr 2013 07:01:43 -0700 Subject: [PATCH 3/8] More thorough test case, use container.Stop() instead of lxc-kill, use setStopped() during the restore step --- runtime.go | 4 ++-- runtime_test.go | 16 +++++++++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/runtime.go b/runtime.go index 4f3c0a6237..9f571cf45a 100644 --- a/runtime.go +++ b/runtime.go @@ -136,14 +136,14 @@ func (runtime *Runtime) Register(container *Container) error { } // FIXME: if the container is supposed to be running but is not, auto restart it? - // If the container is supposed to be running, make sure of if + // If the container is supposed to be running, make sure of it if container.State.Running { if output, err := exec.Command("lxc-info", "-n", container.Id).CombinedOutput(); err != nil { return err } else { if !strings.Contains(string(output), "RUNNING") { Debugf("Container %s was supposed to be running be is not.", container.Id) - container.State.Running = false + container.State.setStopped(-127) if err := container.ToDisk(); err != nil { return err } diff --git a/runtime_test.go b/runtime_test.go index 604f4aac38..4f6ddd1d41 100644 --- a/runtime_test.go +++ b/runtime_test.go @@ -297,10 +297,15 @@ func TestRestore(t *testing.T) { t.Fatal(err) } - // Simulate a crash/manual quit of dockerd: process dies, states stays 'Running' - if err := exec.Command("lxc-kill", "-n", container1_1.Id, "9").Run(); err == nil { - t.Fatalf("container supposed to be killed (return error 255). Success received.") + if !container1_1.State.Running { + t.Fatalf("Container %v should appear as running but isn't", container1_1.Id) } + + // Simulate a crash/manual quit of dockerd: process dies, states stays 'Running' + if err := container1_1.Stop(); err != nil { + t.Fatalf("Could not stop container: %v", err) + } + container1_1.State.Running = true if len(runtime1.List()) != 2 { @@ -310,6 +315,10 @@ func TestRestore(t *testing.T) { t.Fatal(err) } + if !container1_1.State.Running { + t.Fatalf("Container %v should appear as running but isn't", container1_1.Id) + } + // Here are are simulating a docker restart - that is, reloading all containers // from scratch runtime2, err := NewRuntimeFromDirectory(root) @@ -323,6 +332,7 @@ func TestRestore(t *testing.T) { runningCount := 0 for _, c := range runtime2.List() { if c.State.Running { + t.Errorf("Running container found: %v (%v)", c.Id, c.Path) runningCount++ } } From 02c211a0dca91bb3744b6c1301e372799189881f Mon Sep 17 00:00:00 2001 From: shin- Date: Tue, 2 Apr 2013 07:13:42 -0700 Subject: [PATCH 4/8] variable names --- runtime_test.go | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/runtime_test.go b/runtime_test.go index 4f6ddd1d41..28dcf6c279 100644 --- a/runtime_test.go +++ b/runtime_test.go @@ -281,7 +281,7 @@ func TestRestore(t *testing.T) { defer runtime1.Destroy(container1) // Create a second container meant to be killed - container1_1, err := runtime1.Create(&Config{ + container2, err := runtime1.Create(&Config{ Image: GetTestImage(runtime1).Id, Cmd: []string{"/bin/cat"}, OpenStdin: true, @@ -290,23 +290,23 @@ func TestRestore(t *testing.T) { if err != nil { t.Fatal(err) } - defer runtime1.Destroy(container1_1) + defer runtime1.Destroy(container2) // Start the container non blocking - if err := container1_1.Start(); err != nil { + if err := container2.Start(); err != nil { t.Fatal(err) } - if !container1_1.State.Running { - t.Fatalf("Container %v should appear as running but isn't", container1_1.Id) + if !container2.State.Running { + t.Fatalf("Container %v should appear as running but isn't", container2.Id) } // Simulate a crash/manual quit of dockerd: process dies, states stays 'Running' - if err := container1_1.Stop(); err != nil { + if err := container2.Stop(); err != nil { t.Fatalf("Could not stop container: %v", err) } - container1_1.State.Running = true + container2.State.Running = true if len(runtime1.List()) != 2 { t.Errorf("Expected 2 container, %v found", len(runtime1.List())) @@ -315,8 +315,8 @@ func TestRestore(t *testing.T) { t.Fatal(err) } - if !container1_1.State.Running { - t.Fatalf("Container %v should appear as running but isn't", container1_1.Id) + if !container2.State.Running { + t.Fatalf("Container %v should appear as running but isn't", container2.Id) } // Here are are simulating a docker restart - that is, reloading all containers @@ -339,11 +339,11 @@ func TestRestore(t *testing.T) { if runningCount != 0 { t.Fatalf("Expected 0 container alive, %d found", runningCount) } - container2 := runtime2.Get(container1.Id) - if container2 == nil { + container3 := runtime2.Get(container1.Id) + if container3 == nil { t.Fatal("Unable to Get container") } - if err := container2.Run(); err != nil { + if err := container3.Run(); err != nil { t.Fatal(err) } } From 791ca6fde47d26a96134b1715580303ea6cd17ed Mon Sep 17 00:00:00 2001 From: shin- Date: Tue, 2 Apr 2013 10:06:49 -0700 Subject: [PATCH 5/8] Better crash simulation in TestRestore ; force state lock creation when loading a container from disk --- container.go | 1 + runtime_test.go | 10 ++++++---- state.go | 5 +++++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/container.go b/container.go index 8f993a35e1..b89885a17e 100644 --- a/container.go +++ b/container.go @@ -127,6 +127,7 @@ func (container *Container) FromDisk() error { if err := json.Unmarshal(data, container); err != nil { return err } + container.State.resetLock() return nil } diff --git a/runtime_test.go b/runtime_test.go index 28dcf6c279..5eeb0a155a 100644 --- a/runtime_test.go +++ b/runtime_test.go @@ -7,6 +7,7 @@ import ( "os/exec" "os/user" "testing" + "time" ) // FIXME: this is no longer needed @@ -302,11 +303,12 @@ func TestRestore(t *testing.T) { } // Simulate a crash/manual quit of dockerd: process dies, states stays 'Running' - if err := container2.Stop(); err != nil { - t.Fatalf("Could not stop container: %v", err) - } - + cStdin, _ := container2.StdinPipe() + cStdin.Close() + container2.State.setStopped(-1) + time.Sleep(time.Second) container2.State.Running = true + container2.ToDisk() if len(runtime1.List()) != 2 { t.Errorf("Expected 2 container, %v found", len(runtime1.List())) diff --git a/state.go b/state.go index f438ff8727..c7e89f2e65 100644 --- a/state.go +++ b/state.go @@ -39,6 +39,11 @@ func (s *State) setStopped(exitCode int) { s.broadcast() } +func (s *State) resetLock() { + s.stateChangeLock = &sync.Mutex{} + s.stateChangeCond = sync.NewCond(s.stateChangeLock) +} + func (s *State) broadcast() { s.stateChangeLock.Lock() s.stateChangeCond.Broadcast() From 7b74b9cab50ecd12ca2af51116279fce97edadd8 Mon Sep 17 00:00:00 2001 From: shin- Date: Wed, 3 Apr 2013 05:37:45 -0700 Subject: [PATCH 6/8] Integrated @creack's feedback on TestRestore --- runtime_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/runtime_test.go b/runtime_test.go index 5eeb0a155a..d9d4e91cb5 100644 --- a/runtime_test.go +++ b/runtime_test.go @@ -305,8 +305,7 @@ func TestRestore(t *testing.T) { // Simulate a crash/manual quit of dockerd: process dies, states stays 'Running' cStdin, _ := container2.StdinPipe() cStdin.Close() - container2.State.setStopped(-1) - time.Sleep(time.Second) + container2.WaitTimeout(time.Second) container2.State.Running = true container2.ToDisk() From d1767bbf672355b9063099d641106dd4ea5303b3 Mon Sep 17 00:00:00 2001 From: shin- Date: Wed, 3 Apr 2013 05:39:39 -0700 Subject: [PATCH 7/8] Moved resetLock() to the Load() method ; changed resetLock() to initLock() and changed behavior to not modify the lock if it was already set (not nil) --- container.go | 1 - runtime.go | 1 + state.go | 8 +++++--- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/container.go b/container.go index b89885a17e..8f993a35e1 100644 --- a/container.go +++ b/container.go @@ -127,7 +127,6 @@ func (container *Container) FromDisk() error { if err := json.Unmarshal(data, container); err != nil { return err } - container.State.resetLock() return nil } diff --git a/runtime.go b/runtime.go index 9f571cf45a..e096202c61 100644 --- a/runtime.go +++ b/runtime.go @@ -117,6 +117,7 @@ func (runtime *Runtime) Load(id string) (*Container, error) { if err := container.FromDisk(); err != nil { return nil, err } + container.State.initLock() if container.Id != id { return container, fmt.Errorf("Container %s is stored at %s", container.Id, id) } diff --git a/state.go b/state.go index c7e89f2e65..bf325a3eac 100644 --- a/state.go +++ b/state.go @@ -39,9 +39,11 @@ func (s *State) setStopped(exitCode int) { s.broadcast() } -func (s *State) resetLock() { - s.stateChangeLock = &sync.Mutex{} - s.stateChangeCond = sync.NewCond(s.stateChangeLock) +func (s *State) initLock() { + if s.stateChangeLock == nil { + s.stateChangeLock = &sync.Mutex{} + s.stateChangeCond = sync.NewCond(s.stateChangeLock) + } } func (s *State) broadcast() { From ad0183e41978b3b156234f801750034912915223 Mon Sep 17 00:00:00 2001 From: shin- Date: Wed, 3 Apr 2013 10:48:02 -0700 Subject: [PATCH 8/8] Check WaitTimeout return in test, replaced lock initialization in runtime.Register() with call to initLock() --- runtime.go | 5 +---- runtime_test.go | 4 +++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/runtime.go b/runtime.go index e096202c61..950ea404a1 100644 --- a/runtime.go +++ b/runtime.go @@ -11,7 +11,6 @@ import ( "path" "sort" "strings" - "sync" "time" ) @@ -154,9 +153,7 @@ func (runtime *Runtime) Register(container *Container) error { container.runtime = runtime // Setup state lock (formerly in newState() - lock := new(sync.Mutex) - container.State.stateChangeLock = lock - container.State.stateChangeCond = sync.NewCond(lock) + container.State.initLock() // Attach to stdout and stderr container.stderr = newWriteBroadcaster() container.stdout = newWriteBroadcaster() diff --git a/runtime_test.go b/runtime_test.go index d9d4e91cb5..9ae5a5835f 100644 --- a/runtime_test.go +++ b/runtime_test.go @@ -305,7 +305,9 @@ func TestRestore(t *testing.T) { // Simulate a crash/manual quit of dockerd: process dies, states stays 'Running' cStdin, _ := container2.StdinPipe() cStdin.Close() - container2.WaitTimeout(time.Second) + if err := container2.WaitTimeout(time.Second); err != nil { + t.Fatal(err) + } container2.State.Running = true container2.ToDisk()