From cafbc6df28ca941ea131e10b2c3e88203e7f038c Mon Sep 17 00:00:00 2001 From: Andrea Luzzardi Date: Mon, 26 Jan 2015 13:39:52 -0800 Subject: [PATCH 1/2] Improve container state refresh. Currently, container inspection is performed only on creation (or during exec by the API). The problem is that certain informations such as NetworkSettings are not available during creation, therefore we must inspect the containers during other events. This change refactors a bit the API so that RefreshContainer() and RefreshContainers() now accept a `full` flag to force a deep refresh. The node event handler in turn uses that flag whenever a container starts or dies. Signed-off-by: Andrea Luzzardi --- api/api.go | 2 +- cluster/node.go | 105 +++++++++++++++++++++++++----------------------- 2 files changed, 55 insertions(+), 52 deletions(-) diff --git a/api/api.go b/api/api.go index 338935ef08..fd9a0f7dea 100644 --- a/api/api.go +++ b/api/api.go @@ -279,7 +279,7 @@ func proxyContainerAndForceRefresh(c *context, w http.ResponseWriter, r *http.Re } log.Debugf("[REFRESH CONTAINER] --> %s", container.Id) - container.Node.ForceRefreshContainer(container.Container) + container.Node.RefreshContainer(container.Id, true) } // Proxy a request to the right node diff --git a/cluster/node.go b/cluster/node.go index e0970e8af3..243c51ea86 100644 --- a/cluster/node.go +++ b/cluster/node.go @@ -85,10 +85,11 @@ func (n *Node) connectClient(client dockerclient.Client) error { } // Force a state update before returning. - if err := n.refreshContainers(); err != nil { + if err := n.RefreshContainers(true); err != nil { n.client = nil return err } + if err := n.refreshImages(); err != nil { n.client = nil return err @@ -153,32 +154,33 @@ func (n *Node) refreshImages() error { return nil } -// Refresh the list and status of containers running on the node. -func (n *Node) refreshContainers() error { +// Refresh the list and status of containers running on the node. If `full` is +// true, each container will be inspected. +func (n *Node) RefreshContainers(full bool) error { containers, err := n.client.ListContainers(true, false, "") if err != nil { return err } - n.Lock() - defer n.Unlock() - merged := make(map[string]*Container) for _, c := range containers { - merged, err = n.updateContainer(c, merged) + merged, err = n.updateContainer(c, merged, full) if err != nil { log.Errorf("[%s/%s] Unable to update state of %s", n.ID, n.Name, c.Id) } } + n.Lock() + defer n.Unlock() n.containers = merged log.Debugf("[%s/%s] Updated state", n.ID, n.Name) return nil } -// Refresh the status of a container running on the node. -func (n *Node) refreshContainer(ID string) error { +// Refresh the status of a container running on the node. If `full` is true, +// the container will be inspected. +func (n *Node) RefreshContainer(ID string, full bool) error { containers, err := n.client.ListContainers(true, false, fmt.Sprintf("{%q:[%q]}", "id", ID)) if err != nil { return err @@ -186,66 +188,59 @@ func (n *Node) refreshContainer(ID string) error { if len(containers) > 1 { // We expect one container, if we get more than one, trigger a full refresh. - return n.refreshContainers() + return n.RefreshContainers(full) } - n.Lock() - defer n.Unlock() - if len(containers) == 0 { // The container doesn't exist on the node, remove it. + n.Lock() delete(n.containers, ID) + n.Unlock() + return nil } - _, err = n.updateContainer(containers[0], n.containers) + _, err = n.updateContainer(containers[0], n.containers, full) return err } -func (n *Node) ForceRefreshContainer(c dockerclient.Container) error { - return n.inspectContainer(c, n.containers, true) -} +func (n *Node) updateContainer(c dockerclient.Container, containers map[string]*Container, full bool) (map[string]*Container, error) { + var container *Container -func (n *Node) inspectContainer(c dockerclient.Container, containers map[string]*Container, lock bool) error { - - container := &Container{ - Container: c, - Node: n, + if current, exists := n.containers[c.Id]; exists { + // The container is already known. + container = current + } else { + // This is a brand new container. We need to do a full refresh. + container = &Container{ + Node: n, + } + full = true } - info, err := n.client.InspectContainer(c.Id) - if err != nil { - return err + // Update its internal state. + container.Container = c + + // Update ContainerInfo. + if full { + info, err := n.client.InspectContainer(c.Id) + if err != nil { + return nil, err + } + container.Info = *info } - container.Info = *info // real CpuShares -> nb of CPUs container.Info.Config.CpuShares = container.Info.Config.CpuShares / 100.0 * n.Cpus - if lock { - n.Lock() - defer n.Unlock() - } + n.Lock() + defer n.Unlock() containers[container.Id] = container - return nil -} - -func (n *Node) updateContainer(c dockerclient.Container, containers map[string]*Container) (map[string]*Container, error) { - if current, exists := n.containers[c.Id]; exists { - // The container exists. Update its state. - current.Container = c - containers[current.Id] = current - } else { - // This is a brand new container. - if err := n.inspectContainer(c, containers, false); err != nil { - return nil, err - } - } return containers, nil } -func (n *Node) refreshContainersAsync() { +func (n *Node) RefreshContainersAsync() { n.ch <- true } @@ -254,9 +249,9 @@ func (n *Node) refreshLoop() { var err error select { case <-n.ch: - err = n.refreshContainers() + err = n.RefreshContainers(false) case <-time.After(stateRefreshPeriod): - err = n.refreshContainers() + err = n.RefreshContainers(false) } if err == nil { @@ -359,7 +354,7 @@ func (n *Node) Create(config *dockerclient.ContainerConfig, name string, pullIma // Register the container immediately while waiting for a state refresh. // Force a state refresh to pick up the newly created container. - n.refreshContainer(id) + n.RefreshContainer(id, true) n.RLock() defer n.RUnlock() @@ -440,10 +435,18 @@ func (n *Node) String() string { func (n *Node) handler(ev *dockerclient.Event, _ chan error, args ...interface{}) { // Something changed - refresh our internal state. - if ev.Status == "pull" || ev.Status == "untag" || ev.Status == "delete" { + switch ev.Status { + case "pull", "untag", "delete": + // These events refer to images so there's no need to update + // containers. n.refreshImages() - } else { - n.refreshContainer(ev.Id) + case "start", "die": + // If the container is started or stopped, we have to do an inspect in + // order to get the new NetworkSettings. + n.RefreshContainer(ev.Id, true) + default: + // Otherwise, do a "soft" refresh of the container. + n.RefreshContainer(ev.Id, false) } // If there is no event handler registered, abort right now. From dc43a156bd53db24b5020c9b7e705d8aba4310a6 Mon Sep 17 00:00:00 2001 From: Andrea Luzzardi Date: Mon, 26 Jan 2015 14:04:41 -0800 Subject: [PATCH 2/2] Fix concurrency issue in node.updateContainer. Signed-off-by: Andrea Luzzardi --- cluster/node.go | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/cluster/node.go b/cluster/node.go index 243c51ea86..89504fc899 100644 --- a/cluster/node.go +++ b/cluster/node.go @@ -207,6 +207,8 @@ func (n *Node) RefreshContainer(ID string, full bool) error { func (n *Node) updateContainer(c dockerclient.Container, containers map[string]*Container, full bool) (map[string]*Container, error) { var container *Container + n.Lock() + if current, exists := n.containers[c.Id]; exists { // The container is already known. container = current @@ -220,6 +222,10 @@ func (n *Node) updateContainer(c dockerclient.Container, containers map[string]* // Update its internal state. container.Container = c + containers[container.Id] = container + + // Release the lock here as the next step is slow. + n.Unlock() // Update ContainerInfo. if full { @@ -228,15 +234,10 @@ func (n *Node) updateContainer(c dockerclient.Container, containers map[string]* return nil, err } container.Info = *info + // real CpuShares -> nb of CPUs + container.Info.Config.CpuShares = container.Info.Config.CpuShares / 100.0 * n.Cpus } - // real CpuShares -> nb of CPUs - container.Info.Config.CpuShares = container.Info.Config.CpuShares / 100.0 * n.Cpus - - n.Lock() - defer n.Unlock() - containers[container.Id] = container - return containers, nil }