Make container creation and destruction synchronous.

This is in order to fix race conditions. Currently, container creation
can happen in parallel which means that the scheduler operates on stale
data.

Fixes #427

Signed-off-by: Andrea Luzzardi <aluzzardi@gmail.com>
This commit is contained in:
Andrea Luzzardi 2015-03-31 14:11:51 -07:00
parent e1e7259b8a
commit ca05173e41
2 changed files with 10 additions and 25 deletions

View File

@ -66,35 +66,16 @@ func (c *Cluster) Handle(e *cluster.Event) error {
// CreateContainer aka schedule a brand new container into the cluster.
func (c *Cluster) CreateContainer(config *dockerclient.ContainerConfig, name string) (*cluster.Container, error) {
c.scheduler.Lock()
defer c.scheduler.Unlock()
c.RLock()
defer c.RUnlock()
retry:
// FIXME: to prevent a race, we check again after the pull if the node can still handle
// the container. We should store the state in the store before pulling and use this to check
// all the other container create, but, as we don't have a proper store yet, this temporary solution
// was chosen.
n, err := c.scheduler.SelectNodeForContainer(c.listNodes(), config)
if err != nil {
return nil, err
}
if nn, ok := n.(*node); ok {
container, err := nn.create(config, name, false)
if err == dockerclient.ErrNotFound {
// image not on the node, try to pull
if err = nn.pull(config.Image); err != nil {
return nil, err
}
// check if the container can still fit on this node
if _, err = c.scheduler.SelectNodeForContainer([]cluster.Node{n}, config); err != nil {
// if not, try to find another node
log.Debugf("Node %s not available anymore, selecting another one", n.Name())
goto retry
}
container, err = nn.create(config, name, false)
}
container, err := nn.create(config, name, true)
if err != nil {
return nil, err
}
@ -113,8 +94,8 @@ retry:
// RemoveContainer aka Remove a container from the cluster. Containers should
// always be destroyed through the scheduler to guarantee atomicity.
func (c *Cluster) RemoveContainer(container *cluster.Container, force bool) error {
c.Lock()
defer c.Unlock()
c.scheduler.Lock()
defer c.scheduler.Unlock()
if n, ok := container.Node.(*node); ok {
if err := n.destroy(container, force); err != nil {
@ -272,7 +253,7 @@ func (c *Cluster) Container(IDOrName string) *cluster.Container {
return nil
}
// nodes returns all the nodess in the cluster.
// nodes returns all the nodes in the cluster.
func (c *Cluster) listNodes() []cluster.Node {
c.RLock()
defer c.RUnlock()

View File

@ -1,6 +1,8 @@
package scheduler
import (
"sync"
"github.com/docker/swarm/cluster"
"github.com/docker/swarm/scheduler/filter"
"github.com/docker/swarm/scheduler/strategy"
@ -9,6 +11,8 @@ import (
// Scheduler is exported
type Scheduler struct {
sync.Mutex
strategy strategy.PlacementStrategy
filters []filter.Filter
}