diff --git a/api/handlers.go b/api/handlers.go index 218480d294..19cc4bc562 100644 --- a/api/handlers.go +++ b/api/handlers.go @@ -438,8 +438,14 @@ func postContainersCreate(c *context, w http.ResponseWriter, r *http.Request) { authConfig = &dockerclient.AuthConfig{} json.Unmarshal(buf, authConfig) } + containerConfig := cluster.BuildContainerConfig(config) - container, err := c.cluster.CreateContainer(cluster.BuildContainerConfig(config), name, authConfig) + if err := containerConfig.Validate(); err != nil { + httpError(w, err.Error(), http.StatusInternalServerError) + return + } + + container, err := c.cluster.CreateContainer(containerConfig, name, authConfig) if err != nil { if strings.HasPrefix(err.Error(), "Conflict") { httpError(w, err.Error(), http.StatusConflict) diff --git a/cluster/config.go b/cluster/config.go index 951e647f8e..d74c408153 100644 --- a/cluster/config.go +++ b/cluster/config.go @@ -2,6 +2,8 @@ package cluster import ( "encoding/json" + "errors" + "fmt" "strings" "github.com/samalba/dockerclient" @@ -63,9 +65,10 @@ func consolidateResourceFields(c *dockerclient.ContainerConfig) { // BuildContainerConfig creates a cluster.ContainerConfig from a dockerclient.ContainerConfig func BuildContainerConfig(c dockerclient.ContainerConfig) *ContainerConfig { var ( - affinities []string - constraints []string - env []string + affinities []string + constraints []string + reschedulePolicies []string + env []string ) // only for tests @@ -83,12 +86,19 @@ func BuildContainerConfig(c dockerclient.ContainerConfig) *ContainerConfig { json.Unmarshal([]byte(labels), &constraints) } - // parse affinities/constraints from env (ex. docker run -e affinity:container==redis -e affinity:image==nginx -e constraint:region==us-east -e constraint:storage==ssd) + // parse reschedule policy from labels (ex. docker run --label 'com.docker.swarm.reschedule-policies=on-node-failure') + if labels, ok := c.Labels[SwarmLabelNamespace+".reschedule-policies"]; ok { + json.Unmarshal([]byte(labels), &reschedulePolicies) + } + + // parse affinities/constraints/reschedule policies from env (ex. docker run -e affinity:container==redis -e affinity:image==nginx -e constraint:region==us-east -e constraint:storage==ssd -e reschedule:off) for _, e := range c.Env { if ok, key, value := parseEnv(e); ok && key == "affinity" { affinities = append(affinities, value) } else if ok && key == "constraint" { constraints = append(constraints, value) + } else if ok && key == "reschedule" { + reschedulePolicies = append(reschedulePolicies, value) } else { env = append(env, e) } @@ -111,6 +121,13 @@ func BuildContainerConfig(c dockerclient.ContainerConfig) *ContainerConfig { } } + // store reschedule policies in labels + if len(reschedulePolicies) > 0 { + if labels, err := json.Marshal(reschedulePolicies); err == nil { + c.Labels[SwarmLabelNamespace+".reschedule-policies"] = string(labels) + } + } + consolidateResourceFields(&c) return &ContainerConfig{c} @@ -187,11 +204,32 @@ func (c *ContainerConfig) HaveNodeConstraint() bool { return false } -// Affinities returns all the affinities from the ContainerConfig -func (c *ContainerConfig) ReschedulePolicy() string { - policy, ok := c.Labels[SwarmLabelNamespace+".reschedule-policy"] - if !ok { - return "no" +// HasReschedulePolicy returns true if the specified policy is part of the config +func (c *ContainerConfig) HasReschedulePolicy(p string) bool { + for _, reschedulePolicy := range c.extractExprs("reschedule-policies") { + if reschedulePolicy == p { + return true + } } - return policy + return false +} + +// Validate returns an error if the config isn't valid +func (c *ContainerConfig) Validate() error { + //TODO: add validation for affinities and constraints + reschedulePolicies := c.extractExprs("reschedule-policies") + if len(reschedulePolicies) > 1 { + return errors.New("too many reschedule policies") + } else if len(reschedulePolicies) == 1 { + valid := false + for _, validReschedulePolicy := range []string{"off", "on-node-failure"} { + if reschedulePolicies[0] == validReschedulePolicy { + valid = true + } + } + if !valid { + return fmt.Errorf("invalid reschedule policy: %s", reschedulePolicies[0]) + } + } + return nil } diff --git a/cluster/mesos/cluster.go b/cluster/mesos/cluster.go index 3bac670bf5..91f10c3bc0 100644 --- a/cluster/mesos/cluster.go +++ b/cluster/mesos/cluster.go @@ -159,7 +159,7 @@ func (c *Cluster) Handle(e *cluster.Event) error { c.RLock() defer c.RUnlock() - for h, _ := range c.eventHandlers { + for h := range c.eventHandlers { if err := h.Handle(e); err != nil { log.Error(err) } diff --git a/cluster/swarm/cluster.go b/cluster/swarm/cluster.go index ccc9469763..0e77c3cd6c 100644 --- a/cluster/swarm/cluster.go +++ b/cluster/swarm/cluster.go @@ -94,7 +94,7 @@ func (c *Cluster) Handle(e *cluster.Event) error { c.RLock() defer c.RUnlock() - for h, _ := range c.eventHandlers { + for h := range c.eventHandlers { if err := h.Handle(e); err != nil { log.Error(err) } @@ -159,9 +159,12 @@ func (c *Cluster) createContainer(config *cluster.ContainerConfig, name string, return nil, fmt.Errorf("Conflict: The name %s is already assigned. You have to delete (or rename) that container to be able to assign %s to a container again.", name, name) } - // Associate a Swarm ID to the container we are creating. - swarmID := c.generateUniqueID() - config.SetSwarmID(swarmID) + swarmID := config.SwarmID() + if swarmID == "" { + // Associate a Swarm ID to the container we are creating. + swarmID = c.generateUniqueID() + config.SetSwarmID(swarmID) + } if withImageAffinity { config.AddAffinity("image==" + config.Image) diff --git a/cluster/watchdog.go b/cluster/watchdog.go index a623114171..7c2e067d28 100644 --- a/cluster/watchdog.go +++ b/cluster/watchdog.go @@ -6,8 +6,9 @@ import ( log "github.com/Sirupsen/logrus" ) +// Watchdog listen to cluster events ans handle container rescheduling type Watchdog struct { - l sync.Mutex + sync.Mutex cluster Cluster } @@ -19,22 +20,46 @@ func (w *Watchdog) Handle(e *Event) error { } switch e.Status { + case "engine_reconnect": + go w.removeDuplicateContainers(e.Engine) case "engine_disconnect": go w.rescheduleContainers(e.Engine) } - return nil } -func (w *Watchdog) rescheduleContainers(e *Engine) { - w.l.Lock() - defer w.l.Unlock() +// Remove Duplicate containers when a node comes back +func (w *Watchdog) removeDuplicateContainers(e *Engine) { + log.Debugf("removing duplicate containers from Node %s", e.ID) - log.Infof("Node %s failed - rescheduling containers", e.ID) + e.RefreshContainers(false) + + w.Lock() + defer w.Unlock() + + for _, container := range e.Containers() { + + for _, containerInCluster := range w.cluster.Containers() { + if containerInCluster.Config.SwarmID() == container.Config.SwarmID() && containerInCluster.Engine.ID != container.Engine.ID { + log.Debugf("container %s was rescheduled on node %s, removing it\n", container.Id, containerInCluster.Engine.ID) + // container already exists in the cluster, destroy it + e.RemoveContainer(container, true, true) + } + } + } +} + +// Reschedule containers as soon as a node fail +func (w *Watchdog) rescheduleContainers(e *Engine) { + w.Lock() + defer w.Unlock() + + log.Debugf("Node %s failed - rescheduling containers", e.ID) for _, c := range e.Containers() { - // Skip containers which don't have an "always" reschedule policy. - if c.Config.ReschedulePolicy() != "always" { - log.Debugf("Skipping rescheduling of %s based on rescheduling policy", c.Id) + + // Skip containers which don't have an "on-node-failure" reschedule policy. + if !c.Config.HasReschedulePolicy("on-node-failure") { + log.Debugf("Skipping rescheduling of %s based on rescheduling policies", c.Id) continue } @@ -44,23 +69,25 @@ func (w *Watchdog) rescheduleContainers(e *Engine) { // will abort because the name is already taken. c.Engine.removeContainer(c) - newContainer, err := w.cluster.CreateContainer(c.Config, c.Info.Name) + newContainer, err := w.cluster.CreateContainer(c.Config, c.Info.Name, nil) if err != nil { log.Errorf("Failed to reschedule container %s (Swarm ID: %s): %v", c.Id, c.Config.SwarmID(), err) - continue - } - - log.Infof("Rescheduled container %s from %s to %s as %s (Swarm ID: %s)", c.Id, c.Engine.ID, newContainer.Engine.ID, newContainer.Id, c.Config.SwarmID()) - - if c.Info.State.Running { - if err := newContainer.Start(); err != nil { - log.Errorf("Failed to start rescheduled container %s", newContainer.Id) + // add the container back, so we can retry later + c.Engine.AddContainer(c) + } else { + log.Infof("Rescheduled container %s from %s to %s as %s (Swarm ID: %s)", c.Id, c.Engine.ID, newContainer.Engine.ID, newContainer.Id, c.Config.SwarmID()) + if c.Info.State.Running { + if err := newContainer.Start(); err != nil { + log.Errorf("Failed to start rescheduled container %s", newContainer.Id) + } } } } + } +// NewWatchdog creates a new watchdog func NewWatchdog(cluster Cluster) *Watchdog { log.Debugf("Watchdog enabled") w := &Watchdog{ diff --git a/docs/scheduler/index.md b/docs/scheduler/index.md index 3a43ce7c28..30875f0cc5 100644 --- a/docs/scheduler/index.md +++ b/docs/scheduler/index.md @@ -10,7 +10,7 @@ parent="smn_workw_swarm" ## Advanced Scheduling -To learn more about advanced scheduling, see the -[strategies](strategy.md) and [filters](filter.md) +To learn more about advanced scheduling, see the [rescheduling] +(rescheduling.md), [strategies](strategy.md) and [filters](filter.md) documents. diff --git a/docs/scheduler/rescheduling.md b/docs/scheduler/rescheduling.md new file mode 100644 index 0000000000..e4fc34957b --- /dev/null +++ b/docs/scheduler/rescheduling.md @@ -0,0 +1,46 @@ + + +# Rescheduling + +The Docker Swarm scheduler is able to detect node failure and +restart its containers on another node. + +## Rescheduling policies + +The rescheduling policies are: + +* `on-node-failure` +* `off` (default if not specified) + + +When you start a container, use the env var `reschedule` or the +label `com.docker.swarm.reschedule-policy` to specify the policy to +apply to the container. + +``` +# do not reschedule (default) +$ docker run -d -e reschedule:off redis +# or +$ docker run -d -l 'com.docker.swarm.reschedule-policy=["off"]' redis +``` + +``` +# reschedule on node failure +$ docker run -d -e reschedule:on-node-failure redis +# or +$ docker run -d -l 'com.docker.swarm.reschedule-policy=["on-node-failure"]' redis +``` + +- [Docker Swarm overview](../index.md) +- [Discovery options](../discovery.md) +- [Scheduler filters](filter.md) +- [Swarm API](../api/swarm-api.md) diff --git a/test/integration/rescheduling.bats b/test/integration/rescheduling.bats index e4c2c1e8f4..f96e70dac2 100644 --- a/test/integration/rescheduling.bats +++ b/test/integration/rescheduling.bats @@ -9,19 +9,78 @@ function teardown() { @test "rescheduling" { start_docker_with_busybox 2 - swarm_manage - - # Expect 2 nodes - docker_swarm info | grep -q "Nodes: 2" + swarm_manage --engine-refresh-min-interval=1s --engine-refresh-max-interval=1s --engine-failure-retry=1 ${HOSTS[0]},${HOSTS[1]} # c1 on node-0 with reschedule=on-node-failure - run docker_swarm run -dit --name c1 -e constraint:node==~node-0 --label com.docker.swarm.reschedule-policy=on-node-failure busybox sh + run docker_swarm run -dit --name c1 -e constraint:node==~node-0 --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' busybox sh [ "$status" -eq 0 ] - # c2 on node-0 with reschedule=never - run docker_swarm run -dit --name c2 -e constraint:node==~node-0 --label com.docker.swarm.reschedule-policy=off busybox sh + # c2 on node-0 with reschedule=off + run docker_swarm run -dit --name c2 -e constraint:node==~node-0 --label 'com.docker.swarm.reschedule-policies=["off"]' busybox sh [ "$status" -eq 0 ] # c3 on node-1 - run docker_swarm run -dit --name c3 -e constraint:node==~node-1 --label com.docker.swarm.reschedule-policy=on-node-failure busybox sh + run docker_swarm run -dit --name c3 -e constraint:node==~node-1 --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' busybox sh + [ "$status" -eq 0 ] + + run docker_swarm ps -q + [ "${#lines[@]}" -eq 3 ] + + # Make sure containers are running where they should. + run docker_swarm inspect c1 + [ "$status" -eq 0 ] + [[ "${output}" == *'"Name": "node-0"'* ]] + run docker_swarm inspect c2 + [ "$status" -eq 0 ] + [[ "${output}" == *'"Name": "node-0"'* ]] + run docker_swarm inspect c3 + [ "$status" -eq 0 ] + [[ "${output}" == *'"Name": "node-1"'* ]] + + # Get c1 swarm id + swarm_id=$(docker_swarm inspect -f '{{ index .Config.Labels "com.docker.swarm.id" }}' c1) + + # Stop node-0 + docker_host stop ${DOCKER_CONTAINERS[0]} + + # Wait for Swarm to detect the node failure. + retry 5 1 eval "docker_swarm info | grep -q 'Unhealthy'" + + # Wait for the container to be rescheduled + retry 5 1 eval docker_swarm inspect c1 + + # c1 should have been rescheduled from node-0 to node-1 + run docker_swarm inspect c1 + [ "$status" -eq 0 ] + [[ "${output}" == *'"Name": "node-1"'* ]] + + # Check swarm id didn't change for c1 + [[ "$swarm_id" == $(docker_swarm inspect -f '{{ index .Config.Labels "com.docker.swarm.id" }}' c1) ]] + + run docker_swarm inspect "$swarm_id" + [ "$status" -eq 0 ] + [[ "${output}" == *'"Name": "node-1"'* ]] + + # c2 should still be on node-0 since the rescheduling policy was off. + run docker_swarm inspect c2 + [ "$status" -eq 1 ] + + # c3 should still be on node-1 since it wasn't affected + run docker_swarm inspect c3 + [ "$status" -eq 0 ] + [[ "${output}" == *'"Name": "node-1"'* ]] +} + +@test "rescheduling with constraints" { + start_docker_with_busybox 2 + swarm_manage --engine-refresh-min-interval=1s --engine-refresh-max-interval=1s --engine-failure-retry=1 ${HOSTS[0]},${HOSTS[1]} + + # c1 on node-0 with reschedule=on-node-failure + run docker_swarm run -dit --name c1 -e constraint:node==~node-0 -e reschedule:on-node-failure busybox sh + [ "$status" -eq 0 ] + # c2 on node-0 with reschedule=off + run docker_swarm run -dit --name c2 -e constraint:node==node-0 -e reschedule:on-node-failure busybox sh + [ "$status" -eq 0 ] + # c3 on node-1 + run docker_swarm run -dit --name c3 -e constraint:node==node-1 -e reschedule:on-node-failure busybox sh [ "$status" -eq 0 ] run docker_swarm ps -q @@ -42,17 +101,17 @@ function teardown() { docker_host stop ${DOCKER_CONTAINERS[0]} # Wait for Swarm to detect the node failure. - #retry 10 1 eval "docker_swarm info | grep -q 'Nodes: 1'" + retry 5 1 eval "docker_swarm info | grep -q 'Unhealthy'" - sleep 5 - docker_swarm ps + # Wait for the container to be rescheduled + retry 5 1 eval docker_swarm inspect c1 # c1 should have been rescheduled from node-0 to node-1 run docker_swarm inspect c1 [ "$status" -eq 0 ] [[ "${output}" == *'"Name": "node-1"'* ]] - # c2 should still be on node-0 since the rescheduling policy was off. + # c2 should still be on node-0 since a node constraint was applied. run docker_swarm inspect c2 [ "$status" -eq 1 ] @@ -61,3 +120,65 @@ function teardown() { [ "$status" -eq 0 ] [[ "${output}" == *'"Name": "node-1"'* ]] } + +@test "reschedule conflict" { + start_docker_with_busybox 2 + swarm_manage + + run docker_swarm run --name c1 -dit --label 'com.docker.swarm.reschedule-policies=["false"]' busybox sh + [ "$status" -ne 0 ] + [[ "${output}" == *'invalid reschedule policy: false'* ]] + + run docker_swarm run --name c2 -dit -e reschedule:off --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' -e reschedule:off busybox sh + [ "$status" -ne 0 ] + [[ "${output}" == *'too many reschedule policies'* ]] +} + +@test "rescheduling node comes back" { + start_docker_with_busybox 2 + swarm_manage --engine-refresh-min-interval=1s --engine-refresh-max-interval=1s --engine-failure-retry=1 ${HOSTS[0]},${HOSTS[1]} + + # c1 on node-0 with reschedule=on-node-failure + run docker_swarm run -dit --name c1 -e constraint:node==~node-0 --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' busybox sh + [ "$status" -eq 0 ] + # c2 on node-1 + run docker_swarm run -dit --name c2 -e constraint:node==~node-1 --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' busybox sh + [ "$status" -eq 0 ] + + run docker_swarm ps -q + [ "${#lines[@]}" -eq 2 ] + + # Make sure containers are running where they should. + run docker_swarm inspect c1 + [ "$status" -eq 0 ] + [[ "${output}" == *'"Name": "node-0"'* ]] + run docker_swarm inspect c2 + [ "$status" -eq 0 ] + [[ "${output}" == *'"Name": "node-1"'* ]] + + # Stop node-0 + docker_host stop ${DOCKER_CONTAINERS[0]} + + # Wait for Swarm to detect the node failure. + retry 5 1 eval "docker_swarm info | grep -q 'Unhealthy'" + + # Wait for the container to be rescheduled + retry 5 1 eval docker_swarm inspect c1 + + # c1 should have been rescheduled from node-0 to node-1 + run docker_swarm inspect c1 + [ "$status" -eq 0 ] + [[ "${output}" == *'"Name": "node-1"'* ]] + + # c2 should still be on node-1 since it wasn't affected + run docker_swarm inspect c2 + [ "$status" -eq 0 ] + [[ "${output}" == *'"Name": "node-1"'* ]] + + # Restart node-0 + docker_host start ${DOCKER_CONTAINERS[0]} + + sleep 5 + run docker_swarm ps + [ "${#lines[@]}" -eq 3 ] +}