add doc

fix tests and keep swarm id remove duplicate on node reconnect explicit failure Signed-off-by: Victor Vieux <vieux@docker.com>
2015-12-10 21:07:44 -08:00 · 2015-12-10 21:07:44 -08:00 · 78008f4d4a
parent 13f60212f5
commit 78008f4d4a
8 changed files with 289 additions and 48 deletions
--- a/api/handlers.go
+++ b/api/handlers.go
@ -438,8 +438,14 @@ func postContainersCreate(c *context, w http.ResponseWriter, r *http.Request) {
 		authConfig = &dockerclient.AuthConfig{}
 		json.Unmarshal(buf, authConfig)
 	}
+	containerConfig := cluster.BuildContainerConfig(config)

-	container, err := c.cluster.CreateContainer(cluster.BuildContainerConfig(config), name, authConfig)
+	if err := containerConfig.Validate(); err != nil {
+		httpError(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+
+	container, err := c.cluster.CreateContainer(containerConfig, name, authConfig)
 	if err != nil {
 		if strings.HasPrefix(err.Error(), "Conflict") {
 			httpError(w, err.Error(), http.StatusConflict)
--- a/cluster/config.go
+++ b/cluster/config.go
@ -2,6 +2,8 @@ package cluster

 import (
 	"encoding/json"
+	"errors"
+	"fmt"
 	"strings"

 	"github.com/samalba/dockerclient"
@ -63,9 +65,10 @@ func consolidateResourceFields(c *dockerclient.ContainerConfig) {
 // BuildContainerConfig creates a cluster.ContainerConfig from a dockerclient.ContainerConfig
 func BuildContainerConfig(c dockerclient.ContainerConfig) *ContainerConfig {
 	var (
-		affinities  []string
-		constraints []string
-		env         []string
+		affinities         []string
+		constraints        []string
+		reschedulePolicies []string
+		env                []string
 	)

 	// only for tests
@ -83,12 +86,19 @@ func BuildContainerConfig(c dockerclient.ContainerConfig) *ContainerConfig {
 		json.Unmarshal([]byte(labels), &constraints)
 	}

-	// parse affinities/constraints from env (ex. docker run -e affinity:container==redis -e affinity:image==nginx -e constraint:region==us-east -e constraint:storage==ssd)
+	// parse reschedule policy from labels (ex. docker run --label 'com.docker.swarm.reschedule-policies=on-node-failure')
+	if labels, ok := c.Labels[SwarmLabelNamespace+".reschedule-policies"]; ok {
+		json.Unmarshal([]byte(labels), &reschedulePolicies)
+	}
+
+	// parse affinities/constraints/reschedule policies from env (ex. docker run -e affinity:container==redis -e affinity:image==nginx -e constraint:region==us-east -e constraint:storage==ssd -e reschedule:off)
 	for _, e := range c.Env {
 		if ok, key, value := parseEnv(e); ok && key == "affinity" {
 			affinities = append(affinities, value)
 		} else if ok && key == "constraint" {
 			constraints = append(constraints, value)
+		} else if ok && key == "reschedule" {
+			reschedulePolicies = append(reschedulePolicies, value)
 		} else {
 			env = append(env, e)
 		}
@ -111,6 +121,13 @@ func BuildContainerConfig(c dockerclient.ContainerConfig) *ContainerConfig {
 		}
 	}

+	// store reschedule policies in labels
+	if len(reschedulePolicies) > 0 {
+		if labels, err := json.Marshal(reschedulePolicies); err == nil {
+			c.Labels[SwarmLabelNamespace+".reschedule-policies"] = string(labels)
+		}
+	}
+
 	consolidateResourceFields(&c)

 	return &ContainerConfig{c}
@ -187,11 +204,32 @@ func (c *ContainerConfig) HaveNodeConstraint() bool {
 	return false
 }

-// Affinities returns all the affinities from the ContainerConfig
-func (c *ContainerConfig) ReschedulePolicy() string {
-	policy, ok := c.Labels[SwarmLabelNamespace+".reschedule-policy"]
-	if !ok {
-		return "no"
+// HasReschedulePolicy returns true if the specified policy is part of the config
+func (c *ContainerConfig) HasReschedulePolicy(p string) bool {
+	for _, reschedulePolicy := range c.extractExprs("reschedule-policies") {
+		if reschedulePolicy == p {
+			return true
+		}
 	}
-	return policy
+	return false
+}
+
+// Validate returns an error if the config isn't valid
+func (c *ContainerConfig) Validate() error {
+	//TODO: add validation for affinities and constraints
+	reschedulePolicies := c.extractExprs("reschedule-policies")
+	if len(reschedulePolicies) > 1 {
+		return errors.New("too many reschedule policies")
+	} else if len(reschedulePolicies) == 1 {
+		valid := false
+		for _, validReschedulePolicy := range []string{"off", "on-node-failure"} {
+			if reschedulePolicies[0] == validReschedulePolicy {
+				valid = true
+			}
+		}
+		if !valid {
+			return fmt.Errorf("invalid reschedule policy: %s", reschedulePolicies[0])
+		}
+	}
+	return nil
 }
--- a/cluster/mesos/cluster.go
+++ b/cluster/mesos/cluster.go
@ -159,7 +159,7 @@ func (c *Cluster) Handle(e *cluster.Event) error {
 	c.RLock()
 	defer c.RUnlock()

-	for h, _ := range c.eventHandlers {
+	for h := range c.eventHandlers {
 		if err := h.Handle(e); err != nil {
 			log.Error(err)
 		}
--- a/cluster/swarm/cluster.go
+++ b/cluster/swarm/cluster.go
@ -94,7 +94,7 @@ func (c *Cluster) Handle(e *cluster.Event) error {
 	c.RLock()
 	defer c.RUnlock()

-	for h, _ := range c.eventHandlers {
+	for h := range c.eventHandlers {
 		if err := h.Handle(e); err != nil {
 			log.Error(err)
 		}
@ -159,9 +159,12 @@ func (c *Cluster) createContainer(config *cluster.ContainerConfig, name string,
 		return nil, fmt.Errorf("Conflict: The name %s is already assigned. You have to delete (or rename) that container to be able to assign %s to a container again.", name, name)
 	}

-	// Associate a Swarm ID to the container we are creating.
-	swarmID := c.generateUniqueID()
-	config.SetSwarmID(swarmID)
+	swarmID := config.SwarmID()
+	if swarmID == "" {
+		// Associate a Swarm ID to the container we are creating.
+		swarmID = c.generateUniqueID()
+		config.SetSwarmID(swarmID)
+	}

 	if withImageAffinity {
 		config.AddAffinity("image==" + config.Image)
--- a/cluster/watchdog.go
+++ b/cluster/watchdog.go
@ -6,8 +6,9 @@ import (
 	log "github.com/Sirupsen/logrus"
 )

+// Watchdog listen to cluster events ans handle container rescheduling
 type Watchdog struct {
-	l       sync.Mutex
+	sync.Mutex
 	cluster Cluster
 }

@ -19,22 +20,46 @@ func (w *Watchdog) Handle(e *Event) error {
 	}

 	switch e.Status {
+	case "engine_reconnect":
+		go w.removeDuplicateContainers(e.Engine)
 	case "engine_disconnect":
 		go w.rescheduleContainers(e.Engine)
 	}
-
 	return nil
 }

-func (w *Watchdog) rescheduleContainers(e *Engine) {
-	w.l.Lock()
-	defer w.l.Unlock()
+// Remove Duplicate containers when a node comes back
+func (w *Watchdog) removeDuplicateContainers(e *Engine) {
+	log.Debugf("removing duplicate containers from Node %s", e.ID)

-	log.Infof("Node %s failed - rescheduling containers", e.ID)
+	e.RefreshContainers(false)
+
+	w.Lock()
+	defer w.Unlock()
+
+	for _, container := range e.Containers() {
+
+		for _, containerInCluster := range w.cluster.Containers() {
+			if containerInCluster.Config.SwarmID() == container.Config.SwarmID() && containerInCluster.Engine.ID != container.Engine.ID {
+				log.Debugf("container %s was rescheduled on node %s, removing it\n", container.Id, containerInCluster.Engine.ID)
+				// container already exists in the cluster, destroy it
+				e.RemoveContainer(container, true, true)
+			}
+		}
+	}
+}
+
+// Reschedule containers as soon as a node fail
+func (w *Watchdog) rescheduleContainers(e *Engine) {
+	w.Lock()
+	defer w.Unlock()
+
+	log.Debugf("Node %s failed - rescheduling containers", e.ID)
 	for _, c := range e.Containers() {
-		// Skip containers which don't have an "always" reschedule policy.
-		if c.Config.ReschedulePolicy() != "always" {
-			log.Debugf("Skipping rescheduling of %s based on rescheduling policy", c.Id)
+
+		// Skip containers which don't have an "on-node-failure" reschedule policy.
+		if !c.Config.HasReschedulePolicy("on-node-failure") {
+			log.Debugf("Skipping rescheduling of %s based on rescheduling policies", c.Id)
 			continue
 		}

@ -44,23 +69,25 @@ func (w *Watchdog) rescheduleContainers(e *Engine) {
 		// will abort because the name is already taken.
 		c.Engine.removeContainer(c)

-		newContainer, err := w.cluster.CreateContainer(c.Config, c.Info.Name)
+		newContainer, err := w.cluster.CreateContainer(c.Config, c.Info.Name, nil)

 		if err != nil {
 			log.Errorf("Failed to reschedule container %s (Swarm ID: %s): %v", c.Id, c.Config.SwarmID(), err)
-			continue
-		}
-
-		log.Infof("Rescheduled container %s from %s to %s as %s (Swarm ID: %s)", c.Id, c.Engine.ID, newContainer.Engine.ID, newContainer.Id, c.Config.SwarmID())
-
-		if c.Info.State.Running {
-			if err := newContainer.Start(); err != nil {
-				log.Errorf("Failed to start rescheduled container %s", newContainer.Id)
+			// add the container back, so we can retry later
+			c.Engine.AddContainer(c)
+		} else {
+			log.Infof("Rescheduled container %s from %s to %s as %s (Swarm ID: %s)", c.Id, c.Engine.ID, newContainer.Engine.ID, newContainer.Id, c.Config.SwarmID())
+			if c.Info.State.Running {
+				if err := newContainer.Start(); err != nil {
+					log.Errorf("Failed to start rescheduled container %s", newContainer.Id)
+				}
 			}
 		}
 	}
+
 }

+// NewWatchdog creates a new watchdog
 func NewWatchdog(cluster Cluster) *Watchdog {
 	log.Debugf("Watchdog enabled")
 	w := &Watchdog{
--- a/docs/scheduler/index.md
+++ b/docs/scheduler/index.md
@ -10,7 +10,7 @@ parent="smn_workw_swarm"

 ## Advanced Scheduling

-To learn more about advanced scheduling, see the
-[strategies](strategy.md) and [filters](filter.md)
+To learn more about advanced scheduling, see the [rescheduling]
+(rescheduling.md), [strategies](strategy.md) and [filters](filter.md)
 documents.

--- a/docs/scheduler/rescheduling.md
+++ b/docs/scheduler/rescheduling.md
@ -0,0 +1,46 @@
+<!--[metadata]>
+++
+title = "Docker Swarm recheduling"
+description = "Swarm rescheduling"
+keywords = ["docker, swarm, clustering, rescheduling"]
+[menu.main]
+parent="smn_workw_swarm"
+weight=5
+++
+<![end-metadata]-->
+
+# Rescheduling
+
+The Docker Swarm scheduler is able to detect node failure and
+restart its containers on another node.
+
+## Rescheduling policies
+
+The rescheduling policies are:
+
+* `on-node-failure`
+* `off` (default if not specified)
+
+
+When you start a container, use the env var `reschedule` or the
+label `com.docker.swarm.reschedule-policy` to specify the policy to
+apply to the container.
+
+```
+# do not reschedule (default)
+$ docker run -d -e reschedule:off redis
+# or
+$ docker run -d -l 'com.docker.swarm.reschedule-policy=["off"]' redis
+```
+
+```
+# reschedule on node failure
+$ docker run -d -e reschedule:on-node-failure redis
+# or
+$ docker run -d -l 'com.docker.swarm.reschedule-policy=["on-node-failure"]' redis
+```
+
+- [Docker Swarm overview](../index.md)
+- [Discovery options](../discovery.md)
+- [Scheduler filters](filter.md)
+- [Swarm API](../api/swarm-api.md)
--- a/test/integration/rescheduling.bats
+++ b/test/integration/rescheduling.bats
@ -9,19 +9,78 @@ function teardown() {

@test "rescheduling" {
 	start_docker_with_busybox 2
-	swarm_manage
-
-	# Expect 2 nodes
-	docker_swarm info | grep -q "Nodes: 2"
+	swarm_manage --engine-refresh-min-interval=1s --engine-refresh-max-interval=1s --engine-failure-retry=1 ${HOSTS[0]},${HOSTS[1]}

 	# c1 on node-0 with reschedule=on-node-failure
-	run docker_swarm run -dit --name c1 -e constraint:node==~node-0 --label com.docker.swarm.reschedule-policy=on-node-failure busybox sh
+	run docker_swarm run -dit --name c1 -e constraint:node==~node-0 --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' busybox sh
 	[ "$status" -eq 0 ]
-	# c2 on node-0 with reschedule=never
-	run docker_swarm run -dit --name c2 -e constraint:node==~node-0 --label com.docker.swarm.reschedule-policy=off busybox sh
+	# c2 on node-0 with reschedule=off
+	run docker_swarm run -dit --name c2 -e constraint:node==~node-0 --label 'com.docker.swarm.reschedule-policies=["off"]' busybox sh
 	[ "$status" -eq 0 ]
 	# c3 on node-1
-	run docker_swarm run -dit --name c3 -e constraint:node==~node-1 --label com.docker.swarm.reschedule-policy=on-node-failure busybox sh
+	run docker_swarm run -dit --name c3 -e constraint:node==~node-1 --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' busybox sh
+	[ "$status" -eq 0 ]
+
+	run docker_swarm ps -q
+	[ "${#lines[@]}" -eq  3 ]
+
+	# Make sure containers are running where they should.
+	run docker_swarm inspect c1
+	[ "$status" -eq 0 ]
+	[[ "${output}" == *'"Name": "node-0"'* ]]
+	run docker_swarm inspect c2
+	[ "$status" -eq 0 ]
+	[[ "${output}" == *'"Name": "node-0"'* ]]
+	run docker_swarm inspect c3
+	[ "$status" -eq 0 ]
+	[[ "${output}" == *'"Name": "node-1"'* ]]
+
+	# Get c1 swarm id
+	swarm_id=$(docker_swarm inspect -f '{{ index .Config.Labels "com.docker.swarm.id" }}' c1)
+
+	# Stop node-0
+	docker_host stop ${DOCKER_CONTAINERS[0]}
+
+	# Wait for Swarm to detect the node failure.
+	retry 5 1 eval "docker_swarm info | grep -q 'Unhealthy'"
+
+	# Wait for the container to be rescheduled
+	retry 5 1 eval docker_swarm inspect c1
+
+	# c1 should have been rescheduled from node-0 to node-1
+	run docker_swarm inspect c1
+	[ "$status" -eq 0 ]
+	[[ "${output}" == *'"Name": "node-1"'* ]]
+
+	# Check swarm id didn't change for c1
+	[[ "$swarm_id" == $(docker_swarm inspect -f '{{ index .Config.Labels "com.docker.swarm.id" }}' c1) ]]
+
+	run docker_swarm inspect "$swarm_id"
+	[ "$status" -eq 0 ]
+	[[ "${output}" == *'"Name": "node-1"'* ]]
+
+	# c2 should still be on node-0 since the rescheduling policy was off.
+	run docker_swarm inspect c2
+	[ "$status" -eq 1 ]
+
+	# c3 should still be on node-1 since it wasn't affected
+	run docker_swarm inspect c3
+	[ "$status" -eq 0 ]
+	[[ "${output}" == *'"Name": "node-1"'* ]]
+}
+
+@test "rescheduling with constraints" {
+	start_docker_with_busybox 2
+	swarm_manage --engine-refresh-min-interval=1s --engine-refresh-max-interval=1s --engine-failure-retry=1 ${HOSTS[0]},${HOSTS[1]}
+
+	# c1 on node-0 with reschedule=on-node-failure
+	run docker_swarm run -dit --name c1 -e constraint:node==~node-0 -e reschedule:on-node-failure busybox sh
+	[ "$status" -eq 0 ]
+	# c2 on node-0 with reschedule=off
+	run docker_swarm run -dit --name c2 -e constraint:node==node-0 -e reschedule:on-node-failure busybox sh
+	[ "$status" -eq 0 ]
+	# c3 on node-1
+	run docker_swarm run -dit --name c3 -e constraint:node==node-1 -e reschedule:on-node-failure busybox sh
 	[ "$status" -eq 0 ]

 	run docker_swarm ps -q
@ -42,17 +101,17 @@ function teardown() {
 	docker_host stop ${DOCKER_CONTAINERS[0]}

 	# Wait for Swarm to detect the node failure.
-	#retry 10 1 eval "docker_swarm info | grep -q 'Nodes: 1'"
+	retry 5 1 eval "docker_swarm info | grep -q 'Unhealthy'"

-	sleep 5
-	docker_swarm ps
+	# Wait for the container to be rescheduled
+	retry 5 1 eval docker_swarm inspect c1

 	# c1 should have been rescheduled from node-0 to node-1
 	run docker_swarm inspect c1
 	[ "$status" -eq 0 ]
 	[[ "${output}" == *'"Name": "node-1"'* ]]

-	# c2 should still be on node-0 since the rescheduling policy was off.
+	# c2 should still be on node-0 since a node constraint was applied.
 	run docker_swarm inspect c2
 	[ "$status" -eq 1 ]

@ -61,3 +120,65 @@ function teardown() {
 	[ "$status" -eq 0 ]
 	[[ "${output}" == *'"Name": "node-1"'* ]]
 }
+
+@test "reschedule conflict" {
+	start_docker_with_busybox 2
+	swarm_manage
+
+	run docker_swarm run --name c1 -dit --label 'com.docker.swarm.reschedule-policies=["false"]' busybox sh
+	[ "$status" -ne 0 ]
+	[[ "${output}" == *'invalid reschedule policy: false'* ]]
+
+	run docker_swarm run --name c2 -dit -e reschedule:off --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' -e reschedule:off busybox sh
+	[ "$status" -ne 0 ]
+	[[ "${output}" == *'too many reschedule policies'* ]]
+}
+
+@test "rescheduling node comes back" {
+	start_docker_with_busybox 2
+	swarm_manage --engine-refresh-min-interval=1s --engine-refresh-max-interval=1s --engine-failure-retry=1 ${HOSTS[0]},${HOSTS[1]}
+
+	# c1 on node-0 with reschedule=on-node-failure
+	run docker_swarm run -dit --name c1 -e constraint:node==~node-0 --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' busybox sh
+	[ "$status" -eq 0 ]
+	# c2 on node-1
+	run docker_swarm run -dit --name c2 -e constraint:node==~node-1 --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' busybox sh
+	[ "$status" -eq 0 ]
+
+	run docker_swarm ps -q
+	[ "${#lines[@]}" -eq  2 ]
+
+	# Make sure containers are running where they should.
+	run docker_swarm inspect c1
+	[ "$status" -eq 0 ]
+	[[ "${output}" == *'"Name": "node-0"'* ]]
+	run docker_swarm inspect c2
+	[ "$status" -eq 0 ]
+	[[ "${output}" == *'"Name": "node-1"'* ]]
+
+	# Stop node-0
+	docker_host stop ${DOCKER_CONTAINERS[0]}
+
+	# Wait for Swarm to detect the node failure.
+	retry 5 1 eval "docker_swarm info | grep -q 'Unhealthy'"
+
+	# Wait for the container to be rescheduled
+	retry 5 1 eval docker_swarm inspect c1
+
+	# c1 should have been rescheduled from node-0 to node-1
+	run docker_swarm inspect c1
+	[ "$status" -eq 0 ]
+	[[ "${output}" == *'"Name": "node-1"'* ]]
+
+	# c2 should still be on node-1 since it wasn't affected
+	run docker_swarm inspect c2
+	[ "$status" -eq 0 ]
+	[[ "${output}" == *'"Name": "node-1"'* ]]
+
+	# Restart node-0
+	docker_host start ${DOCKER_CONTAINERS[0]}
+
+	sleep 5
+	run docker_swarm ps
+	[ "${#lines[@]}" -eq  3 ]
+}