fix tests and keep swarm id
remove duplicate on node reconnect
explicit failure

Signed-off-by: Victor Vieux <vieux@docker.com>
This commit is contained in:
Victor Vieux 2015-12-10 21:07:44 -08:00
parent 13f60212f5
commit 78008f4d4a
8 changed files with 289 additions and 48 deletions

View File

@ -438,8 +438,14 @@ func postContainersCreate(c *context, w http.ResponseWriter, r *http.Request) {
authConfig = &dockerclient.AuthConfig{}
json.Unmarshal(buf, authConfig)
}
containerConfig := cluster.BuildContainerConfig(config)
container, err := c.cluster.CreateContainer(cluster.BuildContainerConfig(config), name, authConfig)
if err := containerConfig.Validate(); err != nil {
httpError(w, err.Error(), http.StatusInternalServerError)
return
}
container, err := c.cluster.CreateContainer(containerConfig, name, authConfig)
if err != nil {
if strings.HasPrefix(err.Error(), "Conflict") {
httpError(w, err.Error(), http.StatusConflict)

View File

@ -2,6 +2,8 @@ package cluster
import (
"encoding/json"
"errors"
"fmt"
"strings"
"github.com/samalba/dockerclient"
@ -63,9 +65,10 @@ func consolidateResourceFields(c *dockerclient.ContainerConfig) {
// BuildContainerConfig creates a cluster.ContainerConfig from a dockerclient.ContainerConfig
func BuildContainerConfig(c dockerclient.ContainerConfig) *ContainerConfig {
var (
affinities []string
constraints []string
env []string
affinities []string
constraints []string
reschedulePolicies []string
env []string
)
// only for tests
@ -83,12 +86,19 @@ func BuildContainerConfig(c dockerclient.ContainerConfig) *ContainerConfig {
json.Unmarshal([]byte(labels), &constraints)
}
// parse affinities/constraints from env (ex. docker run -e affinity:container==redis -e affinity:image==nginx -e constraint:region==us-east -e constraint:storage==ssd)
// parse reschedule policy from labels (ex. docker run --label 'com.docker.swarm.reschedule-policies=on-node-failure')
if labels, ok := c.Labels[SwarmLabelNamespace+".reschedule-policies"]; ok {
json.Unmarshal([]byte(labels), &reschedulePolicies)
}
// parse affinities/constraints/reschedule policies from env (ex. docker run -e affinity:container==redis -e affinity:image==nginx -e constraint:region==us-east -e constraint:storage==ssd -e reschedule:off)
for _, e := range c.Env {
if ok, key, value := parseEnv(e); ok && key == "affinity" {
affinities = append(affinities, value)
} else if ok && key == "constraint" {
constraints = append(constraints, value)
} else if ok && key == "reschedule" {
reschedulePolicies = append(reschedulePolicies, value)
} else {
env = append(env, e)
}
@ -111,6 +121,13 @@ func BuildContainerConfig(c dockerclient.ContainerConfig) *ContainerConfig {
}
}
// store reschedule policies in labels
if len(reschedulePolicies) > 0 {
if labels, err := json.Marshal(reschedulePolicies); err == nil {
c.Labels[SwarmLabelNamespace+".reschedule-policies"] = string(labels)
}
}
consolidateResourceFields(&c)
return &ContainerConfig{c}
@ -187,11 +204,32 @@ func (c *ContainerConfig) HaveNodeConstraint() bool {
return false
}
// Affinities returns all the affinities from the ContainerConfig
func (c *ContainerConfig) ReschedulePolicy() string {
policy, ok := c.Labels[SwarmLabelNamespace+".reschedule-policy"]
if !ok {
return "no"
// HasReschedulePolicy returns true if the specified policy is part of the config
func (c *ContainerConfig) HasReschedulePolicy(p string) bool {
for _, reschedulePolicy := range c.extractExprs("reschedule-policies") {
if reschedulePolicy == p {
return true
}
}
return policy
return false
}
// Validate returns an error if the config isn't valid
func (c *ContainerConfig) Validate() error {
//TODO: add validation for affinities and constraints
reschedulePolicies := c.extractExprs("reschedule-policies")
if len(reschedulePolicies) > 1 {
return errors.New("too many reschedule policies")
} else if len(reschedulePolicies) == 1 {
valid := false
for _, validReschedulePolicy := range []string{"off", "on-node-failure"} {
if reschedulePolicies[0] == validReschedulePolicy {
valid = true
}
}
if !valid {
return fmt.Errorf("invalid reschedule policy: %s", reschedulePolicies[0])
}
}
return nil
}

View File

@ -159,7 +159,7 @@ func (c *Cluster) Handle(e *cluster.Event) error {
c.RLock()
defer c.RUnlock()
for h, _ := range c.eventHandlers {
for h := range c.eventHandlers {
if err := h.Handle(e); err != nil {
log.Error(err)
}

View File

@ -94,7 +94,7 @@ func (c *Cluster) Handle(e *cluster.Event) error {
c.RLock()
defer c.RUnlock()
for h, _ := range c.eventHandlers {
for h := range c.eventHandlers {
if err := h.Handle(e); err != nil {
log.Error(err)
}
@ -159,9 +159,12 @@ func (c *Cluster) createContainer(config *cluster.ContainerConfig, name string,
return nil, fmt.Errorf("Conflict: The name %s is already assigned. You have to delete (or rename) that container to be able to assign %s to a container again.", name, name)
}
// Associate a Swarm ID to the container we are creating.
swarmID := c.generateUniqueID()
config.SetSwarmID(swarmID)
swarmID := config.SwarmID()
if swarmID == "" {
// Associate a Swarm ID to the container we are creating.
swarmID = c.generateUniqueID()
config.SetSwarmID(swarmID)
}
if withImageAffinity {
config.AddAffinity("image==" + config.Image)

View File

@ -6,8 +6,9 @@ import (
log "github.com/Sirupsen/logrus"
)
// Watchdog listen to cluster events ans handle container rescheduling
type Watchdog struct {
l sync.Mutex
sync.Mutex
cluster Cluster
}
@ -19,22 +20,46 @@ func (w *Watchdog) Handle(e *Event) error {
}
switch e.Status {
case "engine_reconnect":
go w.removeDuplicateContainers(e.Engine)
case "engine_disconnect":
go w.rescheduleContainers(e.Engine)
}
return nil
}
func (w *Watchdog) rescheduleContainers(e *Engine) {
w.l.Lock()
defer w.l.Unlock()
// Remove Duplicate containers when a node comes back
func (w *Watchdog) removeDuplicateContainers(e *Engine) {
log.Debugf("removing duplicate containers from Node %s", e.ID)
log.Infof("Node %s failed - rescheduling containers", e.ID)
e.RefreshContainers(false)
w.Lock()
defer w.Unlock()
for _, container := range e.Containers() {
for _, containerInCluster := range w.cluster.Containers() {
if containerInCluster.Config.SwarmID() == container.Config.SwarmID() && containerInCluster.Engine.ID != container.Engine.ID {
log.Debugf("container %s was rescheduled on node %s, removing it\n", container.Id, containerInCluster.Engine.ID)
// container already exists in the cluster, destroy it
e.RemoveContainer(container, true, true)
}
}
}
}
// Reschedule containers as soon as a node fail
func (w *Watchdog) rescheduleContainers(e *Engine) {
w.Lock()
defer w.Unlock()
log.Debugf("Node %s failed - rescheduling containers", e.ID)
for _, c := range e.Containers() {
// Skip containers which don't have an "always" reschedule policy.
if c.Config.ReschedulePolicy() != "always" {
log.Debugf("Skipping rescheduling of %s based on rescheduling policy", c.Id)
// Skip containers which don't have an "on-node-failure" reschedule policy.
if !c.Config.HasReschedulePolicy("on-node-failure") {
log.Debugf("Skipping rescheduling of %s based on rescheduling policies", c.Id)
continue
}
@ -44,23 +69,25 @@ func (w *Watchdog) rescheduleContainers(e *Engine) {
// will abort because the name is already taken.
c.Engine.removeContainer(c)
newContainer, err := w.cluster.CreateContainer(c.Config, c.Info.Name)
newContainer, err := w.cluster.CreateContainer(c.Config, c.Info.Name, nil)
if err != nil {
log.Errorf("Failed to reschedule container %s (Swarm ID: %s): %v", c.Id, c.Config.SwarmID(), err)
continue
}
log.Infof("Rescheduled container %s from %s to %s as %s (Swarm ID: %s)", c.Id, c.Engine.ID, newContainer.Engine.ID, newContainer.Id, c.Config.SwarmID())
if c.Info.State.Running {
if err := newContainer.Start(); err != nil {
log.Errorf("Failed to start rescheduled container %s", newContainer.Id)
// add the container back, so we can retry later
c.Engine.AddContainer(c)
} else {
log.Infof("Rescheduled container %s from %s to %s as %s (Swarm ID: %s)", c.Id, c.Engine.ID, newContainer.Engine.ID, newContainer.Id, c.Config.SwarmID())
if c.Info.State.Running {
if err := newContainer.Start(); err != nil {
log.Errorf("Failed to start rescheduled container %s", newContainer.Id)
}
}
}
}
}
// NewWatchdog creates a new watchdog
func NewWatchdog(cluster Cluster) *Watchdog {
log.Debugf("Watchdog enabled")
w := &Watchdog{

View File

@ -10,7 +10,7 @@ parent="smn_workw_swarm"
## Advanced Scheduling
To learn more about advanced scheduling, see the
[strategies](strategy.md) and [filters](filter.md)
To learn more about advanced scheduling, see the [rescheduling]
(rescheduling.md), [strategies](strategy.md) and [filters](filter.md)
documents.

View File

@ -0,0 +1,46 @@
<!--[metadata]>
+++
title = "Docker Swarm recheduling"
description = "Swarm rescheduling"
keywords = ["docker, swarm, clustering, rescheduling"]
[menu.main]
parent="smn_workw_swarm"
weight=5
+++
<![end-metadata]-->
# Rescheduling
The Docker Swarm scheduler is able to detect node failure and
restart its containers on another node.
## Rescheduling policies
The rescheduling policies are:
* `on-node-failure`
* `off` (default if not specified)
When you start a container, use the env var `reschedule` or the
label `com.docker.swarm.reschedule-policy` to specify the policy to
apply to the container.
```
# do not reschedule (default)
$ docker run -d -e reschedule:off redis
# or
$ docker run -d -l 'com.docker.swarm.reschedule-policy=["off"]' redis
```
```
# reschedule on node failure
$ docker run -d -e reschedule:on-node-failure redis
# or
$ docker run -d -l 'com.docker.swarm.reschedule-policy=["on-node-failure"]' redis
```
- [Docker Swarm overview](../index.md)
- [Discovery options](../discovery.md)
- [Scheduler filters](filter.md)
- [Swarm API](../api/swarm-api.md)

View File

@ -9,19 +9,78 @@ function teardown() {
@test "rescheduling" {
start_docker_with_busybox 2
swarm_manage
# Expect 2 nodes
docker_swarm info | grep -q "Nodes: 2"
swarm_manage --engine-refresh-min-interval=1s --engine-refresh-max-interval=1s --engine-failure-retry=1 ${HOSTS[0]},${HOSTS[1]}
# c1 on node-0 with reschedule=on-node-failure
run docker_swarm run -dit --name c1 -e constraint:node==~node-0 --label com.docker.swarm.reschedule-policy=on-node-failure busybox sh
run docker_swarm run -dit --name c1 -e constraint:node==~node-0 --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' busybox sh
[ "$status" -eq 0 ]
# c2 on node-0 with reschedule=never
run docker_swarm run -dit --name c2 -e constraint:node==~node-0 --label com.docker.swarm.reschedule-policy=off busybox sh
# c2 on node-0 with reschedule=off
run docker_swarm run -dit --name c2 -e constraint:node==~node-0 --label 'com.docker.swarm.reschedule-policies=["off"]' busybox sh
[ "$status" -eq 0 ]
# c3 on node-1
run docker_swarm run -dit --name c3 -e constraint:node==~node-1 --label com.docker.swarm.reschedule-policy=on-node-failure busybox sh
run docker_swarm run -dit --name c3 -e constraint:node==~node-1 --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' busybox sh
[ "$status" -eq 0 ]
run docker_swarm ps -q
[ "${#lines[@]}" -eq 3 ]
# Make sure containers are running where they should.
run docker_swarm inspect c1
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-0"'* ]]
run docker_swarm inspect c2
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-0"'* ]]
run docker_swarm inspect c3
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-1"'* ]]
# Get c1 swarm id
swarm_id=$(docker_swarm inspect -f '{{ index .Config.Labels "com.docker.swarm.id" }}' c1)
# Stop node-0
docker_host stop ${DOCKER_CONTAINERS[0]}
# Wait for Swarm to detect the node failure.
retry 5 1 eval "docker_swarm info | grep -q 'Unhealthy'"
# Wait for the container to be rescheduled
retry 5 1 eval docker_swarm inspect c1
# c1 should have been rescheduled from node-0 to node-1
run docker_swarm inspect c1
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-1"'* ]]
# Check swarm id didn't change for c1
[[ "$swarm_id" == $(docker_swarm inspect -f '{{ index .Config.Labels "com.docker.swarm.id" }}' c1) ]]
run docker_swarm inspect "$swarm_id"
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-1"'* ]]
# c2 should still be on node-0 since the rescheduling policy was off.
run docker_swarm inspect c2
[ "$status" -eq 1 ]
# c3 should still be on node-1 since it wasn't affected
run docker_swarm inspect c3
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-1"'* ]]
}
@test "rescheduling with constraints" {
start_docker_with_busybox 2
swarm_manage --engine-refresh-min-interval=1s --engine-refresh-max-interval=1s --engine-failure-retry=1 ${HOSTS[0]},${HOSTS[1]}
# c1 on node-0 with reschedule=on-node-failure
run docker_swarm run -dit --name c1 -e constraint:node==~node-0 -e reschedule:on-node-failure busybox sh
[ "$status" -eq 0 ]
# c2 on node-0 with reschedule=off
run docker_swarm run -dit --name c2 -e constraint:node==node-0 -e reschedule:on-node-failure busybox sh
[ "$status" -eq 0 ]
# c3 on node-1
run docker_swarm run -dit --name c3 -e constraint:node==node-1 -e reschedule:on-node-failure busybox sh
[ "$status" -eq 0 ]
run docker_swarm ps -q
@ -42,17 +101,17 @@ function teardown() {
docker_host stop ${DOCKER_CONTAINERS[0]}
# Wait for Swarm to detect the node failure.
#retry 10 1 eval "docker_swarm info | grep -q 'Nodes: 1'"
retry 5 1 eval "docker_swarm info | grep -q 'Unhealthy'"
sleep 5
docker_swarm ps
# Wait for the container to be rescheduled
retry 5 1 eval docker_swarm inspect c1
# c1 should have been rescheduled from node-0 to node-1
run docker_swarm inspect c1
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-1"'* ]]
# c2 should still be on node-0 since the rescheduling policy was off.
# c2 should still be on node-0 since a node constraint was applied.
run docker_swarm inspect c2
[ "$status" -eq 1 ]
@ -61,3 +120,65 @@ function teardown() {
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-1"'* ]]
}
@test "reschedule conflict" {
start_docker_with_busybox 2
swarm_manage
run docker_swarm run --name c1 -dit --label 'com.docker.swarm.reschedule-policies=["false"]' busybox sh
[ "$status" -ne 0 ]
[[ "${output}" == *'invalid reschedule policy: false'* ]]
run docker_swarm run --name c2 -dit -e reschedule:off --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' -e reschedule:off busybox sh
[ "$status" -ne 0 ]
[[ "${output}" == *'too many reschedule policies'* ]]
}
@test "rescheduling node comes back" {
start_docker_with_busybox 2
swarm_manage --engine-refresh-min-interval=1s --engine-refresh-max-interval=1s --engine-failure-retry=1 ${HOSTS[0]},${HOSTS[1]}
# c1 on node-0 with reschedule=on-node-failure
run docker_swarm run -dit --name c1 -e constraint:node==~node-0 --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' busybox sh
[ "$status" -eq 0 ]
# c2 on node-1
run docker_swarm run -dit --name c2 -e constraint:node==~node-1 --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' busybox sh
[ "$status" -eq 0 ]
run docker_swarm ps -q
[ "${#lines[@]}" -eq 2 ]
# Make sure containers are running where they should.
run docker_swarm inspect c1
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-0"'* ]]
run docker_swarm inspect c2
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-1"'* ]]
# Stop node-0
docker_host stop ${DOCKER_CONTAINERS[0]}
# Wait for Swarm to detect the node failure.
retry 5 1 eval "docker_swarm info | grep -q 'Unhealthy'"
# Wait for the container to be rescheduled
retry 5 1 eval docker_swarm inspect c1
# c1 should have been rescheduled from node-0 to node-1
run docker_swarm inspect c1
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-1"'* ]]
# c2 should still be on node-1 since it wasn't affected
run docker_swarm inspect c2
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-1"'* ]]
# Restart node-0
docker_host start ${DOCKER_CONTAINERS[0]}
sleep 5
run docker_swarm ps
[ "${#lines[@]}" -eq 3 ]
}