Merge pull request #1578 from aluzzardi/rescheduling

[experimental] Simple container rescheduling on node failure
This commit is contained in:
Alexandre Beslic 2016-01-12 15:00:27 -08:00
commit e1213384bc
13 changed files with 490 additions and 35 deletions

View File

@ -438,8 +438,14 @@ func postContainersCreate(c *context, w http.ResponseWriter, r *http.Request) {
authConfig = &dockerclient.AuthConfig{}
json.Unmarshal(buf, authConfig)
}
containerConfig := cluster.BuildContainerConfig(config)
container, err := c.cluster.CreateContainer(cluster.BuildContainerConfig(config), name, authConfig)
if err := containerConfig.Validate(); err != nil {
httpError(w, err.Error(), http.StatusInternalServerError)
return
}
container, err := c.cluster.CreateContainer(containerConfig, name, authConfig)
if err != nil {
if strings.HasPrefix(err.Error(), "Conflict") {
httpError(w, err.Error(), http.StatusConflict)

View File

@ -321,5 +321,6 @@ func manage(c *cli.Context) {
server.SetHandler(api.NewPrimary(cl, tlsConfig, &statusHandler{cl, nil, nil}, c.GlobalBool("debug"), c.Bool("cors")))
}
cluster.NewWatchdog(cl)
log.Fatal(server.ListenAndServe())
}

View File

@ -83,6 +83,9 @@ type Cluster interface {
// Register an event handler for cluster-wide events.
RegisterEventHandler(h EventHandler) error
// Unregister an event handler.
UnregisterEventHandler(h EventHandler)
// FIXME: remove this method
// Return a random engine
RANDOMENGINE() (*Engine, error)

View File

@ -2,6 +2,8 @@ package cluster
import (
"encoding/json"
"errors"
"fmt"
"strings"
"github.com/samalba/dockerclient"
@ -63,9 +65,10 @@ func consolidateResourceFields(c *dockerclient.ContainerConfig) {
// BuildContainerConfig creates a cluster.ContainerConfig from a dockerclient.ContainerConfig
func BuildContainerConfig(c dockerclient.ContainerConfig) *ContainerConfig {
var (
affinities []string
constraints []string
env []string
affinities []string
constraints []string
reschedulePolicies []string
env []string
)
// only for tests
@ -83,12 +86,19 @@ func BuildContainerConfig(c dockerclient.ContainerConfig) *ContainerConfig {
json.Unmarshal([]byte(labels), &constraints)
}
// parse affinities/constraints from env (ex. docker run -e affinity:container==redis -e affinity:image==nginx -e constraint:region==us-east -e constraint:storage==ssd)
// parse reschedule policy from labels (ex. docker run --label 'com.docker.swarm.reschedule-policies=on-node-failure')
if labels, ok := c.Labels[SwarmLabelNamespace+".reschedule-policies"]; ok {
json.Unmarshal([]byte(labels), &reschedulePolicies)
}
// parse affinities/constraints/reschedule policies from env (ex. docker run -e affinity:container==redis -e affinity:image==nginx -e constraint:region==us-east -e constraint:storage==ssd -e reschedule:off)
for _, e := range c.Env {
if ok, key, value := parseEnv(e); ok && key == "affinity" {
affinities = append(affinities, value)
} else if ok && key == "constraint" {
constraints = append(constraints, value)
} else if ok && key == "reschedule" {
reschedulePolicies = append(reschedulePolicies, value)
} else {
env = append(env, e)
}
@ -111,6 +121,13 @@ func BuildContainerConfig(c dockerclient.ContainerConfig) *ContainerConfig {
}
}
// store reschedule policies in labels
if len(reschedulePolicies) > 0 {
if labels, err := json.Marshal(reschedulePolicies); err == nil {
c.Labels[SwarmLabelNamespace+".reschedule-policies"] = string(labels)
}
}
consolidateResourceFields(&c)
return &ContainerConfig{c}
@ -186,3 +203,33 @@ func (c *ContainerConfig) HaveNodeConstraint() bool {
}
return false
}
// HasReschedulePolicy returns true if the specified policy is part of the config
func (c *ContainerConfig) HasReschedulePolicy(p string) bool {
for _, reschedulePolicy := range c.extractExprs("reschedule-policies") {
if reschedulePolicy == p {
return true
}
}
return false
}
// Validate returns an error if the config isn't valid
func (c *ContainerConfig) Validate() error {
//TODO: add validation for affinities and constraints
reschedulePolicies := c.extractExprs("reschedule-policies")
if len(reschedulePolicies) > 1 {
return errors.New("too many reschedule policies")
} else if len(reschedulePolicies) == 1 {
valid := false
for _, validReschedulePolicy := range []string{"off", "on-node-failure"} {
if reschedulePolicies[0] == validReschedulePolicy {
valid = true
}
}
if !valid {
return fmt.Errorf("invalid reschedule policy: %s", reschedulePolicies[0])
}
}
return nil
}

View File

@ -21,6 +21,11 @@ func (c *Container) Refresh() (*Container, error) {
return c.Engine.refreshContainer(c.Id, true)
}
// Start a container
func (c *Container) Start() error {
return c.Engine.client.StartContainer(c.Id, nil)
}
// Containers represents a list a containers
type Containers []*Container

View File

@ -1,6 +1,12 @@
package cluster
import "github.com/samalba/dockerclient"
import (
"errors"
"sync"
log "github.com/Sirupsen/logrus"
"github.com/samalba/dockerclient"
)
// Event is exported
type Event struct {
@ -12,3 +18,49 @@ type Event struct {
type EventHandler interface {
Handle(*Event) error
}
// EventHandlers is a map of EventHandler
type EventHandlers struct {
sync.RWMutex
eventHandlers map[EventHandler]struct{}
}
// NewEventHandlers returns a EventHandlers
func NewEventHandlers() *EventHandlers {
return &EventHandlers{
eventHandlers: make(map[EventHandler]struct{}),
}
}
// Handle callbacks for the events
func (eh *EventHandlers) Handle(e *Event) {
eh.RLock()
defer eh.RUnlock()
for h := range eh.eventHandlers {
if err := h.Handle(e); err != nil {
log.Error(err)
}
}
}
// RegisterEventHandler registers an event handler.
func (eh *EventHandlers) RegisterEventHandler(h EventHandler) error {
eh.Lock()
defer eh.Unlock()
if _, ok := eh.eventHandlers[h]; ok {
return errors.New("event handler already set")
}
eh.eventHandlers[h] = struct{}{}
return nil
}
// UnregisterEventHandler unregisters a previously registered event handler.
func (eh *EventHandlers) UnregisterEventHandler(h EventHandler) {
eh.Lock()
defer eh.Unlock()
delete(eh.eventHandlers, h)
}

View File

@ -29,7 +29,7 @@ type Cluster struct {
driver *mesosscheduler.MesosSchedulerDriver
dockerEnginePort string
eventHandler cluster.EventHandler
eventHandlers *cluster.EventHandlers
master string
agents map[string]*agent
scheduler *scheduler.Scheduler
@ -67,6 +67,7 @@ func NewCluster(scheduler *scheduler.Scheduler, TLSConfig *tls.Config, master st
}
cluster := &Cluster{
dockerEnginePort: defaultDockerEnginePort,
eventHandlers: cluster.NewEventHandlers(),
master: master,
agents: make(map[string]*agent),
scheduler: scheduler,
@ -156,22 +157,18 @@ func NewCluster(scheduler *scheduler.Scheduler, TLSConfig *tls.Config, master st
// Handle callbacks for the events
func (c *Cluster) Handle(e *cluster.Event) error {
if c.eventHandler == nil {
return nil
}
if err := c.eventHandler.Handle(e); err != nil {
log.Error(err)
}
c.eventHandlers.Handle(e)
return nil
}
// RegisterEventHandler registers an event handler.
func (c *Cluster) RegisterEventHandler(h cluster.EventHandler) error {
if c.eventHandler != nil {
return errors.New("event handler already set")
}
c.eventHandler = h
return nil
return c.eventHandlers.RegisterEventHandler(h)
}
// UnregisterEventHandler unregisters a previously registered event handler.
func (c *Cluster) UnregisterEventHandler(h cluster.EventHandler) {
c.eventHandlers.UnregisterEventHandler(h)
}
// CreateContainer for container creation in Mesos task

View File

@ -50,7 +50,7 @@ func (p *pendingContainer) ToContainer() *cluster.Container {
type Cluster struct {
sync.RWMutex
eventHandler cluster.EventHandler
eventHandlers *cluster.EventHandlers
engines map[string]*cluster.Engine
pendingEngines map[string]*cluster.Engine
scheduler *scheduler.Scheduler
@ -67,6 +67,7 @@ func NewCluster(scheduler *scheduler.Scheduler, TLSConfig *tls.Config, discovery
log.WithFields(log.Fields{"name": "swarm"}).Debug("Initializing cluster")
cluster := &Cluster{
eventHandlers: cluster.NewEventHandlers(),
engines: make(map[string]*cluster.Engine),
pendingEngines: make(map[string]*cluster.Engine),
scheduler: scheduler,
@ -90,22 +91,18 @@ func NewCluster(scheduler *scheduler.Scheduler, TLSConfig *tls.Config, discovery
// Handle callbacks for the events
func (c *Cluster) Handle(e *cluster.Event) error {
if c.eventHandler == nil {
return nil
}
if err := c.eventHandler.Handle(e); err != nil {
log.Error(err)
}
c.eventHandlers.Handle(e)
return nil
}
// RegisterEventHandler registers an event handler.
func (c *Cluster) RegisterEventHandler(h cluster.EventHandler) error {
if c.eventHandler != nil {
return errors.New("event handler already set")
}
c.eventHandler = h
return nil
return c.eventHandlers.RegisterEventHandler(h)
}
// UnregisterEventHandler unregisters a previously registered event handler.
func (c *Cluster) UnregisterEventHandler(h cluster.EventHandler) {
c.eventHandlers.UnregisterEventHandler(h)
}
// Generate a globally (across the cluster) unique ID.
@ -145,9 +142,12 @@ func (c *Cluster) createContainer(config *cluster.ContainerConfig, name string,
return nil, fmt.Errorf("Conflict: The name %s is already assigned. You have to delete (or rename) that container to be able to assign %s to a container again.", name, name)
}
// Associate a Swarm ID to the container we are creating.
swarmID := c.generateUniqueID()
config.SetSwarmID(swarmID)
swarmID := config.SwarmID()
if swarmID == "" {
// Associate a Swarm ID to the container we are creating.
swarmID = c.generateUniqueID()
config.SetSwarmID(swarmID)
}
if withImageAffinity {
config.AddAffinity("image==" + config.Image)

98
cluster/watchdog.go Normal file
View File

@ -0,0 +1,98 @@
package cluster
import (
"sync"
log "github.com/Sirupsen/logrus"
)
// Watchdog listen to cluster events ans handle container rescheduling
type Watchdog struct {
sync.Mutex
cluster Cluster
}
// Handle cluster callbacks
func (w *Watchdog) Handle(e *Event) error {
// Skip non-swarm events.
if e.From != "swarm" {
return nil
}
switch e.Status {
case "engine_reconnect":
go w.removeDuplicateContainers(e.Engine)
case "engine_disconnect":
go w.rescheduleContainers(e.Engine)
}
return nil
}
// Remove Duplicate containers when a node comes back
func (w *Watchdog) removeDuplicateContainers(e *Engine) {
log.Debugf("removing duplicate containers from Node %s", e.ID)
e.RefreshContainers(false)
w.Lock()
defer w.Unlock()
for _, container := range e.Containers() {
for _, containerInCluster := range w.cluster.Containers() {
if containerInCluster.Config.SwarmID() == container.Config.SwarmID() && containerInCluster.Engine.ID != container.Engine.ID {
log.Debugf("container %s was rescheduled on node %s, removing it\n", container.Id, containerInCluster.Engine.ID)
// container already exists in the cluster, destroy it
e.RemoveContainer(container, true, true)
}
}
}
}
// Reschedule containers as soon as a node fail
func (w *Watchdog) rescheduleContainers(e *Engine) {
w.Lock()
defer w.Unlock()
log.Debugf("Node %s failed - rescheduling containers", e.ID)
for _, c := range e.Containers() {
// Skip containers which don't have an "on-node-failure" reschedule policy.
if !c.Config.HasReschedulePolicy("on-node-failure") {
log.Debugf("Skipping rescheduling of %s based on rescheduling policies", c.Id)
continue
}
// Remove the container from the dead engine. If we don't, then both
// the old and new one will show up in docker ps.
// We have to do this before calling `CreateContainer`, otherwise it
// will abort because the name is already taken.
c.Engine.removeContainer(c)
newContainer, err := w.cluster.CreateContainer(c.Config, c.Info.Name, nil)
if err != nil {
log.Errorf("Failed to reschedule container %s (Swarm ID: %s): %v", c.Id, c.Config.SwarmID(), err)
// add the container back, so we can retry later
c.Engine.AddContainer(c)
} else {
log.Infof("Rescheduled container %s from %s to %s as %s (Swarm ID: %s)", c.Id, c.Engine.ID, newContainer.Engine.ID, newContainer.Id, c.Config.SwarmID())
if c.Info.State.Running {
if err := newContainer.Start(); err != nil {
log.Errorf("Failed to start rescheduled container %s", newContainer.Id)
}
}
}
}
}
// NewWatchdog creates a new watchdog
func NewWatchdog(cluster Cluster) *Watchdog {
log.Debugf("Watchdog enabled")
w := &Watchdog{
cluster: cluster,
}
cluster.RegisterEventHandler(w)
return w
}

View File

@ -10,7 +10,7 @@ parent="smn_workw_swarm"
## Advanced Scheduling
To learn more about advanced scheduling, see the
[strategies](strategy.md) and [filters](filter.md)
To learn more about advanced scheduling, see the [rescheduling]
(rescheduling.md), [strategies](strategy.md) and [filters](filter.md)
documents.

21
experimental/README.md Normal file
View File

@ -0,0 +1,21 @@
# Docker Swarm Experimental Features
This page contains a list of features in the Docker Swarm which are
experimental. Experimental features are **not** ready for production. They are
provided for test and evaluation in your sandbox environments.
The information below describes each feature and the GitHub pull requests and
issues associated with it. If necessary, links are provided to additional
documentation on an issue. As an active Docker user and community member,
please feel free to provide any feedback on these features you wish.
## Current experimental features
* [Container Rescheduling on node failure](rescheduling.md)
## How to comment on an experimental feature
Each feature's documentation includes a list of proposal pull requests or PRs associated with the feature. If you want to comment on or suggest a change to a feature, please add it to the existing feature PR.
Issues or problems with a feature? Inquire for help on the `#swarm` IRC channel or in on the [Docker Google group](https://groups.google.com/forum/#!forum/docker-user).

View File

@ -0,0 +1,41 @@
<!--[metadata]>
+++
title = "Docker Swarm recheduling"
description = "Swarm rescheduling"
keywords = ["docker, swarm, clustering, rescheduling"]
[menu.main]
parent="smn_workw_swarm"
weight=5
+++
<![end-metadata]-->
# Rescheduling
The Docker Swarm scheduler is able to detect node failure and
restart its containers on another node.
## Rescheduling policies
The rescheduling policies are:
* `on-node-failure`
* `off` (default if not specified)
When you start a container, use the env var `reschedule` or the
label `com.docker.swarm.reschedule-policy` to specify the policy to
apply to the container.
```
# do not reschedule (default)
$ docker run -d -e reschedule:off redis
# or
$ docker run -d -l 'com.docker.swarm.reschedule-policy=["off"]' redis
```
```
# reschedule on node failure
$ docker run -d -e reschedule:on-node-failure redis
# or
$ docker run -d -l 'com.docker.swarm.reschedule-policy=["on-node-failure"]' redis
```

View File

@ -0,0 +1,184 @@
#!/usr/bin/env bats
load helpers
function teardown() {
swarm_manage_cleanup
stop_docker
}
@test "rescheduling" {
start_docker_with_busybox 2
swarm_manage --engine-refresh-min-interval=1s --engine-refresh-max-interval=1s --engine-failure-retry=1 ${HOSTS[0]},${HOSTS[1]}
# c1 on node-0 with reschedule=on-node-failure
run docker_swarm run -dit --name c1 -e constraint:node==~node-0 --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' busybox sh
[ "$status" -eq 0 ]
# c2 on node-0 with reschedule=off
run docker_swarm run -dit --name c2 -e constraint:node==~node-0 --label 'com.docker.swarm.reschedule-policies=["off"]' busybox sh
[ "$status" -eq 0 ]
# c3 on node-1
run docker_swarm run -dit --name c3 -e constraint:node==~node-1 --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' busybox sh
[ "$status" -eq 0 ]
run docker_swarm ps -q
[ "${#lines[@]}" -eq 3 ]
# Make sure containers are running where they should.
run docker_swarm inspect c1
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-0"'* ]]
run docker_swarm inspect c2
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-0"'* ]]
run docker_swarm inspect c3
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-1"'* ]]
# Get c1 swarm id
swarm_id=$(docker_swarm inspect -f '{{ index .Config.Labels "com.docker.swarm.id" }}' c1)
# Stop node-0
docker_host stop ${DOCKER_CONTAINERS[0]}
# Wait for Swarm to detect the node failure.
retry 5 1 eval "docker_swarm info | grep -q 'Unhealthy'"
# Wait for the container to be rescheduled
retry 5 1 eval docker_swarm inspect c1
# c1 should have been rescheduled from node-0 to node-1
run docker_swarm inspect c1
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-1"'* ]]
# Check swarm id didn't change for c1
[[ "$swarm_id" == $(docker_swarm inspect -f '{{ index .Config.Labels "com.docker.swarm.id" }}' c1) ]]
run docker_swarm inspect "$swarm_id"
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-1"'* ]]
# c2 should still be on node-0 since the rescheduling policy was off.
run docker_swarm inspect c2
[ "$status" -eq 1 ]
# c3 should still be on node-1 since it wasn't affected
run docker_swarm inspect c3
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-1"'* ]]
}
@test "rescheduling with constraints" {
start_docker_with_busybox 2
swarm_manage --engine-refresh-min-interval=1s --engine-refresh-max-interval=1s --engine-failure-retry=1 ${HOSTS[0]},${HOSTS[1]}
# c1 on node-0 with reschedule=on-node-failure
run docker_swarm run -dit --name c1 -e constraint:node==~node-0 -e reschedule:on-node-failure busybox sh
[ "$status" -eq 0 ]
# c2 on node-0 with reschedule=off
run docker_swarm run -dit --name c2 -e constraint:node==node-0 -e reschedule:on-node-failure busybox sh
[ "$status" -eq 0 ]
# c3 on node-1
run docker_swarm run -dit --name c3 -e constraint:node==node-1 -e reschedule:on-node-failure busybox sh
[ "$status" -eq 0 ]
run docker_swarm ps -q
[ "${#lines[@]}" -eq 3 ]
# Make sure containers are running where they should.
run docker_swarm inspect c1
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-0"'* ]]
run docker_swarm inspect c2
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-0"'* ]]
run docker_swarm inspect c3
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-1"'* ]]
# Stop node-0
docker_host stop ${DOCKER_CONTAINERS[0]}
# Wait for Swarm to detect the node failure.
retry 5 1 eval "docker_swarm info | grep -q 'Unhealthy'"
# Wait for the container to be rescheduled
retry 5 1 eval docker_swarm inspect c1
# c1 should have been rescheduled from node-0 to node-1
run docker_swarm inspect c1
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-1"'* ]]
# c2 should still be on node-0 since a node constraint was applied.
run docker_swarm inspect c2
[ "$status" -eq 1 ]
# c3 should still be on node-1 since it wasn't affected
run docker_swarm inspect c3
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-1"'* ]]
}
@test "reschedule conflict" {
start_docker_with_busybox 2
swarm_manage
run docker_swarm run --name c1 -dit --label 'com.docker.swarm.reschedule-policies=["false"]' busybox sh
[ "$status" -ne 0 ]
[[ "${output}" == *'invalid reschedule policy: false'* ]]
run docker_swarm run --name c2 -dit -e reschedule:off --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' -e reschedule:off busybox sh
[ "$status" -ne 0 ]
[[ "${output}" == *'too many reschedule policies'* ]]
}
@test "rescheduling node comes back" {
start_docker_with_busybox 2
swarm_manage --engine-refresh-min-interval=1s --engine-refresh-max-interval=1s --engine-failure-retry=1 ${HOSTS[0]},${HOSTS[1]}
# c1 on node-0 with reschedule=on-node-failure
run docker_swarm run -dit --name c1 -e constraint:node==~node-0 --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' busybox sh
[ "$status" -eq 0 ]
# c2 on node-1
run docker_swarm run -dit --name c2 -e constraint:node==~node-1 --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' busybox sh
[ "$status" -eq 0 ]
run docker_swarm ps -q
[ "${#lines[@]}" -eq 2 ]
# Make sure containers are running where they should.
run docker_swarm inspect c1
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-0"'* ]]
run docker_swarm inspect c2
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-1"'* ]]
# Stop node-0
docker_host stop ${DOCKER_CONTAINERS[0]}
# Wait for Swarm to detect the node failure.
retry 5 1 eval "docker_swarm info | grep -q 'Unhealthy'"
# Wait for the container to be rescheduled
retry 5 1 eval docker_swarm inspect c1
# c1 should have been rescheduled from node-0 to node-1
run docker_swarm inspect c1
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-1"'* ]]
# c2 should still be on node-1 since it wasn't affected
run docker_swarm inspect c2
[ "$status" -eq 0 ]
[[ "${output}" == *'"Name": "node-1"'* ]]
# Restart node-0
docker_host start ${DOCKER_CONTAINERS[0]}
sleep 5
run docker_swarm ps
[ "${#lines[@]}" -eq 3 ]
}