From ffba4054dcca9f85f8e39012df4674af47066f85 Mon Sep 17 00:00:00 2001
From: Victor Vieux <vieux@docker.com>
Date: Thu, 24 Mar 2016 01:59:42 -0700
Subject: [PATCH] enable rescheduling watchdog only when primary

Signed-off-by: Victor Vieux <vieux@docker.com>
---
 cli/manage.go                     |  9 ++--
 test/integration/replication.bats | 77 +++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+), 3 deletions(-)

diff --git a/cli/manage.go b/cli/manage.go
index 6fa782ae99..5868a09158 100644
--- a/cli/manage.go
+++ b/cli/manage.go
@@ -148,7 +148,7 @@ func setupReplication(c *cli.Context, cluster cluster.Cluster, server *api.Serve
 
 	go func() {
 		for {
-			run(candidate, server, primary, replica)
+			run(cluster, candidate, server, primary, replica)
 			time.Sleep(defaultRecoverTime)
 		}
 	}()
@@ -163,19 +163,22 @@ func setupReplication(c *cli.Context, cluster cluster.Cluster, server *api.Serve
 	server.SetHandler(primary)
 }
 
-func run(candidate *leadership.Candidate, server *api.Server, primary *mux.Router, replica *api.Replica) {
+func run(cl cluster.Cluster, candidate *leadership.Candidate, server *api.Server, primary *mux.Router, replica *api.Replica) {
 	electedCh, errCh, err := candidate.RunForElection()
 	if err != nil {
 		return
 	}
+	var watchdog *cluster.Watchdog
 	for {
 		select {
 		case isElected := <-electedCh:
 			if isElected {
 				log.Info("Leader Election: Cluster leadership acquired")
+				watchdog = cluster.NewWatchdog(cl)
 				server.SetHandler(primary)
 			} else {
 				log.Info("Leader Election: Cluster leadership lost")
+				cl.UnregisterEventHandler(watchdog)
 				server.SetHandler(replica)
 			}
 
@@ -325,8 +328,8 @@ func manage(c *cli.Context) {
 		setupReplication(c, cl, server, discovery, addr, leaderTTL, tlsConfig)
 	} else {
 		server.SetHandler(api.NewPrimary(cl, tlsConfig, &statusHandler{cl, nil, nil}, c.GlobalBool("debug"), c.Bool("cors")))
+		cluster.NewWatchdog(cl)
 	}
 
-	cluster.NewWatchdog(cl)
 	log.Fatal(server.ListenAndServe())
 }
diff --git a/test/integration/replication.bats b/test/integration/replication.bats
index 76ca6393c6..9280858f9d 100644
--- a/test/integration/replication.bats
+++ b/test/integration/replication.bats
@@ -77,6 +77,83 @@ function teardown() {
 	[[ "${output}" == *"Primary: ${SWARM_HOSTS[1]}"* ]]
 }
 
+function containerRunning() {
+	local container="$1"
+	local node="$2"
+	run docker_swarm inspect "$container"
+	[ "$status" -eq 0 ]
+	[[ "${output}" == *"\"Name\": \"$node\""* ]]
+	[[ "${output}" == *"\"Status\": \"running\""* ]]
+}
+
+@test "leader election - rescheduling" {
+	local i=${#SWARM_MANAGE_PID[@]}
+	local port=$(($SWARM_BASE_PORT + $i))
+	local host=127.0.0.1:$port
+
+	start_docker_with_busybox 2
+	swarm_join "$DISCOVERY"
+
+	# Bring up one manager, make sure it becomes primary.
+	swarm_manage --replication --replication-ttl "4s" --advertise 127.0.0.1:$SWARM_BASE_PORT --engine-refresh-min-interval=1s --engine-refresh-max-interval=1s --engine-failure-retry=1 "$DISCOVERY"
+	run docker -H ${SWARM_HOSTS[0]} info
+	[[ "${output}" == *"Role: primary"* ]]
+
+	# Fire up a second manager. Ensure it's a replica forwarding to the right primary.
+	swarm_manage --replication --replication-ttl "4s" --advertise 127.0.0.1:$(($SWARM_BASE_PORT + 1)) --engine-refresh-min-interval=1s --engine-refresh-max-interval=1s --engine-failure-retry=1 "$DISCOVERY"
+	run docker -H ${SWARM_HOSTS[1]} info
+	[[ "${output}" == *"Role: replica"* ]]
+	[[ "${output}" == *"Primary: ${SWARM_HOSTS[0]}"* ]]
+
+	# c1 on node-0 with reschedule=on-node-failure
+	run docker_swarm run -dit --name c1 -e constraint:node==~node-0 --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' busybox sh
+	[ "$status" -eq 0 ]
+	# c2 on node-0 with reschedule=off
+	run docker_swarm run -dit --name c2 -e constraint:node==~node-0 --label 'com.docker.swarm.reschedule-policies=["off"]' busybox sh
+	[ "$status" -eq 0 ]
+	# c3 on node-1
+	run docker_swarm run -dit --name c3 -e constraint:node==~node-1 --label 'com.docker.swarm.reschedule-policies=["on-node-failure"]' busybox sh
+	[ "$status" -eq 0 ]
+
+	run docker_swarm ps -q
+	[ "${#lines[@]}" -eq  3 ]
+
+	# Make sure containers are running where they should.
+	containerRunning "c1" "node-0"
+	containerRunning "c2" "node-0"
+	containerRunning "c3" "node-1"
+
+	# Get c1 swarm id
+	swarm_id=$(docker_swarm inspect -f '{{ index .Config.Labels "com.docker.swarm.id" }}' c1)
+
+	# Stop node-0
+	docker_host stop ${DOCKER_CONTAINERS[0]}
+
+	# Wait for Swarm to detect the node failure.
+	retry 5 1 eval "docker_swarm info | grep -q 'Unhealthy'"
+
+	# Wait for the container to be rescheduled
+	# c1 should have been rescheduled from node-0 to node-1
+	retry 5 1 containerRunning "c1" "node-1"
+
+	# Check swarm id didn't change for c1
+	[[ "$swarm_id" == $(docker_swarm inspect -f '{{ index .Config.Labels "com.docker.swarm.id" }}' c1) ]]
+
+	run docker_swarm inspect "$swarm_id"
+	[ "$status" -eq 0 ]
+	[[ "${output}" == *'"Name": "node-1"'* ]]
+
+	# c2 should still be on node-0 since the rescheduling policy was off.
+	run docker_swarm inspect c2
+	[ "$status" -eq 1 ]
+
+	# c3 should still be on node-1 since it wasn't affected
+	containerRunning "c3" "node-1"
+
+	run docker_swarm ps -q
+	[ "${#lines[@]}" -eq  2 ]
+}
+
 @test "leader election - store failure" {
 	# Bring up one manager, make sure it becomes primary.
 	swarm_manage --replication --replication-ttl "4s" --advertise 127.0.0.1:$SWARM_BASE_PORT "$DISCOVERY"