Merge pull request #1236 from abronan/test_leader_failure

Add integration test for leader election in the event of a backend store failure
2015-09-30 09:16:39 +08:00 · 2015-09-30 09:16:39 +08:00 · 67bfdd67a6
parent 4c02659078 ca20e4ca00
commit 67bfdd67a6
1 changed files with 59 additions and 3 deletions
--- a/test/integration/replication.bats
+++ b/test/integration/replication.bats
@ -2,8 +2,8 @@

 load helpers

-# Address on which the store will listen (random port between 8000 and 9000).
-STORE_HOST=127.0.0.1:$(( ( RANDOM % 1000 )  + 8000 ))
+# Address on which the store will listen
+STORE_HOST=127.0.0.1:8500

 # Discovery parameter for Swarm
 DISCOVERY="consul://${STORE_HOST}/test"
@ -13,7 +13,7 @@ CONTAINER_NAME=swarm_leader

 function start_store() {
 	docker_host run -v $(pwd)/discovery/consul/config:/config --name=$CONTAINER_NAME -h $CONTAINER_NAME -p $STORE_HOST:8500 -d progrium/consul -server -bootstrap-expect 1 -config-file=/config/consul.json
-	# FIXME: We have to wait a few seconds for the store to come up.
+	# Wait a few seconds for the store to come up.
 	sleep 3
 }

@ -58,3 +58,59 @@ function teardown() {
 	[[ "${output}" == *"Role: replica"* ]]
 	[[ "${output}" == *"Primary: ${SWARM_HOSTS[1]}"* ]]
 }
+
+@test "leader election - store failure" {
+	# Bring up one manager, make sure it becomes primary.
+	swarm_manage --replication --leaderTTL "4s" --advertise 127.0.0.1:$SWARM_BASE_PORT "$DISCOVERY"
+	run docker -H ${SWARM_HOSTS[0]} info
+	[[ "${output}" == *"Role: primary"* ]]
+
+	# Fire up a second manager. Ensure it's a replica forwarding to the right primary.
+	swarm_manage --replication --leaderTTL "4s" --advertise 127.0.0.1:$(($SWARM_BASE_PORT + 1)) "$DISCOVERY"
+	run docker -H ${SWARM_HOSTS[1]} info
+	[[ "${output}" == *"Role: replica"* ]]
+	[[ "${output}" == *"Primary: ${SWARM_HOSTS[0]}"* ]]
+
+	# Fire up a third manager. Ensure it's a replica forwarding to the right primary.
+	swarm_manage --replication --leaderTTL "4s" --advertise 127.0.0.1:$(($SWARM_BASE_PORT + 2)) "$DISCOVERY"
+	run docker -H ${SWARM_HOSTS[2]} info
+	[[ "${output}" == *"Role: replica"* ]]
+	[[ "${output}" == *"Primary: ${SWARM_HOSTS[0]}"* ]]
+
+	# Stop and start the store holding the leader metadata
+	stop_store
+	sleep 3
+	start_store
+
+	# Wait a little bit for the re-election to occur
+	# This is specific to Consul (liveness over safety)
+	sleep 6
+
+	# Make sure the managers are either in the 'primary' or the 'replica' state.
+	for host in "${SWARM_HOSTS[@]}"; do
+		retry 120 1 eval "docker -H ${host} info | grep -Eq 'Role: primary|Role: replica'"
+	done
+
+	# Find out which one of the node is the Primary and
+	# the ones that are Replicas after the store failure
+	primary=${SWARM_HOSTS[0]}
+	declare -a replicas
+	i=0
+	for host in "${SWARM_HOSTS[@]}"; do
+		run docker -H $host info
+		if [[ "${output}" == *"Role: primary"* ]]; then
+			primary=$host
+		else
+			replicas[$((i=i+1))]=$host
+		fi
+	done
+
+	# Check if we have indeed 2 replicas
+	[[ "${#replicas[@]}" -eq 2 ]]
+
+	# Check if the replicas are pointing to the right Primary
+	for host in "${replicas[@]}"; do
+		run docker -H $host info
+		[[ "${output}" == *"Primary: ${primary}"* ]]
+	done
+}