integration: cover more grounds in discovery testing.

Signed-off-by: Andrea Luzzardi <aluzzardi@gmail.com>
2015-05-16 16:34:18 -07:00 · 2015-05-16 16:34:18 -07:00 · ac18ef381d
parent 9179ed3d34
commit ac18ef381d
7 changed files with 287 additions and 116 deletions
--- a/test/integration/discovery/consul.bats
+++ b/test/integration/discovery/consul.bats
@ -1,43 +1,81 @@
 #!/usr/bin/env bats

-load ../helpers
+load discovery_helpers

-# Address on which Consul will listen (random port between 8000 and 9000).
-CONSUL_HOST=127.0.0.1:$(( ( RANDOM % 1000 )  + 8000 ))
+# Address on which the store will listen (random port between 8000 and 9000).
+STORE_HOST=127.0.0.1:$(( ( RANDOM % 1000 )  + 8000 ))
+
+# Discovery parameter for Swarm
+DISCOVERY="consul://${STORE_HOST}/test"

 # Container name for integration test
 CONTAINER_NAME=swarm_consul

-function start_consul() {
-	docker_host run --name=$CONTAINER_NAME -h $CONTAINER_NAME -p $CONSUL_HOST:8500 -d progrium/consul -server -bootstrap-expect 1 -data-dir /test
+function start_store() {
+	docker_host run --name=$CONTAINER_NAME -h $CONTAINER_NAME -p $STORE_HOST:8500 -d progrium/consul -server -bootstrap-expect 1 -data-dir /test
 }

-function stop_consul() {
+function stop_store() {
 	docker_host rm -f -v $CONTAINER_NAME
 }

-function setup() {
-	start_consul
-}
-
 function teardown() {
 	swarm_manage_cleanup
 	swarm_join_cleanup
 	stop_docker
-	stop_consul
+	stop_store
 }

-@test "consul discovery" {
+@test "consul discovery: recover engines" {
+	# The goal of this test is to ensure swarm can see engines that joined
+	# while the manager was stopped.
+
+	# Start the store
+	start_store
+
 	# Start 2 engines and make them join the cluster.
 	start_docker 2
-	swarm_join "consul://${CONSUL_HOST}/test"
+	swarm_join "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_list "$DISCOVERY"

-	# Start a manager and ensure it sees all the engines.
-	swarm_manage "consul://${CONSUL_HOST}/test"
-	check_swarm_nodes
-
-	# Add another engine to the cluster and make sure it's picked up by swarm.
-	start_docker 1
-	swarm_join "consul://${CONSUL_HOST}/test"
-	retry 30 1 check_swarm_nodes
+	# Then, start a manager and ensure it sees all the engines.
+	swarm_manage "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_info
+}
+
+@test "consul discovery: watch for changes" {
+	# The goal of this test is to ensure swarm can see new nodes as they join
+	# the cluster.
+	start_store
+
+	# Start a manager with no engines.
+	swarm_manage "$DISCOVERY"
+	retry 10 1 discovery_check_swarm_info
+
+	# Add engines to the cluster and make sure it's picked up by swarm.
+	start_docker 2
+	swarm_join "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_list "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_info
+}
+
+@test "consul discovery: failure" {
+	# The goal of this test is to simulate a store failure and ensure discovery
+	# is resilient to it.
+
+	# At this point, the store is not yet started.
+	
+	# Start 2 engines and join the cluster. They should keep retrying
+	start_docker 2
+	swarm_join "$DISCOVERY"
+
+	# Start a manager. It should keep retrying
+	swarm_manage "$DISCOVERY"
+
+	# Now start the store
+	start_store
+
+	# After a while, `join` and `manage` should reach the store.
+	retry 5 1 discovery_check_swarm_list "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_info
 }
--- a/test/integration/discovery/discovery_helpers.bash
+++ b/test/integration/discovery/discovery_helpers.bash
@ -0,0 +1,17 @@
+#!/bin/bash
+
+load ../helpers
+
+# Returns true if all nodes have joined the swarm.
+function discovery_check_swarm_info() {
+	docker_swarm info | grep -q "Nodes: ${#HOSTS[@]}"
+}
+
+# Returns true if all nodes have joined the discovery.
+function discovery_check_swarm_list() {
+	local joined=`swarm list "$1" | wc -l`
+	local total=${#HOSTS[@]}
+
+	echo "${joined} out of ${total} hosts joined discovery"
+	[ "$joined" -eq "$total" ]
+}
--- a/test/integration/discovery/etcd.bats
+++ b/test/integration/discovery/etcd.bats
@ -1,43 +1,82 @@
 #!/usr/bin/env bats

-load ../helpers
+load discovery_helpers

-# Address on which Etcd will listen (random port between 9000 and 10,000).
-ETCD_HOST=127.0.0.1:$(( ( RANDOM % 1000 )  + 9000 ))
+# Address on which the store will listen (random port between 8000 and 9000).
+STORE_HOST=127.0.0.1:$(( ( RANDOM % 1000 )  + 9000 ))
+
+# Discovery parameter for Swarm
+DISCOVERY="etcd://${STORE_HOST}/test"

 # Container name for integration test
 CONTAINER_NAME=swarm_etcd

-function start_etcd() {
-	docker_host run -p $ETCD_HOST:4001 --name=$CONTAINER_NAME -d coreos/etcd
+function start_store() {
+	docker_host run -p $STORE_HOST:4001 --name=$CONTAINER_NAME -d coreos/etcd
 }

-function stop_etcd() {
+function stop_store() {
 	docker_host rm -f -v $CONTAINER_NAME
 }

-function setup() {
-	start_etcd
-}
-
 function teardown() {
 	swarm_manage_cleanup
 	swarm_join_cleanup
 	stop_docker
-	stop_etcd
+	stop_store
 }

-@test "etcd discovery" {
+@test "etcd discovery: recover engines" {
+	# The goal of this test is to ensure swarm can see engines that joined
+	# while the manager was stopped.
+
+	# Start the store
+	start_store
+
+	docker_host ps -a
 	# Start 2 engines and make them join the cluster.
 	start_docker 2
-	swarm_join "etcd://${ETCD_HOST}/test"
+	swarm_join "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_list "$DISCOVERY"

-	# Start a manager and ensure it sees all the engines.
-	swarm_manage "etcd://${ETCD_HOST}/test"
-	check_swarm_nodes
-
-	# Add another engine to the cluster and make sure it's picked up by swarm.
-	start_docker 1
-	swarm_join "etcd://${ETCD_HOST}/test"
-	retry 30 1 check_swarm_nodes
+	# Then, start a manager and ensure it sees all the engines.
+	swarm_manage "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_info
+}
+
+@test "etcd discovery: watch for changes" {
+	# The goal of this test is to ensure swarm can see new nodes as they join
+	# the cluster.
+	start_store
+
+	# Start a manager with no engines.
+	swarm_manage "$DISCOVERY"
+	retry 10 1 discovery_check_swarm_info
+
+	# Add engines to the cluster and make sure it's picked up by swarm.
+	start_docker 2
+	swarm_join "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_list "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_info
+}
+
+@test "etcd discovery: failure" {
+	# The goal of this test is to simulate a store failure and ensure discovery
+	# is resilient to it.
+
+	# At this point, the store is not yet started.
+	
+	# Start 2 engines and join the cluster. They should keep retrying
+	start_docker 2
+	swarm_join "$DISCOVERY"
+
+	# Start a manager. It should keep retrying
+	swarm_manage "$DISCOVERY"
+
+	# Now start the store
+	start_store
+
+	# After a while, `join` and `manage` should reach the store.
+	retry 5 1 discovery_check_swarm_list "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_info
 }
--- a/test/integration/discovery/file.bats
+++ b/test/integration/discovery/file.bats
@ -1,9 +1,15 @@
 #!/usr/bin/env bats

-load ../helpers
+load discovery_helpers

-# create a blank temp file for discovery
-DISCOVERY_FILE=$(mktemp)
+DISCOVERY_FILE=""
+DISCOVERY=""
+
+function setup() {
+	# create a blank temp file for discovery
+	DISCOVERY_FILE=$(mktemp)
+	DISCOVERY="file://$DISCOVERY_FILE"
+}

 function teardown() {
 	swarm_manage_cleanup
@ -11,24 +17,59 @@ function teardown() {
 	rm -f "$DISCOVERY_FILE"
 }

-function setup_file_discovery() {
+function setup_discovery_file() {
 	rm -f "$DISCOVERY_FILE"
 	for host in ${HOSTS[@]}; do
 		echo "$host" >> $DISCOVERY_FILE
 	done
 }

-@test "file discovery" {
-	# Start 2 engines, register them in a file, then start swarm and make sure
-	# it sees them.
-	start_docker 2
-	setup_file_discovery
-	swarm_manage "file://$DISCOVERY_FILE"
-	check_swarm_nodes
+@test "file discovery: recover engines" {
+	# The goal of this test is to ensure swarm can see engines that joined
+	# while the manager was stopped.

-	# Add another engine to the cluster, update the discovery file and make
-	# sure it's picked up by swarm.
-	start_docker 1
-	setup_file_discovery
-	retry 10 1 check_swarm_nodes
+	# Start 2 engines and register them in the file.
+	start_docker 2
+	setup_discovery_file
+	retry 5 1 discovery_check_swarm_list "$DISCOVERY"
+
+	# Then, start a manager and ensure it sees all the engines.
+	swarm_manage "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_info
+}
+
+@test "file discovery: watch for changes" {
+	# The goal of this test is to ensure swarm can see new nodes as they join
+	# the cluster.
+
+	# Start a manager with no engines.
+	swarm_manage "$DISCOVERY"
+	retry 10 1 discovery_check_swarm_info
+
+	# Add engines to the cluster and make sure it's picked up by swarm.
+	start_docker 2
+	setup_discovery_file
+	retry 5 1 discovery_check_swarm_list "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_info
+}
+
+@test "file discovery: failure" {
+	# The goal of this test is to simulate a failure (file not available) and ensure discovery
+	# is resilient to it.
+
+	# Wipe out the discovery file.
+	rm -f "$DISCOVERY_FILE"
+	
+	# Start 2 engines.
+	start_docker 2
+
+	# Start a manager. It should keep retrying
+	swarm_manage "$DISCOVERY"
+
+	# Now create the discovery file.
+	setup_discovery_file
+
+	# After a while, `join` and `manage` should see the file.
+	retry 5 1 discovery_check_swarm_list "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_info
 }
--- a/test/integration/discovery/token.bats
+++ b/test/integration/discovery/token.bats
@ -1,8 +1,9 @@
 #!/usr/bin/env bats

-load ../helpers
+load discovery_helpers

 TOKEN=""
+DISCOVERY=""

 function token_cleanup() {
 	[ -z "$TOKEN" ] && return
@ -10,6 +11,12 @@ function token_cleanup() {
 	curl -X DELETE "https://discovery-stage.hub.docker.com/v1/clusters/$TOKEN"
 }

+function setup() {
+	TOKEN=$(swarm create)
+	[[ "$TOKEN" =~ ^[0-9a-f]{32}$ ]]
+	DISCOVERY="token://$TOKEN"
+}
+
 function teardown() {
 	swarm_manage_cleanup
 	swarm_join_cleanup
@ -17,23 +24,31 @@ function teardown() {
 	token_cleanup
 }

-@test "token discovery" {
-	# Create a cluster and validate the token.
-	run swarm create
-	[ "$status" -eq 0 ]
-	[[ "$output" =~ ^[0-9a-f]{32}$ ]]
-	TOKEN="$output"
+@test "token discovery: recover engines" {
+	# The goal of this test is to ensure swarm can see engines that joined
+	# while the manager was stopped.

 	# Start 2 engines and make them join the cluster.
 	start_docker 2
-	swarm_join "token://$TOKEN"
+	swarm_join "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_list "$DISCOVERY"

-	# Start a manager and ensure it sees all the engines.
-	swarm_manage "token://$TOKEN"
-	check_swarm_nodes
-
-	# Add another engine to the cluster and make sure it's picked up by swarm.
-	start_docker 1
-	swarm_join "token://$TOKEN"
-	retry 10 1 check_swarm_nodes
+	# Then, start a manager and ensure it sees all the engines.
+	swarm_manage "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_info
+}
+
+@test "token discovery: watch for changes" {
+	# The goal of this test is to ensure swarm can see new nodes as they join
+	# the cluster.
+
+	# Start a manager with no engines.
+	swarm_manage "$DISCOVERY"
+	retry 10 1 discovery_check_swarm_info
+
+	# Add engines to the cluster and make sure it's picked up by swarm.
+	start_docker 2
+	swarm_join "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_list "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_info
 }
--- a/test/integration/discovery/zk.bats
+++ b/test/integration/discovery/zk.bats
@ -1,43 +1,81 @@
 #!/usr/bin/env bats

-load ../helpers
+load discovery_helpers

-# Address on which Zookeeper will listen (random port between 7000 and 8000).
-ZK_HOST=127.0.0.1:$(( ( RANDOM % 1000 )  + 7000 ))
+# Address on which the store will listen (random port between 8000 and 9000).
+STORE_HOST=127.0.0.1:$(( ( RANDOM % 1000 )  + 7000 ))
+
+# Discovery parameter for Swarm
+DISCOVERY="zk://${STORE_HOST}/test"

 # Container name for integration test
-ZK_CONTAINER_NAME=swarm_integration_zk
+CONTAINER_NAME=swarm_integration_zk

-function start_zk() {
-	docker_host run --name $ZK_CONTAINER_NAME -p $ZK_HOST:2181 -d jplock/zookeeper
+function start_store() {
+	docker_host run --name $CONTAINER_NAME -p $STORE_HOST:2181 -d jplock/zookeeper
 }

-function stop_zk() {
-	docker_host rm -f -v $ZK_CONTAINER_NAME
-}
-
-function setup() {
-	start_zk
+function stop_store() {
+	docker_host rm -f -v $CONTAINER_NAME
 }

 function teardown() {
 	swarm_manage_cleanup
 	swarm_join_cleanup
 	stop_docker
-	stop_zk
+	stop_store
 }

-@test "zookeeper discovery" {
+@test "zk discovery: recover engines" {
+	# The goal of this test is to ensure swarm can see engines that joined
+	# while the manager was stopped.
+
+	# Start the store
+	start_store
+
 	# Start 2 engines and make them join the cluster.
 	start_docker 2
-	swarm_join "zk://${ZK_HOST}/test"
+	swarm_join "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_list "$DISCOVERY"

-	# Start a manager and ensure it sees all the engines.
-	swarm_manage "zk://${ZK_HOST}/test"
-	check_swarm_nodes
-
-	# Add another engine to the cluster and make sure it's picked up by swarm.
-	start_docker 1
-	swarm_join "zk://${ZK_HOST}/test"
-	retry 30 1 check_swarm_nodes
+	# Then, start a manager and ensure it sees all the engines.
+	swarm_manage "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_info
+}
+
+@test "zk discovery: watch for changes" {
+	# The goal of this test is to ensure swarm can see new nodes as they join
+	# the cluster.
+	start_store
+
+	# Start a manager with no engines.
+	swarm_manage "$DISCOVERY"
+	retry 10 1 discovery_check_swarm_info
+
+	# Add engines to the cluster and make sure it's picked up by swarm.
+	start_docker 2
+	swarm_join "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_list "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_info
+}
+
+@test "zk discovery: failure" {
+	# The goal of this test is to simulate a store failure and ensure discovery
+	# is resilient to it.
+
+	# At this point, the store is not yet started.
+	
+	# Start 2 engines and join the cluster. They should keep retrying
+	start_docker 2
+	swarm_join "$DISCOVERY"
+
+	# Start a manager. It should keep retrying
+	swarm_manage "$DISCOVERY"
+
+	# Now start the store
+	start_store
+
+	# After a while, `join` and `manage` should reach the store.
+	retry 5 1 discovery_check_swarm_list "$DISCOVERY"
+	retry 5 1 discovery_check_swarm_info
 }
--- a/test/integration/helpers.bash
+++ b/test/integration/helpers.bash
@ -85,11 +85,6 @@ function wait_until_reachable() {
 	retry 10 1 docker -H $1 info
 }

-# Returns true if all nodes have joined the swarm.
-function check_swarm_nodes() {
-	docker_swarm info | grep -q "Nodes: ${#HOSTS[@]}"
-}
-
 # Start the swarm manager in background.
 function swarm_manage() {
 	local discovery
@ -99,10 +94,9 @@ function swarm_manage() {
 		discovery="$@"
 	fi

-	"$SWARM_BINARY" manage -H "$SWARM_HOST" --cluster-opt "swarm.discovery.heartbeat=1s" "$discovery" &
+	"$SWARM_BINARY" -l debug manage -H "$SWARM_HOST" --cluster-opt "swarm.discovery.heartbeat=1s" "$discovery" &
 	SWARM_PID=$!
 	wait_until_reachable "$SWARM_HOST"
-	retry 10 1 check_swarm_nodes
 }

 # swarm join every engine created with `start_docker`.
@ -120,23 +114,12 @@ function swarm_join() {

 	# Start the engines.
 	local i
-	echo "current: $current | nodes: $nodes" > log
 	for ((i=current; i < nodes; i++)); do
 		local h="${HOSTS[$i]}"
 		echo "Swarm join #${i}: $h $addr"
-		"$SWARM_BINARY" join --heartbeat=1s --addr="$h" "$addr" &
+		"$SWARM_BINARY" -l debug join --heartbeat=1s --addr="$h" "$addr" &
 		SWARM_JOIN_PID[$i]=$!
 	done
-	retry 10 0.5 check_discovery_nodes "$addr"
-}
-
-# Returns true if all nodes have joined the discovery.
-function check_discovery_nodes() {
-	local joined=`swarm list "$1" | wc -l`
-	local total=${#HOSTS[@]}
-
-	echo "${joined} out of ${total} hosts joined discovery"
-	[ "$joined" -eq "$total" ]
 }

 # Stops the manager.