Merge pull request #1236 from abronan/test_leader_failure

Add integration test for leader election in the event of a backend store failure
This commit is contained in:
Xian Chaobo 2015-09-30 09:16:39 +08:00
commit 67bfdd67a6
1 changed files with 59 additions and 3 deletions

View File

@ -2,8 +2,8 @@
load helpers
# Address on which the store will listen (random port between 8000 and 9000).
STORE_HOST=127.0.0.1:$(( ( RANDOM % 1000 ) + 8000 ))
# Address on which the store will listen
STORE_HOST=127.0.0.1:8500
# Discovery parameter for Swarm
DISCOVERY="consul://${STORE_HOST}/test"
@ -13,7 +13,7 @@ CONTAINER_NAME=swarm_leader
function start_store() {
docker_host run -v $(pwd)/discovery/consul/config:/config --name=$CONTAINER_NAME -h $CONTAINER_NAME -p $STORE_HOST:8500 -d progrium/consul -server -bootstrap-expect 1 -config-file=/config/consul.json
# FIXME: We have to wait a few seconds for the store to come up.
# Wait a few seconds for the store to come up.
sleep 3
}
@ -58,3 +58,59 @@ function teardown() {
[[ "${output}" == *"Role: replica"* ]]
[[ "${output}" == *"Primary: ${SWARM_HOSTS[1]}"* ]]
}
@test "leader election - store failure" {
# Bring up one manager, make sure it becomes primary.
swarm_manage --replication --leaderTTL "4s" --advertise 127.0.0.1:$SWARM_BASE_PORT "$DISCOVERY"
run docker -H ${SWARM_HOSTS[0]} info
[[ "${output}" == *"Role: primary"* ]]
# Fire up a second manager. Ensure it's a replica forwarding to the right primary.
swarm_manage --replication --leaderTTL "4s" --advertise 127.0.0.1:$(($SWARM_BASE_PORT + 1)) "$DISCOVERY"
run docker -H ${SWARM_HOSTS[1]} info
[[ "${output}" == *"Role: replica"* ]]
[[ "${output}" == *"Primary: ${SWARM_HOSTS[0]}"* ]]
# Fire up a third manager. Ensure it's a replica forwarding to the right primary.
swarm_manage --replication --leaderTTL "4s" --advertise 127.0.0.1:$(($SWARM_BASE_PORT + 2)) "$DISCOVERY"
run docker -H ${SWARM_HOSTS[2]} info
[[ "${output}" == *"Role: replica"* ]]
[[ "${output}" == *"Primary: ${SWARM_HOSTS[0]}"* ]]
# Stop and start the store holding the leader metadata
stop_store
sleep 3
start_store
# Wait a little bit for the re-election to occur
# This is specific to Consul (liveness over safety)
sleep 6
# Make sure the managers are either in the 'primary' or the 'replica' state.
for host in "${SWARM_HOSTS[@]}"; do
retry 120 1 eval "docker -H ${host} info | grep -Eq 'Role: primary|Role: replica'"
done
# Find out which one of the node is the Primary and
# the ones that are Replicas after the store failure
primary=${SWARM_HOSTS[0]}
declare -a replicas
i=0
for host in "${SWARM_HOSTS[@]}"; do
run docker -H $host info
if [[ "${output}" == *"Role: primary"* ]]; then
primary=$host
else
replicas[$((i=i+1))]=$host
fi
done
# Check if we have indeed 2 replicas
[[ "${#replicas[@]}" -eq 2 ]]
# Check if the replicas are pointing to the right Primary
for host in "${replicas[@]}"; do
run docker -H $host info
[[ "${output}" == *"Primary: ${primary}"* ]]
done
}