Merge pull request #1636 from jimenez/checkpoint_failover

Enabling checkpoint failover in FrameworkInfo
This commit is contained in:
Victor Vieux 2016-01-14 10:18:09 -08:00
commit 08839f62fa
4 changed files with 21 additions and 7 deletions

View File

@ -46,11 +46,12 @@ Options:
{{end}}{{if (eq .Name "manage")}}{{printf "\t * swarm.overcommit=0.05\tovercommit to apply on resources"}}
{{printf "\t * swarm.createretry=0\tcontainer create retry count after initial failure"}}
{{printf "\t * mesos.address=\taddress to bind on [$SWARM_MESOS_ADDRESS]"}}
{{printf "\t * mesos.checkpointfailover=false\tcheckpointing allows a restarted slave to reconnect with old executors and recover status updates, at the cost of disk I/O [$SWARM_MESOS_CHECKPOINT_FAILOVER]"}}
{{printf "\t * mesos.port=\tport to bind on [$SWARM_MESOS_PORT]"}}
{{printf "\t * mesos.offertimeout=30s\ttimeout for offers [$SWARM_MESOS_OFFER_TIMEOUT]"}}
{{printf "\t * mesos.offerrefusetimeout=5s\tseconds to consider unused resources refused [$SWARM_MESOS_OFFER_REFUSE_TIMEOUT]"}}
{{printf "\t * mesos.tasktimeout=5s\ttimeout for task creation [$SWARM_MESOS_TASK_TIMEOUT]"}}
{{printf "\t * mesos.user=\tframework user [$SWARM_MESOS_USER]"}}
{{printf "\t * mesos.offerrefusetimeout=5s\tseconds to consider unused resources refused [$SWARM_MESOS_OFFER_REFUSE_TIMEOUT]"}}{{end}}{{ end }}
{{printf "\t * mesos.user=\tframework user [$SWARM_MESOS_USER]"}}{{end}}{{ end }}
`
}

View File

@ -119,6 +119,10 @@ func NewCluster(scheduler *scheduler.Scheduler, TLSConfig *tls.Config, master st
driverConfig.BindingAddress = bindingAddress
}
if checkpointFailover, ok := options.Bool("mesos.checkpointfailover", "SWARM_MESOS_CHECKPOINT_FAILOVER"); ok {
driverConfig.Framework.Checkpoint = &checkpointFailover
}
if offerTimeout, ok := options.String("mesos.offertimeout", "SWARM_MESOS_OFFER_TIMEOUT"); ok {
d, err := time.ParseDuration(offerTimeout)
if err != nil {

View File

@ -58,3 +58,12 @@ func (do DriverOpts) IP(key, env string) (net.IP, bool) {
}
return nil, false
}
// Bool returns a boolean from the driver options
func (do DriverOpts) Bool(key, env string) (bool, bool) {
if value, ok := do.String(key, env); ok {
b, _ := strconv.ParseBool(value)
return b, true
}
return false, false
}

View File

@ -8,7 +8,7 @@ export SWARM_MESOS_USER=root
MESOS_IMAGE=dockerswarm/mesos:0.25.0
MESOS_MASTER_PORT=$(( ( RANDOM % 1000 ) + 10000 ))
# Start mesos master and slave.
# Start mesos master and agent.
function start_mesos() {
local current=${#DOCKER_CONTAINERS[@]}
MESOS_MASTER=$(
@ -19,7 +19,7 @@ function start_mesos() {
retry 10 1 eval "docker_host ps | grep 'mesos-master'"
for ((i=0; i < current; i++)); do
local docker_port=$(echo ${HOSTS[$i]} | cut -d: -f2)
MESOS_SLAVES[$i]=$(
MESOS_AGENTS[$i]=$(
docker_host run --privileged -d --name mesos-slave-$i --volumes-from node-$i -v /sys/fs/cgroup:/sys/fs/cgroup --net=host -u root \
$MESOS_IMAGE mesos-slave --master=127.0.0.1:$MESOS_MASTER_PORT --containerizers=docker --attributes="docker_port:$docker_port" --hostname=127.0.0.1 --port=$(($MESOS_MASTER_PORT + (1 + $i))) --docker=/usr/local/bin/docker
)
@ -37,7 +37,7 @@ function start_mesos_zk() {
retry 10 1 eval "docker_host ps | grep 'mesos-master'"
for ((i=0; i < current; i++)); do
local docker_port=$(echo ${HOSTS[$i]} | cut -d: -f2)
MESOS_SLAVES[$i]=$(
MESOS_AGENTS[$i]=$(
docker_host run --privileged -d --name mesos-slave-$i --volumes-from node-$i -v /sys/fs/cgroup:/sys/fs/cgroup --net=host -u root \
$MESOS_IMAGE mesos-slave --master=127.0.0.1:$MESOS_MASTER_PORT --containerizers=docker --attributes="docker_port:$docker_port" --hostname=127.0.0.1 --port=$(($MESOS_MASTER_PORT + (1 + $i))) --docker=/usr/local/bin/docker
)
@ -45,11 +45,11 @@ function start_mesos_zk() {
done
}
# Stop mesos master and slave.
# Stop mesos master and agent
function stop_mesos() {
echo "Stopping $MESOS_MASTER"
docker_host rm -f -v $MESOS_MASTER > /dev/null;
for id in ${MESOS_SLAVES[@]}; do
for id in ${MESOS_AGENTS[@]}; do
echo "Stopping $id"
docker_host rm -f -v $id > /dev/null;
done