Merge pull request #1410 from dongluochen/joinSpike

Add a random delay to avoid synchronized registration at swarm join. Simple fix for #1353
This commit is contained in:
Alexandre Beslic 2016-01-04 12:28:17 -08:00
commit 40f26856a5
4 changed files with 21 additions and 2 deletions

View File

@ -36,7 +36,7 @@ var (
Name: "join", Name: "join",
ShortName: "j", ShortName: "j",
Usage: "join a docker cluster", Usage: "join a docker cluster",
Flags: []cli.Flag{flJoinAdvertise, flHeartBeat, flTTL, flDiscoveryOpt}, Flags: []cli.Flag{flJoinAdvertise, flHeartBeat, flTTL, flJoinRandomDelay, flDiscoveryOpt},
Action: join, Action: join,
}, },
} }

View File

@ -32,6 +32,11 @@ var (
Usage: "Address of the Docker Engine joining the cluster. Swarm manager(s) MUST be able to reach the Docker Engine at this address.", Usage: "Address of the Docker Engine joining the cluster. Swarm manager(s) MUST be able to reach the Docker Engine at this address.",
EnvVar: "SWARM_ADVERTISE", EnvVar: "SWARM_ADVERTISE",
} }
flJoinRandomDelay = cli.StringFlag{
Name: "delay",
Value: "0s",
Usage: "add a random delay in [0s,delay] to avoid synchronized registration",
}
flManageAdvertise = cli.StringFlag{ flManageAdvertise = cli.StringFlag{
Name: "advertise, addr", Name: "advertise, addr",
Usage: "Address of the swarm manager joining the cluster. Other swarm manager(s) MUST be able to reach the swarm manager at this address.", Usage: "Address of the swarm manager joining the cluster. Other swarm manager(s) MUST be able to reach the swarm manager at this address.",

View File

@ -1,6 +1,7 @@
package cli package cli
import ( import (
"math/rand"
"regexp" "regexp"
"time" "time"
@ -28,6 +29,11 @@ func join(c *cli.Context) {
log.Fatal("--advertise should be of the form ip:port or hostname:port") log.Fatal("--advertise should be of the form ip:port or hostname:port")
} }
joinDelay, err := time.ParseDuration(c.String("delay"))
if err != nil {
log.Fatalf("invalid --delay: %v", err)
}
hb, err := time.ParseDuration(c.String("heartbeat")) hb, err := time.ParseDuration(c.String("heartbeat"))
if err != nil { if err != nil {
log.Fatalf("invalid --heartbeat: %v", err) log.Fatalf("invalid --heartbeat: %v", err)
@ -48,6 +54,14 @@ func join(c *cli.Context) {
log.Fatal(err) log.Fatal(err)
} }
// add a random delay between 0s and joinDelay at start to avoid synchronized registration
if joinDelay > 0 {
r := rand.New(rand.NewSource(time.Now().UTC().UnixNano()))
delay := time.Duration(r.Int63n(int64(joinDelay)))
log.Infof("Add a random delay %s to avoid synchronized registration", delay)
time.Sleep(delay)
}
for { for {
log.WithFields(log.Fields{"addr": addr, "discovery": dflag}).Infof("Registering on the discovery service every %s...", hb) log.WithFields(log.Fields{"addr": addr, "discovery": dflag}).Infof("Registering on the discovery service every %s...", hb)
if err := d.Register(addr); err != nil { if err := d.Register(addr); err != nil {

View File

@ -29,7 +29,7 @@ For details about libkv and a detailed technical overview of the supported backe
1. On each node, start the Swarm agent. 1. On each node, start the Swarm agent.
The node IP address doesn't have to be public as long as the swarm manager can access it. The node IP address doesn't have to be public as long as the swarm manager can access it. In a large cluster, the nodes joining swarm may trigger request spikes to discovery. For example, a large number of nodes are added by a script, or recovered from a network partition. This may result in discovery failure. You can use `--delay` option to specify a delay limit. Swarm join will add a random delay less than this limit to reduce pressure to discovery.
**Etcd**: **Etcd**: