diff --git a/cli/commands.go b/cli/commands.go index a1e8263e3f..cd848b3770 100644 --- a/cli/commands.go +++ b/cli/commands.go @@ -36,7 +36,7 @@ var ( Name: "join", ShortName: "j", Usage: "join a docker cluster", - Flags: []cli.Flag{flJoinAdvertise, flHeartBeat, flTTL, flDiscoveryOpt}, + Flags: []cli.Flag{flJoinAdvertise, flHeartBeat, flTTL, flJoinRandomDelay, flDiscoveryOpt}, Action: join, }, } diff --git a/cli/flags.go b/cli/flags.go index 9553a93635..bfae7b9c51 100644 --- a/cli/flags.go +++ b/cli/flags.go @@ -32,6 +32,11 @@ var ( Usage: "Address of the Docker Engine joining the cluster. Swarm manager(s) MUST be able to reach the Docker Engine at this address.", EnvVar: "SWARM_ADVERTISE", } + flJoinRandomDelay = cli.StringFlag{ + Name: "delay", + Value: "0s", + Usage: "add a random delay in [0s,delay] to avoid synchronized registration", + } flManageAdvertise = cli.StringFlag{ Name: "advertise, addr", Usage: "Address of the swarm manager joining the cluster. Other swarm manager(s) MUST be able to reach the swarm manager at this address.", diff --git a/cli/join.go b/cli/join.go index d8aa732b77..e302e15f73 100644 --- a/cli/join.go +++ b/cli/join.go @@ -1,6 +1,7 @@ package cli import ( + "math/rand" "regexp" "time" @@ -28,6 +29,11 @@ func join(c *cli.Context) { log.Fatal("--advertise should be of the form ip:port or hostname:port") } + joinDelay, err := time.ParseDuration(c.String("delay")) + if err != nil { + log.Fatalf("invalid --delay: %v", err) + } + hb, err := time.ParseDuration(c.String("heartbeat")) if err != nil { log.Fatalf("invalid --heartbeat: %v", err) @@ -48,6 +54,14 @@ func join(c *cli.Context) { log.Fatal(err) } + // add a random delay between 0s and joinDelay at start to avoid synchronized registration + if joinDelay > 0 { + r := rand.New(rand.NewSource(time.Now().UTC().UnixNano())) + delay := time.Duration(r.Int63n(int64(joinDelay))) + log.Infof("Add a random delay %s to avoid synchronized registration", delay) + time.Sleep(delay) + } + for { log.WithFields(log.Fields{"addr": addr, "discovery": dflag}).Infof("Registering on the discovery service every %s...", hb) if err := d.Register(addr); err != nil { diff --git a/docs/discovery.md b/docs/discovery.md index 4213c0a108..f111281c75 100644 --- a/docs/discovery.md +++ b/docs/discovery.md @@ -29,7 +29,7 @@ For details about libkv and a detailed technical overview of the supported backe 1. On each node, start the Swarm agent. - The node IP address doesn't have to be public as long as the swarm manager can access it. + The node IP address doesn't have to be public as long as the swarm manager can access it. In a large cluster, the nodes joining swarm may trigger request spikes to discovery. For example, a large number of nodes are added by a script, or recovered from a network partition. This may result in discovery failure. You can use `--delay` option to specify a delay limit. Swarm join will add a random delay less than this limit to reduce pressure to discovery. **Etcd**: