From 36ca8ff63ff9575fab52e868d93baaf871b5dbc1 Mon Sep 17 00:00:00 2001 From: Dong Chen Date: Thu, 12 Nov 2015 16:17:29 -0800 Subject: [PATCH 1/4] Add a random delay to avoid synchronized registration at swarm join. Signed-off-by: Dong Chen --- cli/join.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cli/join.go b/cli/join.go index d8aa732b77..f59c005f91 100644 --- a/cli/join.go +++ b/cli/join.go @@ -1,6 +1,7 @@ package cli import ( + "math/rand" "regexp" "time" @@ -48,6 +49,12 @@ func join(c *cli.Context) { log.Fatal(err) } + // add a random delay [0,hb) at start to avoid synchronized registration + r := rand.New(rand.NewSource(time.Now().UTC().UnixNano())) + delay := time.Duration(r.Int63n(int64(hb))) + log.Infof("Add a random delay %s to avoid synchronized registration", delay) + time.Sleep(delay) + for { log.WithFields(log.Fields{"addr": addr, "discovery": dflag}).Infof("Registering on the discovery service every %s...", hb) if err := d.Register(addr); err != nil { From db5c8aba7cfdbc971c07e523b2c2c500faf66af1 Mon Sep 17 00:00:00 2001 From: Dong Chen Date: Thu, 3 Dec 2015 15:01:02 -0800 Subject: [PATCH 2/4] Add a command line option for swam join delay. Signed-off-by: Dong Chen --- cli/commands.go | 2 +- cli/flags.go | 5 +++++ cli/join.go | 17 ++++++++++++----- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/cli/commands.go b/cli/commands.go index 73790935c9..d3d76d46f4 100644 --- a/cli/commands.go +++ b/cli/commands.go @@ -36,7 +36,7 @@ var ( Name: "join", ShortName: "j", Usage: "join a docker cluster", - Flags: []cli.Flag{flJoinAdvertise, flHeartBeat, flTTL, flDiscoveryOpt}, + Flags: []cli.Flag{flJoinAdvertise, flHeartBeat, flTTL, flJoinRandomDelay, flDiscoveryOpt}, Action: join, }, } diff --git a/cli/flags.go b/cli/flags.go index 6d783189e9..b7d4e617b4 100644 --- a/cli/flags.go +++ b/cli/flags.go @@ -32,6 +32,11 @@ var ( Usage: "Address of the Docker Engine joining the cluster. Swarm manager(s) MUST be able to reach the Docker Engine at this address.", EnvVar: "SWARM_ADVERTISE", } + flJoinRandomDelay = cli.StringFlag{ + Name: "joindelay", + Value: "0s", + Usage: "add a random delay in [0s,joindelay] to avoid synchronized registration", + } flManageAdvertise = cli.StringFlag{ Name: "advertise, addr", Usage: "Address of the swarm manager joining the cluster. Other swarm manager(s) MUST be able to reach the swarm manager at this address.", diff --git a/cli/join.go b/cli/join.go index f59c005f91..5de81276a8 100644 --- a/cli/join.go +++ b/cli/join.go @@ -29,6 +29,11 @@ func join(c *cli.Context) { log.Fatal("--advertise should be of the form ip:port or hostname:port") } + joinDelay, err := time.ParseDuration(c.String("joindelay")) + if err != nil { + log.Fatalf("invalid --joindelay: %v", err) + } + hb, err := time.ParseDuration(c.String("heartbeat")) if err != nil { log.Fatalf("invalid --heartbeat: %v", err) @@ -49,11 +54,13 @@ func join(c *cli.Context) { log.Fatal(err) } - // add a random delay [0,hb) at start to avoid synchronized registration - r := rand.New(rand.NewSource(time.Now().UTC().UnixNano())) - delay := time.Duration(r.Int63n(int64(hb))) - log.Infof("Add a random delay %s to avoid synchronized registration", delay) - time.Sleep(delay) + // add a random delay between 0s and joinDelay at start to avoid synchronized registration + if joinDelay > 0 { + r := rand.New(rand.NewSource(time.Now().UTC().UnixNano())) + delay := time.Duration(r.Int63n(int64(joinDelay))) + log.Infof("Add a random delay %s to avoid synchronized registration", delay) + time.Sleep(delay) + } for { log.WithFields(log.Fields{"addr": addr, "discovery": dflag}).Infof("Registering on the discovery service every %s...", hb) From 2c029f9795fc60a53ae2fa6445983aba9970a5f5 Mon Sep 17 00:00:00 2001 From: Dong Chen Date: Tue, 8 Dec 2015 15:42:19 -0800 Subject: [PATCH 3/4] Change '--joindelay' to '--delay' since it's a join option. Signed-off-by: Dong Chen --- cli/flags.go | 4 ++-- cli/join.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cli/flags.go b/cli/flags.go index b7d4e617b4..2d5024d63d 100644 --- a/cli/flags.go +++ b/cli/flags.go @@ -33,9 +33,9 @@ var ( EnvVar: "SWARM_ADVERTISE", } flJoinRandomDelay = cli.StringFlag{ - Name: "joindelay", + Name: "delay", Value: "0s", - Usage: "add a random delay in [0s,joindelay] to avoid synchronized registration", + Usage: "add a random delay in [0s,delay] to avoid synchronized registration", } flManageAdvertise = cli.StringFlag{ Name: "advertise, addr", diff --git a/cli/join.go b/cli/join.go index 5de81276a8..e302e15f73 100644 --- a/cli/join.go +++ b/cli/join.go @@ -29,9 +29,9 @@ func join(c *cli.Context) { log.Fatal("--advertise should be of the form ip:port or hostname:port") } - joinDelay, err := time.ParseDuration(c.String("joindelay")) + joinDelay, err := time.ParseDuration(c.String("delay")) if err != nil { - log.Fatalf("invalid --joindelay: %v", err) + log.Fatalf("invalid --delay: %v", err) } hb, err := time.ParseDuration(c.String("heartbeat")) From 13c2b60ca8c3e322a005f4e7d43325410432affc Mon Sep 17 00:00:00 2001 From: Dong Chen Date: Mon, 14 Dec 2015 17:21:15 -0800 Subject: [PATCH 4/4] Update documentation for swarm join `--delay` option. Signed-off-by: Dong Chen --- docs/discovery.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/discovery.md b/docs/discovery.md index 4213c0a108..f111281c75 100644 --- a/docs/discovery.md +++ b/docs/discovery.md @@ -29,7 +29,7 @@ For details about libkv and a detailed technical overview of the supported backe 1. On each node, start the Swarm agent. - The node IP address doesn't have to be public as long as the swarm manager can access it. + The node IP address doesn't have to be public as long as the swarm manager can access it. In a large cluster, the nodes joining swarm may trigger request spikes to discovery. For example, a large number of nodes are added by a script, or recovered from a network partition. This may result in discovery failure. You can use `--delay` option to specify a delay limit. Swarm join will add a random delay less than this limit to reduce pressure to discovery. **Etcd**: