mirror of https://github.com/docker/docs.git
Implement engine refresh backoff strategy for failing nodes. Use a backoff factor so the backoff speed is relative to user setting.
Signed-off-by: Dong Chen <dongluo.chen@docker.com>
This commit is contained in:
parent
d9914ffa59
commit
b03bf75557
|
@ -65,11 +65,12 @@ func newDelayer(rangeMin, rangeMax time.Duration) *delayer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *delayer) Wait() <-chan time.Time {
|
// Wait returns timeout event after fixed + randomized time duration
|
||||||
|
func (d *delayer) Wait(backoffFactor int) <-chan time.Time {
|
||||||
d.l.Lock()
|
d.l.Lock()
|
||||||
defer d.l.Unlock()
|
defer d.l.Unlock()
|
||||||
|
|
||||||
waitPeriod := int64(d.rangeMin)
|
waitPeriod := int64(d.rangeMin) * int64(1+backoffFactor)
|
||||||
if delta := int64(d.rangeMax) - int64(d.rangeMin); delta > 0 {
|
if delta := int64(d.rangeMax) - int64(d.rangeMin); delta > 0 {
|
||||||
// Int63n panics if the parameter is 0
|
// Int63n panics if the parameter is 0
|
||||||
waitPeriod += d.r.Int63n(delta)
|
waitPeriod += d.r.Int63n(delta)
|
||||||
|
@ -605,13 +606,21 @@ func (e *Engine) updateContainer(c dockerclient.Container, containers map[string
|
||||||
|
|
||||||
// refreshLoop periodically triggers engine refresh.
|
// refreshLoop periodically triggers engine refresh.
|
||||||
func (e *Engine) refreshLoop() {
|
func (e *Engine) refreshLoop() {
|
||||||
|
const maxBackoffFactor int = 1000
|
||||||
for {
|
for {
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
|
// Engines keep failing should backoff
|
||||||
|
// e.failureCount and e.opts.FailureRetry are type of int
|
||||||
|
backoffFactor := e.failureCount - e.opts.FailureRetry
|
||||||
|
if backoffFactor < 0 {
|
||||||
|
backoffFactor = 0
|
||||||
|
} else if backoffFactor > maxBackoffFactor {
|
||||||
|
backoffFactor = maxBackoffFactor
|
||||||
|
}
|
||||||
// Wait for the delayer or quit if we get stopped.
|
// Wait for the delayer or quit if we get stopped.
|
||||||
select {
|
select {
|
||||||
case <-e.refreshDelayer.Wait():
|
case <-e.refreshDelayer.Wait(backoffFactor):
|
||||||
case <-e.stopCh:
|
case <-e.stopCh:
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,7 +41,7 @@ function setup_discovery_file() {
|
||||||
# Restart node
|
# Restart node
|
||||||
docker_host start ${DOCKER_CONTAINERS[0]}
|
docker_host start ${DOCKER_CONTAINERS[0]}
|
||||||
# Wait for swarm to detect node recovery
|
# Wait for swarm to detect node recovery
|
||||||
retry 5 1 eval "docker_swarm info | grep -q -i 'Status: Healthy'"
|
retry 15 1 eval "docker_swarm info | grep -q -i 'Status: Healthy'"
|
||||||
}
|
}
|
||||||
|
|
||||||
@test "node pending and recovery" {
|
@test "node pending and recovery" {
|
||||||
|
|
|
@ -155,7 +155,8 @@ function containerRunning() {
|
||||||
# Restart node-0
|
# Restart node-0
|
||||||
docker_host start ${DOCKER_CONTAINERS[0]}
|
docker_host start ${DOCKER_CONTAINERS[0]}
|
||||||
# Wait for node-0 to be healthy
|
# Wait for node-0 to be healthy
|
||||||
retry 5 1 eval "test \"$(docker_swarm info | grep \"Status: Unhealthy\" | wc -l)\" = '0'"
|
# Failing node refresh interval increases over time. Provide enough retry here.
|
||||||
|
retry 30 1 eval "test \"$(docker_swarm info | grep \"Status: Unhealthy\" | wc -l)\" = '0'"
|
||||||
|
|
||||||
# Stop node-1
|
# Stop node-1
|
||||||
docker_host stop ${DOCKER_CONTAINERS[1]}
|
docker_host stop ${DOCKER_CONTAINERS[1]}
|
||||||
|
|
Loading…
Reference in New Issue