Use failureCount as a secondary health indicator.

Signed-off-by: Dong Chen <dongluo.chen@docker.com>
This commit is contained in:
Dong Chen 2015-12-08 17:58:36 -08:00
parent 91f9a4e89b
commit 4d24256c19
2 changed files with 36 additions and 2 deletions

View File

@ -24,6 +24,9 @@ const (
// Minimum docker engine version supported by swarm. // Minimum docker engine version supported by swarm.
minSupportedVersion = version.Version("1.6.0") minSupportedVersion = version.Version("1.6.0")
// Engine failureCount threshold
engineFailureCountThreshold = 3
) )
// delayer offers a simple API to random delay within a given time range. // delayer offers a simple API to random delay within a given time range.
@ -83,6 +86,7 @@ type Engine struct {
client dockerclient.Client client dockerclient.Client
eventHandler EventHandler eventHandler EventHandler
healthy bool healthy bool
failureCount int64
overcommitRatio int64 overcommitRatio int64
opts *EngineOpts opts *EngineOpts
} }
@ -192,6 +196,27 @@ func (e *Engine) Status() string {
return "Unhealthy" return "Unhealthy"
} }
// IncFailureCount increases engine's failure count, and set engine as unhealthy if threshold is crossed
func (e *Engine) IncFailureCount() {
e.Lock()
e.failureCount++
if e.healthy && e.failureCount >= engineFailureCountThreshold {
e.healthy = false
}
e.Unlock()
}
// SetEngineHealth sets engine healthy state
func (e *Engine) SetEngineHealth(state bool) {
e.Lock()
e.healthy = state
// if engine is healthy, clear failureCount
if state {
e.failureCount = 0
}
e.Unlock()
}
// Gather engine specs (CPU, memory, constraints, ...). // Gather engine specs (CPU, memory, constraints, ...).
func (e *Engine) updateSpecs() error { func (e *Engine) updateSpecs() error {
info, err := e.client.Info() info, err := e.client.Info()
@ -434,7 +459,7 @@ func (e *Engine) refreshLoop() {
failedAttempts++ failedAttempts++
if failedAttempts >= e.opts.RefreshRetry && e.healthy { if failedAttempts >= e.opts.RefreshRetry && e.healthy {
e.emitEvent("engine_disconnect") e.emitEvent("engine_disconnect")
e.healthy = false e.SetEngineHealth(false)
log.WithFields(log.Fields{"name": e.Name, "id": e.ID}).Errorf("Flagging engine as dead. Updated state failed %d times: %v", failedAttempts, err) log.WithFields(log.Fields{"name": e.Name, "id": e.ID}).Errorf("Flagging engine as dead. Updated state failed %d times: %v", failedAttempts, err)
} }
} else { } else {
@ -448,7 +473,7 @@ func (e *Engine) refreshLoop() {
e.client.StartMonitorEvents(e.handler, nil) e.client.StartMonitorEvents(e.handler, nil)
e.emitEvent("engine_reconnect") e.emitEvent("engine_reconnect")
} }
e.healthy = true e.SetEngineHealth(true)
failedAttempts = 0 failedAttempts = 0
} }
} }

View File

@ -38,6 +38,15 @@ var (
} }
) )
func TestEngineFailureCount(t *testing.T) {
engine := NewEngine("test", 0, engOpts)
for i := 0; i < engineFailureCountThreshold; i++ {
assert.True(t, engine.IsHealthy())
engine.IncFailureCount()
}
assert.False(t, engine.IsHealthy())
}
func TestEngineConnectionFailure(t *testing.T) { func TestEngineConnectionFailure(t *testing.T) {
engine := NewEngine("test", 0, engOpts) engine := NewEngine("test", 0, engOpts)
assert.False(t, engine.isConnected()) assert.False(t, engine.isConnected())