Merge pull request #1517 from dongluochen/EngineFastFailure

Use failureCount as a secondary health indicator.
This commit is contained in:
Victor Vieux 2015-12-17 16:35:29 -08:00
commit 53cf899e31
10 changed files with 151 additions and 39 deletions

View File

@ -386,6 +386,7 @@ func getContainerJSON(c *context, w http.ResponseWriter, r *http.Request) {
client, scheme := newClientAndScheme(c.tlsConfig) client, scheme := newClientAndScheme(c.tlsConfig)
resp, err := client.Get(scheme + "://" + container.Engine.Addr + "/containers/" + container.Id + "/json") resp, err := client.Get(scheme + "://" + container.Engine.Addr + "/containers/" + container.Id + "/json")
container.Engine.CheckConnectionErr(err)
if err != nil { if err != nil {
httpError(w, err.Error(), http.StatusInternalServerError) httpError(w, err.Error(), http.StatusInternalServerError)
return return
@ -670,6 +671,7 @@ func postContainersExec(c *context, w http.ResponseWriter, r *http.Request) {
client, scheme := newClientAndScheme(c.tlsConfig) client, scheme := newClientAndScheme(c.tlsConfig)
resp, err := client.Post(scheme+"://"+container.Engine.Addr+"/containers/"+container.Id+"/exec", "application/json", r.Body) resp, err := client.Post(scheme+"://"+container.Engine.Addr+"/containers/"+container.Id+"/exec", "application/json", r.Body)
container.Engine.CheckConnectionErr(err)
if err != nil { if err != nil {
httpError(w, err.Error(), http.StatusInternalServerError) httpError(w, err.Error(), http.StatusInternalServerError)
return return
@ -796,7 +798,8 @@ func proxyNetwork(c *context, w http.ResponseWriter, r *http.Request) {
func proxyVolume(c *context, w http.ResponseWriter, r *http.Request) { func proxyVolume(c *context, w http.ResponseWriter, r *http.Request) {
var name = mux.Vars(r)["volumename"] var name = mux.Vars(r)["volumename"]
if volume := c.cluster.Volume(name); volume != nil { if volume := c.cluster.Volume(name); volume != nil {
proxy(c.tlsConfig, volume.Engine.Addr, w, r) err := proxy(c.tlsConfig, volume.Engine.Addr, w, r)
volume.Engine.CheckConnectionErr(err)
return return
} }
httpError(w, fmt.Sprintf("No such volume: %s", name), http.StatusNotFound) httpError(w, fmt.Sprintf("No such volume: %s", name), http.StatusNotFound)
@ -838,7 +841,9 @@ func proxyNetworkContainerOperation(c *context, w http.ResponseWriter, r *http.R
} }
// request is forwarded to the container's address // request is forwarded to the container's address
if err := proxyAsync(c.tlsConfig, container.Engine.Addr, w, r, cb); err != nil { err := proxyAsync(c.tlsConfig, container.Engine.Addr, w, r, cb)
container.Engine.CheckConnectionErr(err)
if err != nil {
httpError(w, err.Error(), http.StatusNotFound) httpError(w, err.Error(), http.StatusNotFound)
} }
} }
@ -856,7 +861,9 @@ func proxyContainer(c *context, w http.ResponseWriter, r *http.Request) {
r.URL.Path = strings.Replace(r.URL.Path, name, container.Id, 1) r.URL.Path = strings.Replace(r.URL.Path, name, container.Id, 1)
} }
if err := proxy(c.tlsConfig, container.Engine.Addr, w, r); err != nil { err = proxy(c.tlsConfig, container.Engine.Addr, w, r)
container.Engine.CheckConnectionErr(err)
if err != nil {
httpError(w, err.Error(), http.StatusInternalServerError) httpError(w, err.Error(), http.StatusInternalServerError)
} }
} }
@ -879,7 +886,9 @@ func proxyContainerAndForceRefresh(c *context, w http.ResponseWriter, r *http.Re
container.Refresh() container.Refresh()
} }
if err := proxyAsync(c.tlsConfig, container.Engine.Addr, w, r, cb); err != nil { err = proxyAsync(c.tlsConfig, container.Engine.Addr, w, r, cb)
container.Engine.CheckConnectionErr(err)
if err != nil {
httpError(w, err.Error(), http.StatusInternalServerError) httpError(w, err.Error(), http.StatusInternalServerError)
} }
} }
@ -889,7 +898,8 @@ func proxyImage(c *context, w http.ResponseWriter, r *http.Request) {
name := mux.Vars(r)["name"] name := mux.Vars(r)["name"]
if image := c.cluster.Image(name); image != nil { if image := c.cluster.Image(name); image != nil {
proxy(c.tlsConfig, image.Engine.Addr, w, r) err := proxy(c.tlsConfig, image.Engine.Addr, w, r)
image.Engine.CheckConnectionErr(err)
return return
} }
httpError(w, fmt.Sprintf("No such image: %s", name), http.StatusNotFound) httpError(w, fmt.Sprintf("No such image: %s", name), http.StatusNotFound)
@ -902,7 +912,8 @@ func proxyImageGet(c *context, w http.ResponseWriter, r *http.Request) {
for _, image := range c.cluster.Images() { for _, image := range c.cluster.Images() {
if len(strings.SplitN(name, ":", 2)) == 2 && image.Match(name, true) || if len(strings.SplitN(name, ":", 2)) == 2 && image.Match(name, true) ||
len(strings.SplitN(name, ":", 2)) == 1 && image.Match(name, false) { len(strings.SplitN(name, ":", 2)) == 1 && image.Match(name, false) {
proxy(c.tlsConfig, image.Engine.Addr, w, r) err := proxy(c.tlsConfig, image.Engine.Addr, w, r)
image.Engine.CheckConnectionErr(err)
return return
} }
} }
@ -925,7 +936,8 @@ func proxyImagePush(c *context, w http.ResponseWriter, r *http.Request) {
for _, image := range c.cluster.Images() { for _, image := range c.cluster.Images() {
if tag != "" && image.Match(name, true) || if tag != "" && image.Match(name, true) ||
tag == "" && image.Match(name, false) { tag == "" && image.Match(name, false) {
proxy(c.tlsConfig, image.Engine.Addr, w, r) err := proxy(c.tlsConfig, image.Engine.Addr, w, r)
image.Engine.CheckConnectionErr(err)
return return
} }
} }
@ -969,7 +981,9 @@ func proxyRandom(c *context, w http.ResponseWriter, r *http.Request) {
return return
} }
if err := proxy(c.tlsConfig, engine.Addr, w, r); err != nil { err = proxy(c.tlsConfig, engine.Addr, w, r)
engine.CheckConnectionErr(err)
if err != nil {
httpError(w, err.Error(), http.StatusInternalServerError) httpError(w, err.Error(), http.StatusInternalServerError)
} }
@ -1003,7 +1017,9 @@ func postCommit(c *context, w http.ResponseWriter, r *http.Request) {
} }
// proxy commit request to the right node // proxy commit request to the right node
if err := proxyAsync(c.tlsConfig, container.Engine.Addr, w, r, cb); err != nil { err = proxyAsync(c.tlsConfig, container.Engine.Addr, w, r, cb)
container.Engine.CheckConnectionErr(err)
if err != nil {
httpError(w, err.Error(), http.StatusInternalServerError) httpError(w, err.Error(), http.StatusInternalServerError)
} }
} }
@ -1093,7 +1109,9 @@ func proxyHijack(c *context, w http.ResponseWriter, r *http.Request) {
r.URL.Path = strings.Replace(r.URL.Path, name, container.Id, 1) r.URL.Path = strings.Replace(r.URL.Path, name, container.Id, 1)
} }
if err := hijack(c.tlsConfig, container.Engine.Addr, w, r); err != nil { err = hijack(c.tlsConfig, container.Engine.Addr, w, r)
container.Engine.CheckConnectionErr(err)
if err != nil {
httpError(w, err.Error(), http.StatusInternalServerError) httpError(w, err.Error(), http.StatusInternalServerError)
} }
} }

View File

@ -26,7 +26,7 @@ var (
flHosts, flHosts,
flLeaderElection, flLeaderTTL, flManageAdvertise, flLeaderElection, flLeaderTTL, flManageAdvertise,
flTLS, flTLSCaCert, flTLSCert, flTLSKey, flTLSVerify, flTLS, flTLSCaCert, flTLSCert, flTLSKey, flTLSVerify,
flRefreshIntervalMin, flRefreshIntervalMax, flRefreshRetry, flRefreshIntervalMin, flRefreshIntervalMax, flFailureRetry, flRefreshRetry,
flHeartBeat, flHeartBeat,
flEnableCors, flEnableCors,
flCluster, flDiscoveryOpt, flClusterOpt}, flCluster, flDiscoveryOpt, flClusterOpt},

View File

@ -74,7 +74,12 @@ var (
flRefreshRetry = cli.IntFlag{ flRefreshRetry = cli.IntFlag{
Name: "engine-refresh-retry", Name: "engine-refresh-retry",
Value: 3, Value: 3,
Usage: "set engine refresh retry count on failure", Usage: "deprecated; replaced by --engine-failure-retry",
}
flFailureRetry = cli.IntFlag{
Name: "engine-failure-retry",
Value: 3,
Usage: "set engine failure retry count",
} }
flEnableCors = cli.BoolFlag{ flEnableCors = cli.BoolFlag{
Name: "api-enable-cors, cors", Name: "api-enable-cors, cors",

View File

@ -246,14 +246,19 @@ func manage(c *cli.Context) {
if refreshMaxInterval < refreshMinInterval { if refreshMaxInterval < refreshMinInterval {
log.Fatal("max refresh interval cannot be less than min refresh interval") log.Fatal("max refresh interval cannot be less than min refresh interval")
} }
// engine-refresh-retry is deprecated
refreshRetry := c.Int("engine-refresh-retry") refreshRetry := c.Int("engine-refresh-retry")
if refreshRetry <= 0 { if refreshRetry != 3 {
log.Fatal("invalid refresh retry count") log.Fatal("--engine-refresh-retry is deprecated. Use --engine-failure-retry")
}
failureRetry := c.Int("engine-failure-retry")
if failureRetry <= 0 {
log.Fatal("invalid failure retry count")
} }
engineOpts := &cluster.EngineOpts{ engineOpts := &cluster.EngineOpts{
RefreshMinInterval: refreshMinInterval, RefreshMinInterval: refreshMinInterval,
RefreshMaxInterval: refreshMaxInterval, RefreshMaxInterval: refreshMaxInterval,
RefreshRetry: refreshRetry, FailureRetry: failureRetry,
} }
uri := getDiscovery(c) uri := getDiscovery(c)

View File

@ -59,7 +59,7 @@ func (d *delayer) Wait() <-chan time.Time {
type EngineOpts struct { type EngineOpts struct {
RefreshMinInterval time.Duration RefreshMinInterval time.Duration
RefreshMaxInterval time.Duration RefreshMaxInterval time.Duration
RefreshRetry int FailureRetry int
} }
// Engine represents a docker engine // Engine represents a docker engine
@ -83,6 +83,7 @@ type Engine struct {
client dockerclient.Client client dockerclient.Client
eventHandler EventHandler eventHandler EventHandler
healthy bool healthy bool
failureCount int
overcommitRatio int64 overcommitRatio int64
opts *EngineOpts opts *EngineOpts
} }
@ -184,6 +185,17 @@ func (e *Engine) IsHealthy() bool {
return e.healthy return e.healthy
} }
// setHealthy sets engine healthy state
func (e *Engine) setHealthy(state bool) {
e.Lock()
e.healthy = state
// if engine is healthy, clear failureCount
if state {
e.failureCount = 0
}
e.Unlock()
}
// Status returns the health status of the Engine: Healthy or Unhealthy // Status returns the health status of the Engine: Healthy or Unhealthy
func (e *Engine) Status() string { func (e *Engine) Status() string {
if e.healthy { if e.healthy {
@ -192,9 +204,44 @@ func (e *Engine) Status() string {
return "Unhealthy" return "Unhealthy"
} }
// incFailureCount increases engine's failure count, and set engine as unhealthy if threshold is crossed
func (e *Engine) incFailureCount() {
e.Lock()
e.failureCount++
if e.healthy && e.failureCount >= e.opts.FailureRetry {
e.healthy = false
}
e.Unlock()
}
// CheckConnectionErr checks error from client response and adjust engine healthy indicators
func (e *Engine) CheckConnectionErr(err error) {
if err == nil {
e.setHealthy(true)
return
}
// dockerclient defines ErrConnectionRefused error. but if http client is from swarm, it's not using
// dockerclient. We need string matching for these cases. Remove the first character to deal with
// case sensitive issue
if err == dockerclient.ErrConnectionRefused ||
strings.Contains(err.Error(), "onnection refused") ||
strings.Contains(err.Error(), "annot connect to the docker engine endpoint") {
// each connection refused instance may increase failure count so
// engine can fail fast. Short engine freeze or network failure may result
// in engine marked as unhealthy. If this causes unnecessary failure, engine
// can track last error time. Only increase failure count if last error is
// not too recent, e.g., last error is at least 1 seconds ago.
e.incFailureCount()
return
}
// other errors may be ambiguous. let refresh loop decide healthy or not.
}
// Gather engine specs (CPU, memory, constraints, ...). // Gather engine specs (CPU, memory, constraints, ...).
func (e *Engine) updateSpecs() error { func (e *Engine) updateSpecs() error {
info, err := e.client.Info() info, err := e.client.Info()
e.CheckConnectionErr(err)
if err != nil { if err != nil {
return err return err
} }
@ -204,6 +251,7 @@ func (e *Engine) updateSpecs() error {
} }
v, err := e.client.Version() v, err := e.client.Version()
e.CheckConnectionErr(err)
if err != nil { if err != nil {
return err return err
} }
@ -236,6 +284,7 @@ func (e *Engine) updateSpecs() error {
// RemoveImage deletes an image from the engine. // RemoveImage deletes an image from the engine.
func (e *Engine) RemoveImage(image *Image, name string, force bool) ([]*dockerclient.ImageDelete, error) { func (e *Engine) RemoveImage(image *Image, name string, force bool) ([]*dockerclient.ImageDelete, error) {
array, err := e.client.RemoveImage(name, force) array, err := e.client.RemoveImage(name, force)
e.CheckConnectionErr(err)
e.RefreshImages() e.RefreshImages()
return array, err return array, err
@ -244,13 +293,16 @@ func (e *Engine) RemoveImage(image *Image, name string, force bool) ([]*dockercl
// RemoveNetwork deletes a network from the engine. // RemoveNetwork deletes a network from the engine.
func (e *Engine) RemoveNetwork(network *Network) error { func (e *Engine) RemoveNetwork(network *Network) error {
err := e.client.RemoveNetwork(network.ID) err := e.client.RemoveNetwork(network.ID)
e.CheckConnectionErr(err)
e.RefreshNetworks() e.RefreshNetworks()
return err return err
} }
// RemoveVolume deletes a volume from the engine. // RemoveVolume deletes a volume from the engine.
func (e *Engine) RemoveVolume(name string) error { func (e *Engine) RemoveVolume(name string) error {
if err := e.client.RemoveVolume(name); err != nil { err := e.client.RemoveVolume(name)
e.CheckConnectionErr(err)
if err != nil {
return err return err
} }
@ -266,6 +318,7 @@ func (e *Engine) RemoveVolume(name string) error {
// RefreshImages refreshes the list of images on the engine. // RefreshImages refreshes the list of images on the engine.
func (e *Engine) RefreshImages() error { func (e *Engine) RefreshImages() error {
images, err := e.client.ListImages(true) images, err := e.client.ListImages(true)
e.CheckConnectionErr(err)
if err != nil { if err != nil {
return err return err
} }
@ -281,6 +334,7 @@ func (e *Engine) RefreshImages() error {
// RefreshNetworks refreshes the list of networks on the engine. // RefreshNetworks refreshes the list of networks on the engine.
func (e *Engine) RefreshNetworks() error { func (e *Engine) RefreshNetworks() error {
networks, err := e.client.ListNetworks("") networks, err := e.client.ListNetworks("")
e.CheckConnectionErr(err)
if err != nil { if err != nil {
return err return err
} }
@ -296,6 +350,7 @@ func (e *Engine) RefreshNetworks() error {
// RefreshVolumes refreshes the list of volumes on the engine. // RefreshVolumes refreshes the list of volumes on the engine.
func (e *Engine) RefreshVolumes() error { func (e *Engine) RefreshVolumes() error {
volumes, err := e.client.ListVolumes() volumes, err := e.client.ListVolumes()
e.CheckConnectionErr(err)
if err != nil { if err != nil {
return err return err
} }
@ -313,6 +368,9 @@ func (e *Engine) RefreshVolumes() error {
// FIXME: unexport this method after mesos scheduler stops using it directly // FIXME: unexport this method after mesos scheduler stops using it directly
func (e *Engine) RefreshContainers(full bool) error { func (e *Engine) RefreshContainers(full bool) error {
containers, err := e.client.ListContainers(true, false, "") containers, err := e.client.ListContainers(true, false, "")
// e.CheckConnectionErr(err) is not appropriate here because refresh loop uses
// RefreshContainers function to fail/recover an engine. Adding CheckConnectionErr
// here would result in double count
if err != nil { if err != nil {
return err return err
} }
@ -339,6 +397,7 @@ func (e *Engine) RefreshContainers(full bool) error {
// the container will be inspected. // the container will be inspected.
func (e *Engine) refreshContainer(ID string, full bool) (*Container, error) { func (e *Engine) refreshContainer(ID string, full bool) (*Container, error) {
containers, err := e.client.ListContainers(true, false, fmt.Sprintf("{%q:[%q]}", "id", ID)) containers, err := e.client.ListContainers(true, false, fmt.Sprintf("{%q:[%q]}", "id", ID))
e.CheckConnectionErr(err)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -385,6 +444,7 @@ func (e *Engine) updateContainer(c dockerclient.Container, containers map[string
// Update ContainerInfo. // Update ContainerInfo.
if full { if full {
info, err := e.client.InspectContainer(c.Id) info, err := e.client.InspectContainer(c.Id)
e.CheckConnectionErr(err)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -410,7 +470,6 @@ func (e *Engine) updateContainer(c dockerclient.Container, containers map[string
} }
func (e *Engine) refreshLoop() { func (e *Engine) refreshLoop() {
failedAttempts := 0
for { for {
var err error var err error
@ -431,15 +490,15 @@ func (e *Engine) refreshLoop() {
} }
if err != nil { if err != nil {
failedAttempts++ e.failureCount++
if failedAttempts >= e.opts.RefreshRetry && e.healthy { if e.failureCount >= e.opts.FailureRetry && e.healthy {
e.emitEvent("engine_disconnect") e.emitEvent("engine_disconnect")
e.healthy = false e.setHealthy(false)
log.WithFields(log.Fields{"name": e.Name, "id": e.ID}).Errorf("Flagging engine as dead. Updated state failed %d times: %v", failedAttempts, err) log.WithFields(log.Fields{"name": e.Name, "id": e.ID}).Errorf("Flagging engine as dead. Updated state failed %d times: %v", e.failureCount, err)
} }
} else { } else {
if !e.healthy { if !e.healthy {
log.WithFields(log.Fields{"name": e.Name, "id": e.ID}).Infof("Engine came back to life after %d retries. Hooray!", failedAttempts) log.WithFields(log.Fields{"name": e.Name, "id": e.ID}).Infof("Engine came back to life after %d retries. Hooray!", e.failureCount)
if err := e.updateSpecs(); err != nil { if err := e.updateSpecs(); err != nil {
log.WithFields(log.Fields{"name": e.Name, "id": e.ID}).Errorf("Update engine specs failed: %v", err) log.WithFields(log.Fields{"name": e.Name, "id": e.ID}).Errorf("Update engine specs failed: %v", err)
continue continue
@ -448,8 +507,7 @@ func (e *Engine) refreshLoop() {
e.client.StartMonitorEvents(e.handler, nil) e.client.StartMonitorEvents(e.handler, nil)
e.emitEvent("engine_reconnect") e.emitEvent("engine_reconnect")
} }
e.healthy = true e.setHealthy(true)
failedAttempts = 0
} }
} }
} }
@ -521,7 +579,9 @@ func (e *Engine) Create(config *ContainerConfig, name string, pullImage bool, au
dockerConfig.CpuShares = int64(math.Ceil(float64(config.CpuShares*1024) / float64(e.Cpus))) dockerConfig.CpuShares = int64(math.Ceil(float64(config.CpuShares*1024) / float64(e.Cpus)))
dockerConfig.HostConfig.CpuShares = dockerConfig.CpuShares dockerConfig.HostConfig.CpuShares = dockerConfig.CpuShares
if id, err = client.CreateContainer(&dockerConfig, name, nil); err != nil { id, err = client.CreateContainer(&dockerConfig, name, nil)
e.CheckConnectionErr(err)
if err != nil {
// If the error is other than not found, abort immediately. // If the error is other than not found, abort immediately.
if err != dockerclient.ErrImageNotFound || !pullImage { if err != dockerclient.ErrImageNotFound || !pullImage {
return nil, err return nil, err
@ -531,7 +591,9 @@ func (e *Engine) Create(config *ContainerConfig, name string, pullImage bool, au
return nil, err return nil, err
} }
// ...And try again. // ...And try again.
if id, err = client.CreateContainer(&dockerConfig, name, nil); err != nil { id, err = client.CreateContainer(&dockerConfig, name, nil)
e.CheckConnectionErr(err)
if err != nil {
return nil, err return nil, err
} }
} }
@ -554,7 +616,9 @@ func (e *Engine) Create(config *ContainerConfig, name string, pullImage bool, au
// RemoveContainer a container from the engine. // RemoveContainer a container from the engine.
func (e *Engine) RemoveContainer(container *Container, force, volumes bool) error { func (e *Engine) RemoveContainer(container *Container, force, volumes bool) error {
if err := e.client.RemoveContainer(container.Id, force, volumes); err != nil { err := e.client.RemoveContainer(container.Id, force, volumes)
e.CheckConnectionErr(err)
if err != nil {
return err return err
} }
@ -570,6 +634,7 @@ func (e *Engine) RemoveContainer(container *Container, force, volumes bool) erro
// CreateNetwork creates a network in the engine // CreateNetwork creates a network in the engine
func (e *Engine) CreateNetwork(request *dockerclient.NetworkCreate) (*dockerclient.NetworkCreateResponse, error) { func (e *Engine) CreateNetwork(request *dockerclient.NetworkCreate) (*dockerclient.NetworkCreateResponse, error) {
response, err := e.client.CreateNetwork(request) response, err := e.client.CreateNetwork(request)
e.CheckConnectionErr(err)
e.RefreshNetworks() e.RefreshNetworks()
@ -581,6 +646,7 @@ func (e *Engine) CreateVolume(request *dockerclient.VolumeCreateRequest) (*Volum
volume, err := e.client.CreateVolume(request) volume, err := e.client.CreateVolume(request)
e.RefreshVolumes() e.RefreshVolumes()
e.CheckConnectionErr(err)
if err != nil { if err != nil {
return nil, err return nil, err
@ -594,7 +660,9 @@ func (e *Engine) Pull(image string, authConfig *dockerclient.AuthConfig) error {
if !strings.Contains(image, ":") { if !strings.Contains(image, ":") {
image = image + ":latest" image = image + ":latest"
} }
if err := e.client.PullImage(image, authConfig); err != nil { err := e.client.PullImage(image, authConfig)
e.CheckConnectionErr(err)
if err != nil {
return err return err
} }
@ -606,7 +674,9 @@ func (e *Engine) Pull(image string, authConfig *dockerclient.AuthConfig) error {
// Load an image on the engine // Load an image on the engine
func (e *Engine) Load(reader io.Reader) error { func (e *Engine) Load(reader io.Reader) error {
if err := e.client.LoadImage(reader); err != nil { err := e.client.LoadImage(reader)
e.CheckConnectionErr(err)
if err != nil {
return err return err
} }
@ -618,7 +688,9 @@ func (e *Engine) Load(reader io.Reader) error {
// Import image // Import image
func (e *Engine) Import(source string, repository string, tag string, imageReader io.Reader) error { func (e *Engine) Import(source string, repository string, tag string, imageReader io.Reader) error {
if _, err := e.client.ImportImage(source, repository, tag, imageReader); err != nil { _, err := e.client.ImportImage(source, repository, tag, imageReader)
e.CheckConnectionErr(err)
if err != nil {
return err return err
} }
@ -774,6 +846,7 @@ func (e *Engine) cleanupContainers() {
func (e *Engine) RenameContainer(container *Container, newName string) error { func (e *Engine) RenameContainer(container *Container, newName string) error {
// send rename request // send rename request
err := e.client.RenameContainer(container.Id, newName) err := e.client.RenameContainer(container.Id, newName)
e.CheckConnectionErr(err)
if err != nil { if err != nil {
return err return err
} }
@ -785,14 +858,16 @@ func (e *Engine) RenameContainer(container *Container, newName string) error {
// BuildImage build an image // BuildImage build an image
func (e *Engine) BuildImage(buildImage *dockerclient.BuildImage) (io.ReadCloser, error) { func (e *Engine) BuildImage(buildImage *dockerclient.BuildImage) (io.ReadCloser, error) {
reader, err := e.client.BuildImage(buildImage)
return e.client.BuildImage(buildImage) e.CheckConnectionErr(err)
return reader, err
} }
// TagImage tag an image // TagImage tag an image
func (e *Engine) TagImage(IDOrName string, repo string, tag string, force bool) error { func (e *Engine) TagImage(IDOrName string, repo string, tag string, force bool) error {
// send tag request to docker engine // send tag request to docker engine
err := e.client.TagImage(IDOrName, repo, tag, force) err := e.client.TagImage(IDOrName, repo, tag, force)
e.CheckConnectionErr(err)
if err != nil { if err != nil {
return err return err
} }

View File

@ -34,10 +34,19 @@ var (
engOpts = &EngineOpts{ engOpts = &EngineOpts{
RefreshMinInterval: time.Duration(30) * time.Second, RefreshMinInterval: time.Duration(30) * time.Second,
RefreshMaxInterval: time.Duration(60) * time.Second, RefreshMaxInterval: time.Duration(60) * time.Second,
RefreshRetry: 3, FailureRetry: 3,
} }
) )
func TestEngineFailureCount(t *testing.T) {
engine := NewEngine("test", 0, engOpts)
for i := 0; i < engine.opts.FailureRetry; i++ {
assert.True(t, engine.IsHealthy())
engine.incFailureCount()
}
assert.False(t, engine.IsHealthy())
}
func TestEngineConnectionFailure(t *testing.T) { func TestEngineConnectionFailure(t *testing.T) {
engine := NewEngine("test", 0, engOpts) engine := NewEngine("test", 0, engOpts)
assert.False(t, engine.isConnected()) assert.False(t, engine.isConnected())

View File

@ -13,7 +13,7 @@ func createAgent(t *testing.T, ID string, containers ...*cluster.Container) *age
engOpts := &cluster.EngineOpts{ engOpts := &cluster.EngineOpts{
RefreshMinInterval: time.Duration(30) * time.Second, RefreshMinInterval: time.Duration(30) * time.Second,
RefreshMaxInterval: time.Duration(60) * time.Second, RefreshMaxInterval: time.Duration(60) * time.Second,
RefreshRetry: 3, FailureRetry: 3,
} }
engine := cluster.NewEngine(ID, 0, engOpts) engine := cluster.NewEngine(ID, 0, engOpts)
engine.Name = ID engine.Name = ID

View File

@ -43,7 +43,7 @@ var (
engOpts = &cluster.EngineOpts{ engOpts = &cluster.EngineOpts{
RefreshMinInterval: time.Duration(30) * time.Second, RefreshMinInterval: time.Duration(30) * time.Second,
RefreshMaxInterval: time.Duration(60) * time.Second, RefreshMaxInterval: time.Duration(60) * time.Second,
RefreshRetry: 3, FailureRetry: 3,
} }
) )

View File

@ -30,7 +30,7 @@ function teardown() {
@test "docker logs unhealthy node" { @test "docker logs unhealthy node" {
start_docker_with_busybox 1 start_docker_with_busybox 1
swarm_manage --engine-refresh-min-interval=1s --engine-refresh-max-interval=1s --engine-refresh-retry=1 ${HOSTS[0]} swarm_manage --engine-refresh-min-interval=1s --engine-refresh-max-interval=1s --engine-failure-retry=1 ${HOSTS[0]}
# run a container with echo command # run a container with echo command
docker_swarm run -d --name test_container busybox /bin/sh -c "echo hello world; echo hello docker; echo hello swarm" docker_swarm run -d --name test_container busybox /bin/sh -c "echo hello world; echo hello docker; echo hello swarm"

View File

@ -14,7 +14,7 @@ load helpers
[[ "${output}" == *"max refresh interval cannot be less than min refresh interval"* ]] [[ "${output}" == *"max refresh interval cannot be less than min refresh interval"* ]]
# engine refresh retry count # engine refresh retry count
run swarm manage --engine-refresh-retry 0 --advertise 127.0.0.1:$SWARM_BASE_PORT 192.168.56.202:4444 run swarm manage --engine-failure-retry 0 --advertise 127.0.0.1:$SWARM_BASE_PORT 192.168.56.202:4444
[ "$status" -ne 0 ] [ "$status" -ne 0 ]
[[ "${output}" == *"invalid refresh retry count"* ]] [[ "${output}" == *"invalid failure retry count"* ]]
} }