mirror of https://github.com/docker/docs.git
Merge pull request #1517 from dongluochen/EngineFastFailure
Use failureCount as a secondary health indicator.
This commit is contained in:
commit
53cf899e31
|
@ -386,6 +386,7 @@ func getContainerJSON(c *context, w http.ResponseWriter, r *http.Request) {
|
||||||
client, scheme := newClientAndScheme(c.tlsConfig)
|
client, scheme := newClientAndScheme(c.tlsConfig)
|
||||||
|
|
||||||
resp, err := client.Get(scheme + "://" + container.Engine.Addr + "/containers/" + container.Id + "/json")
|
resp, err := client.Get(scheme + "://" + container.Engine.Addr + "/containers/" + container.Id + "/json")
|
||||||
|
container.Engine.CheckConnectionErr(err)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
httpError(w, err.Error(), http.StatusInternalServerError)
|
httpError(w, err.Error(), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
|
@ -670,6 +671,7 @@ func postContainersExec(c *context, w http.ResponseWriter, r *http.Request) {
|
||||||
client, scheme := newClientAndScheme(c.tlsConfig)
|
client, scheme := newClientAndScheme(c.tlsConfig)
|
||||||
|
|
||||||
resp, err := client.Post(scheme+"://"+container.Engine.Addr+"/containers/"+container.Id+"/exec", "application/json", r.Body)
|
resp, err := client.Post(scheme+"://"+container.Engine.Addr+"/containers/"+container.Id+"/exec", "application/json", r.Body)
|
||||||
|
container.Engine.CheckConnectionErr(err)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
httpError(w, err.Error(), http.StatusInternalServerError)
|
httpError(w, err.Error(), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
|
@ -796,7 +798,8 @@ func proxyNetwork(c *context, w http.ResponseWriter, r *http.Request) {
|
||||||
func proxyVolume(c *context, w http.ResponseWriter, r *http.Request) {
|
func proxyVolume(c *context, w http.ResponseWriter, r *http.Request) {
|
||||||
var name = mux.Vars(r)["volumename"]
|
var name = mux.Vars(r)["volumename"]
|
||||||
if volume := c.cluster.Volume(name); volume != nil {
|
if volume := c.cluster.Volume(name); volume != nil {
|
||||||
proxy(c.tlsConfig, volume.Engine.Addr, w, r)
|
err := proxy(c.tlsConfig, volume.Engine.Addr, w, r)
|
||||||
|
volume.Engine.CheckConnectionErr(err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
httpError(w, fmt.Sprintf("No such volume: %s", name), http.StatusNotFound)
|
httpError(w, fmt.Sprintf("No such volume: %s", name), http.StatusNotFound)
|
||||||
|
@ -838,7 +841,9 @@ func proxyNetworkContainerOperation(c *context, w http.ResponseWriter, r *http.R
|
||||||
}
|
}
|
||||||
|
|
||||||
// request is forwarded to the container's address
|
// request is forwarded to the container's address
|
||||||
if err := proxyAsync(c.tlsConfig, container.Engine.Addr, w, r, cb); err != nil {
|
err := proxyAsync(c.tlsConfig, container.Engine.Addr, w, r, cb)
|
||||||
|
container.Engine.CheckConnectionErr(err)
|
||||||
|
if err != nil {
|
||||||
httpError(w, err.Error(), http.StatusNotFound)
|
httpError(w, err.Error(), http.StatusNotFound)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -856,7 +861,9 @@ func proxyContainer(c *context, w http.ResponseWriter, r *http.Request) {
|
||||||
r.URL.Path = strings.Replace(r.URL.Path, name, container.Id, 1)
|
r.URL.Path = strings.Replace(r.URL.Path, name, container.Id, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := proxy(c.tlsConfig, container.Engine.Addr, w, r); err != nil {
|
err = proxy(c.tlsConfig, container.Engine.Addr, w, r)
|
||||||
|
container.Engine.CheckConnectionErr(err)
|
||||||
|
if err != nil {
|
||||||
httpError(w, err.Error(), http.StatusInternalServerError)
|
httpError(w, err.Error(), http.StatusInternalServerError)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -879,7 +886,9 @@ func proxyContainerAndForceRefresh(c *context, w http.ResponseWriter, r *http.Re
|
||||||
container.Refresh()
|
container.Refresh()
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := proxyAsync(c.tlsConfig, container.Engine.Addr, w, r, cb); err != nil {
|
err = proxyAsync(c.tlsConfig, container.Engine.Addr, w, r, cb)
|
||||||
|
container.Engine.CheckConnectionErr(err)
|
||||||
|
if err != nil {
|
||||||
httpError(w, err.Error(), http.StatusInternalServerError)
|
httpError(w, err.Error(), http.StatusInternalServerError)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -889,7 +898,8 @@ func proxyImage(c *context, w http.ResponseWriter, r *http.Request) {
|
||||||
name := mux.Vars(r)["name"]
|
name := mux.Vars(r)["name"]
|
||||||
|
|
||||||
if image := c.cluster.Image(name); image != nil {
|
if image := c.cluster.Image(name); image != nil {
|
||||||
proxy(c.tlsConfig, image.Engine.Addr, w, r)
|
err := proxy(c.tlsConfig, image.Engine.Addr, w, r)
|
||||||
|
image.Engine.CheckConnectionErr(err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
httpError(w, fmt.Sprintf("No such image: %s", name), http.StatusNotFound)
|
httpError(w, fmt.Sprintf("No such image: %s", name), http.StatusNotFound)
|
||||||
|
@ -902,7 +912,8 @@ func proxyImageGet(c *context, w http.ResponseWriter, r *http.Request) {
|
||||||
for _, image := range c.cluster.Images() {
|
for _, image := range c.cluster.Images() {
|
||||||
if len(strings.SplitN(name, ":", 2)) == 2 && image.Match(name, true) ||
|
if len(strings.SplitN(name, ":", 2)) == 2 && image.Match(name, true) ||
|
||||||
len(strings.SplitN(name, ":", 2)) == 1 && image.Match(name, false) {
|
len(strings.SplitN(name, ":", 2)) == 1 && image.Match(name, false) {
|
||||||
proxy(c.tlsConfig, image.Engine.Addr, w, r)
|
err := proxy(c.tlsConfig, image.Engine.Addr, w, r)
|
||||||
|
image.Engine.CheckConnectionErr(err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -925,7 +936,8 @@ func proxyImagePush(c *context, w http.ResponseWriter, r *http.Request) {
|
||||||
for _, image := range c.cluster.Images() {
|
for _, image := range c.cluster.Images() {
|
||||||
if tag != "" && image.Match(name, true) ||
|
if tag != "" && image.Match(name, true) ||
|
||||||
tag == "" && image.Match(name, false) {
|
tag == "" && image.Match(name, false) {
|
||||||
proxy(c.tlsConfig, image.Engine.Addr, w, r)
|
err := proxy(c.tlsConfig, image.Engine.Addr, w, r)
|
||||||
|
image.Engine.CheckConnectionErr(err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -969,7 +981,9 @@ func proxyRandom(c *context, w http.ResponseWriter, r *http.Request) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := proxy(c.tlsConfig, engine.Addr, w, r); err != nil {
|
err = proxy(c.tlsConfig, engine.Addr, w, r)
|
||||||
|
engine.CheckConnectionErr(err)
|
||||||
|
if err != nil {
|
||||||
httpError(w, err.Error(), http.StatusInternalServerError)
|
httpError(w, err.Error(), http.StatusInternalServerError)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1003,7 +1017,9 @@ func postCommit(c *context, w http.ResponseWriter, r *http.Request) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// proxy commit request to the right node
|
// proxy commit request to the right node
|
||||||
if err := proxyAsync(c.tlsConfig, container.Engine.Addr, w, r, cb); err != nil {
|
err = proxyAsync(c.tlsConfig, container.Engine.Addr, w, r, cb)
|
||||||
|
container.Engine.CheckConnectionErr(err)
|
||||||
|
if err != nil {
|
||||||
httpError(w, err.Error(), http.StatusInternalServerError)
|
httpError(w, err.Error(), http.StatusInternalServerError)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1093,7 +1109,9 @@ func proxyHijack(c *context, w http.ResponseWriter, r *http.Request) {
|
||||||
r.URL.Path = strings.Replace(r.URL.Path, name, container.Id, 1)
|
r.URL.Path = strings.Replace(r.URL.Path, name, container.Id, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := hijack(c.tlsConfig, container.Engine.Addr, w, r); err != nil {
|
err = hijack(c.tlsConfig, container.Engine.Addr, w, r)
|
||||||
|
container.Engine.CheckConnectionErr(err)
|
||||||
|
if err != nil {
|
||||||
httpError(w, err.Error(), http.StatusInternalServerError)
|
httpError(w, err.Error(), http.StatusInternalServerError)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,7 +26,7 @@ var (
|
||||||
flHosts,
|
flHosts,
|
||||||
flLeaderElection, flLeaderTTL, flManageAdvertise,
|
flLeaderElection, flLeaderTTL, flManageAdvertise,
|
||||||
flTLS, flTLSCaCert, flTLSCert, flTLSKey, flTLSVerify,
|
flTLS, flTLSCaCert, flTLSCert, flTLSKey, flTLSVerify,
|
||||||
flRefreshIntervalMin, flRefreshIntervalMax, flRefreshRetry,
|
flRefreshIntervalMin, flRefreshIntervalMax, flFailureRetry, flRefreshRetry,
|
||||||
flHeartBeat,
|
flHeartBeat,
|
||||||
flEnableCors,
|
flEnableCors,
|
||||||
flCluster, flDiscoveryOpt, flClusterOpt},
|
flCluster, flDiscoveryOpt, flClusterOpt},
|
||||||
|
|
|
@ -74,7 +74,12 @@ var (
|
||||||
flRefreshRetry = cli.IntFlag{
|
flRefreshRetry = cli.IntFlag{
|
||||||
Name: "engine-refresh-retry",
|
Name: "engine-refresh-retry",
|
||||||
Value: 3,
|
Value: 3,
|
||||||
Usage: "set engine refresh retry count on failure",
|
Usage: "deprecated; replaced by --engine-failure-retry",
|
||||||
|
}
|
||||||
|
flFailureRetry = cli.IntFlag{
|
||||||
|
Name: "engine-failure-retry",
|
||||||
|
Value: 3,
|
||||||
|
Usage: "set engine failure retry count",
|
||||||
}
|
}
|
||||||
flEnableCors = cli.BoolFlag{
|
flEnableCors = cli.BoolFlag{
|
||||||
Name: "api-enable-cors, cors",
|
Name: "api-enable-cors, cors",
|
||||||
|
|
|
@ -246,14 +246,19 @@ func manage(c *cli.Context) {
|
||||||
if refreshMaxInterval < refreshMinInterval {
|
if refreshMaxInterval < refreshMinInterval {
|
||||||
log.Fatal("max refresh interval cannot be less than min refresh interval")
|
log.Fatal("max refresh interval cannot be less than min refresh interval")
|
||||||
}
|
}
|
||||||
|
// engine-refresh-retry is deprecated
|
||||||
refreshRetry := c.Int("engine-refresh-retry")
|
refreshRetry := c.Int("engine-refresh-retry")
|
||||||
if refreshRetry <= 0 {
|
if refreshRetry != 3 {
|
||||||
log.Fatal("invalid refresh retry count")
|
log.Fatal("--engine-refresh-retry is deprecated. Use --engine-failure-retry")
|
||||||
|
}
|
||||||
|
failureRetry := c.Int("engine-failure-retry")
|
||||||
|
if failureRetry <= 0 {
|
||||||
|
log.Fatal("invalid failure retry count")
|
||||||
}
|
}
|
||||||
engineOpts := &cluster.EngineOpts{
|
engineOpts := &cluster.EngineOpts{
|
||||||
RefreshMinInterval: refreshMinInterval,
|
RefreshMinInterval: refreshMinInterval,
|
||||||
RefreshMaxInterval: refreshMaxInterval,
|
RefreshMaxInterval: refreshMaxInterval,
|
||||||
RefreshRetry: refreshRetry,
|
FailureRetry: failureRetry,
|
||||||
}
|
}
|
||||||
|
|
||||||
uri := getDiscovery(c)
|
uri := getDiscovery(c)
|
||||||
|
|
|
@ -59,7 +59,7 @@ func (d *delayer) Wait() <-chan time.Time {
|
||||||
type EngineOpts struct {
|
type EngineOpts struct {
|
||||||
RefreshMinInterval time.Duration
|
RefreshMinInterval time.Duration
|
||||||
RefreshMaxInterval time.Duration
|
RefreshMaxInterval time.Duration
|
||||||
RefreshRetry int
|
FailureRetry int
|
||||||
}
|
}
|
||||||
|
|
||||||
// Engine represents a docker engine
|
// Engine represents a docker engine
|
||||||
|
@ -83,6 +83,7 @@ type Engine struct {
|
||||||
client dockerclient.Client
|
client dockerclient.Client
|
||||||
eventHandler EventHandler
|
eventHandler EventHandler
|
||||||
healthy bool
|
healthy bool
|
||||||
|
failureCount int
|
||||||
overcommitRatio int64
|
overcommitRatio int64
|
||||||
opts *EngineOpts
|
opts *EngineOpts
|
||||||
}
|
}
|
||||||
|
@ -184,6 +185,17 @@ func (e *Engine) IsHealthy() bool {
|
||||||
return e.healthy
|
return e.healthy
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// setHealthy sets engine healthy state
|
||||||
|
func (e *Engine) setHealthy(state bool) {
|
||||||
|
e.Lock()
|
||||||
|
e.healthy = state
|
||||||
|
// if engine is healthy, clear failureCount
|
||||||
|
if state {
|
||||||
|
e.failureCount = 0
|
||||||
|
}
|
||||||
|
e.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
// Status returns the health status of the Engine: Healthy or Unhealthy
|
// Status returns the health status of the Engine: Healthy or Unhealthy
|
||||||
func (e *Engine) Status() string {
|
func (e *Engine) Status() string {
|
||||||
if e.healthy {
|
if e.healthy {
|
||||||
|
@ -192,9 +204,44 @@ func (e *Engine) Status() string {
|
||||||
return "Unhealthy"
|
return "Unhealthy"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// incFailureCount increases engine's failure count, and set engine as unhealthy if threshold is crossed
|
||||||
|
func (e *Engine) incFailureCount() {
|
||||||
|
e.Lock()
|
||||||
|
e.failureCount++
|
||||||
|
if e.healthy && e.failureCount >= e.opts.FailureRetry {
|
||||||
|
e.healthy = false
|
||||||
|
}
|
||||||
|
e.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// CheckConnectionErr checks error from client response and adjust engine healthy indicators
|
||||||
|
func (e *Engine) CheckConnectionErr(err error) {
|
||||||
|
if err == nil {
|
||||||
|
e.setHealthy(true)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// dockerclient defines ErrConnectionRefused error. but if http client is from swarm, it's not using
|
||||||
|
// dockerclient. We need string matching for these cases. Remove the first character to deal with
|
||||||
|
// case sensitive issue
|
||||||
|
if err == dockerclient.ErrConnectionRefused ||
|
||||||
|
strings.Contains(err.Error(), "onnection refused") ||
|
||||||
|
strings.Contains(err.Error(), "annot connect to the docker engine endpoint") {
|
||||||
|
// each connection refused instance may increase failure count so
|
||||||
|
// engine can fail fast. Short engine freeze or network failure may result
|
||||||
|
// in engine marked as unhealthy. If this causes unnecessary failure, engine
|
||||||
|
// can track last error time. Only increase failure count if last error is
|
||||||
|
// not too recent, e.g., last error is at least 1 seconds ago.
|
||||||
|
e.incFailureCount()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// other errors may be ambiguous. let refresh loop decide healthy or not.
|
||||||
|
}
|
||||||
|
|
||||||
// Gather engine specs (CPU, memory, constraints, ...).
|
// Gather engine specs (CPU, memory, constraints, ...).
|
||||||
func (e *Engine) updateSpecs() error {
|
func (e *Engine) updateSpecs() error {
|
||||||
info, err := e.client.Info()
|
info, err := e.client.Info()
|
||||||
|
e.CheckConnectionErr(err)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -204,6 +251,7 @@ func (e *Engine) updateSpecs() error {
|
||||||
}
|
}
|
||||||
|
|
||||||
v, err := e.client.Version()
|
v, err := e.client.Version()
|
||||||
|
e.CheckConnectionErr(err)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -236,6 +284,7 @@ func (e *Engine) updateSpecs() error {
|
||||||
// RemoveImage deletes an image from the engine.
|
// RemoveImage deletes an image from the engine.
|
||||||
func (e *Engine) RemoveImage(image *Image, name string, force bool) ([]*dockerclient.ImageDelete, error) {
|
func (e *Engine) RemoveImage(image *Image, name string, force bool) ([]*dockerclient.ImageDelete, error) {
|
||||||
array, err := e.client.RemoveImage(name, force)
|
array, err := e.client.RemoveImage(name, force)
|
||||||
|
e.CheckConnectionErr(err)
|
||||||
e.RefreshImages()
|
e.RefreshImages()
|
||||||
return array, err
|
return array, err
|
||||||
|
|
||||||
|
@ -244,13 +293,16 @@ func (e *Engine) RemoveImage(image *Image, name string, force bool) ([]*dockercl
|
||||||
// RemoveNetwork deletes a network from the engine.
|
// RemoveNetwork deletes a network from the engine.
|
||||||
func (e *Engine) RemoveNetwork(network *Network) error {
|
func (e *Engine) RemoveNetwork(network *Network) error {
|
||||||
err := e.client.RemoveNetwork(network.ID)
|
err := e.client.RemoveNetwork(network.ID)
|
||||||
|
e.CheckConnectionErr(err)
|
||||||
e.RefreshNetworks()
|
e.RefreshNetworks()
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// RemoveVolume deletes a volume from the engine.
|
// RemoveVolume deletes a volume from the engine.
|
||||||
func (e *Engine) RemoveVolume(name string) error {
|
func (e *Engine) RemoveVolume(name string) error {
|
||||||
if err := e.client.RemoveVolume(name); err != nil {
|
err := e.client.RemoveVolume(name)
|
||||||
|
e.CheckConnectionErr(err)
|
||||||
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -266,6 +318,7 @@ func (e *Engine) RemoveVolume(name string) error {
|
||||||
// RefreshImages refreshes the list of images on the engine.
|
// RefreshImages refreshes the list of images on the engine.
|
||||||
func (e *Engine) RefreshImages() error {
|
func (e *Engine) RefreshImages() error {
|
||||||
images, err := e.client.ListImages(true)
|
images, err := e.client.ListImages(true)
|
||||||
|
e.CheckConnectionErr(err)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -281,6 +334,7 @@ func (e *Engine) RefreshImages() error {
|
||||||
// RefreshNetworks refreshes the list of networks on the engine.
|
// RefreshNetworks refreshes the list of networks on the engine.
|
||||||
func (e *Engine) RefreshNetworks() error {
|
func (e *Engine) RefreshNetworks() error {
|
||||||
networks, err := e.client.ListNetworks("")
|
networks, err := e.client.ListNetworks("")
|
||||||
|
e.CheckConnectionErr(err)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -296,6 +350,7 @@ func (e *Engine) RefreshNetworks() error {
|
||||||
// RefreshVolumes refreshes the list of volumes on the engine.
|
// RefreshVolumes refreshes the list of volumes on the engine.
|
||||||
func (e *Engine) RefreshVolumes() error {
|
func (e *Engine) RefreshVolumes() error {
|
||||||
volumes, err := e.client.ListVolumes()
|
volumes, err := e.client.ListVolumes()
|
||||||
|
e.CheckConnectionErr(err)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -313,6 +368,9 @@ func (e *Engine) RefreshVolumes() error {
|
||||||
// FIXME: unexport this method after mesos scheduler stops using it directly
|
// FIXME: unexport this method after mesos scheduler stops using it directly
|
||||||
func (e *Engine) RefreshContainers(full bool) error {
|
func (e *Engine) RefreshContainers(full bool) error {
|
||||||
containers, err := e.client.ListContainers(true, false, "")
|
containers, err := e.client.ListContainers(true, false, "")
|
||||||
|
// e.CheckConnectionErr(err) is not appropriate here because refresh loop uses
|
||||||
|
// RefreshContainers function to fail/recover an engine. Adding CheckConnectionErr
|
||||||
|
// here would result in double count
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -339,6 +397,7 @@ func (e *Engine) RefreshContainers(full bool) error {
|
||||||
// the container will be inspected.
|
// the container will be inspected.
|
||||||
func (e *Engine) refreshContainer(ID string, full bool) (*Container, error) {
|
func (e *Engine) refreshContainer(ID string, full bool) (*Container, error) {
|
||||||
containers, err := e.client.ListContainers(true, false, fmt.Sprintf("{%q:[%q]}", "id", ID))
|
containers, err := e.client.ListContainers(true, false, fmt.Sprintf("{%q:[%q]}", "id", ID))
|
||||||
|
e.CheckConnectionErr(err)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -385,6 +444,7 @@ func (e *Engine) updateContainer(c dockerclient.Container, containers map[string
|
||||||
// Update ContainerInfo.
|
// Update ContainerInfo.
|
||||||
if full {
|
if full {
|
||||||
info, err := e.client.InspectContainer(c.Id)
|
info, err := e.client.InspectContainer(c.Id)
|
||||||
|
e.CheckConnectionErr(err)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -410,7 +470,6 @@ func (e *Engine) updateContainer(c dockerclient.Container, containers map[string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *Engine) refreshLoop() {
|
func (e *Engine) refreshLoop() {
|
||||||
failedAttempts := 0
|
|
||||||
|
|
||||||
for {
|
for {
|
||||||
var err error
|
var err error
|
||||||
|
@ -431,15 +490,15 @@ func (e *Engine) refreshLoop() {
|
||||||
}
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
failedAttempts++
|
e.failureCount++
|
||||||
if failedAttempts >= e.opts.RefreshRetry && e.healthy {
|
if e.failureCount >= e.opts.FailureRetry && e.healthy {
|
||||||
e.emitEvent("engine_disconnect")
|
e.emitEvent("engine_disconnect")
|
||||||
e.healthy = false
|
e.setHealthy(false)
|
||||||
log.WithFields(log.Fields{"name": e.Name, "id": e.ID}).Errorf("Flagging engine as dead. Updated state failed %d times: %v", failedAttempts, err)
|
log.WithFields(log.Fields{"name": e.Name, "id": e.ID}).Errorf("Flagging engine as dead. Updated state failed %d times: %v", e.failureCount, err)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if !e.healthy {
|
if !e.healthy {
|
||||||
log.WithFields(log.Fields{"name": e.Name, "id": e.ID}).Infof("Engine came back to life after %d retries. Hooray!", failedAttempts)
|
log.WithFields(log.Fields{"name": e.Name, "id": e.ID}).Infof("Engine came back to life after %d retries. Hooray!", e.failureCount)
|
||||||
if err := e.updateSpecs(); err != nil {
|
if err := e.updateSpecs(); err != nil {
|
||||||
log.WithFields(log.Fields{"name": e.Name, "id": e.ID}).Errorf("Update engine specs failed: %v", err)
|
log.WithFields(log.Fields{"name": e.Name, "id": e.ID}).Errorf("Update engine specs failed: %v", err)
|
||||||
continue
|
continue
|
||||||
|
@ -448,8 +507,7 @@ func (e *Engine) refreshLoop() {
|
||||||
e.client.StartMonitorEvents(e.handler, nil)
|
e.client.StartMonitorEvents(e.handler, nil)
|
||||||
e.emitEvent("engine_reconnect")
|
e.emitEvent("engine_reconnect")
|
||||||
}
|
}
|
||||||
e.healthy = true
|
e.setHealthy(true)
|
||||||
failedAttempts = 0
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -521,7 +579,9 @@ func (e *Engine) Create(config *ContainerConfig, name string, pullImage bool, au
|
||||||
dockerConfig.CpuShares = int64(math.Ceil(float64(config.CpuShares*1024) / float64(e.Cpus)))
|
dockerConfig.CpuShares = int64(math.Ceil(float64(config.CpuShares*1024) / float64(e.Cpus)))
|
||||||
dockerConfig.HostConfig.CpuShares = dockerConfig.CpuShares
|
dockerConfig.HostConfig.CpuShares = dockerConfig.CpuShares
|
||||||
|
|
||||||
if id, err = client.CreateContainer(&dockerConfig, name, nil); err != nil {
|
id, err = client.CreateContainer(&dockerConfig, name, nil)
|
||||||
|
e.CheckConnectionErr(err)
|
||||||
|
if err != nil {
|
||||||
// If the error is other than not found, abort immediately.
|
// If the error is other than not found, abort immediately.
|
||||||
if err != dockerclient.ErrImageNotFound || !pullImage {
|
if err != dockerclient.ErrImageNotFound || !pullImage {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -531,7 +591,9 @@ func (e *Engine) Create(config *ContainerConfig, name string, pullImage bool, au
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
// ...And try again.
|
// ...And try again.
|
||||||
if id, err = client.CreateContainer(&dockerConfig, name, nil); err != nil {
|
id, err = client.CreateContainer(&dockerConfig, name, nil)
|
||||||
|
e.CheckConnectionErr(err)
|
||||||
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -554,7 +616,9 @@ func (e *Engine) Create(config *ContainerConfig, name string, pullImage bool, au
|
||||||
|
|
||||||
// RemoveContainer a container from the engine.
|
// RemoveContainer a container from the engine.
|
||||||
func (e *Engine) RemoveContainer(container *Container, force, volumes bool) error {
|
func (e *Engine) RemoveContainer(container *Container, force, volumes bool) error {
|
||||||
if err := e.client.RemoveContainer(container.Id, force, volumes); err != nil {
|
err := e.client.RemoveContainer(container.Id, force, volumes)
|
||||||
|
e.CheckConnectionErr(err)
|
||||||
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -570,6 +634,7 @@ func (e *Engine) RemoveContainer(container *Container, force, volumes bool) erro
|
||||||
// CreateNetwork creates a network in the engine
|
// CreateNetwork creates a network in the engine
|
||||||
func (e *Engine) CreateNetwork(request *dockerclient.NetworkCreate) (*dockerclient.NetworkCreateResponse, error) {
|
func (e *Engine) CreateNetwork(request *dockerclient.NetworkCreate) (*dockerclient.NetworkCreateResponse, error) {
|
||||||
response, err := e.client.CreateNetwork(request)
|
response, err := e.client.CreateNetwork(request)
|
||||||
|
e.CheckConnectionErr(err)
|
||||||
|
|
||||||
e.RefreshNetworks()
|
e.RefreshNetworks()
|
||||||
|
|
||||||
|
@ -581,6 +646,7 @@ func (e *Engine) CreateVolume(request *dockerclient.VolumeCreateRequest) (*Volum
|
||||||
volume, err := e.client.CreateVolume(request)
|
volume, err := e.client.CreateVolume(request)
|
||||||
|
|
||||||
e.RefreshVolumes()
|
e.RefreshVolumes()
|
||||||
|
e.CheckConnectionErr(err)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -594,7 +660,9 @@ func (e *Engine) Pull(image string, authConfig *dockerclient.AuthConfig) error {
|
||||||
if !strings.Contains(image, ":") {
|
if !strings.Contains(image, ":") {
|
||||||
image = image + ":latest"
|
image = image + ":latest"
|
||||||
}
|
}
|
||||||
if err := e.client.PullImage(image, authConfig); err != nil {
|
err := e.client.PullImage(image, authConfig)
|
||||||
|
e.CheckConnectionErr(err)
|
||||||
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -606,7 +674,9 @@ func (e *Engine) Pull(image string, authConfig *dockerclient.AuthConfig) error {
|
||||||
|
|
||||||
// Load an image on the engine
|
// Load an image on the engine
|
||||||
func (e *Engine) Load(reader io.Reader) error {
|
func (e *Engine) Load(reader io.Reader) error {
|
||||||
if err := e.client.LoadImage(reader); err != nil {
|
err := e.client.LoadImage(reader)
|
||||||
|
e.CheckConnectionErr(err)
|
||||||
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -618,7 +688,9 @@ func (e *Engine) Load(reader io.Reader) error {
|
||||||
|
|
||||||
// Import image
|
// Import image
|
||||||
func (e *Engine) Import(source string, repository string, tag string, imageReader io.Reader) error {
|
func (e *Engine) Import(source string, repository string, tag string, imageReader io.Reader) error {
|
||||||
if _, err := e.client.ImportImage(source, repository, tag, imageReader); err != nil {
|
_, err := e.client.ImportImage(source, repository, tag, imageReader)
|
||||||
|
e.CheckConnectionErr(err)
|
||||||
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -774,6 +846,7 @@ func (e *Engine) cleanupContainers() {
|
||||||
func (e *Engine) RenameContainer(container *Container, newName string) error {
|
func (e *Engine) RenameContainer(container *Container, newName string) error {
|
||||||
// send rename request
|
// send rename request
|
||||||
err := e.client.RenameContainer(container.Id, newName)
|
err := e.client.RenameContainer(container.Id, newName)
|
||||||
|
e.CheckConnectionErr(err)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -785,14 +858,16 @@ func (e *Engine) RenameContainer(container *Container, newName string) error {
|
||||||
|
|
||||||
// BuildImage build an image
|
// BuildImage build an image
|
||||||
func (e *Engine) BuildImage(buildImage *dockerclient.BuildImage) (io.ReadCloser, error) {
|
func (e *Engine) BuildImage(buildImage *dockerclient.BuildImage) (io.ReadCloser, error) {
|
||||||
|
reader, err := e.client.BuildImage(buildImage)
|
||||||
return e.client.BuildImage(buildImage)
|
e.CheckConnectionErr(err)
|
||||||
|
return reader, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// TagImage tag an image
|
// TagImage tag an image
|
||||||
func (e *Engine) TagImage(IDOrName string, repo string, tag string, force bool) error {
|
func (e *Engine) TagImage(IDOrName string, repo string, tag string, force bool) error {
|
||||||
// send tag request to docker engine
|
// send tag request to docker engine
|
||||||
err := e.client.TagImage(IDOrName, repo, tag, force)
|
err := e.client.TagImage(IDOrName, repo, tag, force)
|
||||||
|
e.CheckConnectionErr(err)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,10 +34,19 @@ var (
|
||||||
engOpts = &EngineOpts{
|
engOpts = &EngineOpts{
|
||||||
RefreshMinInterval: time.Duration(30) * time.Second,
|
RefreshMinInterval: time.Duration(30) * time.Second,
|
||||||
RefreshMaxInterval: time.Duration(60) * time.Second,
|
RefreshMaxInterval: time.Duration(60) * time.Second,
|
||||||
RefreshRetry: 3,
|
FailureRetry: 3,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func TestEngineFailureCount(t *testing.T) {
|
||||||
|
engine := NewEngine("test", 0, engOpts)
|
||||||
|
for i := 0; i < engine.opts.FailureRetry; i++ {
|
||||||
|
assert.True(t, engine.IsHealthy())
|
||||||
|
engine.incFailureCount()
|
||||||
|
}
|
||||||
|
assert.False(t, engine.IsHealthy())
|
||||||
|
}
|
||||||
|
|
||||||
func TestEngineConnectionFailure(t *testing.T) {
|
func TestEngineConnectionFailure(t *testing.T) {
|
||||||
engine := NewEngine("test", 0, engOpts)
|
engine := NewEngine("test", 0, engOpts)
|
||||||
assert.False(t, engine.isConnected())
|
assert.False(t, engine.isConnected())
|
||||||
|
|
|
@ -13,7 +13,7 @@ func createAgent(t *testing.T, ID string, containers ...*cluster.Container) *age
|
||||||
engOpts := &cluster.EngineOpts{
|
engOpts := &cluster.EngineOpts{
|
||||||
RefreshMinInterval: time.Duration(30) * time.Second,
|
RefreshMinInterval: time.Duration(30) * time.Second,
|
||||||
RefreshMaxInterval: time.Duration(60) * time.Second,
|
RefreshMaxInterval: time.Duration(60) * time.Second,
|
||||||
RefreshRetry: 3,
|
FailureRetry: 3,
|
||||||
}
|
}
|
||||||
engine := cluster.NewEngine(ID, 0, engOpts)
|
engine := cluster.NewEngine(ID, 0, engOpts)
|
||||||
engine.Name = ID
|
engine.Name = ID
|
||||||
|
|
|
@ -43,7 +43,7 @@ var (
|
||||||
engOpts = &cluster.EngineOpts{
|
engOpts = &cluster.EngineOpts{
|
||||||
RefreshMinInterval: time.Duration(30) * time.Second,
|
RefreshMinInterval: time.Duration(30) * time.Second,
|
||||||
RefreshMaxInterval: time.Duration(60) * time.Second,
|
RefreshMaxInterval: time.Duration(60) * time.Second,
|
||||||
RefreshRetry: 3,
|
FailureRetry: 3,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -30,7 +30,7 @@ function teardown() {
|
||||||
|
|
||||||
@test "docker logs unhealthy node" {
|
@test "docker logs unhealthy node" {
|
||||||
start_docker_with_busybox 1
|
start_docker_with_busybox 1
|
||||||
swarm_manage --engine-refresh-min-interval=1s --engine-refresh-max-interval=1s --engine-refresh-retry=1 ${HOSTS[0]}
|
swarm_manage --engine-refresh-min-interval=1s --engine-refresh-max-interval=1s --engine-failure-retry=1 ${HOSTS[0]}
|
||||||
|
|
||||||
# run a container with echo command
|
# run a container with echo command
|
||||||
docker_swarm run -d --name test_container busybox /bin/sh -c "echo hello world; echo hello docker; echo hello swarm"
|
docker_swarm run -d --name test_container busybox /bin/sh -c "echo hello world; echo hello docker; echo hello swarm"
|
||||||
|
|
|
@ -14,7 +14,7 @@ load helpers
|
||||||
[[ "${output}" == *"max refresh interval cannot be less than min refresh interval"* ]]
|
[[ "${output}" == *"max refresh interval cannot be less than min refresh interval"* ]]
|
||||||
|
|
||||||
# engine refresh retry count
|
# engine refresh retry count
|
||||||
run swarm manage --engine-refresh-retry 0 --advertise 127.0.0.1:$SWARM_BASE_PORT 192.168.56.202:4444
|
run swarm manage --engine-failure-retry 0 --advertise 127.0.0.1:$SWARM_BASE_PORT 192.168.56.202:4444
|
||||||
[ "$status" -ne 0 ]
|
[ "$status" -ne 0 ]
|
||||||
[[ "${output}" == *"invalid refresh retry count"* ]]
|
[[ "${output}" == *"invalid failure retry count"* ]]
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue