mirror of https://github.com/docker/docs.git
Add swarm container create retry option.
Signed-off-by: Dong Chen <dongluo.chen@docker.com>
This commit is contained in:
parent
d21748699d
commit
8cc9b6c284
|
|
@ -44,6 +44,7 @@ Arguments:
|
||||||
Options:
|
Options:
|
||||||
{{range .Flags}}{{.}}
|
{{range .Flags}}{{.}}
|
||||||
{{end}}{{if (eq .Name "manage")}}{{printf "\t * swarm.overcommit=0.05\tovercommit to apply on resources"}}
|
{{end}}{{if (eq .Name "manage")}}{{printf "\t * swarm.overcommit=0.05\tovercommit to apply on resources"}}
|
||||||
|
{{printf "\t * swarm.createretry=0\tcontainer create retry count after initial failure"}}
|
||||||
{{printf "\t * mesos.address=\taddress to bind on [$SWARM_MESOS_ADDRESS]"}}
|
{{printf "\t * mesos.address=\taddress to bind on [$SWARM_MESOS_ADDRESS]"}}
|
||||||
{{printf "\t * mesos.port=\tport to bind on [$SWARM_MESOS_PORT]"}}
|
{{printf "\t * mesos.port=\tport to bind on [$SWARM_MESOS_PORT]"}}
|
||||||
{{printf "\t * mesos.offertimeout=30s\ttimeout for offers [$SWARM_MESOS_OFFER_TIMEOUT]"}}
|
{{printf "\t * mesos.offertimeout=30s\ttimeout for offers [$SWARM_MESOS_OFFER_TIMEOUT]"}}
|
||||||
|
|
|
||||||
|
|
@ -59,6 +59,7 @@ type Cluster struct {
|
||||||
|
|
||||||
overcommitRatio float64
|
overcommitRatio float64
|
||||||
engineOpts *cluster.EngineOpts
|
engineOpts *cluster.EngineOpts
|
||||||
|
createRetry int64
|
||||||
TLSConfig *tls.Config
|
TLSConfig *tls.Config
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -76,12 +77,20 @@ func NewCluster(scheduler *scheduler.Scheduler, TLSConfig *tls.Config, discovery
|
||||||
pendingContainers: make(map[string]*pendingContainer),
|
pendingContainers: make(map[string]*pendingContainer),
|
||||||
overcommitRatio: 0.05,
|
overcommitRatio: 0.05,
|
||||||
engineOpts: engineOptions,
|
engineOpts: engineOptions,
|
||||||
|
createRetry: 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
if val, ok := options.Float("swarm.overcommit", ""); ok {
|
if val, ok := options.Float("swarm.overcommit", ""); ok {
|
||||||
cluster.overcommitRatio = val
|
cluster.overcommitRatio = val
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if val, ok := options.Int("swarm.createretry", ""); ok {
|
||||||
|
if val < 0 {
|
||||||
|
log.Fatalf("swarm.createretry=%d is invalid", val)
|
||||||
|
}
|
||||||
|
cluster.createRetry = val
|
||||||
|
}
|
||||||
|
|
||||||
discoveryCh, errCh := cluster.discovery.Watch(nil)
|
discoveryCh, errCh := cluster.discovery.Watch(nil)
|
||||||
go cluster.monitorDiscovery(discoveryCh, errCh)
|
go cluster.monitorDiscovery(discoveryCh, errCh)
|
||||||
go cluster.monitorPendingEngines()
|
go cluster.monitorPendingEngines()
|
||||||
|
|
@ -119,16 +128,23 @@ func (c *Cluster) generateUniqueID() string {
|
||||||
func (c *Cluster) CreateContainer(config *cluster.ContainerConfig, name string, authConfig *dockerclient.AuthConfig) (*cluster.Container, error) {
|
func (c *Cluster) CreateContainer(config *cluster.ContainerConfig, name string, authConfig *dockerclient.AuthConfig) (*cluster.Container, error) {
|
||||||
container, err := c.createContainer(config, name, false, authConfig)
|
container, err := c.createContainer(config, name, false, authConfig)
|
||||||
|
|
||||||
// fails with image not found, then try to reschedule with image affinity
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
var retries int64
|
||||||
|
// fails with image not found, then try to reschedule with image affinity
|
||||||
bImageNotFoundError, _ := regexp.MatchString(`image \S* not found`, err.Error())
|
bImageNotFoundError, _ := regexp.MatchString(`image \S* not found`, err.Error())
|
||||||
if bImageNotFoundError && !config.HaveNodeConstraint() {
|
if bImageNotFoundError && !config.HaveNodeConstraint() {
|
||||||
// Check if the image exists in the cluster
|
// Check if the image exists in the cluster
|
||||||
// If exists, retry with a image affinity
|
// If exists, retry with a image affinity
|
||||||
if c.Image(config.Image) != nil {
|
if c.Image(config.Image) != nil {
|
||||||
container, err = c.createContainer(config, name, true, authConfig)
|
container, err = c.createContainer(config, name, true, authConfig)
|
||||||
|
retries++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for ; retries < c.createRetry && err != nil; retries++ {
|
||||||
|
log.WithFields(log.Fields{"Name": "Swarm"}).Warnf("Failed to create container: %s, retrying", err)
|
||||||
|
container, err = c.createContainer(config, name, false, authConfig)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return container, err
|
return container, err
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -55,3 +55,22 @@ function teardown() {
|
||||||
[ "$status" -eq 0 ]
|
[ "$status" -eq 0 ]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@test "scheduler retry" {
|
||||||
|
# Start 1 engine and register it in the file.
|
||||||
|
start_docker 2
|
||||||
|
# Start swarm and check it can reach the node
|
||||||
|
# refresh interval is 20s. 20 retries before marking it as unhealthy
|
||||||
|
swarm_manage --engine-refresh-min-interval "20s" --engine-refresh-max-interval "20s" --engine-failure-retry 20 -cluster-opt swarm.createretry=1 "${HOSTS[0]},${HOSTS[1]}"
|
||||||
|
|
||||||
|
eval "docker_swarm info | grep -q -i 'Nodes: 2'"
|
||||||
|
|
||||||
|
# Use memory on node-0
|
||||||
|
docker_swarm run -e constraint:node==node-0 -m 50m busybox sh
|
||||||
|
|
||||||
|
# Stop the node-1
|
||||||
|
docker_host stop ${DOCKER_CONTAINERS[1]}
|
||||||
|
|
||||||
|
# Try to run a container. It'd try node-1, upon failure automatically retry on node-0
|
||||||
|
run docker_swarm run -m 10m busybox sh
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue