mirror of https://github.com/docker/docs.git
Add swarm container create retry option.
Signed-off-by: Dong Chen <dongluo.chen@docker.com>
This commit is contained in:
parent
d21748699d
commit
8cc9b6c284
|
@ -44,6 +44,7 @@ Arguments:
|
|||
Options:
|
||||
{{range .Flags}}{{.}}
|
||||
{{end}}{{if (eq .Name "manage")}}{{printf "\t * swarm.overcommit=0.05\tovercommit to apply on resources"}}
|
||||
{{printf "\t * swarm.createretry=0\tcontainer create retry count after initial failure"}}
|
||||
{{printf "\t * mesos.address=\taddress to bind on [$SWARM_MESOS_ADDRESS]"}}
|
||||
{{printf "\t * mesos.port=\tport to bind on [$SWARM_MESOS_PORT]"}}
|
||||
{{printf "\t * mesos.offertimeout=30s\ttimeout for offers [$SWARM_MESOS_OFFER_TIMEOUT]"}}
|
||||
|
|
|
@ -59,6 +59,7 @@ type Cluster struct {
|
|||
|
||||
overcommitRatio float64
|
||||
engineOpts *cluster.EngineOpts
|
||||
createRetry int64
|
||||
TLSConfig *tls.Config
|
||||
}
|
||||
|
||||
|
@ -76,12 +77,20 @@ func NewCluster(scheduler *scheduler.Scheduler, TLSConfig *tls.Config, discovery
|
|||
pendingContainers: make(map[string]*pendingContainer),
|
||||
overcommitRatio: 0.05,
|
||||
engineOpts: engineOptions,
|
||||
createRetry: 0,
|
||||
}
|
||||
|
||||
if val, ok := options.Float("swarm.overcommit", ""); ok {
|
||||
cluster.overcommitRatio = val
|
||||
}
|
||||
|
||||
if val, ok := options.Int("swarm.createretry", ""); ok {
|
||||
if val < 0 {
|
||||
log.Fatalf("swarm.createretry=%d is invalid", val)
|
||||
}
|
||||
cluster.createRetry = val
|
||||
}
|
||||
|
||||
discoveryCh, errCh := cluster.discovery.Watch(nil)
|
||||
go cluster.monitorDiscovery(discoveryCh, errCh)
|
||||
go cluster.monitorPendingEngines()
|
||||
|
@ -119,16 +128,23 @@ func (c *Cluster) generateUniqueID() string {
|
|||
func (c *Cluster) CreateContainer(config *cluster.ContainerConfig, name string, authConfig *dockerclient.AuthConfig) (*cluster.Container, error) {
|
||||
container, err := c.createContainer(config, name, false, authConfig)
|
||||
|
||||
// fails with image not found, then try to reschedule with image affinity
|
||||
if err != nil {
|
||||
var retries int64
|
||||
// fails with image not found, then try to reschedule with image affinity
|
||||
bImageNotFoundError, _ := regexp.MatchString(`image \S* not found`, err.Error())
|
||||
if bImageNotFoundError && !config.HaveNodeConstraint() {
|
||||
// Check if the image exists in the cluster
|
||||
// If exists, retry with a image affinity
|
||||
if c.Image(config.Image) != nil {
|
||||
container, err = c.createContainer(config, name, true, authConfig)
|
||||
retries++
|
||||
}
|
||||
}
|
||||
|
||||
for ; retries < c.createRetry && err != nil; retries++ {
|
||||
log.WithFields(log.Fields{"Name": "Swarm"}).Warnf("Failed to create container: %s, retrying", err)
|
||||
container, err = c.createContainer(config, name, false, authConfig)
|
||||
}
|
||||
}
|
||||
return container, err
|
||||
}
|
||||
|
|
|
@ -55,3 +55,22 @@ function teardown() {
|
|||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
||||
@test "scheduler retry" {
|
||||
# Start 1 engine and register it in the file.
|
||||
start_docker 2
|
||||
# Start swarm and check it can reach the node
|
||||
# refresh interval is 20s. 20 retries before marking it as unhealthy
|
||||
swarm_manage --engine-refresh-min-interval "20s" --engine-refresh-max-interval "20s" --engine-failure-retry 20 -cluster-opt swarm.createretry=1 "${HOSTS[0]},${HOSTS[1]}"
|
||||
|
||||
eval "docker_swarm info | grep -q -i 'Nodes: 2'"
|
||||
|
||||
# Use memory on node-0
|
||||
docker_swarm run -e constraint:node==node-0 -m 50m busybox sh
|
||||
|
||||
# Stop the node-1
|
||||
docker_host stop ${DOCKER_CONTAINERS[1]}
|
||||
|
||||
# Try to run a container. It'd try node-1, upon failure automatically retry on node-0
|
||||
run docker_swarm run -m 10m busybox sh
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue