mirror of https://github.com/docker/docs.git
Retry creating dynamic networks if not found
In cases there are failures in task start, swarmkit might be trying to restart the task again in the same node which might keep failing. This creates a race where when a failed task is getting removed it might remove the associated network while another task for the same service or a different service but connected to the same network is proceeding with starting the container knowing that the network is still present. Fix this by reacting to `ErrNoSuchNetwork` error during container start by trying to recreate the managed networks. If they have been removed it will be recreated. If they are already present nothing bad will happen. Signed-off-by: Jana Radhakrishnan <mrjana@docker.com> (cherry picked from commit 117cef5e9766d6ba228770c225e816c6afd16ff8) Signed-off-by: Tibor Vass <tibor@docker.com>
This commit is contained in:
parent
2f6ca79080
commit
769c25c416
|
@ -6,6 +6,7 @@ import (
|
||||||
executorpkg "github.com/docker/docker/daemon/cluster/executor"
|
executorpkg "github.com/docker/docker/daemon/cluster/executor"
|
||||||
"github.com/docker/engine-api/types"
|
"github.com/docker/engine-api/types"
|
||||||
"github.com/docker/engine-api/types/events"
|
"github.com/docker/engine-api/types/events"
|
||||||
|
"github.com/docker/libnetwork"
|
||||||
"github.com/docker/swarmkit/agent/exec"
|
"github.com/docker/swarmkit/agent/exec"
|
||||||
"github.com/docker/swarmkit/api"
|
"github.com/docker/swarmkit/api"
|
||||||
"github.com/docker/swarmkit/log"
|
"github.com/docker/swarmkit/log"
|
||||||
|
@ -160,8 +161,23 @@ func (r *controller) Start(ctx context.Context) error {
|
||||||
return exec.ErrTaskStarted
|
return exec.ErrTaskStarted
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := r.adapter.start(ctx); err != nil {
|
for {
|
||||||
return errors.Wrap(err, "starting container failed")
|
if err := r.adapter.start(ctx); err != nil {
|
||||||
|
if _, ok := err.(libnetwork.ErrNoSuchNetwork); ok {
|
||||||
|
// Retry network creation again if we
|
||||||
|
// failed because some of the networks
|
||||||
|
// were not found.
|
||||||
|
if err := r.adapter.createNetworks(ctx); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
return errors.Wrap(err, "starting container failed")
|
||||||
|
}
|
||||||
|
|
||||||
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
// no health check
|
// no health check
|
||||||
|
|
Loading…
Reference in New Issue