From 769c25c416ac8d22fe2d17c15f7d693074fb64cd Mon Sep 17 00:00:00 2001 From: Jana Radhakrishnan Date: Tue, 9 Aug 2016 13:37:11 -0700 Subject: [PATCH] Retry creating dynamic networks if not found In cases there are failures in task start, swarmkit might be trying to restart the task again in the same node which might keep failing. This creates a race where when a failed task is getting removed it might remove the associated network while another task for the same service or a different service but connected to the same network is proceeding with starting the container knowing that the network is still present. Fix this by reacting to `ErrNoSuchNetwork` error during container start by trying to recreate the managed networks. If they have been removed it will be recreated. If they are already present nothing bad will happen. Signed-off-by: Jana Radhakrishnan (cherry picked from commit 117cef5e9766d6ba228770c225e816c6afd16ff8) Signed-off-by: Tibor Vass --- .../cluster/executor/container/controller.go | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/daemon/cluster/executor/container/controller.go b/daemon/cluster/executor/container/controller.go index 61d6a34fd9..cb9198acf4 100644 --- a/daemon/cluster/executor/container/controller.go +++ b/daemon/cluster/executor/container/controller.go @@ -6,6 +6,7 @@ import ( executorpkg "github.com/docker/docker/daemon/cluster/executor" "github.com/docker/engine-api/types" "github.com/docker/engine-api/types/events" + "github.com/docker/libnetwork" "github.com/docker/swarmkit/agent/exec" "github.com/docker/swarmkit/api" "github.com/docker/swarmkit/log" @@ -160,8 +161,23 @@ func (r *controller) Start(ctx context.Context) error { return exec.ErrTaskStarted } - if err := r.adapter.start(ctx); err != nil { - return errors.Wrap(err, "starting container failed") + for { + if err := r.adapter.start(ctx); err != nil { + if _, ok := err.(libnetwork.ErrNoSuchNetwork); ok { + // Retry network creation again if we + // failed because some of the networks + // were not found. + if err := r.adapter.createNetworks(ctx); err != nil { + return err + } + + continue + } + + return errors.Wrap(err, "starting container failed") + } + + break } // no health check