Adding suicide logic for tasks so as to prevent false timeout for tasks having a long image pull

Signed-off-by: Isabel Jimenez <contact@isabeljimenez.com>
This commit is contained in:
Isabel Jimenez 2016-01-14 04:55:45 -05:00
parent d12ea7ff6d
commit a99ceeb9c1
6 changed files with 29 additions and 17 deletions

View File

@ -2,6 +2,7 @@ package mesos
import (
"testing"
"time"
"github.com/docker/swarm/cluster"
"github.com/docker/swarm/cluster/mesos/task"
@ -42,11 +43,11 @@ func TestAddTask(t *testing.T) {
assert.Empty(t, s.tasks)
assert.True(t, s.empty())
t1, err := task.NewTask(cluster.BuildContainerConfig(dockerclient.ContainerConfig{}), "task1")
t1, err := task.NewTask(cluster.BuildContainerConfig(dockerclient.ContainerConfig{}), "task1", 5*time.Second)
assert.NoError(t, err)
s.addTask(t1)
t2, err := task.NewTask(cluster.BuildContainerConfig(dockerclient.ContainerConfig{}), "task1")
t2, err := task.NewTask(cluster.BuildContainerConfig(dockerclient.ContainerConfig{}), "task1", 5*time.Second)
assert.NoError(t, err)
s.addTask(t2)
assert.Equal(t, len(s.tasks), 2)
@ -80,11 +81,11 @@ func TestRemoveTask(t *testing.T) {
assert.Empty(t, s.tasks)
t1, err := task.NewTask(cluster.BuildContainerConfig(dockerclient.ContainerConfig{}), "task1")
t1, err := task.NewTask(cluster.BuildContainerConfig(dockerclient.ContainerConfig{}), "task1", 5*time.Second)
assert.NoError(t, err)
s.addTask(t1)
t2, err := task.NewTask(cluster.BuildContainerConfig(dockerclient.ContainerConfig{}), "task1")
t2, err := task.NewTask(cluster.BuildContainerConfig(dockerclient.ContainerConfig{}), "task1", 5*time.Second)
assert.NoError(t, err)
s.addTask(t2)
assert.Equal(t, len(s.tasks), 2)

View File

@ -183,7 +183,7 @@ func (c *Cluster) CreateContainer(config *cluster.ContainerConfig, name string,
return nil, errResourcesNeeded
}
task, err := task.NewTask(config, name)
task, err := task.NewTask(config, name, c.taskCreationTimeout)
if err != nil {
return nil, err
}
@ -194,10 +194,8 @@ func (c *Cluster) CreateContainer(config *cluster.ContainerConfig, name string,
case container := <-task.GetContainer():
return formatContainer(container), nil
case err := <-task.Error:
return nil, err
case <-time.After(c.taskCreationTimeout):
c.pendingTasks.Remove(task)
return nil, fmt.Errorf("container failed to start after %s", c.taskCreationTimeout)
return nil, err
}
}

View File

@ -5,6 +5,7 @@ import (
"fmt"
"strconv"
"strings"
"time"
log "github.com/Sirupsen/logrus"
"github.com/docker/docker/pkg/stringid"
@ -162,7 +163,7 @@ func (t *Task) Build(slaveID string, offers map[string]*mesosproto.Offer) {
}
// NewTask fucntion creates a task
func NewTask(config *cluster.ContainerConfig, name string) (*Task, error) {
func NewTask(config *cluster.ContainerConfig, name string, timeout time.Duration) (*Task, error) {
id := stringid.TruncateID(stringid.GenerateRandomID())
if name != "" {
@ -183,9 +184,19 @@ func NewTask(config *cluster.ContainerConfig, name string) (*Task, error) {
task.Name = &name
task.TaskId = &mesosproto.TaskID{Value: &id}
task.Labels = &mesosproto.Labels{Labels: []*mesosproto.Label{{Key: proto.String("SWARM_CONTAINER_NAME"), Value: &name}}}
go task.suicide(timeout)
return task, nil
}
func (t *Task) suicide(timeout time.Duration) {
<-time.After(timeout)
if !t.Stopped() && t.SlaveId == nil {
t.Error <- fmt.Errorf("container failed to start after %s", timeout)
}
}
// SendStatus method writes the task status in the updates channel
func (t *Task) SendStatus(status *mesosproto.TaskStatus) {
t.updates <- status

View File

@ -4,6 +4,7 @@ import (
"sort"
"strings"
"testing"
"time"
"github.com/docker/swarm/cluster"
"github.com/mesos/mesos-go/mesosproto"
@ -20,7 +21,7 @@ func TestBuild(t *testing.T) {
CpuShares: 42,
Memory: 2097152,
Cmd: []string{"ls", "foo", "bar"},
}), name)
}), name, 5*time.Second)
assert.NoError(t, err)
task.Build("slave-id", nil)
@ -47,7 +48,7 @@ func TestBuild(t *testing.T) {
}
func TestNewTask(t *testing.T) {
task, err := NewTask(cluster.BuildContainerConfig(dockerclient.ContainerConfig{}), name)
task, err := NewTask(cluster.BuildContainerConfig(dockerclient.ContainerConfig{}), name, 5*time.Second)
assert.NoError(t, err)
assert.Equal(t, *task.Name, name)
@ -56,7 +57,7 @@ func TestNewTask(t *testing.T) {
}
func TestSendGetStatus(t *testing.T) {
task, err := NewTask(cluster.BuildContainerConfig(dockerclient.ContainerConfig{}), "")
task, err := NewTask(cluster.BuildContainerConfig(dockerclient.ContainerConfig{}), "", 5*time.Second)
assert.NoError(t, err)
status := mesosutil.NewTaskStatus(nil, mesosproto.TaskState_TASK_RUNNING)

View File

@ -2,6 +2,7 @@ package task
import (
"testing"
"time"
"github.com/docker/swarm/cluster"
"github.com/samalba/dockerclient"
@ -25,14 +26,14 @@ func TestAdd(t *testing.T) {
CpuShares: 42,
Memory: 2097152,
Cmd: []string{"ls", "foo", "bar"},
}), "name1")
}), "name1", 5*time.Second)
task2, _ := NewTask(cluster.BuildContainerConfig(dockerclient.ContainerConfig{
Image: "test-image",
CpuShares: 42,
Memory: 2097152,
Cmd: []string{"ls", "foo", "bar"},
}), "name2")
}), "name2", 5*time.Second)
q.Add(task1)
assert.Equal(t, len(q.Tasks), 0)
@ -48,7 +49,7 @@ func TestRemove(t *testing.T) {
CpuShares: 42,
Memory: 2097152,
Cmd: []string{"ls", "foo", "bar"},
}), "name1")
}), "name1", 5*time.Second)
q.Add(task1)
assert.Equal(t, len(q.Tasks), 1)
@ -64,7 +65,7 @@ func TestProcess(t *testing.T) {
CpuShares: 42,
Memory: 2097152,
Cmd: []string{"ls", "foo", "bar"},
}), "name1")
}), "name1", 5*time.Second)
q.Add(task1)
assert.Equal(t, len(q.Tasks), 1)

View File

@ -79,4 +79,4 @@ function teardown() {
# verify, container is running
[ -n $(docker_swarm ps -q --filter=name=test_container --filter=status=running) ]
}
}