From 34c05c58c8d41ee2bb02cd8059e9928ee2f061ea Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 20 May 2014 00:13:00 +0000 Subject: [PATCH 1/3] Mount /dev in tmpfs for privileged containers Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- daemon/execdriver/native/create.go | 10 ++++--- .../native/template/default_template.go | 5 ++++ pkg/libcontainer/container.go | 27 ++++++++++--------- pkg/libcontainer/container.json | 12 ++++++++- pkg/libcontainer/container_test.go | 21 ++++++++++----- pkg/libcontainer/mount/init.go | 10 +++---- pkg/libcontainer/mount/nodes/nodes.go | 21 +++++++++++---- .../mount/nodes/nodes_unsupported.go | 11 ++++++++ 8 files changed, 83 insertions(+), 34 deletions(-) create mode 100644 pkg/libcontainer/mount/nodes/nodes_unsupported.go diff --git a/daemon/execdriver/native/create.go b/daemon/execdriver/native/create.go index 76816e0b9c..9ed0491940 100644 --- a/daemon/execdriver/native/create.go +++ b/daemon/execdriver/native/create.go @@ -10,6 +10,7 @@ import ( "github.com/dotcloud/docker/daemon/execdriver/native/template" "github.com/dotcloud/docker/pkg/apparmor" "github.com/dotcloud/docker/pkg/libcontainer" + "github.com/dotcloud/docker/pkg/libcontainer/mount/nodes" ) // createContainer populates and configures the container type with the @@ -34,8 +35,6 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Container if err := d.setPrivileged(container); err != nil { return nil, err } - } else { - container.Mounts = append(container.Mounts, libcontainer.Mount{Type: "devtmpfs"}) } if err := d.setupCgroups(container, c); err != nil { return nil, err @@ -97,11 +96,16 @@ func (d *driver) createNetwork(container *libcontainer.Container, c *execdriver. return nil } -func (d *driver) setPrivileged(container *libcontainer.Container) error { +func (d *driver) setPrivileged(container *libcontainer.Container) (err error) { container.Capabilities = libcontainer.GetAllCapabilities() container.Cgroups.DeviceAccess = true delete(container.Context, "restrictions") + delete(container.DeviceNodes, "additional") + + if container.DeviceNodes["required"], err = nodes.GetHostDeviceNodes(); err != nil { + return err + } if apparmor.IsEnabled() { container.Context["apparmor_profile"] = "unconfined" diff --git a/daemon/execdriver/native/template/default_template.go b/daemon/execdriver/native/template/default_template.go index ba52499c24..dbe3985f9b 100644 --- a/daemon/execdriver/native/template/default_template.go +++ b/daemon/execdriver/native/template/default_template.go @@ -4,6 +4,7 @@ import ( "github.com/dotcloud/docker/pkg/apparmor" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/cgroups" + "github.com/dotcloud/docker/pkg/libcontainer/mount/nodes" ) // New returns the docker default configuration for libcontainer @@ -33,6 +34,10 @@ func New() *libcontainer.Container { DeviceAccess: false, }, Context: libcontainer.Context{}, + DeviceNodes: map[string][]string{ + "required": nodes.DefaultNodes, + "additional": {"fuse"}, + }, } if apparmor.IsEnabled() { container.Context["apparmor_profile"] = "docker-default" diff --git a/pkg/libcontainer/container.go b/pkg/libcontainer/container.go index 0ea8d37c20..092cd5d93a 100644 --- a/pkg/libcontainer/container.go +++ b/pkg/libcontainer/container.go @@ -11,19 +11,20 @@ type Context map[string]string // Container defines configuration options for how a // container is setup inside a directory and how a process should be executed type Container struct { - Hostname string `json:"hostname,omitempty"` // hostname - ReadonlyFs bool `json:"readonly_fs,omitempty"` // set the containers rootfs as readonly - NoPivotRoot bool `json:"no_pivot_root,omitempty"` // this can be enabled if you are running in ramdisk - User string `json:"user,omitempty"` // user to execute the process as - WorkingDir string `json:"working_dir,omitempty"` // current working directory - Env []string `json:"environment,omitempty"` // environment to set - Tty bool `json:"tty,omitempty"` // setup a proper tty or not - Namespaces map[string]bool `json:"namespaces,omitempty"` // namespaces to apply - Capabilities []string `json:"capabilities,omitempty"` // capabilities given to the container - Networks []*Network `json:"networks,omitempty"` // nil for host's network stack - Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` // cgroups - Context Context `json:"context,omitempty"` // generic context for specific options (apparmor, selinux) - Mounts Mounts `json:"mounts,omitempty"` + Hostname string `json:"hostname,omitempty"` // hostname + ReadonlyFs bool `json:"readonly_fs,omitempty"` // set the containers rootfs as readonly + NoPivotRoot bool `json:"no_pivot_root,omitempty"` // this can be enabled if you are running in ramdisk + User string `json:"user,omitempty"` // user to execute the process as + WorkingDir string `json:"working_dir,omitempty"` // current working directory + Env []string `json:"environment,omitempty"` // environment to set + Tty bool `json:"tty,omitempty"` // setup a proper tty or not + Namespaces map[string]bool `json:"namespaces,omitempty"` // namespaces to apply + Capabilities []string `json:"capabilities,omitempty"` // capabilities given to the container + Networks []*Network `json:"networks,omitempty"` // nil for host's network stack + Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` // cgroups + Context Context `json:"context,omitempty"` // generic context for specific options (apparmor, selinux) + Mounts Mounts `json:"mounts,omitempty"` + DeviceNodes map[string][]string `json:"device_nodes,omitempty"` // device nodes to add to the container's /dev } // Network defines configuration for a container's networking stack diff --git a/pkg/libcontainer/container.json b/pkg/libcontainer/container.json index 07950fe58a..c3b0196b4a 100644 --- a/pkg/libcontainer/container.json +++ b/pkg/libcontainer/container.json @@ -43,5 +43,15 @@ { "type": "devtmpfs" } - ] + ], + "device_nodes": { + "required": [ + "null", + "zero", + "full", + "random", + "urandom", + "tty" + ] + } } diff --git a/pkg/libcontainer/container_test.go b/pkg/libcontainer/container_test.go index b3f240740c..d77ce313ae 100644 --- a/pkg/libcontainer/container_test.go +++ b/pkg/libcontainer/container_test.go @@ -4,12 +4,14 @@ import ( "encoding/json" "os" "testing" + + "github.com/dotcloud/docker/pkg/libcontainer/mount/nodes" ) // Checks whether the expected capability is specified in the capabilities. -func hasCapability(expected string, capabilities []string) bool { - for _, capability := range capabilities { - if capability == expected { +func contains(expected string, values []string) bool { + for _, v := range values { + if v == expected { return true } } @@ -47,18 +49,25 @@ func TestContainerJsonFormat(t *testing.T) { t.Fail() } - if hasCapability("SYS_ADMIN", container.Capabilities) { + if contains("SYS_ADMIN", container.Capabilities) { t.Log("SYS_ADMIN should not be enabled in capabilities mask") t.Fail() } - if !hasCapability("MKNOD", container.Capabilities) { + if !contains("MKNOD", container.Capabilities) { t.Log("MKNOD should be enabled in capabilities mask") t.Fail() } - if hasCapability("SYS_CHROOT", container.Capabilities) { + if contains("SYS_CHROOT", container.Capabilities) { t.Log("capabilities mask should not contain SYS_CHROOT") t.Fail() } + + for _, n := range nodes.DefaultNodes { + if !contains(n, container.DeviceNodes["required"]) { + t.Logf("devices should contain %s", n) + t.Fail() + } + } } diff --git a/pkg/libcontainer/mount/init.go b/pkg/libcontainer/mount/init.go index c4148131ad..184df1e8ec 100644 --- a/pkg/libcontainer/mount/init.go +++ b/pkg/libcontainer/mount/init.go @@ -48,10 +48,10 @@ func InitializeMountNamespace(rootfs, console string, container *libcontainer.Co if err := setupBindmounts(rootfs, container.Mounts); err != nil { return fmt.Errorf("bind mounts %s", err) } - if err := nodes.CopyN(rootfs, nodes.DefaultNodes, true); err != nil { - return fmt.Errorf("copy dev nodes %s", err) + if err := nodes.CopyN(rootfs, container.DeviceNodes["required"], true); err != nil { + return fmt.Errorf("copy required dev nodes %s", err) } - if err := nodes.CopyN(rootfs, nodes.AdditionalNodes, false); err != nil { + if err := nodes.CopyN(rootfs, container.DeviceNodes["additional"], false); err != nil { return fmt.Errorf("copy additional dev nodes %s", err) } if err := SetupPtmx(rootfs, console, container.Context["mount_label"]); err != nil { @@ -195,13 +195,11 @@ func newSystemMounts(rootfs, mountLabel string, mounts libcontainer.Mounts) []mo systemMounts := []mount{ {source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags}, {source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags}, + {source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)}, {source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)}, {source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)}, {source: "tmpfs", path: filepath.Join(rootfs, "run"), device: "tmpfs", flags: defaultMountFlags}, } - if len(mounts.OfType("devtmpfs")) == 1 { - systemMounts = append([]mount{{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)}}, systemMounts...) - } return systemMounts } diff --git a/pkg/libcontainer/mount/nodes/nodes.go b/pkg/libcontainer/mount/nodes/nodes.go index 1384682729..14b6f5ae57 100644 --- a/pkg/libcontainer/mount/nodes/nodes.go +++ b/pkg/libcontainer/mount/nodes/nodes.go @@ -4,6 +4,7 @@ package nodes import ( "fmt" + "io/ioutil" "os" "path/filepath" "syscall" @@ -21,11 +22,6 @@ var DefaultNodes = []string{ "tty", } -// AdditionalNodes includes nodes that are not required -var AdditionalNodes = []string{ - "fuse", -} - // CopyN copies the device node from the host into the rootfs func CopyN(rootfs string, nodesToCopy []string, shouldExist bool) error { oldMask := system.Umask(0000) @@ -61,3 +57,18 @@ func Copy(rootfs, node string, shouldExist bool) error { } return nil } + +func GetHostDeviceNodes() ([]string, error) { + files, err := ioutil.ReadDir("/dev") + if err != nil { + return nil, err + } + + out := []string{} + for _, f := range files { + if f.Mode()&os.ModeDevice == os.ModeDevice { + out = append(out, f.Name()) + } + } + return out, nil +} diff --git a/pkg/libcontainer/mount/nodes/nodes_unsupported.go b/pkg/libcontainer/mount/nodes/nodes_unsupported.go new file mode 100644 index 0000000000..24409f411f --- /dev/null +++ b/pkg/libcontainer/mount/nodes/nodes_unsupported.go @@ -0,0 +1,11 @@ +// +build !linux + +package nodes + +import "github.com/dotcloud/docker/pkg/libcontainer" + +var DefaultNodes = []string{} + +func GetHostDeviceNodes() ([]string, error) { + return nil, libcontainer.ErrUnsupported +} From ed5892ed4efa995950e2fdeb5fd718b3bb1aa1c2 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 20 May 2014 23:34:46 +0000 Subject: [PATCH 2/3] Update documentation for container struct in libcontainer Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- pkg/libcontainer/container.go | 100 ++++++++++++++++++++++++++-------- 1 file changed, 77 insertions(+), 23 deletions(-) diff --git a/pkg/libcontainer/container.go b/pkg/libcontainer/container.go index 092cd5d93a..f7aa245855 100644 --- a/pkg/libcontainer/container.go +++ b/pkg/libcontainer/container.go @@ -4,27 +4,70 @@ import ( "github.com/dotcloud/docker/pkg/libcontainer/cgroups" ) -// Context is a generic key value pair that allows -// arbatrary data to be sent +// Context is a generic key value pair that allows arbatrary data to be sent type Context map[string]string -// Container defines configuration options for how a -// container is setup inside a directory and how a process should be executed +// Container defines configuration options for executing a process inside a contained environment type Container struct { - Hostname string `json:"hostname,omitempty"` // hostname - ReadonlyFs bool `json:"readonly_fs,omitempty"` // set the containers rootfs as readonly - NoPivotRoot bool `json:"no_pivot_root,omitempty"` // this can be enabled if you are running in ramdisk - User string `json:"user,omitempty"` // user to execute the process as - WorkingDir string `json:"working_dir,omitempty"` // current working directory - Env []string `json:"environment,omitempty"` // environment to set - Tty bool `json:"tty,omitempty"` // setup a proper tty or not - Namespaces map[string]bool `json:"namespaces,omitempty"` // namespaces to apply - Capabilities []string `json:"capabilities,omitempty"` // capabilities given to the container - Networks []*Network `json:"networks,omitempty"` // nil for host's network stack - Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` // cgroups - Context Context `json:"context,omitempty"` // generic context for specific options (apparmor, selinux) - Mounts Mounts `json:"mounts,omitempty"` - DeviceNodes map[string][]string `json:"device_nodes,omitempty"` // device nodes to add to the container's /dev + // Hostname optionally sets the container's hostname if provided + Hostname string `json:"hostname,omitempty"` + + // ReadonlyFs will remount the container's rootfs as readonly where only externally mounted + // bind mounts are writtable + ReadonlyFs bool `json:"readonly_fs,omitempty"` + + // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs + // This is a common option when the container is running in ramdisk + NoPivotRoot bool `json:"no_pivot_root,omitempty"` + + // User will set the uid and gid of the executing process running inside the container + User string `json:"user,omitempty"` + + // WorkingDir will change the processes current working directory inside the container's rootfs + WorkingDir string `json:"working_dir,omitempty"` + + // Env will populate the processes environment with the provided values + // Any values from the parent processes will be cleared before the values + // provided in Env are provided to the process + Env []string `json:"environment,omitempty"` + + // Tty when true will allocate a pty slave on the host for access by the container's process + // and ensure that it is mounted inside the container's rootfs + Tty bool `json:"tty,omitempty"` + + // Namespaces specifies the container's namespaces that it should setup when cloning the init process + // If a namespace is not provided that namespace is shared from the container's parent process + Namespaces map[string]bool `json:"namespaces,omitempty"` + + // Capabilities specify the capabilities to keep when executing the process inside the container + // All capbilities not specified will be dropped from the processes capability mask + Capabilities []string `json:"capabilities,omitempty"` + + // Networks specifies the container's network stop to be created + Networks []*Network `json:"networks,omitempty"` + + // Cgroups specifies specific cgroup settings for the various subsystems that the container is + // placed into to limit the resources the container has available + Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` + + // Context is a generic key value format that allows for additional settings to be passed + // on the container's creation + // This is commonly used to specify apparmor profiles, selinux labels, and different restrictions + // placed on the container's processes + Context Context `json:"context,omitempty"` + + // Mounts specify additional source and destination paths that will be mounted inside the container's + // rootfs and mount namespace if specified + Mounts Mounts `json:"mounts,omitempty"` + + // DeviceNodes are a list of 'required' and 'additional' nodes that will be mknod into the container's + // rootfs at /dev + // + // Required device nodes will return an error if the host system does not have this device available + // + // Additional device nodes are created but no error is returned if the host system does not have the + // device avaliable for use by the container + DeviceNodes map[string][]string `json:"device_nodes,omitempty"` } // Network defines configuration for a container's networking stack @@ -32,9 +75,20 @@ type Container struct { // The network configuration can be omited from a container causing the // container to be setup with the host's networking stack type Network struct { - Type string `json:"type,omitempty"` // type of networking to setup i.e. veth, macvlan, etc - Context Context `json:"context,omitempty"` // generic context for type specific networking options - Address string `json:"address,omitempty"` - Gateway string `json:"gateway,omitempty"` - Mtu int `json:"mtu,omitempty"` + // Type sets the networks type, commonly veth and loopback + Type string `json:"type,omitempty"` + + // Context is a generic key value format for setting additional options that are specific to + // the network type + Context Context `json:"context,omitempty"` + + // Address contains the IP and mask to set on the network interface + Address string `json:"address,omitempty"` + + // Gateway sets the gateway address that is used as the default for the interface + Gateway string `json:"gateway,omitempty"` + + // Mtu sets the mtu value for the interface and will be mirrored on both the host and + // container's interfaces if a pair is created, specifically in the case of type veth + Mtu int `json:"mtu,omitempty"` } From f042c3c15759fce5cc139f2b3362b791ac7d4829 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 21 May 2014 00:36:50 +0000 Subject: [PATCH 3/3] Update code post codereview Add specific types for Required and Optional DeviceNodes Docker-DCO-1.1-Signed-off-by: Michael Crosby (github: crosbymichael) --- daemon/execdriver/native/create.go | 4 ++-- .../native/template/default_template.go | 8 +++----- pkg/libcontainer/container.go | 17 ++++++++--------- pkg/libcontainer/container.json | 18 ++++++++---------- pkg/libcontainer/container_test.go | 2 +- pkg/libcontainer/mount/init.go | 6 +++--- 6 files changed, 25 insertions(+), 30 deletions(-) diff --git a/daemon/execdriver/native/create.go b/daemon/execdriver/native/create.go index 9ed0491940..d35043bd08 100644 --- a/daemon/execdriver/native/create.go +++ b/daemon/execdriver/native/create.go @@ -101,9 +101,9 @@ func (d *driver) setPrivileged(container *libcontainer.Container) (err error) { container.Cgroups.DeviceAccess = true delete(container.Context, "restrictions") - delete(container.DeviceNodes, "additional") - if container.DeviceNodes["required"], err = nodes.GetHostDeviceNodes(); err != nil { + container.OptionalDeviceNodes = nil + if container.RequiredDeviceNodes, err = nodes.GetHostDeviceNodes(); err != nil { return err } diff --git a/daemon/execdriver/native/template/default_template.go b/daemon/execdriver/native/template/default_template.go index dbe3985f9b..cbef06fbf1 100644 --- a/daemon/execdriver/native/template/default_template.go +++ b/daemon/execdriver/native/template/default_template.go @@ -33,11 +33,9 @@ func New() *libcontainer.Container { Parent: "docker", DeviceAccess: false, }, - Context: libcontainer.Context{}, - DeviceNodes: map[string][]string{ - "required": nodes.DefaultNodes, - "additional": {"fuse"}, - }, + Context: libcontainer.Context{}, + RequiredDeviceNodes: nodes.DefaultNodes, + OptionalDeviceNodes: []string{"fuse"}, } if apparmor.IsEnabled() { container.Context["apparmor_profile"] = "docker-default" diff --git a/pkg/libcontainer/container.go b/pkg/libcontainer/container.go index f7aa245855..6734bfd590 100644 --- a/pkg/libcontainer/container.go +++ b/pkg/libcontainer/container.go @@ -43,7 +43,7 @@ type Container struct { // All capbilities not specified will be dropped from the processes capability mask Capabilities []string `json:"capabilities,omitempty"` - // Networks specifies the container's network stop to be created + // Networks specifies the container's network setup to be created Networks []*Network `json:"networks,omitempty"` // Cgroups specifies specific cgroup settings for the various subsystems that the container is @@ -60,14 +60,13 @@ type Container struct { // rootfs and mount namespace if specified Mounts Mounts `json:"mounts,omitempty"` - // DeviceNodes are a list of 'required' and 'additional' nodes that will be mknod into the container's - // rootfs at /dev - // - // Required device nodes will return an error if the host system does not have this device available - // - // Additional device nodes are created but no error is returned if the host system does not have the - // device avaliable for use by the container - DeviceNodes map[string][]string `json:"device_nodes,omitempty"` + // RequiredDeviceNodes are a list of device nodes that will be mknod into the container's rootfs at /dev + // If the host system does not support the device that the container requests an error is returned + RequiredDeviceNodes []string `json:"required_device_nodes,omitempty"` + + // OptionalDeviceNodes are a list of device nodes that will be mknod into the container's rootfs at /dev + // If the host system does not support the device that the container requests the error is ignored + OptionalDeviceNodes []string `json:"optional_device_nodes,omitempty"` } // Network defines configuration for a container's networking stack diff --git a/pkg/libcontainer/container.json b/pkg/libcontainer/container.json index c3b0196b4a..7156260bc2 100644 --- a/pkg/libcontainer/container.json +++ b/pkg/libcontainer/container.json @@ -44,14 +44,12 @@ "type": "devtmpfs" } ], - "device_nodes": { - "required": [ - "null", - "zero", - "full", - "random", - "urandom", - "tty" - ] - } + "required_device_nodes": [ + "null", + "zero", + "full", + "random", + "urandom", + "tty" + ] } diff --git a/pkg/libcontainer/container_test.go b/pkg/libcontainer/container_test.go index d77ce313ae..f6e991edf5 100644 --- a/pkg/libcontainer/container_test.go +++ b/pkg/libcontainer/container_test.go @@ -65,7 +65,7 @@ func TestContainerJsonFormat(t *testing.T) { } for _, n := range nodes.DefaultNodes { - if !contains(n, container.DeviceNodes["required"]) { + if !contains(n, container.RequiredDeviceNodes) { t.Logf("devices should contain %s", n) t.Fail() } diff --git a/pkg/libcontainer/mount/init.go b/pkg/libcontainer/mount/init.go index 184df1e8ec..3fb9667b16 100644 --- a/pkg/libcontainer/mount/init.go +++ b/pkg/libcontainer/mount/init.go @@ -48,11 +48,11 @@ func InitializeMountNamespace(rootfs, console string, container *libcontainer.Co if err := setupBindmounts(rootfs, container.Mounts); err != nil { return fmt.Errorf("bind mounts %s", err) } - if err := nodes.CopyN(rootfs, container.DeviceNodes["required"], true); err != nil { + if err := nodes.CopyN(rootfs, container.RequiredDeviceNodes, true); err != nil { return fmt.Errorf("copy required dev nodes %s", err) } - if err := nodes.CopyN(rootfs, container.DeviceNodes["additional"], false); err != nil { - return fmt.Errorf("copy additional dev nodes %s", err) + if err := nodes.CopyN(rootfs, container.OptionalDeviceNodes, false); err != nil { + return fmt.Errorf("copy optional dev nodes %s", err) } if err := SetupPtmx(rootfs, console, container.Context["mount_label"]); err != nil { return err