Execdriver implementation on new libcontainer API

Signed-off-by: Alexander Morozov <lk4d4@docker.com>
This commit is contained in:
Alexander Morozov 2015-03-05 09:55:14 -08:00
parent 7910cb5243
commit 68ba5f0b69
16 changed files with 773 additions and 429 deletions

View File

@ -14,6 +14,8 @@ import (
"syscall" "syscall"
"time" "time"
"github.com/docker/libcontainer"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/devices" "github.com/docker/libcontainer/devices"
"github.com/docker/libcontainer/label" "github.com/docker/libcontainer/label"
@ -259,18 +261,18 @@ func populateCommand(c *Container, env []string) error {
pid.HostPid = c.hostConfig.PidMode.IsHost() pid.HostPid = c.hostConfig.PidMode.IsHost()
// Build lists of devices allowed and created within the container. // Build lists of devices allowed and created within the container.
userSpecifiedDevices := make([]*devices.Device, len(c.hostConfig.Devices)) userSpecifiedDevices := make([]*configs.Device, len(c.hostConfig.Devices))
for i, deviceMapping := range c.hostConfig.Devices { for i, deviceMapping := range c.hostConfig.Devices {
device, err := devices.GetDevice(deviceMapping.PathOnHost, deviceMapping.CgroupPermissions) device, err := devices.DeviceFromPath(deviceMapping.PathOnHost, deviceMapping.CgroupPermissions)
if err != nil { if err != nil {
return fmt.Errorf("error gathering device information while adding custom device %q: %s", deviceMapping.PathOnHost, err) return fmt.Errorf("error gathering device information while adding custom device %q: %s", deviceMapping.PathOnHost, err)
} }
device.Path = deviceMapping.PathInContainer device.Path = deviceMapping.PathInContainer
userSpecifiedDevices[i] = device userSpecifiedDevices[i] = device
} }
allowedDevices := append(devices.DefaultAllowedDevices, userSpecifiedDevices...) allowedDevices := append(configs.DefaultAllowedDevices, userSpecifiedDevices...)
autoCreatedDevices := append(devices.DefaultAutoCreatedDevices, userSpecifiedDevices...) autoCreatedDevices := append(configs.DefaultAutoCreatedDevices, userSpecifiedDevices...)
// TODO: this can be removed after lxc-conf is fully deprecated // TODO: this can be removed after lxc-conf is fully deprecated
lxcConfig, err := mergeLxcConfIntoOptions(c.hostConfig) lxcConfig, err := mergeLxcConfIntoOptions(c.hostConfig)
@ -972,7 +974,7 @@ func (container *Container) Exposes(p nat.Port) bool {
return exists return exists
} }
func (container *Container) GetPtyMaster() (*os.File, error) { func (container *Container) GetPtyMaster() (libcontainer.Console, error) {
ttyConsole, ok := container.command.ProcessConfig.Terminal.(execdriver.TtyTerminal) ttyConsole, ok := container.command.ProcessConfig.Terminal.(execdriver.TtyTerminal)
if !ok { if !ok {
return nil, ErrNoTTY return nil, ErrNoTTY

View File

@ -1,17 +1,22 @@
package execdriver package execdriver
import ( import (
"encoding/json"
"errors" "errors"
"io" "io"
"io/ioutil"
"os" "os"
"os/exec" "os/exec"
"path/filepath"
"strconv"
"strings" "strings"
"time" "time"
"github.com/docker/docker/daemon/execdriver/native/template" "github.com/docker/docker/daemon/execdriver/native/template"
"github.com/docker/docker/pkg/ulimit" "github.com/docker/docker/pkg/ulimit"
"github.com/docker/libcontainer" "github.com/docker/libcontainer"
"github.com/docker/libcontainer/devices" "github.com/docker/libcontainer/cgroups/fs"
"github.com/docker/libcontainer/configs"
) )
// Context is a generic key value pair that allows // Context is a generic key value pair that allows
@ -42,7 +47,7 @@ type Terminal interface {
} }
type TtyTerminal interface { type TtyTerminal interface {
Master() *os.File Master() libcontainer.Console
} }
// ExitStatus provides exit reasons for a container. // ExitStatus provides exit reasons for a container.
@ -109,7 +114,7 @@ type Resources struct {
} }
type ResourceStats struct { type ResourceStats struct {
*libcontainer.ContainerStats *libcontainer.Stats
Read time.Time `json:"read"` Read time.Time `json:"read"`
MemoryLimit int64 `json:"memory_limit"` MemoryLimit int64 `json:"memory_limit"`
SystemUsage uint64 `json:"system_usage"` SystemUsage uint64 `json:"system_usage"`
@ -149,8 +154,8 @@ type Command struct {
Pid *Pid `json:"pid"` Pid *Pid `json:"pid"`
Resources *Resources `json:"resources"` Resources *Resources `json:"resources"`
Mounts []Mount `json:"mounts"` Mounts []Mount `json:"mounts"`
AllowedDevices []*devices.Device `json:"allowed_devices"` AllowedDevices []*configs.Device `json:"allowed_devices"`
AutoCreatedDevices []*devices.Device `json:"autocreated_devices"` AutoCreatedDevices []*configs.Device `json:"autocreated_devices"`
CapAdd []string `json:"cap_add"` CapAdd []string `json:"cap_add"`
CapDrop []string `json:"cap_drop"` CapDrop []string `json:"cap_drop"`
ContainerPid int `json:"container_pid"` // the pid for the process inside a container ContainerPid int `json:"container_pid"` // the pid for the process inside a container
@ -161,23 +166,19 @@ type Command struct {
AppArmorProfile string `json:"apparmor_profile"` AppArmorProfile string `json:"apparmor_profile"`
} }
func InitContainer(c *Command) *libcontainer.Config { func InitContainer(c *Command) *configs.Config {
container := template.New() container := template.New()
container.Hostname = getEnv("HOSTNAME", c.ProcessConfig.Env) container.Hostname = getEnv("HOSTNAME", c.ProcessConfig.Env)
container.Tty = c.ProcessConfig.Tty
container.User = c.ProcessConfig.User
container.WorkingDir = c.WorkingDir
container.Env = c.ProcessConfig.Env
container.Cgroups.Name = c.ID container.Cgroups.Name = c.ID
container.Cgroups.AllowedDevices = c.AllowedDevices container.Cgroups.AllowedDevices = c.AllowedDevices
container.MountConfig.DeviceNodes = c.AutoCreatedDevices container.Readonlyfs = c.ReadonlyRootfs
container.RootFs = c.Rootfs container.Devices = c.AutoCreatedDevices
container.MountConfig.ReadonlyFs = c.ReadonlyRootfs container.Rootfs = c.Rootfs
container.Readonlyfs = c.ReadonlyRootfs
// check to see if we are running in ramdisk to disable pivot root // check to see if we are running in ramdisk to disable pivot root
container.MountConfig.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != "" container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != ""
container.RestrictSys = true
return container return container
} }
@ -191,7 +192,7 @@ func getEnv(key string, env []string) string {
return "" return ""
} }
func SetupCgroups(container *libcontainer.Config, c *Command) error { func SetupCgroups(container *configs.Config, c *Command) error {
if c.Resources != nil { if c.Resources != nil {
container.Cgroups.CpuShares = c.Resources.CpuShares container.Cgroups.CpuShares = c.Resources.CpuShares
container.Cgroups.Memory = c.Resources.Memory container.Cgroups.Memory = c.Resources.Memory
@ -203,28 +204,98 @@ func SetupCgroups(container *libcontainer.Config, c *Command) error {
return nil return nil
} }
func Stats(stateFile string, containerMemoryLimit int64, machineMemory int64) (*ResourceStats, error) { // Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo.
state, err := libcontainer.GetState(stateFile) func getNetworkInterfaceStats(interfaceName string) (*libcontainer.NetworkInterface, error) {
if err != nil { out := &libcontainer.NetworkInterface{Name: interfaceName}
if os.IsNotExist(err) { // This can happen if the network runtime information is missing - possible if the
return nil, ErrNotRunning // container was created by an old version of libcontainer.
if interfaceName == "" {
return out, nil
}
type netStatsPair struct {
// Where to write the output.
Out *uint64
// The network stats file to read.
File string
}
// Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container.
netStats := []netStatsPair{
{Out: &out.RxBytes, File: "tx_bytes"},
{Out: &out.RxPackets, File: "tx_packets"},
{Out: &out.RxErrors, File: "tx_errors"},
{Out: &out.RxDropped, File: "tx_dropped"},
{Out: &out.TxBytes, File: "rx_bytes"},
{Out: &out.TxPackets, File: "rx_packets"},
{Out: &out.TxErrors, File: "rx_errors"},
{Out: &out.TxDropped, File: "rx_dropped"},
}
for _, netStat := range netStats {
data, err := readSysfsNetworkStats(interfaceName, netStat.File)
if err != nil {
return nil, err
} }
*(netStat.Out) = data
}
return out, nil
}
// Reads the specified statistics available under /sys/class/net/<EthInterface>/statistics
func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) {
data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile))
if err != nil {
return 0, err
}
return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
}
func Stats(containerDir string, containerMemoryLimit int64, machineMemory int64) (*ResourceStats, error) {
f, err := os.Open(filepath.Join(containerDir, "state.json"))
if err != nil {
return nil, err
}
defer f.Close()
type network struct {
Type string
HostInterfaceName string
}
state := struct {
CgroupPaths map[string]string `json:"cgroup_paths"`
Networks []network
}{}
if err := json.NewDecoder(f).Decode(&state); err != nil {
return nil, err return nil, err
} }
now := time.Now() now := time.Now()
stats, err := libcontainer.GetStats(nil, state)
mgr := fs.Manager{Paths: state.CgroupPaths}
cstats, err := mgr.GetStats()
if err != nil { if err != nil {
return nil, err return nil, err
} }
stats := &libcontainer.Stats{CgroupStats: cstats}
// if the container does not have any memory limit specified set the // if the container does not have any memory limit specified set the
// limit to the machines memory // limit to the machines memory
memoryLimit := containerMemoryLimit memoryLimit := containerMemoryLimit
if memoryLimit == 0 { if memoryLimit == 0 {
memoryLimit = machineMemory memoryLimit = machineMemory
} }
for _, iface := range state.Networks {
switch iface.Type {
case "veth":
istats, err := getNetworkInterfaceStats(iface.HostInterfaceName)
if err != nil {
return nil, err
}
stats.Interfaces = append(stats.Interfaces, istats)
}
}
return &ResourceStats{ return &ResourceStats{
Read: now, Stats: stats,
ContainerStats: stats, Read: now,
MemoryLimit: memoryLimit, MemoryLimit: memoryLimit,
}, nil }, nil
} }

View File

@ -23,7 +23,9 @@ import (
"github.com/docker/docker/utils" "github.com/docker/docker/utils"
"github.com/docker/libcontainer" "github.com/docker/libcontainer"
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/mount/nodes" "github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/system"
"github.com/docker/libcontainer/user"
"github.com/kr/pty" "github.com/kr/pty"
) )
@ -42,7 +44,7 @@ type driver struct {
} }
type activeContainer struct { type activeContainer struct {
container *libcontainer.Config container *configs.Config
cmd *exec.Cmd cmd *exec.Cmd
} }
@ -190,7 +192,7 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
c.ProcessConfig.Path = aname c.ProcessConfig.Path = aname
c.ProcessConfig.Args = append([]string{name}, arg...) c.ProcessConfig.Args = append([]string{name}, arg...)
if err := nodes.CreateDeviceNodes(c.Rootfs, c.AutoCreatedDevices); err != nil { if err := createDeviceNodes(c.Rootfs, c.AutoCreatedDevices); err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err return execdriver.ExitStatus{ExitCode: -1}, err
} }
@ -231,11 +233,17 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
} }
state := &libcontainer.State{ state := &libcontainer.State{
InitPid: pid, InitProcessPid: pid,
CgroupPaths: cgroupPaths, CgroupPaths: cgroupPaths,
} }
if err := libcontainer.SaveState(dataPath, state); err != nil { f, err := os.Create(filepath.Join(dataPath, "state.json"))
if err != nil {
return terminate(err)
}
defer f.Close()
if err := json.NewEncoder(f).Encode(state); err != nil {
return terminate(err) return terminate(err)
} }
@ -245,8 +253,9 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
log.Debugf("Invoking startCallback") log.Debugf("Invoking startCallback")
startCallback(&c.ProcessConfig, pid) startCallback(&c.ProcessConfig, pid)
} }
oomKill := false oomKill := false
oomKillNotification, err := libcontainer.NotifyOnOOM(state) oomKillNotification, err := notifyOnOOM(cgroupPaths)
if err == nil { if err == nil {
_, oomKill = <-oomKillNotification _, oomKill = <-oomKillNotification
log.Debugf("oomKill error %s waitErr %s", oomKill, waitErr) log.Debugf("oomKill error %s waitErr %s", oomKill, waitErr)
@ -265,9 +274,57 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
return execdriver.ExitStatus{ExitCode: exitCode, OOMKilled: oomKill}, waitErr return execdriver.ExitStatus{ExitCode: exitCode, OOMKilled: oomKill}, waitErr
} }
// copy from libcontainer
func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) {
dir := paths["memory"]
if dir == "" {
return nil, fmt.Errorf("There is no path for %q in state", "memory")
}
oomControl, err := os.Open(filepath.Join(dir, "memory.oom_control"))
if err != nil {
return nil, err
}
fd, _, syserr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0)
if syserr != 0 {
oomControl.Close()
return nil, syserr
}
eventfd := os.NewFile(fd, "eventfd")
eventControlPath := filepath.Join(dir, "cgroup.event_control")
data := fmt.Sprintf("%d %d", eventfd.Fd(), oomControl.Fd())
if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil {
eventfd.Close()
oomControl.Close()
return nil, err
}
ch := make(chan struct{})
go func() {
defer func() {
close(ch)
eventfd.Close()
oomControl.Close()
}()
buf := make([]byte, 8)
for {
if _, err := eventfd.Read(buf); err != nil {
return
}
// When a cgroup is destroyed, an event is sent to eventfd.
// So if the control path is gone, return instead of notifying.
if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) {
return
}
ch <- struct{}{}
}
}()
return ch, nil
}
// createContainer populates and configures the container type with the // createContainer populates and configures the container type with the
// data provided by the execdriver.Command // data provided by the execdriver.Command
func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, error) { func (d *driver) createContainer(c *execdriver.Command) (*configs.Config, error) {
container := execdriver.InitContainer(c) container := execdriver.InitContainer(c)
if err := execdriver.SetupCgroups(container, c); err != nil { if err := execdriver.SetupCgroups(container, c); err != nil {
return nil, err return nil, err
@ -297,6 +354,87 @@ func cgroupPaths(containerId string) (map[string]string, error) {
return paths, nil return paths, nil
} }
// this is copy from old libcontainer nodes.go
func createDeviceNodes(rootfs string, nodesToCreate []*configs.Device) error {
oldMask := syscall.Umask(0000)
defer syscall.Umask(oldMask)
for _, node := range nodesToCreate {
if err := createDeviceNode(rootfs, node); err != nil {
return err
}
}
return nil
}
// Creates the device node in the rootfs of the container.
func createDeviceNode(rootfs string, node *configs.Device) error {
var (
dest = filepath.Join(rootfs, node.Path)
parent = filepath.Dir(dest)
)
if err := os.MkdirAll(parent, 0755); err != nil {
return err
}
fileMode := node.FileMode
switch node.Type {
case 'c':
fileMode |= syscall.S_IFCHR
case 'b':
fileMode |= syscall.S_IFBLK
default:
return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
}
if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil && !os.IsExist(err) {
return fmt.Errorf("mknod %s %s", node.Path, err)
}
if err := syscall.Chown(dest, int(node.Uid), int(node.Gid)); err != nil {
return fmt.Errorf("chown %s to %d:%d", node.Path, node.Uid, node.Gid)
}
return nil
}
// setupUser changes the groups, gid, and uid for the user inside the container
// copy from libcontainer, cause not it's private
func setupUser(userSpec string) error {
// Set up defaults.
defaultExecUser := user.ExecUser{
Uid: syscall.Getuid(),
Gid: syscall.Getgid(),
Home: "/",
}
passwdPath, err := user.GetPasswdPath()
if err != nil {
return err
}
groupPath, err := user.GetGroupPath()
if err != nil {
return err
}
execUser, err := user.GetExecUserPath(userSpec, &defaultExecUser, passwdPath, groupPath)
if err != nil {
return err
}
if err := system.Setgid(execUser.Gid); err != nil {
return err
}
if err := system.Setuid(execUser.Uid); err != nil {
return err
}
// if we didn't get HOME already, set it based on the user's HOME
if envHome := os.Getenv("HOME"); envHome == "" {
if err := os.Setenv("HOME", execUser.Home); err != nil {
return err
}
}
return nil
}
/// Return the exit code of the process /// Return the exit code of the process
// if the process has not exited -1 will be returned // if the process has not exited -1 will be returned
func getExitCode(c *execdriver.Command) int { func getExitCode(c *execdriver.Command) int {

View File

@ -3,8 +3,6 @@ package lxc
import ( import (
"fmt" "fmt"
"github.com/docker/libcontainer"
"github.com/docker/libcontainer/namespaces"
"github.com/docker/libcontainer/utils" "github.com/docker/libcontainer/utils"
) )
@ -12,9 +10,7 @@ func finalizeNamespace(args *InitArgs) error {
if err := utils.CloseExecFrom(3); err != nil { if err := utils.CloseExecFrom(3); err != nil {
return err return err
} }
if err := namespaces.SetupUser(&libcontainer.Config{ if err := setupUser(args.User); err != nil {
User: args.User,
}); err != nil {
return fmt.Errorf("setup user %s", err) return fmt.Errorf("setup user %s", err)
} }
if err := setupWorkingDirectory(args); err != nil { if err := setupWorkingDirectory(args); err != nil {

View File

@ -11,7 +11,6 @@ import (
nativeTemplate "github.com/docker/docker/daemon/execdriver/native/template" nativeTemplate "github.com/docker/docker/daemon/execdriver/native/template"
"github.com/docker/docker/utils" "github.com/docker/docker/utils"
"github.com/docker/libcontainer/label" "github.com/docker/libcontainer/label"
"github.com/docker/libcontainer/security/capabilities"
) )
const LxcTemplate = ` const LxcTemplate = `
@ -169,7 +168,7 @@ func keepCapabilities(adds []string, drops []string) ([]string, error) {
var newCaps []string var newCaps []string
for _, cap := range caps { for _, cap := range caps {
log.Debugf("cap %s\n", cap) log.Debugf("cap %s\n", cap)
realCap := capabilities.GetCapability(cap) realCap := execdriver.GetCapability(cap)
numCap := fmt.Sprintf("%d", realCap.Value) numCap := fmt.Sprintf("%d", realCap.Value)
newCaps = append(newCaps, numCap) newCaps = append(newCaps, numCap)
} }
@ -180,13 +179,10 @@ func keepCapabilities(adds []string, drops []string) ([]string, error) {
func dropList(drops []string) ([]string, error) { func dropList(drops []string) ([]string, error) {
if utils.StringsContainsNoCase(drops, "all") { if utils.StringsContainsNoCase(drops, "all") {
var newCaps []string var newCaps []string
for _, cap := range capabilities.GetAllCapabilities() { for _, capName := range execdriver.GetAllCapabilities() {
log.Debugf("drop cap %s\n", cap) cap := execdriver.GetCapability(capName)
realCap := capabilities.GetCapability(cap) log.Debugf("drop cap %s\n", cap.Key)
if realCap == nil { numCap := fmt.Sprintf("%d", cap.Value)
return nil, fmt.Errorf("Invalid capability '%s'", cap)
}
numCap := fmt.Sprintf("%d", realCap.Value)
newCaps = append(newCaps, numCap) newCaps = append(newCaps, numCap)
} }
return newCaps, nil return newCaps, nil

View File

@ -5,11 +5,6 @@ package lxc
import ( import (
"bufio" "bufio"
"fmt" "fmt"
"github.com/docker/docker/daemon/execdriver"
nativeTemplate "github.com/docker/docker/daemon/execdriver/native/template"
"github.com/docker/libcontainer/devices"
"github.com/docker/libcontainer/security/capabilities"
"github.com/syndtr/gocapability/capability"
"io/ioutil" "io/ioutil"
"math/rand" "math/rand"
"os" "os"
@ -17,6 +12,11 @@ import (
"strings" "strings"
"testing" "testing"
"time" "time"
"github.com/docker/docker/daemon/execdriver"
nativeTemplate "github.com/docker/docker/daemon/execdriver/native/template"
"github.com/docker/libcontainer/configs"
"github.com/syndtr/gocapability/capability"
) )
func TestLXCConfig(t *testing.T) { func TestLXCConfig(t *testing.T) {
@ -53,7 +53,7 @@ func TestLXCConfig(t *testing.T) {
Mtu: 1500, Mtu: 1500,
Interface: nil, Interface: nil,
}, },
AllowedDevices: make([]*devices.Device, 0), AllowedDevices: make([]*configs.Device, 0),
ProcessConfig: execdriver.ProcessConfig{}, ProcessConfig: execdriver.ProcessConfig{},
} }
p, err := driver.generateLXCConfig(command) p, err := driver.generateLXCConfig(command)
@ -295,7 +295,7 @@ func TestCustomLxcConfigMisc(t *testing.T) {
grepFile(t, p, "lxc.cgroup.cpuset.cpus = 0,1") grepFile(t, p, "lxc.cgroup.cpuset.cpus = 0,1")
container := nativeTemplate.New() container := nativeTemplate.New()
for _, cap := range container.Capabilities { for _, cap := range container.Capabilities {
realCap := capabilities.GetCapability(cap) realCap := execdriver.GetCapability(cap)
numCap := fmt.Sprintf("%d", realCap.Value) numCap := fmt.Sprintf("%d", realCap.Value)
if cap != "MKNOD" && cap != "KILL" { if cap != "MKNOD" && cap != "KILL" {
grepFile(t, p, fmt.Sprintf("lxc.cap.keep = %s", numCap)) grepFile(t, p, fmt.Sprintf("lxc.cap.keep = %s", numCap))
@ -359,7 +359,7 @@ func TestCustomLxcConfigMiscOverride(t *testing.T) {
grepFile(t, p, "lxc.cgroup.cpuset.cpus = 0,1") grepFile(t, p, "lxc.cgroup.cpuset.cpus = 0,1")
container := nativeTemplate.New() container := nativeTemplate.New()
for _, cap := range container.Capabilities { for _, cap := range container.Capabilities {
realCap := capabilities.GetCapability(cap) realCap := execdriver.GetCapability(cap)
numCap := fmt.Sprintf("%d", realCap.Value) numCap := fmt.Sprintf("%d", realCap.Value)
if cap != "MKNOD" && cap != "KILL" { if cap != "MKNOD" && cap != "KILL" {
grepFile(t, p, fmt.Sprintf("lxc.cap.keep = %s", numCap)) grepFile(t, p, fmt.Sprintf("lxc.cap.keep = %s", numCap))

View File

@ -3,21 +3,24 @@
package native package native
import ( import (
"errors"
"fmt" "fmt"
"os/exec" "net"
"path/filepath" "path/filepath"
"strings"
"syscall"
"github.com/docker/docker/daemon/execdriver" "github.com/docker/docker/daemon/execdriver"
"github.com/docker/libcontainer" "github.com/docker/docker/pkg/symlink"
"github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/apparmor"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/devices" "github.com/docker/libcontainer/devices"
"github.com/docker/libcontainer/mount" "github.com/docker/libcontainer/utils"
"github.com/docker/libcontainer/security/capabilities"
) )
// createContainer populates and configures the container type with the // createContainer populates and configures the container type with the
// data provided by the execdriver.Command // data provided by the execdriver.Command
func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, error) { func (d *driver) createContainer(c *execdriver.Command) (*configs.Config, error) {
container := execdriver.InitContainer(c) container := execdriver.InitContainer(c)
if err := d.createIpc(container, c); err != nil { if err := d.createIpc(container, c); err != nil {
@ -33,6 +36,13 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e
} }
if c.ProcessConfig.Privileged { if c.ProcessConfig.Privileged {
// clear readonly for /sys
for i := range container.Mounts {
if container.Mounts[i].Destination == "/sys" {
container.Mounts[i].Flags &= ^syscall.MS_RDONLY
}
}
container.ReadonlyPaths = nil
if err := d.setPrivileged(container); err != nil { if err := d.setPrivileged(container); err != nil {
return nil, err return nil, err
} }
@ -57,43 +67,52 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Config, e
if err := d.setupLabels(container, c); err != nil { if err := d.setupLabels(container, c); err != nil {
return nil, err return nil, err
} }
d.setupRlimits(container, c) d.setupRlimits(container, c)
cmds := make(map[string]*exec.Cmd)
d.Lock()
for k, v := range d.activeContainers {
cmds[k] = v.cmd
}
d.Unlock()
return container, nil return container, nil
} }
func (d *driver) createNetwork(container *libcontainer.Config, c *execdriver.Command) error { func generateIfaceName() (string, error) {
for i := 0; i < 10; i++ {
name, err := utils.GenerateRandomName("veth", 7)
if err != nil {
continue
}
if _, err := net.InterfaceByName(name); err != nil {
if strings.Contains(err.Error(), "no such") {
return name, nil
}
return "", err
}
}
return "", errors.New("Failed to find name for new interface")
}
func (d *driver) createNetwork(container *configs.Config, c *execdriver.Command) error {
if c.Network.HostNetworking { if c.Network.HostNetworking {
container.Namespaces.Remove(libcontainer.NEWNET) container.Namespaces.Remove(configs.NEWNET)
return nil return nil
} }
container.Networks = []*libcontainer.Network{ container.Networks = []*configs.Network{
{ {
Mtu: c.Network.Mtu, Type: "loopback",
Address: fmt.Sprintf("%s/%d", "127.0.0.1", 0),
Gateway: "localhost",
Type: "loopback",
}, },
} }
iName, err := generateIfaceName()
if err != nil {
return err
}
if c.Network.Interface != nil { if c.Network.Interface != nil {
vethNetwork := libcontainer.Network{ vethNetwork := configs.Network{
Mtu: c.Network.Mtu, Name: "eth0",
Address: fmt.Sprintf("%s/%d", c.Network.Interface.IPAddress, c.Network.Interface.IPPrefixLen), HostInterfaceName: iName,
MacAddress: c.Network.Interface.MacAddress, Mtu: c.Network.Mtu,
Gateway: c.Network.Interface.Gateway, Address: fmt.Sprintf("%s/%d", c.Network.Interface.IPAddress, c.Network.Interface.IPPrefixLen),
Type: "veth", MacAddress: c.Network.Interface.MacAddress,
Bridge: c.Network.Interface.Bridge, Gateway: c.Network.Interface.Gateway,
VethPrefix: "veth", Type: "veth",
Bridge: c.Network.Interface.Bridge,
} }
if c.Network.Interface.GlobalIPv6Address != "" { if c.Network.Interface.GlobalIPv6Address != "" {
vethNetwork.IPv6Address = fmt.Sprintf("%s/%d", c.Network.Interface.GlobalIPv6Address, c.Network.Interface.GlobalIPv6PrefixLen) vethNetwork.IPv6Address = fmt.Sprintf("%s/%d", c.Network.Interface.GlobalIPv6Address, c.Network.Interface.GlobalIPv6PrefixLen)
@ -107,21 +126,24 @@ func (d *driver) createNetwork(container *libcontainer.Config, c *execdriver.Com
active := d.activeContainers[c.Network.ContainerID] active := d.activeContainers[c.Network.ContainerID]
d.Unlock() d.Unlock()
if active == nil || active.cmd.Process == nil { if active == nil {
return fmt.Errorf("%s is not a valid running container to join", c.Network.ContainerID) return fmt.Errorf("%s is not a valid running container to join", c.Network.ContainerID)
} }
cmd := active.cmd
nspath := filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "net") state, err := active.State()
container.Namespaces.Add(libcontainer.NEWNET, nspath) if err != nil {
return err
}
container.Namespaces.Add(configs.NEWNET, state.NamespacePaths[configs.NEWNET])
} }
return nil return nil
} }
func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command) error { func (d *driver) createIpc(container *configs.Config, c *execdriver.Command) error {
if c.Ipc.HostIpc { if c.Ipc.HostIpc {
container.Namespaces.Remove(libcontainer.NEWIPC) container.Namespaces.Remove(configs.NEWIPC)
return nil return nil
} }
@ -130,37 +152,38 @@ func (d *driver) createIpc(container *libcontainer.Config, c *execdriver.Command
active := d.activeContainers[c.Ipc.ContainerID] active := d.activeContainers[c.Ipc.ContainerID]
d.Unlock() d.Unlock()
if active == nil || active.cmd.Process == nil { if active == nil {
return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID) return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID)
} }
cmd := active.cmd
container.Namespaces.Add(libcontainer.NEWIPC, filepath.Join("/proc", fmt.Sprint(cmd.Process.Pid), "ns", "ipc")) state, err := active.State()
if err != nil {
return err
}
container.Namespaces.Add(configs.NEWIPC, state.NamespacePaths[configs.NEWIPC])
} }
return nil return nil
} }
func (d *driver) createPid(container *libcontainer.Config, c *execdriver.Command) error { func (d *driver) createPid(container *configs.Config, c *execdriver.Command) error {
if c.Pid.HostPid { if c.Pid.HostPid {
container.Namespaces.Remove(libcontainer.NEWPID) container.Namespaces.Remove(configs.NEWPID)
return nil return nil
} }
return nil return nil
} }
func (d *driver) setPrivileged(container *libcontainer.Config) (err error) { func (d *driver) setPrivileged(container *configs.Config) (err error) {
container.Capabilities = capabilities.GetAllCapabilities() container.Capabilities = execdriver.GetAllCapabilities()
container.Cgroups.AllowAllDevices = true container.Cgroups.AllowAllDevices = true
hostDeviceNodes, err := devices.GetHostDeviceNodes() hostDevices, err := devices.HostDevices()
if err != nil { if err != nil {
return err return err
} }
container.MountConfig.DeviceNodes = hostDeviceNodes container.Devices = hostDevices
container.RestrictSys = false
if apparmor.IsEnabled() { if apparmor.IsEnabled() {
container.AppArmorProfile = "unconfined" container.AppArmorProfile = "unconfined"
@ -169,39 +192,52 @@ func (d *driver) setPrivileged(container *libcontainer.Config) (err error) {
return nil return nil
} }
func (d *driver) setCapabilities(container *libcontainer.Config, c *execdriver.Command) (err error) { func (d *driver) setCapabilities(container *configs.Config, c *execdriver.Command) (err error) {
container.Capabilities, err = execdriver.TweakCapabilities(container.Capabilities, c.CapAdd, c.CapDrop) container.Capabilities, err = execdriver.TweakCapabilities(container.Capabilities, c.CapAdd, c.CapDrop)
return err return err
} }
func (d *driver) setupRlimits(container *libcontainer.Config, c *execdriver.Command) { func (d *driver) setupRlimits(container *configs.Config, c *execdriver.Command) {
if c.Resources == nil { if c.Resources == nil {
return return
} }
for _, rlimit := range c.Resources.Rlimits { for _, rlimit := range c.Resources.Rlimits {
container.Rlimits = append(container.Rlimits, libcontainer.Rlimit((*rlimit))) container.Rlimits = append(container.Rlimits, configs.Rlimit{
} Type: rlimit.Type,
} Hard: rlimit.Hard,
Soft: rlimit.Soft,
func (d *driver) setupMounts(container *libcontainer.Config, c *execdriver.Command) error {
for _, m := range c.Mounts {
container.MountConfig.Mounts = append(container.MountConfig.Mounts, &mount.Mount{
Type: "bind",
Source: m.Source,
Destination: m.Destination,
Writable: m.Writable,
Private: m.Private,
Slave: m.Slave,
}) })
} }
}
func (d *driver) setupMounts(container *configs.Config, c *execdriver.Command) error {
for _, m := range c.Mounts {
dest, err := symlink.FollowSymlinkInScope(filepath.Join(c.Rootfs, m.Destination), c.Rootfs)
if err != nil {
return err
}
flags := syscall.MS_BIND | syscall.MS_REC
if !m.Writable {
flags |= syscall.MS_RDONLY
}
if m.Slave {
flags |= syscall.MS_SLAVE
}
container.Mounts = append(container.Mounts, &configs.Mount{
Source: m.Source,
Destination: dest,
Device: "bind",
Flags: flags,
})
}
return nil return nil
} }
func (d *driver) setupLabels(container *libcontainer.Config, c *execdriver.Command) error { func (d *driver) setupLabels(container *configs.Config, c *execdriver.Command) error {
container.ProcessLabel = c.ProcessLabel container.ProcessLabel = c.ProcessLabel
container.MountConfig.MountLabel = c.MountLabel container.MountLabel = c.MountLabel
return nil return nil
} }

View File

@ -4,28 +4,28 @@ package native
import ( import (
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"os" "os"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
"strings"
"sync" "sync"
"syscall" "syscall"
"time"
log "github.com/Sirupsen/logrus" log "github.com/Sirupsen/logrus"
"github.com/docker/docker/daemon/execdriver" "github.com/docker/docker/daemon/execdriver"
"github.com/docker/docker/pkg/reexec"
sysinfo "github.com/docker/docker/pkg/system" sysinfo "github.com/docker/docker/pkg/system"
"github.com/docker/docker/pkg/term" "github.com/docker/docker/pkg/term"
"github.com/docker/libcontainer" "github.com/docker/libcontainer"
"github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/apparmor"
"github.com/docker/libcontainer/cgroups/fs"
"github.com/docker/libcontainer/cgroups/systemd" "github.com/docker/libcontainer/cgroups/systemd"
consolepkg "github.com/docker/libcontainer/console" "github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/namespaces"
_ "github.com/docker/libcontainer/namespaces/nsenter"
"github.com/docker/libcontainer/system" "github.com/docker/libcontainer/system"
"github.com/docker/libcontainer/utils"
) )
const ( const (
@ -33,16 +33,12 @@ const (
Version = "0.2" Version = "0.2"
) )
type activeContainer struct {
container *libcontainer.Config
cmd *exec.Cmd
}
type driver struct { type driver struct {
root string root string
initPath string initPath string
activeContainers map[string]*activeContainer activeContainers map[string]libcontainer.Container
machineMemory int64 machineMemory int64
factory libcontainer.Factory
sync.Mutex sync.Mutex
} }
@ -59,11 +55,22 @@ func NewDriver(root, initPath string) (*driver, error) {
if err := apparmor.InstallDefaultProfile(); err != nil { if err := apparmor.InstallDefaultProfile(); err != nil {
return nil, err return nil, err
} }
cgm := libcontainer.Cgroupfs
if systemd.UseSystemd() {
cgm = libcontainer.SystemdCgroups
}
f, err := libcontainer.New(root, cgm, libcontainer.InitPath(reexec.Self(), DriverName))
if err != nil {
return nil, err
}
return &driver{ return &driver{
root: root, root: root,
initPath: initPath, initPath: initPath,
activeContainers: make(map[string]*activeContainer), activeContainers: make(map[string]libcontainer.Container),
machineMemory: meminfo.MemTotal, machineMemory: meminfo.MemTotal,
factory: f,
}, nil }, nil
} }
@ -81,101 +88,141 @@ func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba
var term execdriver.Terminal var term execdriver.Terminal
p := &libcontainer.Process{
Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...),
Env: c.ProcessConfig.Env,
Cwd: c.WorkingDir,
User: c.ProcessConfig.User,
}
if c.ProcessConfig.Tty { if c.ProcessConfig.Tty {
term, err = NewTtyConsole(&c.ProcessConfig, pipes) rootuid, err := container.HostUID()
if err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err
}
cons, err := p.NewConsole(rootuid)
if err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err
}
term, err = NewTtyConsole(cons, pipes, rootuid)
} else { } else {
term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes) p.Stdout = pipes.Stdout
p.Stderr = pipes.Stderr
r, w, err := os.Pipe()
if err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err
}
if pipes.Stdin != nil {
go func() {
io.Copy(w, pipes.Stdin)
w.Close()
}()
p.Stdin = r
}
term = &execdriver.StdConsole{}
} }
if err != nil { if err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err return execdriver.ExitStatus{ExitCode: -1}, err
} }
c.ProcessConfig.Terminal = term c.ProcessConfig.Terminal = term
cont, err := d.factory.Create(c.ID, container)
if err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err
}
d.Lock() d.Lock()
d.activeContainers[c.ID] = &activeContainer{ d.activeContainers[c.ID] = cont
container: container,
cmd: &c.ProcessConfig.Cmd,
}
d.Unlock() d.Unlock()
defer func() {
var ( cont.Destroy()
dataPath = filepath.Join(d.root, c.ID) d.cleanContainer(c.ID)
args = append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...)
)
if err := d.createContainerRoot(c.ID); err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err
}
defer d.cleanContainer(c.ID)
if err := d.writeContainerFile(container, c.ID); err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err
}
execOutputChan := make(chan execOutput, 1)
waitForStart := make(chan struct{})
go func() {
exitCode, err := namespaces.Exec(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, dataPath, args, func(container *libcontainer.Config, console, dataPath, init string, child *os.File, args []string) *exec.Cmd {
c.ProcessConfig.Path = d.initPath
c.ProcessConfig.Args = append([]string{
DriverName,
"-console", console,
"-pipe", "3",
"-root", filepath.Join(d.root, c.ID),
"--",
}, args...)
// set this to nil so that when we set the clone flags anything else is reset
c.ProcessConfig.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: uintptr(namespaces.GetNamespaceFlags(container.Namespaces)),
}
c.ProcessConfig.ExtraFiles = []*os.File{child}
c.ProcessConfig.Env = container.Env
c.ProcessConfig.Dir = container.RootFs
return &c.ProcessConfig.Cmd
}, func() {
close(waitForStart)
if startCallback != nil {
c.ContainerPid = c.ProcessConfig.Process.Pid
startCallback(&c.ProcessConfig, c.ContainerPid)
}
})
execOutputChan <- execOutput{exitCode, err}
}() }()
select { if err := cont.Start(p); err != nil {
case execOutput := <-execOutputChan: return execdriver.ExitStatus{ExitCode: -1}, err
return execdriver.ExitStatus{ExitCode: execOutput.exitCode}, execOutput.err
case <-waitForStart:
break
} }
oomKill := false if startCallback != nil {
state, err := libcontainer.GetState(filepath.Join(d.root, c.ID)) pid, err := p.Pid()
if err == nil { if err != nil {
oomKillNotification, err := libcontainer.NotifyOnOOM(state) p.Signal(os.Kill)
if err == nil { p.Wait()
_, oomKill = <-oomKillNotification return execdriver.ExitStatus{ExitCode: -1}, err
} else {
log.Warnf("WARNING: Your kernel does not support OOM notifications: %s", err)
} }
} else { startCallback(&c.ProcessConfig, pid)
log.Warnf("Failed to get container state, oom notify will not work: %s", err)
} }
// wait for the container to exit.
execOutput := <-execOutputChan
return execdriver.ExitStatus{ExitCode: execOutput.exitCode, OOMKilled: oomKill}, execOutput.err oomKillNotification, err := cont.NotifyOOM()
if err != nil {
oomKillNotification = nil
log.Warnf("WARNING: Your kernel does not support OOM notifications: %s", err)
}
waitF := p.Wait
if nss := cont.Config().Namespaces; nss.Contains(configs.NEWPID) {
// we need such hack for tracking processes with inerited fds,
// because cmd.Wait() waiting for all streams to be copied
waitF = waitInPIDHost(p, cont)
}
ps, err := waitF()
if err != nil {
if err, ok := err.(*exec.ExitError); !ok {
return execdriver.ExitStatus{ExitCode: -1}, err
} else {
ps = err.ProcessState
}
}
cont.Destroy()
_, oomKill := <-oomKillNotification
return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
} }
func (d *driver) Kill(p *execdriver.Command, sig int) error { func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) {
if p.ProcessConfig.Process == nil { return func() (*os.ProcessState, error) {
return errors.New("exec: not started") pid, err := p.Pid()
if err != nil {
return nil, err
}
process, err := os.FindProcess(pid)
s, err := process.Wait()
if err != nil {
if err, ok := err.(*exec.ExitError); !ok {
return s, err
} else {
s = err.ProcessState
}
}
processes, err := c.Processes()
if err != nil {
return s, err
}
for _, pid := range processes {
process, err := os.FindProcess(pid)
if err != nil {
log.Errorf("Failed to kill process: %d", pid)
continue
}
process.Kill()
}
p.Wait()
return s, err
} }
return syscall.Kill(p.ProcessConfig.Process.Pid, syscall.Signal(sig)) }
func (d *driver) Kill(c *execdriver.Command, sig int) error {
active := d.activeContainers[c.ID]
if active == nil {
return fmt.Errorf("active container for %s does not exist", c.ID)
}
state, err := active.State()
if err != nil {
return err
}
return syscall.Kill(state.InitProcessPid, syscall.Signal(sig))
} }
func (d *driver) Pause(c *execdriver.Command) error { func (d *driver) Pause(c *execdriver.Command) error {
@ -183,11 +230,7 @@ func (d *driver) Pause(c *execdriver.Command) error {
if active == nil { if active == nil {
return fmt.Errorf("active container for %s does not exist", c.ID) return fmt.Errorf("active container for %s does not exist", c.ID)
} }
active.container.Cgroups.Freezer = "FROZEN" return active.Pause()
if systemd.UseSystemd() {
return systemd.Freeze(active.container.Cgroups, active.container.Cgroups.Freezer)
}
return fs.Freeze(active.container.Cgroups, active.container.Cgroups.Freezer)
} }
func (d *driver) Unpause(c *execdriver.Command) error { func (d *driver) Unpause(c *execdriver.Command) error {
@ -195,44 +238,31 @@ func (d *driver) Unpause(c *execdriver.Command) error {
if active == nil { if active == nil {
return fmt.Errorf("active container for %s does not exist", c.ID) return fmt.Errorf("active container for %s does not exist", c.ID)
} }
active.container.Cgroups.Freezer = "THAWED" return active.Resume()
if systemd.UseSystemd() {
return systemd.Freeze(active.container.Cgroups, active.container.Cgroups.Freezer)
}
return fs.Freeze(active.container.Cgroups, active.container.Cgroups.Freezer)
} }
func (d *driver) Terminate(p *execdriver.Command) error { func (d *driver) Terminate(c *execdriver.Command) error {
// lets check the start time for the process // lets check the start time for the process
state, err := libcontainer.GetState(filepath.Join(d.root, p.ID)) active := d.activeContainers[c.ID]
if err != nil { if active == nil {
if !os.IsNotExist(err) { return fmt.Errorf("active container for %s does not exist", c.ID)
return err
}
// TODO: Remove this part for version 1.2.0
// This is added only to ensure smooth upgrades from pre 1.1.0 to 1.1.0
data, err := ioutil.ReadFile(filepath.Join(d.root, p.ID, "start"))
if err != nil {
// if we don't have the data on disk then we can assume the process is gone
// because this is only removed after we know the process has stopped
if os.IsNotExist(err) {
return nil
}
return err
}
state = &libcontainer.State{InitStartTime: string(data)}
} }
state, err := active.State()
if err != nil {
return err
}
pid := state.InitProcessPid
currentStartTime, err := system.GetProcessStartTime(p.ProcessConfig.Process.Pid) currentStartTime, err := system.GetProcessStartTime(pid)
if err != nil { if err != nil {
return err return err
} }
if state.InitStartTime == currentStartTime { if state.InitProcessStartTime == currentStartTime {
err = syscall.Kill(p.ProcessConfig.Process.Pid, 9) err = syscall.Kill(pid, 9)
syscall.Wait4(p.ProcessConfig.Process.Pid, nil, 0, nil) syscall.Wait4(pid, nil, 0, nil)
} }
d.cleanContainer(p.ID) d.cleanContainer(c.ID)
return err return err
@ -257,15 +287,10 @@ func (d *driver) GetPidsForContainer(id string) ([]int, error) {
if active == nil { if active == nil {
return nil, fmt.Errorf("active container for %s does not exist", id) return nil, fmt.Errorf("active container for %s does not exist", id)
} }
c := active.container.Cgroups return active.Processes()
if systemd.UseSystemd() {
return systemd.GetPids(c)
}
return fs.GetPids(c)
} }
func (d *driver) writeContainerFile(container *libcontainer.Config, id string) error { func (d *driver) writeContainerFile(container *configs.Config, id string) error {
data, err := json.Marshal(container) data, err := json.Marshal(container)
if err != nil { if err != nil {
return err return err
@ -289,42 +314,61 @@ func (d *driver) Clean(id string) error {
} }
func (d *driver) Stats(id string) (*execdriver.ResourceStats, error) { func (d *driver) Stats(id string) (*execdriver.ResourceStats, error) {
return execdriver.Stats(filepath.Join(d.root, id), d.activeContainers[id].container.Cgroups.Memory, d.machineMemory) c := d.activeContainers[id]
} now := time.Now()
stats, err := c.Stats()
type TtyConsole struct {
MasterPty *os.File
}
func NewTtyConsole(processConfig *execdriver.ProcessConfig, pipes *execdriver.Pipes) (*TtyConsole, error) {
ptyMaster, console, err := consolepkg.CreateMasterAndConsole()
if err != nil { if err != nil {
return nil, err return nil, err
} }
memoryLimit := c.Config().Cgroups.Memory
// if the container does not have any memory limit specified set the
// limit to the machines memory
if memoryLimit == 0 {
memoryLimit = d.machineMemory
}
return &execdriver.ResourceStats{
Stats: stats,
Read: now,
MemoryLimit: memoryLimit,
}, nil
}
func getEnv(key string, env []string) string {
for _, pair := range env {
parts := strings.Split(pair, "=")
if parts[0] == key {
return parts[1]
}
}
return ""
}
type TtyConsole struct {
console libcontainer.Console
}
func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes, rootuid int) (*TtyConsole, error) {
tty := &TtyConsole{ tty := &TtyConsole{
MasterPty: ptyMaster, console: console,
} }
if err := tty.AttachPipes(&processConfig.Cmd, pipes); err != nil { if err := tty.AttachPipes(pipes); err != nil {
tty.Close() tty.Close()
return nil, err return nil, err
} }
processConfig.Console = console
return tty, nil return tty, nil
} }
func (t *TtyConsole) Master() *os.File { func (t *TtyConsole) Master() libcontainer.Console {
return t.MasterPty return t.console
} }
func (t *TtyConsole) Resize(h, w int) error { func (t *TtyConsole) Resize(h, w int) error {
return term.SetWinsize(t.MasterPty.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)}) return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
} }
func (t *TtyConsole) AttachPipes(command *exec.Cmd, pipes *execdriver.Pipes) error { func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes) error {
go func() { go func() {
if wb, ok := pipes.Stdout.(interface { if wb, ok := pipes.Stdout.(interface {
CloseWriters() error CloseWriters() error
@ -332,12 +376,12 @@ func (t *TtyConsole) AttachPipes(command *exec.Cmd, pipes *execdriver.Pipes) err
defer wb.CloseWriters() defer wb.CloseWriters()
} }
io.Copy(pipes.Stdout, t.MasterPty) io.Copy(pipes.Stdout, t.console)
}() }()
if pipes.Stdin != nil { if pipes.Stdin != nil {
go func() { go func() {
io.Copy(t.MasterPty, pipes.Stdin) io.Copy(t.console, pipes.Stdin)
pipes.Stdin.Close() pipes.Stdin.Close()
}() }()
@ -347,5 +391,5 @@ func (t *TtyConsole) AttachPipes(command *exec.Cmd, pipes *execdriver.Pipes) err
} }
func (t *TtyConsole) Close() error { func (t *TtyConsole) Close() error {
return t.MasterPty.Close() return t.console.Close()
} }

View File

@ -4,67 +4,77 @@ package native
import ( import (
"fmt" "fmt"
"log"
"os" "os"
"os/exec" "os/exec"
"path/filepath" "syscall"
"runtime"
"github.com/docker/docker/daemon/execdriver" "github.com/docker/docker/daemon/execdriver"
"github.com/docker/docker/pkg/reexec"
"github.com/docker/libcontainer" "github.com/docker/libcontainer"
"github.com/docker/libcontainer/namespaces" _ "github.com/docker/libcontainer/nsenter"
"github.com/docker/libcontainer/utils"
) )
const execCommandName = "nsenter-exec"
func init() {
reexec.Register(execCommandName, nsenterExec)
}
func nsenterExec() {
runtime.LockOSThread()
// User args are passed after '--' in the command line.
userArgs := findUserArgs()
config, err := loadConfigFromFd()
if err != nil {
log.Fatalf("docker-exec: unable to receive config from sync pipe: %s", err)
}
if err := namespaces.FinalizeSetns(config, userArgs); err != nil {
log.Fatalf("docker-exec: failed to exec: %s", err)
}
}
// TODO(vishh): Add support for running in priviledged mode and running as a different user. // TODO(vishh): Add support for running in priviledged mode and running as a different user.
func (d *driver) Exec(c *execdriver.Command, processConfig *execdriver.ProcessConfig, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) { func (d *driver) Exec(c *execdriver.Command, processConfig *execdriver.ProcessConfig, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) {
active := d.activeContainers[c.ID] active := d.activeContainers[c.ID]
if active == nil { if active == nil {
return -1, fmt.Errorf("No active container exists with ID %s", c.ID) return -1, fmt.Errorf("No active container exists with ID %s", c.ID)
} }
state, err := libcontainer.GetState(filepath.Join(d.root, c.ID))
if err != nil {
return -1, fmt.Errorf("State unavailable for container with ID %s. The container may have been cleaned up already. Error: %s", c.ID, err)
}
var term execdriver.Terminal var term execdriver.Terminal
var err error
p := &libcontainer.Process{
Args: append([]string{processConfig.Entrypoint}, processConfig.Arguments...),
Env: c.ProcessConfig.Env,
Cwd: c.WorkingDir,
User: c.ProcessConfig.User,
}
if processConfig.Tty { if processConfig.Tty {
term, err = NewTtyConsole(processConfig, pipes) config := active.Config()
rootuid, err := config.HostUID()
if err != nil {
return -1, err
}
cons, err := p.NewConsole(rootuid)
if err != nil {
return -1, err
}
term, err = NewTtyConsole(cons, pipes, rootuid)
} else { } else {
term, err = execdriver.NewStdConsole(processConfig, pipes) p.Stdout = pipes.Stdout
p.Stderr = pipes.Stderr
p.Stdin = pipes.Stdin
term = &execdriver.StdConsole{}
}
if err != nil {
return -1, err
} }
processConfig.Terminal = term processConfig.Terminal = term
args := append([]string{processConfig.Entrypoint}, processConfig.Arguments...) if err := active.Start(p); err != nil {
return -1, err
}
return namespaces.ExecIn(active.container, state, args, os.Args[0], "exec", processConfig.Stdin, processConfig.Stdout, processConfig.Stderr, processConfig.Console, if startCallback != nil {
func(cmd *exec.Cmd) { pid, err := p.Pid()
if startCallback != nil { if err != nil {
startCallback(&c.ProcessConfig, cmd.Process.Pid) p.Signal(os.Kill)
} p.Wait()
}) return -1, err
}
startCallback(&c.ProcessConfig, pid)
}
ps, err := p.Wait()
if err != nil {
exitErr, ok := err.(*exec.ExitError)
if !ok {
return -1, err
}
ps = exitErr.ProcessState
}
return utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), nil
} }

View File

@ -2,13 +2,6 @@
package native package native
import (
"os"
"path/filepath"
"github.com/docker/libcontainer"
)
type info struct { type info struct {
ID string ID string
driver *driver driver *driver
@ -18,13 +11,6 @@ type info struct {
// pid file for a container. If the file exists then the // pid file for a container. If the file exists then the
// container is currently running // container is currently running
func (i *info) IsRunning() bool { func (i *info) IsRunning() bool {
if _, err := libcontainer.GetState(filepath.Join(i.driver.root, i.ID)); err == nil { _, ok := i.driver.activeContainers[i.ID]
return true return ok
}
// TODO: Remove this part for version 1.2.0
// This is added only to ensure smooth upgrades from pre 1.1.0 to 1.1.0
if _, err := os.Stat(filepath.Join(i.driver.root, i.ID, "pid")); err == nil {
return true
}
return false
} }

View File

@ -3,55 +3,40 @@
package native package native
import ( import (
"encoding/json"
"flag"
"fmt" "fmt"
"os" "os"
"path/filepath"
"runtime" "runtime"
"github.com/docker/docker/pkg/reexec" "github.com/docker/docker/pkg/reexec"
"github.com/docker/libcontainer" "github.com/docker/libcontainer"
"github.com/docker/libcontainer/namespaces"
) )
func init() { func init() {
reexec.Register(DriverName, initializer) reexec.Register(DriverName, initializer)
} }
func fatal(err error) {
if lerr, ok := err.(libcontainer.Error); ok {
lerr.Detail(os.Stderr)
os.Exit(1)
}
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
func initializer() { func initializer() {
runtime.GOMAXPROCS(1)
runtime.LockOSThread() runtime.LockOSThread()
factory, err := libcontainer.New("")
var (
pipe = flag.Int("pipe", 0, "sync pipe fd")
console = flag.String("console", "", "console (pty slave) path")
root = flag.String("root", ".", "root path for configuration files")
)
flag.Parse()
var container *libcontainer.Config
f, err := os.Open(filepath.Join(*root, "container.json"))
if err != nil { if err != nil {
writeError(err) fatal(err)
}
if err := factory.StartInitialization(3); err != nil {
fatal(err)
} }
if err := json.NewDecoder(f).Decode(&container); err != nil { panic("unreachable")
f.Close()
writeError(err)
}
f.Close()
rootfs, err := os.Getwd()
if err != nil {
writeError(err)
}
if err := namespaces.Init(container, rootfs, *console, os.NewFile(uintptr(*pipe), "child"), flag.Args()); err != nil {
writeError(err)
}
panic("Unreachable")
} }
func writeError(err error) { func writeError(err error) {

View File

@ -1,14 +1,17 @@
package template package template
import ( import (
"github.com/docker/libcontainer" "syscall"
"github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/apparmor"
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs"
) )
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
// New returns the docker default configuration for libcontainer // New returns the docker default configuration for libcontainer
func New() *libcontainer.Config { func New() *configs.Config {
container := &libcontainer.Config{ container := &configs.Config{
Capabilities: []string{ Capabilities: []string{
"CHOWN", "CHOWN",
"DAC_OVERRIDE", "DAC_OVERRIDE",
@ -25,18 +28,51 @@ func New() *libcontainer.Config {
"KILL", "KILL",
"AUDIT_WRITE", "AUDIT_WRITE",
}, },
Namespaces: libcontainer.Namespaces([]libcontainer.Namespace{ Namespaces: configs.Namespaces([]configs.Namespace{
{Type: "NEWNS"}, {Type: "NEWNS"},
{Type: "NEWUTS"}, {Type: "NEWUTS"},
{Type: "NEWIPC"}, {Type: "NEWIPC"},
{Type: "NEWPID"}, {Type: "NEWPID"},
{Type: "NEWNET"}, {Type: "NEWNET"},
}), }),
Cgroups: &cgroups.Cgroup{ Cgroups: &configs.Cgroup{
Parent: "docker", Parent: "docker",
AllowAllDevices: false, AllowAllDevices: false,
}, },
MountConfig: &libcontainer.MountConfig{}, Mounts: []*configs.Mount{
{
Device: "tmpfs",
Source: "shm",
Destination: "/dev/shm",
Data: "mode=1777,size=65536k",
Flags: defaultMountFlags,
},
{
Source: "mqueue",
Destination: "/dev/mqueue",
Device: "mqueue",
Flags: defaultMountFlags,
},
{
Source: "sysfs",
Destination: "/sys",
Device: "sysfs",
Flags: defaultMountFlags | syscall.MS_RDONLY,
},
},
MaskPaths: []string{
"/proc/kcore",
},
ReadonlyPaths: []string{
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
},
Rlimits: []configs.Rlimit{
{
Type: syscall.RLIMIT_NOFILE,
Hard: 1024,
Soft: 1024,
},
},
} }
if apparmor.IsEnabled() { if apparmor.IsEnabled() {

View File

@ -2,28 +2,21 @@
package native package native
import ( //func findUserArgs() []string {
"encoding/json" //for i, a := range os.Args {
"os" //if a == "--" {
//return os.Args[i+1:]
//}
//}
//return []string{}
//}
"github.com/docker/libcontainer" //// loadConfigFromFd loads a container's config from the sync pipe that is provided by
) //// fd 3 when running a process
//func loadConfigFromFd() (*configs.Config, error) {
func findUserArgs() []string { //var config *libcontainer.Config
for i, a := range os.Args { //if err := json.NewDecoder(os.NewFile(3, "child")).Decode(&config); err != nil {
if a == "--" { //return nil, err
return os.Args[i+1:] //}
} //return config, nil
} //}
return []string{}
}
// loadConfigFromFd loads a container's config from the sync pipe that is provided by
// fd 3 when running a process
func loadConfigFromFd() (*libcontainer.Config, error) {
var config *libcontainer.Config
if err := json.NewDecoder(os.NewFile(3, "child")).Decode(&config); err != nil {
return nil, err
}
return config, nil
}

View File

@ -5,13 +5,83 @@ import (
"strings" "strings"
"github.com/docker/docker/utils" "github.com/docker/docker/utils"
"github.com/docker/libcontainer/security/capabilities" "github.com/syndtr/gocapability/capability"
) )
var capabilityList = Capabilities{
{Key: "SETPCAP", Value: capability.CAP_SETPCAP},
{Key: "SYS_MODULE", Value: capability.CAP_SYS_MODULE},
{Key: "SYS_RAWIO", Value: capability.CAP_SYS_RAWIO},
{Key: "SYS_PACCT", Value: capability.CAP_SYS_PACCT},
{Key: "SYS_ADMIN", Value: capability.CAP_SYS_ADMIN},
{Key: "SYS_NICE", Value: capability.CAP_SYS_NICE},
{Key: "SYS_RESOURCE", Value: capability.CAP_SYS_RESOURCE},
{Key: "SYS_TIME", Value: capability.CAP_SYS_TIME},
{Key: "SYS_TTY_CONFIG", Value: capability.CAP_SYS_TTY_CONFIG},
{Key: "MKNOD", Value: capability.CAP_MKNOD},
{Key: "AUDIT_WRITE", Value: capability.CAP_AUDIT_WRITE},
{Key: "AUDIT_CONTROL", Value: capability.CAP_AUDIT_CONTROL},
{Key: "MAC_OVERRIDE", Value: capability.CAP_MAC_OVERRIDE},
{Key: "MAC_ADMIN", Value: capability.CAP_MAC_ADMIN},
{Key: "NET_ADMIN", Value: capability.CAP_NET_ADMIN},
{Key: "SYSLOG", Value: capability.CAP_SYSLOG},
{Key: "CHOWN", Value: capability.CAP_CHOWN},
{Key: "NET_RAW", Value: capability.CAP_NET_RAW},
{Key: "DAC_OVERRIDE", Value: capability.CAP_DAC_OVERRIDE},
{Key: "FOWNER", Value: capability.CAP_FOWNER},
{Key: "DAC_READ_SEARCH", Value: capability.CAP_DAC_READ_SEARCH},
{Key: "FSETID", Value: capability.CAP_FSETID},
{Key: "KILL", Value: capability.CAP_KILL},
{Key: "SETGID", Value: capability.CAP_SETGID},
{Key: "SETUID", Value: capability.CAP_SETUID},
{Key: "LINUX_IMMUTABLE", Value: capability.CAP_LINUX_IMMUTABLE},
{Key: "NET_BIND_SERVICE", Value: capability.CAP_NET_BIND_SERVICE},
{Key: "NET_BROADCAST", Value: capability.CAP_NET_BROADCAST},
{Key: "IPC_LOCK", Value: capability.CAP_IPC_LOCK},
{Key: "IPC_OWNER", Value: capability.CAP_IPC_OWNER},
{Key: "SYS_CHROOT", Value: capability.CAP_SYS_CHROOT},
{Key: "SYS_PTRACE", Value: capability.CAP_SYS_PTRACE},
{Key: "SYS_BOOT", Value: capability.CAP_SYS_BOOT},
{Key: "LEASE", Value: capability.CAP_LEASE},
{Key: "SETFCAP", Value: capability.CAP_SETFCAP},
{Key: "WAKE_ALARM", Value: capability.CAP_WAKE_ALARM},
{Key: "BLOCK_SUSPEND", Value: capability.CAP_BLOCK_SUSPEND},
}
type (
CapabilityMapping struct {
Key string `json:"key,omitempty"`
Value capability.Cap `json:"value,omitempty"`
}
Capabilities []*CapabilityMapping
)
func (c *CapabilityMapping) String() string {
return c.Key
}
func GetCapability(key string) *CapabilityMapping {
for _, capp := range capabilityList {
if capp.Key == key {
cpy := *capp
return &cpy
}
}
return nil
}
func GetAllCapabilities() []string {
output := make([]string, len(capabilityList))
for i, capability := range capabilityList {
output[i] = capability.String()
}
return output
}
func TweakCapabilities(basics, adds, drops []string) ([]string, error) { func TweakCapabilities(basics, adds, drops []string) ([]string, error) {
var ( var (
newCaps []string newCaps []string
allCaps = capabilities.GetAllCapabilities() allCaps = GetAllCapabilities()
) )
// look for invalid cap in the drop list // look for invalid cap in the drop list
@ -26,7 +96,7 @@ func TweakCapabilities(basics, adds, drops []string) ([]string, error) {
// handle --cap-add=all // handle --cap-add=all
if utils.StringsContainsNoCase(adds, "all") { if utils.StringsContainsNoCase(adds, "all") {
basics = capabilities.GetAllCapabilities() basics = allCaps
} }
if !utils.StringsContainsNoCase(drops, "all") { if !utils.StringsContainsNoCase(drops, "all") {

View File

@ -18,7 +18,7 @@ func (daemon *Daemon) ContainerStats(job *engine.Job) engine.Status {
enc := json.NewEncoder(job.Stdout) enc := json.NewEncoder(job.Stdout)
for v := range updates { for v := range updates {
update := v.(*execdriver.ResourceStats) update := v.(*execdriver.ResourceStats)
ss := convertToAPITypes(update.ContainerStats) ss := convertToAPITypes(update.Stats)
ss.MemoryStats.Limit = uint64(update.MemoryLimit) ss.MemoryStats.Limit = uint64(update.MemoryLimit)
ss.Read = update.Read ss.Read = update.Read
ss.CpuStats.SystemUsage = update.SystemUsage ss.CpuStats.SystemUsage = update.SystemUsage
@ -31,20 +31,21 @@ func (daemon *Daemon) ContainerStats(job *engine.Job) engine.Status {
return engine.StatusOK return engine.StatusOK
} }
// convertToAPITypes converts the libcontainer.ContainerStats to the api specific // convertToAPITypes converts the libcontainer.Stats to the api specific
// structs. This is done to preserve API compatibility and versioning. // structs. This is done to preserve API compatibility and versioning.
func convertToAPITypes(ls *libcontainer.ContainerStats) *types.Stats { func convertToAPITypes(ls *libcontainer.Stats) *types.Stats {
s := &types.Stats{} s := &types.Stats{}
if ls.NetworkStats != nil { if ls.Interfaces != nil {
s.Network = types.Network{ s.Network = types.Network{}
RxBytes: ls.NetworkStats.RxBytes, for _, iface := range ls.Interfaces {
RxPackets: ls.NetworkStats.RxPackets, s.Network.RxBytes += iface.RxBytes
RxErrors: ls.NetworkStats.RxErrors, s.Network.RxPackets += iface.RxPackets
RxDropped: ls.NetworkStats.RxDropped, s.Network.RxErrors += iface.RxErrors
TxBytes: ls.NetworkStats.TxBytes, s.Network.RxDropped += iface.RxDropped
TxPackets: ls.NetworkStats.TxPackets, s.Network.TxBytes += iface.TxBytes
TxErrors: ls.NetworkStats.TxErrors, s.Network.TxPackets += iface.TxPackets
TxDropped: ls.NetworkStats.TxDropped, s.Network.TxErrors += iface.TxErrors
s.Network.TxDropped += iface.TxDropped
} }
} }
cs := ls.CgroupStats cs := ls.CgroupStats

View File

@ -60,7 +60,7 @@ func TestExecInteractiveStdinClose(t *testing.T) {
out, err := cmd.CombinedOutput() out, err := cmd.CombinedOutput()
if err != nil { if err != nil {
t.Fatal(err, out) t.Fatal(err, string(out))
} }
if string(out) == "" { if string(out) == "" {
@ -538,7 +538,6 @@ func TestRunExecDir(t *testing.T) {
id := strings.TrimSpace(out) id := strings.TrimSpace(out)
execDir := filepath.Join(execDriverPath, id) execDir := filepath.Join(execDriverPath, id)
stateFile := filepath.Join(execDir, "state.json") stateFile := filepath.Join(execDir, "state.json")
contFile := filepath.Join(execDir, "container.json")
{ {
fi, err := os.Stat(execDir) fi, err := os.Stat(execDir)
@ -552,10 +551,6 @@ func TestRunExecDir(t *testing.T) {
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
fi, err = os.Stat(contFile)
if err != nil {
t.Fatal(err)
}
} }
stopCmd := exec.Command(dockerBinary, "stop", id) stopCmd := exec.Command(dockerBinary, "stop", id)
@ -564,23 +559,12 @@ func TestRunExecDir(t *testing.T) {
t.Fatal(err, out) t.Fatal(err, out)
} }
{ {
fi, err := os.Stat(execDir) _, err := os.Stat(execDir)
if err != nil { if err == nil {
t.Fatal(err) t.Fatal(err)
} }
if !fi.IsDir() {
t.Fatalf("%q must be a directory", execDir)
}
fi, err = os.Stat(stateFile)
if err == nil { if err == nil {
t.Fatalf("Statefile %q is exists for stopped container!", stateFile) t.Fatalf("Exec directory %q exists for removed container!", execDir)
}
if !os.IsNotExist(err) {
t.Fatalf("Error should be about non-existing, got %s", err)
}
fi, err = os.Stat(contFile)
if err == nil {
t.Fatalf("Container file %q is exists for stopped container!", contFile)
} }
if !os.IsNotExist(err) { if !os.IsNotExist(err) {
t.Fatalf("Error should be about non-existing, got %s", err) t.Fatalf("Error should be about non-existing, got %s", err)
@ -603,10 +587,6 @@ func TestRunExecDir(t *testing.T) {
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
fi, err = os.Stat(contFile)
if err != nil {
t.Fatal(err)
}
} }
rmCmd := exec.Command(dockerBinary, "rm", "-f", id) rmCmd := exec.Command(dockerBinary, "rm", "-f", id)
out, _, err = runCommandWithOutput(rmCmd) out, _, err = runCommandWithOutput(rmCmd)