diff --git a/api.go b/api.go index cfef7a50ce..83b749b217 100644 --- a/api.go +++ b/api.go @@ -657,16 +657,16 @@ func postContainersCreate(srv *Server, version float64, w http.ResponseWriter, r for scanner.Scan() { out.Warnings = append(out.Warnings, scanner.Text()) } - if job.GetenvInt("Memory") > 0 && !srv.runtime.capabilities.MemoryLimit { + if job.GetenvInt("Memory") > 0 && !srv.runtime.sysInfo.MemoryLimit { log.Println("WARNING: Your kernel does not support memory limit capabilities. Limitation discarded.") out.Warnings = append(out.Warnings, "Your kernel does not support memory limit capabilities. Limitation discarded.") } - if job.GetenvInt("Memory") > 0 && !srv.runtime.capabilities.SwapLimit { + if job.GetenvInt("Memory") > 0 && !srv.runtime.sysInfo.SwapLimit { log.Println("WARNING: Your kernel does not support swap limit capabilities. Limitation discarded.") out.Warnings = append(out.Warnings, "Your kernel does not support memory swap capabilities. Limitation discarded.") } - if !job.GetenvBool("NetworkDisabled") && srv.runtime.capabilities.IPv4ForwardingDisabled { + if !job.GetenvBool("NetworkDisabled") && srv.runtime.sysInfo.IPv4ForwardingDisabled { log.Println("Warning: IPv4 forwarding is disabled.") out.Warnings = append(out.Warnings, "IPv4 forwarding is disabled.") } diff --git a/cgroups/cgroups.go b/cgroups/cgroups.go index 30de8d4d1e..fdcab8c3da 100644 --- a/cgroups/cgroups.go +++ b/cgroups/cgroups.go @@ -12,8 +12,13 @@ import ( "strings" ) -// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt +type Values struct { + Memory int64 `json:"memory"` + MemorySwap int64 `json:"memory_swap"` + CpuShares int64 `json:"cpu_shares"` +} +// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt func FindCgroupMountpoint(subsystem string) (string, error) { mounts, err := mount.GetMounts() if err != nil { diff --git a/commands.go b/commands.go index 09b65ad163..b93f0b9686 100644 --- a/commands.go +++ b/commands.go @@ -12,6 +12,7 @@ import ( "github.com/dotcloud/docker/auth" "github.com/dotcloud/docker/engine" flag "github.com/dotcloud/docker/pkg/mflag" + "github.com/dotcloud/docker/pkg/sysinfo" "github.com/dotcloud/docker/pkg/term" "github.com/dotcloud/docker/registry" "github.com/dotcloud/docker/utils" @@ -470,7 +471,7 @@ func (cli *DockerCli) CmdInfo(args ...string) error { fmt.Fprintf(cli.out, "Debug mode (client): %v\n", os.Getenv("DEBUG") != "") fmt.Fprintf(cli.out, "Fds: %d\n", remoteInfo.GetInt("NFd")) fmt.Fprintf(cli.out, "Goroutines: %d\n", remoteInfo.GetInt("NGoroutines")) - fmt.Fprintf(cli.out, "LXC Version: %s\n", remoteInfo.Get("LXCVersion")) + fmt.Fprintf(cli.out, "Execution Driver: %s\n", remoteInfo.Get("ExecutionDriver")) fmt.Fprintf(cli.out, "EventsListeners: %d\n", remoteInfo.GetInt("NEventsListener")) fmt.Fprintf(cli.out, "Kernel Version: %s\n", remoteInfo.Get("KernelVersion")) @@ -1745,14 +1746,14 @@ func (cli *DockerCli) CmdTag(args ...string) error { } //FIXME Only used in tests -func ParseRun(args []string, capabilities *Capabilities) (*Config, *HostConfig, *flag.FlagSet, error) { +func ParseRun(args []string, sysInfo *sysinfo.SysInfo) (*Config, *HostConfig, *flag.FlagSet, error) { cmd := flag.NewFlagSet("run", flag.ContinueOnError) cmd.SetOutput(ioutil.Discard) cmd.Usage = nil - return parseRun(cmd, args, capabilities) + return parseRun(cmd, args, sysInfo) } -func parseRun(cmd *flag.FlagSet, args []string, capabilities *Capabilities) (*Config, *HostConfig, *flag.FlagSet, error) { +func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Config, *HostConfig, *flag.FlagSet, error) { var ( // FIXME: use utils.ListOpts for attach and volumes? flAttach = NewListOpts(ValidateAttach) @@ -1802,7 +1803,7 @@ func parseRun(cmd *flag.FlagSet, args []string, capabilities *Capabilities) (*Co } // Check if the kernel supports memory limit cgroup. - if capabilities != nil && *flMemoryString != "" && !capabilities.MemoryLimit { + if sysInfo != nil && *flMemoryString != "" && !sysInfo.MemoryLimit { *flMemoryString = "" } @@ -1934,7 +1935,7 @@ func parseRun(cmd *flag.FlagSet, args []string, capabilities *Capabilities) (*Co PublishAllPorts: *flPublishAll, } - if capabilities != nil && flMemory > 0 && !capabilities.SwapLimit { + if sysInfo != nil && flMemory > 0 && !sysInfo.SwapLimit { //fmt.Fprintf(stdout, "WARNING: Your kernel does not support swap limit capabilities. Limitation discarded.\n") config.MemorySwap = -1 } diff --git a/container.go b/container.go index 9e4495890a..79d9511d7b 100644 --- a/container.go +++ b/container.go @@ -1,11 +1,12 @@ package docker import ( - "bytes" "encoding/json" "errors" "fmt" "github.com/dotcloud/docker/archive" + "github.com/dotcloud/docker/cgroups" + "github.com/dotcloud/docker/execdriver" "github.com/dotcloud/docker/graphdriver" "github.com/dotcloud/docker/mount" "github.com/dotcloud/docker/pkg/term" @@ -16,9 +17,7 @@ import ( "log" "net" "os" - "os/exec" "path" - "strconv" "strings" "sync" "syscall" @@ -55,7 +54,7 @@ type Container struct { Name string Driver string - cmd *exec.Cmd + process *execdriver.Process stdout *utils.WriteBroadcaster stderr *utils.WriteBroadcaster stdin io.ReadCloser @@ -235,10 +234,6 @@ func (container *Container) Inject(file io.Reader, pth string) error { return nil } -func (container *Container) Cmd() *exec.Cmd { - return container.cmd -} - func (container *Container) When() time.Time { return container.Created } @@ -305,23 +300,14 @@ func (container *Container) generateEnvConfig(env []string) error { return nil } -func (container *Container) generateLXCConfig() error { - fo, err := os.Create(container.lxcConfigPath()) - if err != nil { - return err - } - defer fo.Close() - return LxcTemplateCompiled.Execute(fo, container) -} - -func (container *Container) startPty() error { +func (container *Container) setupPty() error { ptyMaster, ptySlave, err := pty.Open() if err != nil { return err } container.ptyMaster = ptyMaster - container.cmd.Stdout = ptySlave - container.cmd.Stderr = ptySlave + container.process.Stdout = ptySlave + container.process.Stderr = ptySlave // Copy the PTYs to our broadcasters go func() { @@ -333,8 +319,8 @@ func (container *Container) startPty() error { // stdin if container.Config.OpenStdin { - container.cmd.Stdin = ptySlave - container.cmd.SysProcAttr.Setctty = true + container.process.Stdin = ptySlave + container.process.SysProcAttr.Setctty = true go func() { defer container.stdin.Close() utils.Debugf("startPty: begin of stdin pipe") @@ -342,18 +328,14 @@ func (container *Container) startPty() error { utils.Debugf("startPty: end of stdin pipe") }() } - if err := container.cmd.Start(); err != nil { - return err - } - ptySlave.Close() return nil } -func (container *Container) start() error { - container.cmd.Stdout = container.stdout - container.cmd.Stderr = container.stderr +func (container *Container) setupStd() error { + container.process.Stdout = container.stdout + container.process.Stderr = container.stderr if container.Config.OpenStdin { - stdin, err := container.cmd.StdinPipe() + stdin, err := container.process.StdinPipe() if err != nil { return err } @@ -364,7 +346,7 @@ func (container *Container) start() error { utils.Debugf("start: end of stdin pipe") }() } - return container.cmd.Start() + return nil } func (container *Container) Attach(stdin io.ReadCloser, stdinCloser io.Closer, stdout io.Writer, stderr io.Writer) chan error { @@ -384,12 +366,14 @@ func (container *Container) Attach(stdin io.ReadCloser, stdinCloser io.Closer, s if container.Config.StdinOnce && !container.Config.Tty { defer cStdin.Close() } else { - if cStdout != nil { - defer cStdout.Close() - } - if cStderr != nil { - defer cStderr.Close() - } + defer func() { + if cStdout != nil { + cStdout.Close() + } + if cStderr != nil { + cStderr.Close() + } + }() } if container.Config.Tty { _, err = utils.CopyEscapable(cStdin, stdin) @@ -485,12 +469,15 @@ func (container *Container) Attach(stdin io.ReadCloser, stdinCloser io.Closer, s } return utils.Go(func() error { - if cStdout != nil { - defer cStdout.Close() - } - if cStderr != nil { - defer cStderr.Close() - } + defer func() { + if cStdout != nil { + cStdout.Close() + } + if cStderr != nil { + cStderr.Close() + } + }() + // FIXME: how to clean up the stdin goroutine without the unwanted side effect // of closing the passed stdin? Add an intermediary io.Pipe? for i := 0; i < nJobs; i += 1 { @@ -532,16 +519,16 @@ func (container *Container) Start() (err error) { } // Make sure the config is compatible with the current kernel - if container.Config.Memory > 0 && !container.runtime.capabilities.MemoryLimit { + if container.Config.Memory > 0 && !container.runtime.sysInfo.MemoryLimit { log.Printf("WARNING: Your kernel does not support memory limit capabilities. Limitation discarded.\n") container.Config.Memory = 0 } - if container.Config.Memory > 0 && !container.runtime.capabilities.SwapLimit { + if container.Config.Memory > 0 && !container.runtime.sysInfo.SwapLimit { log.Printf("WARNING: Your kernel does not support swap limit capabilities. Limitation discarded.\n") container.Config.MemorySwap = -1 } - if container.runtime.capabilities.IPv4ForwardingDisabled { + if container.runtime.sysInfo.IPv4ForwardingDisabled { log.Printf("WARNING: IPv4 forwarding is disabled. Networking will not work") } @@ -559,38 +546,6 @@ func (container *Container) Start() (err error) { return err } - if err := container.generateLXCConfig(); err != nil { - return err - } - - var lxcStart string = "lxc-start" - if container.hostConfig.Privileged && container.runtime.capabilities.AppArmor { - lxcStart = path.Join(container.runtime.config.Root, "lxc-start-unconfined") - } - - params := []string{ - lxcStart, - "-n", container.ID, - "-f", container.lxcConfigPath(), - "--", - "/.dockerinit", - } - - // Networking - if !container.Config.NetworkDisabled { - network := container.NetworkSettings - params = append(params, - "-g", network.Gateway, - "-i", fmt.Sprintf("%s/%d", network.IPAddress, network.IPPrefixLen), - "-mtu", strconv.Itoa(container.runtime.config.Mtu), - ) - } - - // User - if container.Config.User != "" { - params = append(params, "-u", container.Config.User) - } - // Setup environment env := []string{ "HOME=/", @@ -602,10 +557,6 @@ func (container *Container) Start() (err error) { env = append(env, "TERM=xterm") } - if container.hostConfig.Privileged { - params = append(params, "-privileged") - } - // Init any links between the parent and children runtime := container.runtime @@ -653,37 +604,12 @@ func (container *Container) Start() (err error) { return err } + var workingDir string if container.Config.WorkingDir != "" { - workingDir := path.Clean(container.Config.WorkingDir) - utils.Debugf("[working dir] working dir is %s", workingDir) - + workingDir = path.Clean(container.Config.WorkingDir) if err := os.MkdirAll(path.Join(container.RootfsPath(), workingDir), 0755); err != nil { return nil } - - params = append(params, - "-w", workingDir, - ) - } - - // Program - params = append(params, "--", container.Path) - params = append(params, container.Args...) - - if RootIsShared() { - // lxc-start really needs / to be non-shared, or all kinds of stuff break - // when lxc-start unmount things and those unmounts propagate to the main - // mount namespace. - // What we really want is to clone into a new namespace and then - // mount / MS_REC|MS_SLAVE, but since we can't really clone or fork - // without exec in go we have to do this horrible shell hack... - shellString := - "mount --make-rslave /; exec " + - utils.ShellQuoteArguments(params) - - params = []string{ - "unshare", "-m", "--", "/bin/sh", "-c", shellString, - } } root := container.RootfsPath() @@ -713,7 +639,6 @@ func (container *Container) Start() (err error) { } // Mount user specified volumes - for r, v := range container.Volumes { mountAs := "ro" if container.VolumesRW[r] { @@ -725,7 +650,48 @@ func (container *Container) Start() (err error) { } } - container.cmd = exec.Command(params[0], params[1:]...) + var ( + en *execdriver.Network + driverConfig []string + ) + + if !container.Config.NetworkDisabled { + network := container.NetworkSettings + en = &execdriver.Network{ + Gateway: network.Gateway, + Bridge: network.Bridge, + IPAddress: network.IPAddress, + IPPrefixLen: network.IPPrefixLen, + Mtu: container.runtime.config.Mtu, + } + } + + if lxcConf := container.hostConfig.LxcConf; lxcConf != nil { + for _, pair := range lxcConf { + driverConfig = append(driverConfig, fmt.Sprintf("%s = %s", pair.Key, pair.Value)) + } + } + cgroupValues := &cgroups.Values{ + Memory: container.Config.Memory, + MemorySwap: container.Config.MemorySwap, + CpuShares: container.Config.CpuShares, + } + + container.process = &execdriver.Process{ + ID: container.ID, + Privileged: container.hostConfig.Privileged, + Rootfs: root, + InitPath: "/.dockerinit", + Entrypoint: container.Path, + Arguments: container.Args, + WorkingDir: workingDir, + Network: en, + Tty: container.Config.Tty, + User: container.Config.User, + Config: driverConfig, + Cgroups: cgroupValues, + } + container.process.SysProcAttr = &syscall.SysProcAttr{Setsid: true} // Setup logging of stdout and stderr to disk if err := container.runtime.LogToDisk(container.stdout, container.logPath("json"), "stdout"); err != nil { @@ -734,59 +700,47 @@ func (container *Container) Start() (err error) { if err := container.runtime.LogToDisk(container.stderr, container.logPath("json"), "stderr"); err != nil { return err } - - container.cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true} - - if container.Config.Tty { - err = container.startPty() - } else { - err = container.start() - } - if err != nil { - return err - } - // FIXME: save state on disk *first*, then converge - // this way disk state is used as a journal, eg. we can restore after crash etc. - container.State.SetRunning(container.cmd.Process.Pid) - - // Init the lock container.waitLock = make(chan struct{}) - container.ToDisk() - go container.monitor() + // Setuping pipes and/or Pty + var setup func() error + if container.Config.Tty { + setup = container.setupPty + } else { + setup = container.setupStd + } + if err := setup(); err != nil { + return err + } - defer utils.Debugf("Container running: %v", container.State.IsRunning()) - // We wait for the container to be fully running. - // Timeout after 5 seconds. In case of broken pipe, just retry. - // Note: The container can run and finish correctly before - // the end of this loop - for now := time.Now(); time.Since(now) < 5*time.Second; { - // If the container dies while waiting for it, just return - if !container.State.IsRunning() { - return nil - } - output, err := exec.Command("lxc-info", "-s", "-n", container.ID).CombinedOutput() - if err != nil { - utils.Debugf("Error with lxc-info: %s (%s)", err, output) - - output, err = exec.Command("lxc-info", "-s", "-n", container.ID).CombinedOutput() - if err != nil { - utils.Debugf("Second Error with lxc-info: %s (%s)", err, output) - return err + callbackLock := make(chan struct{}) + callback := func(process *execdriver.Process) { + container.State.SetRunning(process.Pid()) + if process.Tty { + // The callback is called after the process Start() + // so we are in the parent process. In TTY mode, stdin/out/err is the PtySlace + // which we close here. + if c, ok := process.Stdout.(io.Closer); ok { + c.Close() } - } - if strings.Contains(string(output), "RUNNING") { - return nil + if err := container.ToDisk(); err != nil { + utils.Debugf("%s", err) } - utils.Debugf("Waiting for the container to start (running: %v): %s", container.State.IsRunning(), bytes.TrimSpace(output)) - time.Sleep(50 * time.Millisecond) + close(callbackLock) } - if container.State.IsRunning() { - return ErrContainerStartTimeout + // We use a callback here instead of a goroutine and an chan for + // syncronization purposes + cErr := utils.Go(func() error { return container.monitor(callback) }) + + // Start should not return until the process is actually running + select { + case <-callbackLock: + case err := <-cErr: + return err } - return ErrContainerStart + return nil } func (container *Container) getBindMap() (map[string]BindMap, error) { @@ -1159,48 +1113,24 @@ func (container *Container) releaseNetwork() { container.NetworkSettings = &NetworkSettings{} } -// FIXME: replace this with a control socket within dockerinit -func (container *Container) waitLxc() error { - for { - output, err := exec.Command("lxc-info", "-n", container.ID).CombinedOutput() - if err != nil { - return err - } - if !strings.Contains(string(output), "RUNNING") { - return nil - } - time.Sleep(500 * time.Millisecond) - } -} +func (container *Container) monitor(callback execdriver.StartCallback) error { + var ( + err error + exitCode int + ) -func (container *Container) monitor() { - // Wait for the program to exit - - // If the command does not exist, try to wait via lxc - // (This probably happens only for ghost containers, i.e. containers that were running when Docker started) - if container.cmd == nil { - utils.Debugf("monitor: waiting for container %s using waitLxc", container.ID) - if err := container.waitLxc(); err != nil { - utils.Errorf("monitor: while waiting for container %s, waitLxc had a problem: %s", container.ID, err) - } + if container.process == nil { + // This happends when you have a GHOST container with lxc + err = container.runtime.WaitGhost(container) } else { - utils.Debugf("monitor: waiting for container %s using cmd.Wait", container.ID) - if err := container.cmd.Wait(); err != nil { - // Since non-zero exit status and signal terminations will cause err to be non-nil, - // we have to actually discard it. Still, log it anyway, just in case. - utils.Debugf("monitor: cmd.Wait reported exit status %s for container %s", err, container.ID) + exitCode, err = container.runtime.Run(container, callback) + } + + if err != nil { + if container.runtime != nil && container.runtime.srv != nil { + container.runtime.srv.LogEvent("die", container.ID, container.runtime.repositories.ImageName(container.Image)) } } - utils.Debugf("monitor: container %s finished", container.ID) - - exitCode := -1 - if container.cmd != nil { - exitCode = container.cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus() - } - - if container.runtime != nil && container.runtime.srv != nil { - container.runtime.srv.LogEvent("die", container.ID, container.runtime.repositories.ImageName(container.Image)) - } // Cleanup container.cleanup() @@ -1210,21 +1140,20 @@ func (container *Container) monitor() { container.stdin, container.stdinPipe = io.Pipe() } - // Report status back container.State.SetStopped(exitCode) - // Release the lock close(container.waitLock) - if err := container.ToDisk(); err != nil { - // FIXME: there is a race condition here which causes this to fail during the unit tests. - // If another goroutine was waiting for Wait() to return before removing the container's root - // from the filesystem... At this point it may already have done so. - // This is because State.setStopped() has already been called, and has caused Wait() - // to return. - // FIXME: why are we serializing running state to disk in the first place? - //log.Printf("%s: Failed to dump configuration to the disk: %s", container.ID, err) - } + // FIXME: there is a race condition here which causes this to fail during the unit tests. + // If another goroutine was waiting for Wait() to return before removing the container's root + // from the filesystem... At this point it may already have done so. + // This is because State.setStopped() has already been called, and has caused Wait() + // to return. + // FIXME: why are we serializing running state to disk in the first place? + //log.Printf("%s: Failed to dump configuration to the disk: %s", container.ID, err) + container.ToDisk() + + return err } func (container *Container) cleanup() { @@ -1267,13 +1196,7 @@ func (container *Container) kill(sig int) error { if !container.State.IsRunning() { return nil } - - if output, err := exec.Command("lxc-kill", "-n", container.ID, strconv.Itoa(sig)).CombinedOutput(); err != nil { - log.Printf("error killing container %s (%s, %s)", utils.TruncateID(container.ID), output, err) - return err - } - - return nil + return container.runtime.Kill(container, sig) } func (container *Container) Kill() error { @@ -1288,11 +1211,11 @@ func (container *Container) Kill() error { // 2. Wait for the process to die, in last resort, try to kill the process directly if err := container.WaitTimeout(10 * time.Second); err != nil { - if container.cmd == nil { + if container.process == nil { return fmt.Errorf("lxc-kill failed, impossible to kill the container %s", utils.TruncateID(container.ID)) } log.Printf("Container %s failed to exit within 10 seconds of lxc-kill %s - trying direct SIGKILL", "SIGKILL", utils.TruncateID(container.ID)) - if err := container.cmd.Process.Kill(); err != nil { + if err := container.runtime.Kill(container, 9); err != nil { return err } } @@ -1463,10 +1386,6 @@ func (container *Container) EnvConfigPath() (string, error) { return p, nil } -func (container *Container) lxcConfigPath() string { - return path.Join(container.root, "config.lxc") -} - // This method must be exported to be used from the lxc template func (container *Container) RootfsPath() string { return container.rootfs diff --git a/execdriver/MAINTAINERS b/execdriver/MAINTAINERS new file mode 100644 index 0000000000..e53d933d47 --- /dev/null +++ b/execdriver/MAINTAINERS @@ -0,0 +1,2 @@ +Michael Crosby (@crosbymichael) +Guillaume Charmes (@creack) diff --git a/execdriver/chroot/driver.go b/execdriver/chroot/driver.go new file mode 100644 index 0000000000..b9f3f04016 --- /dev/null +++ b/execdriver/chroot/driver.go @@ -0,0 +1,87 @@ +package chroot + +import ( + "fmt" + "github.com/dotcloud/docker/execdriver" + "github.com/dotcloud/docker/mount" + "os" + "os/exec" +) + +const ( + DriverName = "chroot" + Version = "0.1" +) + +func init() { + execdriver.RegisterInitFunc(DriverName, func(args *execdriver.InitArgs) error { + if err := mount.ForceMount("proc", "proc", "proc", ""); err != nil { + return err + } + defer mount.ForceUnmount("proc") + cmd := exec.Command(args.Args[0], args.Args[1:]...) + + cmd.Stderr = os.Stderr + cmd.Stdout = os.Stdout + cmd.Stdin = os.Stdin + + return cmd.Run() + }) +} + +type driver struct { +} + +func NewDriver() (*driver, error) { + return &driver{}, nil +} + +func (d *driver) Run(c *execdriver.Process, startCallback execdriver.StartCallback) (int, error) { + params := []string{ + "chroot", + c.Rootfs, + "/.dockerinit", + "-driver", + DriverName, + } + params = append(params, c.Entrypoint) + params = append(params, c.Arguments...) + + var ( + name = params[0] + arg = params[1:] + ) + aname, err := exec.LookPath(name) + if err != nil { + aname = name + } + c.Path = aname + c.Args = append([]string{name}, arg...) + + if err := c.Start(); err != nil { + return -1, err + } + + if startCallback != nil { + startCallback(c) + } + + err = c.Wait() + return c.GetExitCode(), err +} + +func (d *driver) Kill(p *execdriver.Process, sig int) error { + return p.Process.Kill() +} + +func (d *driver) Wait(id string) error { + panic("Not Implemented") +} + +func (d *driver) Info(id string) execdriver.Info { + panic("Not implemented") +} + +func (d *driver) Name() string { + return fmt.Sprintf("%s-%s", DriverName, Version) +} diff --git a/execdriver/driver.go b/execdriver/driver.go new file mode 100644 index 0000000000..5f3d25a566 --- /dev/null +++ b/execdriver/driver.go @@ -0,0 +1,115 @@ +package execdriver + +import ( + "errors" + "github.com/dotcloud/docker/cgroups" + "os/exec" + "syscall" +) + +var ( + ErrNotRunning = errors.New("Process could not be started") + ErrWaitTimeoutReached = errors.New("Wait timeout reached") + ErrDriverAlreadyRegistered = errors.New("A driver already registered this docker init function") + ErrDriverNotFound = errors.New("The requested docker init has not been found") +) + +var dockerInitFcts map[string]InitFunc + +type ( + StartCallback func(*Process) + InitFunc func(i *InitArgs) error +) + +func RegisterInitFunc(name string, fct InitFunc) error { + if dockerInitFcts == nil { + dockerInitFcts = make(map[string]InitFunc) + } + if _, ok := dockerInitFcts[name]; ok { + return ErrDriverAlreadyRegistered + } + dockerInitFcts[name] = fct + return nil +} + +func GetInitFunc(name string) (InitFunc, error) { + fct, ok := dockerInitFcts[name] + if !ok { + return nil, ErrDriverNotFound + } + return fct, nil +} + +// Args provided to the init function for a driver +type InitArgs struct { + User string + Gateway string + Ip string + WorkDir string + Privileged bool + Env []string + Args []string + Mtu int + Driver string +} + +// Driver specific information based on +// processes registered with the driver +type Info interface { + IsRunning() bool +} + +type Driver interface { + Run(c *Process, startCallback StartCallback) (int, error) // Run executes the process and blocks until the process exits and returns the exit code + Kill(c *Process, sig int) error + Wait(id string) error // Wait on an out of process...process - lxc ghosts TODO: Rename to reattach, reconnect + Name() string // Driver name + Info(id string) Info // "temporary" hack (until we move state from core to plugins) +} + +// Network settings of the container +type Network struct { + Gateway string `json:"gateway"` + IPAddress string `json:"ip"` + Bridge string `json:"bridge"` + IPPrefixLen int `json:"ip_prefix_len"` + Mtu int `json:"mtu"` +} + +// Process wrapps an os/exec.Cmd to add more metadata +// TODO: Rename to Command +type Process struct { + exec.Cmd + + ID string `json:"id"` + Privileged bool `json:"privileged"` + User string `json:"user"` + Rootfs string `json:"rootfs"` // root fs of the container + InitPath string `json:"initpath"` // dockerinit + Entrypoint string `json:"entrypoint"` + Arguments []string `json:"arguments"` + WorkingDir string `json:"working_dir"` + ConfigPath string `json:"config_path"` // this should be able to be removed when the lxc template is moved into the driver + Tty bool `json:"tty"` + Network *Network `json:"network"` // if network is nil then networking is disabled + Config []string `json:"config"` // generic values that specific drivers can consume + Cgroups *cgroups.Values `json:"cgroups"` +} + +// Return the pid of the process +// If the process is nil -1 will be returned +func (c *Process) Pid() int { + if c.Process == nil { + return -1 + } + return c.Process.Pid +} + +// Return the exit code of the process +// if the process has not exited -1 will be returned +func (c *Process) GetExitCode() int { + if c.ProcessState == nil { + return -1 + } + return c.ProcessState.Sys().(syscall.WaitStatus).ExitStatus() +} diff --git a/execdriver/lxc/driver.go b/execdriver/lxc/driver.go new file mode 100644 index 0000000000..2a6663117a --- /dev/null +++ b/execdriver/lxc/driver.go @@ -0,0 +1,323 @@ +package lxc + +import ( + "fmt" + "github.com/dotcloud/docker/execdriver" + "github.com/dotcloud/docker/utils" + "io/ioutil" + "log" + "os" + "os/exec" + "path" + "strconv" + "strings" + "syscall" + "time" +) + +const DriverName = "lxc" + +func init() { + execdriver.RegisterInitFunc(DriverName, func(args *execdriver.InitArgs) error { + if err := setupHostname(args); err != nil { + return err + } + + if err := setupNetworking(args); err != nil { + return err + } + + if err := setupCapabilities(args); err != nil { + return err + } + if err := setupWorkingDirectory(args); err != nil { + return err + } + + if err := changeUser(args); err != nil { + return err + } + path, err := exec.LookPath(args.Args[0]) + if err != nil { + log.Printf("Unable to locate %v", args.Args[0]) + os.Exit(127) + } + if err := syscall.Exec(path, args.Args, os.Environ()); err != nil { + return fmt.Errorf("dockerinit unable to execute %s - %s", path, err) + } + panic("Unreachable") + }) +} + +type driver struct { + root string // root path for the driver to use + apparmor bool + sharedRoot bool +} + +func NewDriver(root string, apparmor bool) (*driver, error) { + // setup unconfined symlink + if err := linkLxcStart(root); err != nil { + return nil, err + } + return &driver{ + apparmor: apparmor, + root: root, + sharedRoot: rootIsShared(), + }, nil +} + +func (d *driver) Name() string { + version := d.version() + return fmt.Sprintf("%s-%s", DriverName, version) +} + +func (d *driver) Run(c *execdriver.Process, startCallback execdriver.StartCallback) (int, error) { + configPath, err := d.generateLXCConfig(c) + if err != nil { + return -1, err + } + params := []string{ + "lxc-start", + "-n", c.ID, + "-f", configPath, + "--", + c.InitPath, + "-driver", + DriverName, + } + + if c.Network != nil { + params = append(params, + "-g", c.Network.Gateway, + "-i", fmt.Sprintf("%s/%d", c.Network.IPAddress, c.Network.IPPrefixLen), + "-mtu", strconv.Itoa(c.Network.Mtu), + ) + } + + if c.User != "" { + params = append(params, "-u", c.User) + } + + if c.Privileged { + if d.apparmor { + params[0] = path.Join(d.root, "lxc-start-unconfined") + + } + params = append(params, "-privileged") + } + + if c.WorkingDir != "" { + params = append(params, "-w", c.WorkingDir) + } + + if d.sharedRoot { + // lxc-start really needs / to be non-shared, or all kinds of stuff break + // when lxc-start unmount things and those unmounts propagate to the main + // mount namespace. + // What we really want is to clone into a new namespace and then + // mount / MS_REC|MS_SLAVE, but since we can't really clone or fork + // without exec in go we have to do this horrible shell hack... + shellString := + "mount --make-rslave /; exec " + + utils.ShellQuoteArguments(params) + + params = []string{ + "unshare", "-m", "--", "/bin/sh", "-c", shellString, + } + } + + params = append(params, "--", c.Entrypoint) + params = append(params, c.Arguments...) + + var ( + name = params[0] + arg = params[1:] + ) + aname, err := exec.LookPath(name) + if err != nil { + aname = name + } + c.Path = aname + c.Args = append([]string{name}, arg...) + + if err := c.Start(); err != nil { + return -1, err + } + + var ( + waitErr error + waitLock = make(chan struct{}) + ) + go func() { + if err := c.Wait(); err != nil { + waitErr = err + } + close(waitLock) + }() + + // Poll lxc for RUNNING status + if err := d.waitForStart(c, waitLock); err != nil { + return -1, err + } + + if startCallback != nil { + startCallback(c) + } + + <-waitLock + + return c.GetExitCode(), waitErr +} + +func (d *driver) Kill(c *execdriver.Process, sig int) error { + return d.kill(c, sig) +} + +func (d *driver) Wait(id string) error { + for { + output, err := exec.Command("lxc-info", "-n", id).CombinedOutput() + if err != nil { + return err + } + if !strings.Contains(string(output), "RUNNING") { + return nil + } + time.Sleep(500 * time.Millisecond) + } +} + +func (d *driver) version() string { + version := "" + if output, err := exec.Command("lxc-version").CombinedOutput(); err == nil { + outputStr := string(output) + if len(strings.SplitN(outputStr, ":", 2)) == 2 { + version = strings.TrimSpace(strings.SplitN(outputStr, ":", 2)[1]) + } + } + return version +} + +func (d *driver) kill(c *execdriver.Process, sig int) error { + output, err := exec.Command("lxc-kill", "-n", c.ID, strconv.Itoa(sig)).CombinedOutput() + if err != nil { + return fmt.Errorf("Err: %s Output: %s", err, output) + } + return nil +} + +func (d *driver) waitForStart(c *execdriver.Process, waitLock chan struct{}) error { + var ( + err error + output []byte + ) + // We wait for the container to be fully running. + // Timeout after 5 seconds. In case of broken pipe, just retry. + // Note: The container can run and finish correctly before + // the end of this loop + for now := time.Now(); time.Since(now) < 5*time.Second; { + select { + case <-waitLock: + // If the process dies while waiting for it, just return + return nil + if c.ProcessState != nil && c.ProcessState.Exited() { + return nil + } + default: + } + + output, err = d.getInfo(c.ID) + if err != nil { + output, err = d.getInfo(c.ID) + if err != nil { + return err + } + } + if strings.Contains(string(output), "RUNNING") { + return nil + } + time.Sleep(50 * time.Millisecond) + } + return execdriver.ErrNotRunning +} + +func (d *driver) getInfo(id string) ([]byte, error) { + return exec.Command("lxc-info", "-s", "-n", id).CombinedOutput() +} + +type info struct { + ID string + driver *driver +} + +func (i *info) IsRunning() bool { + var running bool + + output, err := i.driver.getInfo(i.ID) + if err != nil { + panic(err) + } + if strings.Contains(string(output), "RUNNING") { + running = true + } + return running +} + +func (d *driver) Info(id string) execdriver.Info { + return &info{ + ID: id, + driver: d, + } +} + +func linkLxcStart(root string) error { + sourcePath, err := exec.LookPath("lxc-start") + if err != nil { + return err + } + targetPath := path.Join(root, "lxc-start-unconfined") + + if _, err := os.Lstat(targetPath); err != nil && !os.IsNotExist(err) { + return err + } else if err == nil { + if err := os.Remove(targetPath); err != nil { + return err + } + } + return os.Symlink(sourcePath, targetPath) +} + +// TODO: This can be moved to the mountinfo reader in the mount pkg +func rootIsShared() bool { + if data, err := ioutil.ReadFile("/proc/self/mountinfo"); err == nil { + for _, line := range strings.Split(string(data), "\n") { + cols := strings.Split(line, " ") + if len(cols) >= 6 && cols[4] == "/" { + return strings.HasPrefix(cols[6], "shared") + } + } + } + + // No idea, probably safe to assume so + return true +} + +func (d *driver) generateLXCConfig(p *execdriver.Process) (string, error) { + root := path.Join(d.root, "containers", p.ID, "config.lxc") + fo, err := os.Create(root) + if err != nil { + return "", err + } + defer fo.Close() + + if err := LxcTemplateCompiled.Execute(fo, struct { + *execdriver.Process + AppArmor bool + }{ + Process: p, + AppArmor: d.apparmor, + }); err != nil { + return "", err + } + return root, nil +} diff --git a/execdriver/lxc/init.go b/execdriver/lxc/init.go new file mode 100644 index 0000000000..7c2b039c50 --- /dev/null +++ b/execdriver/lxc/init.go @@ -0,0 +1,153 @@ +package lxc + +import ( + "fmt" + "github.com/dotcloud/docker/execdriver" + "github.com/dotcloud/docker/pkg/netlink" + "github.com/dotcloud/docker/utils" + "github.com/syndtr/gocapability/capability" + "net" + "os" + "strconv" + "strings" + "syscall" +) + +func setupHostname(args *execdriver.InitArgs) error { + hostname := getEnv(args, "HOSTNAME") + if hostname == "" { + return nil + } + return setHostname(hostname) +} + +// Setup networking +func setupNetworking(args *execdriver.InitArgs) error { + if args.Ip != "" { + // eth0 + iface, err := net.InterfaceByName("eth0") + if err != nil { + return fmt.Errorf("Unable to set up networking: %v", err) + } + ip, ipNet, err := net.ParseCIDR(args.Ip) + if err != nil { + return fmt.Errorf("Unable to set up networking: %v", err) + } + if err := netlink.NetworkLinkAddIp(iface, ip, ipNet); err != nil { + return fmt.Errorf("Unable to set up networking: %v", err) + } + if err := netlink.NetworkSetMTU(iface, args.Mtu); err != nil { + return fmt.Errorf("Unable to set MTU: %v", err) + } + if err := netlink.NetworkLinkUp(iface); err != nil { + return fmt.Errorf("Unable to set up networking: %v", err) + } + + // loopback + iface, err = net.InterfaceByName("lo") + if err != nil { + return fmt.Errorf("Unable to set up networking: %v", err) + } + if err := netlink.NetworkLinkUp(iface); err != nil { + return fmt.Errorf("Unable to set up networking: %v", err) + } + } + if args.Gateway != "" { + gw := net.ParseIP(args.Gateway) + if gw == nil { + return fmt.Errorf("Unable to set up networking, %s is not a valid gateway IP", args.Gateway) + } + + if err := netlink.AddDefaultGw(gw); err != nil { + return fmt.Errorf("Unable to set up networking: %v", err) + } + } + + return nil +} + +// Setup working directory +func setupWorkingDirectory(args *execdriver.InitArgs) error { + if args.WorkDir == "" { + return nil + } + if err := syscall.Chdir(args.WorkDir); err != nil { + return fmt.Errorf("Unable to change dir to %v: %v", args.WorkDir, err) + } + return nil +} + +// Takes care of dropping privileges to the desired user +func changeUser(args *execdriver.InitArgs) error { + if args.User == "" { + return nil + } + userent, err := utils.UserLookup(args.User) + if err != nil { + return fmt.Errorf("Unable to find user %v: %v", args.User, err) + } + + uid, err := strconv.Atoi(userent.Uid) + if err != nil { + return fmt.Errorf("Invalid uid: %v", userent.Uid) + } + gid, err := strconv.Atoi(userent.Gid) + if err != nil { + return fmt.Errorf("Invalid gid: %v", userent.Gid) + } + + if err := syscall.Setgid(gid); err != nil { + return fmt.Errorf("setgid failed: %v", err) + } + if err := syscall.Setuid(uid); err != nil { + return fmt.Errorf("setuid failed: %v", err) + } + + return nil +} + +func setupCapabilities(args *execdriver.InitArgs) error { + + if args.Privileged { + return nil + } + + drop := []capability.Cap{ + capability.CAP_SETPCAP, + capability.CAP_SYS_MODULE, + capability.CAP_SYS_RAWIO, + capability.CAP_SYS_PACCT, + capability.CAP_SYS_ADMIN, + capability.CAP_SYS_NICE, + capability.CAP_SYS_RESOURCE, + capability.CAP_SYS_TIME, + capability.CAP_SYS_TTY_CONFIG, + capability.CAP_MKNOD, + capability.CAP_AUDIT_WRITE, + capability.CAP_AUDIT_CONTROL, + capability.CAP_MAC_OVERRIDE, + capability.CAP_MAC_ADMIN, + } + + c, err := capability.NewPid(os.Getpid()) + if err != nil { + return err + } + + c.Unset(capability.CAPS|capability.BOUNDS, drop...) + + if err := c.Apply(capability.CAPS | capability.BOUNDS); err != nil { + return err + } + return nil +} + +func getEnv(args *execdriver.InitArgs, key string) string { + for _, kv := range args.Env { + parts := strings.SplitN(kv, "=", 2) + if parts[0] == key && len(parts) == 2 { + return parts[1] + } + } + return "" +} diff --git a/sysinit/sysinit_darwin.go b/execdriver/lxc/lxc_init_darwin.go similarity index 83% rename from sysinit/sysinit_darwin.go rename to execdriver/lxc/lxc_init_darwin.go index 64566afb3c..c066fead93 100644 --- a/sysinit/sysinit_darwin.go +++ b/execdriver/lxc/lxc_init_darwin.go @@ -1,4 +1,4 @@ -package sysinit +package lxc func setHostname(hostname string) error { panic("Not supported on darwin") diff --git a/sysinit/sysinit_linux.go b/execdriver/lxc/lxc_init_linux.go similarity index 87% rename from sysinit/sysinit_linux.go rename to execdriver/lxc/lxc_init_linux.go index d18d2fab8b..b0055c3668 100644 --- a/sysinit/sysinit_linux.go +++ b/execdriver/lxc/lxc_init_linux.go @@ -1,4 +1,4 @@ -package sysinit +package lxc import ( "syscall" diff --git a/lxc_template.go b/execdriver/lxc/lxc_template.go similarity index 74% rename from lxc_template.go rename to execdriver/lxc/lxc_template.go index f96323b5ea..deaa7a66ae 100644 --- a/lxc_template.go +++ b/execdriver/lxc/lxc_template.go @@ -1,23 +1,24 @@ -package docker +package lxc import ( + "github.com/dotcloud/docker/cgroups" "strings" "text/template" ) const LxcTemplate = ` -{{if .Config.NetworkDisabled}} -# network is disabled (-n=false) -lxc.network.type = empty -{{else}} +{{if .Network}} # network configuration lxc.network.type = veth -lxc.network.link = {{.NetworkSettings.Bridge}} +lxc.network.link = {{.Network.Bridge}} lxc.network.name = eth0 +{{else}} +# network is disabled (-n=false) +lxc.network.type = empty {{end}} # root filesystem -{{$ROOTFS := .RootfsPath}} +{{$ROOTFS := .Rootfs}} lxc.rootfs = {{$ROOTFS}} # use a dedicated pts for the container (and limit the number of pseudo terminal @@ -30,8 +31,8 @@ lxc.console = none # no controlling tty at all lxc.tty = 1 -{{if (getHostConfig .).Privileged}} -lxc.cgroup.devices.allow = a +{{if .Privileged}} +lxc.cgroup.devices.allow = a {{else}} # no implicit access to devices lxc.cgroup.devices.deny = a @@ -81,8 +82,8 @@ lxc.mount.entry = sysfs {{escapeFstabSpaces $ROOTFS}}/sys sysfs nosuid,nodev,noe lxc.mount.entry = devpts {{escapeFstabSpaces $ROOTFS}}/dev/pts devpts newinstance,ptmxmode=0666,nosuid,noexec 0 0 lxc.mount.entry = shm {{escapeFstabSpaces $ROOTFS}}/dev/shm tmpfs size=65536k,nosuid,nodev,noexec 0 0 -{{if (getHostConfig .).Privileged}} -{{if (getCapabilities .).AppArmor}} +{{if .Privileged}} +{{if .AppArmor}} lxc.aa_profile = unconfined {{else}} #lxc.aa_profile = unconfined @@ -90,20 +91,22 @@ lxc.aa_profile = unconfined {{end}} # limits -{{if .Config.Memory}} -lxc.cgroup.memory.limit_in_bytes = {{.Config.Memory}} -lxc.cgroup.memory.soft_limit_in_bytes = {{.Config.Memory}} -{{with $memSwap := getMemorySwap .Config}} +{{if .Cgroups}} +{{if .Cgroups.Memory}} +lxc.cgroup.memory.limit_in_bytes = {{.Cgroups.Memory}} +lxc.cgroup.memory.soft_limit_in_bytes = {{.Cgroups.Memory}} +{{with $memSwap := getMemorySwap .Cgroups}} lxc.cgroup.memory.memsw.limit_in_bytes = {{$memSwap}} {{end}} {{end}} -{{if .Config.CpuShares}} -lxc.cgroup.cpu.shares = {{.Config.CpuShares}} +{{if .Cgroups.CpuShares}} +lxc.cgroup.cpu.shares = {{.Cgroups.CpuShares}} +{{end}} {{end}} -{{if (getHostConfig .).LxcConf}} -{{range $pair := (getHostConfig .).LxcConf}} -{{$pair.Key}} = {{$pair.Value}} +{{if .Config}} +{{range $value := .Config}} +{{$value}} {{end}} {{end}} ` @@ -116,29 +119,19 @@ func escapeFstabSpaces(field string) string { return strings.Replace(field, " ", "\\040", -1) } -func getMemorySwap(config *Config) int64 { +func getMemorySwap(v *cgroups.Values) int64 { // By default, MemorySwap is set to twice the size of RAM. // If you want to omit MemorySwap, set it to `-1'. - if config.MemorySwap < 0 { + if v.MemorySwap < 0 { return 0 } - return config.Memory * 2 -} - -func getHostConfig(container *Container) *HostConfig { - return container.hostConfig -} - -func getCapabilities(container *Container) *Capabilities { - return container.runtime.capabilities + return v.Memory * 2 } func init() { var err error funcMap := template.FuncMap{ "getMemorySwap": getMemorySwap, - "getHostConfig": getHostConfig, - "getCapabilities": getCapabilities, "escapeFstabSpaces": escapeFstabSpaces, } LxcTemplateCompiled, err = template.New("lxc").Funcs(funcMap).Parse(LxcTemplate) diff --git a/lxc_template_unit_test.go b/execdriver/lxc/lxc_template_unit_test.go similarity index 60% rename from lxc_template_unit_test.go rename to execdriver/lxc/lxc_template_unit_test.go index f71f1dd6f5..fe65fe5cbc 100644 --- a/lxc_template_unit_test.go +++ b/execdriver/lxc/lxc_template_unit_test.go @@ -1,11 +1,14 @@ -package docker +package lxc import ( "bufio" "fmt" + "github.com/dotcloud/docker/cgroups" + "github.com/dotcloud/docker/execdriver" "io/ioutil" "math/rand" "os" + "path" "strings" "testing" "time" @@ -17,32 +20,39 @@ func TestLXCConfig(t *testing.T) { t.Fatal(err) } defer os.RemoveAll(root) + + os.MkdirAll(path.Join(root, "containers", "1"), 0777) + // Memory is allocated randomly for testing rand.Seed(time.Now().UTC().UnixNano()) - memMin := 33554432 - memMax := 536870912 - mem := memMin + rand.Intn(memMax-memMin) - // CPU shares as well - cpuMin := 100 - cpuMax := 10000 - cpu := cpuMin + rand.Intn(cpuMax-cpuMin) - container := &Container{ - root: root, - Config: &Config{ - Memory: int64(mem), - CpuShares: int64(cpu), - NetworkDisabled: true, - }, - hostConfig: &HostConfig{ - Privileged: false, - }, - } - if err := container.generateLXCConfig(); err != nil { + var ( + memMin = 33554432 + memMax = 536870912 + mem = memMin + rand.Intn(memMax-memMin) + cpuMin = 100 + cpuMax = 10000 + cpu = cpuMin + rand.Intn(cpuMax-cpuMin) + ) + + driver, err := NewDriver(root, false) + if err != nil { t.Fatal(err) } - grepFile(t, container.lxcConfigPath(), + process := &execdriver.Process{ + ID: "1", + Cgroups: &cgroups.Values{ + Memory: int64(mem), + CpuShares: int64(cpu), + }, + } + p, err := driver.generateLXCConfig(process) + if err != nil { + t.Fatal(err) + } + grepFile(t, p, fmt.Sprintf("lxc.cgroup.memory.limit_in_bytes = %d", mem)) - grepFile(t, container.lxcConfigPath(), + + grepFile(t, p, fmt.Sprintf("lxc.cgroup.memory.memsw.limit_in_bytes = %d", mem*2)) } @@ -52,31 +62,29 @@ func TestCustomLxcConfig(t *testing.T) { t.Fatal(err) } defer os.RemoveAll(root) - container := &Container{ - root: root, - Config: &Config{ - Hostname: "foobar", - NetworkDisabled: true, - }, - hostConfig: &HostConfig{ - Privileged: false, - LxcConf: []KeyValuePair{ - { - Key: "lxc.utsname", - Value: "docker", - }, - { - Key: "lxc.cgroup.cpuset.cpus", - Value: "0,1", - }, - }, - }, - } - if err := container.generateLXCConfig(); err != nil { + + os.MkdirAll(path.Join(root, "containers", "1"), 0777) + + driver, err := NewDriver(root, false) + if err != nil { t.Fatal(err) } - grepFile(t, container.lxcConfigPath(), "lxc.utsname = docker") - grepFile(t, container.lxcConfigPath(), "lxc.cgroup.cpuset.cpus = 0,1") + process := &execdriver.Process{ + ID: "1", + Privileged: false, + Config: []string{ + "lxc.utsname = docker", + "lxc.cgroup.cpuset.cpus = 0,1", + }, + } + + p, err := driver.generateLXCConfig(process) + if err != nil { + t.Fatal(err) + } + + grepFile(t, p, "lxc.utsname = docker") + grepFile(t, p, "lxc.cgroup.cpuset.cpus = 0,1") } func grepFile(t *testing.T, path string, pattern string) { diff --git a/integration/utils_test.go b/integration/utils_test.go index 4ab1c96cca..63ac3a44b9 100644 --- a/integration/utils_test.go +++ b/integration/utils_test.go @@ -38,7 +38,6 @@ func mkRuntime(f utils.Fataler) *docker.Runtime { if err != nil { f.Fatal(err) } - r.UpdateCapabilities(true) return r } diff --git a/mount/mount.go b/mount/mount.go index b087293a9d..3860b975bd 100644 --- a/mount/mount.go +++ b/mount/mount.go @@ -25,27 +25,37 @@ func Mounted(mountpoint string) (bool, error) { return false, nil } -// Mount the specified options at the target path +// Mount the specified options at the target path only if +// the target is not mounted // Options must be specified as fstab style func Mount(device, target, mType, options string) error { if mounted, err := Mounted(target); err != nil || mounted { return err } + return ForceMount(device, target, mType, options) +} +// Mount the specified options at the target path +// reguardless if the target is mounted or not +// Options must be specified as fstab style +func ForceMount(device, target, mType, options string) error { flag, data := parseOptions(options) if err := mount(device, target, mType, uintptr(flag), data); err != nil { return err } return nil - } // Unmount the target only if it is mounted -func Unmount(target string) (err error) { +func Unmount(target string) error { if mounted, err := Mounted(target); err != nil || !mounted { return err } + return ForceUnmount(target) +} +// Unmount the target reguardless if it is mounted or not +func ForceUnmount(target string) (err error) { // Simple retry logic for unmount for i := 0; i < 10; i++ { if err = unmount(target, 0); err == nil { diff --git a/pkg/sysinfo/sysinfo.go b/pkg/sysinfo/sysinfo.go new file mode 100644 index 0000000000..884fcbde7c --- /dev/null +++ b/pkg/sysinfo/sysinfo.go @@ -0,0 +1,55 @@ +package sysinfo + +import ( + "github.com/dotcloud/docker/cgroups" + "github.com/dotcloud/docker/utils" + "io/ioutil" + "log" + "os" + "path" +) + +type SysInfo struct { + MemoryLimit bool + SwapLimit bool + IPv4ForwardingDisabled bool + AppArmor bool +} + +func New(quiet bool) *SysInfo { + sysInfo := &SysInfo{} + if cgroupMemoryMountpoint, err := cgroups.FindCgroupMountpoint("memory"); err != nil { + if !quiet { + log.Printf("WARNING: %s\n", err) + } + } else { + _, err1 := ioutil.ReadFile(path.Join(cgroupMemoryMountpoint, "memory.limit_in_bytes")) + _, err2 := ioutil.ReadFile(path.Join(cgroupMemoryMountpoint, "memory.soft_limit_in_bytes")) + sysInfo.MemoryLimit = err1 == nil && err2 == nil + if !sysInfo.MemoryLimit && !quiet { + log.Printf("WARNING: Your kernel does not support cgroup memory limit.") + } + + _, err = ioutil.ReadFile(path.Join(cgroupMemoryMountpoint, "memory.memsw.limit_in_bytes")) + sysInfo.SwapLimit = err == nil + if !sysInfo.SwapLimit && !quiet { + log.Printf("WARNING: Your kernel does not support cgroup swap limit.") + } + } + + content, err3 := ioutil.ReadFile("/proc/sys/net/ipv4/ip_forward") + sysInfo.IPv4ForwardingDisabled = err3 != nil || len(content) == 0 || content[0] != '1' + if sysInfo.IPv4ForwardingDisabled && !quiet { + log.Printf("WARNING: IPv4 forwarding is disabled.") + } + + // Check if AppArmor seems to be enabled on this system. + if _, err := os.Stat("/sys/kernel/security/apparmor"); os.IsNotExist(err) { + utils.Debugf("/sys/kernel/security/apparmor not found; assuming AppArmor is not enabled.") + sysInfo.AppArmor = false + } else { + utils.Debugf("/sys/kernel/security/apparmor found; assuming AppArmor is enabled.") + sysInfo.AppArmor = true + } + return sysInfo +} diff --git a/runtime.go b/runtime.go index 731e3a8784..52f03f84be 100644 --- a/runtime.go +++ b/runtime.go @@ -4,18 +4,19 @@ import ( "container/list" "fmt" "github.com/dotcloud/docker/archive" - "github.com/dotcloud/docker/cgroups" + "github.com/dotcloud/docker/execdriver" + "github.com/dotcloud/docker/execdriver/chroot" + "github.com/dotcloud/docker/execdriver/lxc" "github.com/dotcloud/docker/graphdriver" "github.com/dotcloud/docker/graphdriver/aufs" _ "github.com/dotcloud/docker/graphdriver/devmapper" _ "github.com/dotcloud/docker/graphdriver/vfs" "github.com/dotcloud/docker/pkg/graphdb" + "github.com/dotcloud/docker/pkg/sysinfo" "github.com/dotcloud/docker/utils" "io" "io/ioutil" - "log" "os" - "os/exec" "path" "regexp" "sort" @@ -35,13 +36,6 @@ var ( validContainerNamePattern = regexp.MustCompile(`^/?` + validContainerNameChars + `+$`) ) -type Capabilities struct { - MemoryLimit bool - SwapLimit bool - IPv4ForwardingDisabled bool - AppArmor bool -} - type Runtime struct { repository string sysInitPath string @@ -50,12 +44,13 @@ type Runtime struct { graph *Graph repositories *TagStore idIndex *utils.TruncIndex - capabilities *Capabilities + sysInfo *sysinfo.SysInfo volumes *Graph srv *Server config *DaemonConfig containerGraph *graphdb.Database driver graphdriver.Driver + execDriver execdriver.Driver } // List returns an array of all containers registered in the runtime. @@ -160,11 +155,9 @@ func (runtime *Runtime) Register(container *Container) error { // if so, then we need to restart monitor and init a new lock // If the container is supposed to be running, make sure of it if container.State.IsRunning() { - output, err := exec.Command("lxc-info", "-n", container.ID).CombinedOutput() - if err != nil { - return err - } - if !strings.Contains(string(output), "RUNNING") { + info := runtime.execDriver.Info(container.ID) + + if !info.IsRunning() { utils.Debugf("Container %s was supposed to be running but is not.", container.ID) if runtime.config.AutoRestart { utils.Debugf("Restarting") @@ -188,7 +181,7 @@ func (runtime *Runtime) Register(container *Container) error { } container.waitLock = make(chan struct{}) - go container.monitor() + go container.monitor(nil) } } return nil @@ -331,43 +324,6 @@ func (runtime *Runtime) restore() error { return nil } -// FIXME: comment please! -func (runtime *Runtime) UpdateCapabilities(quiet bool) { - if cgroupMemoryMountpoint, err := cgroups.FindCgroupMountpoint("memory"); err != nil { - if !quiet { - log.Printf("WARNING: %s\n", err) - } - } else { - _, err1 := ioutil.ReadFile(path.Join(cgroupMemoryMountpoint, "memory.limit_in_bytes")) - _, err2 := ioutil.ReadFile(path.Join(cgroupMemoryMountpoint, "memory.soft_limit_in_bytes")) - runtime.capabilities.MemoryLimit = err1 == nil && err2 == nil - if !runtime.capabilities.MemoryLimit && !quiet { - log.Printf("WARNING: Your kernel does not support cgroup memory limit.") - } - - _, err = ioutil.ReadFile(path.Join(cgroupMemoryMountpoint, "memory.memsw.limit_in_bytes")) - runtime.capabilities.SwapLimit = err == nil - if !runtime.capabilities.SwapLimit && !quiet { - log.Printf("WARNING: Your kernel does not support cgroup swap limit.") - } - } - - content, err3 := ioutil.ReadFile("/proc/sys/net/ipv4/ip_forward") - runtime.capabilities.IPv4ForwardingDisabled = err3 != nil || len(content) == 0 || content[0] != '1' - if runtime.capabilities.IPv4ForwardingDisabled && !quiet { - log.Printf("WARNING: IPv4 forwarding is disabled.") - } - - // Check if AppArmor seems to be enabled on this system. - if _, err := os.Stat("/sys/kernel/security/apparmor"); os.IsNotExist(err) { - utils.Debugf("/sys/kernel/security/apparmor not found; assuming AppArmor is not enabled.") - runtime.capabilities.AppArmor = false - } else { - utils.Debugf("/sys/kernel/security/apparmor found; assuming AppArmor is enabled.") - runtime.capabilities.AppArmor = true - } -} - // Create creates a new container from the given configuration with a given name. func (runtime *Runtime) Create(config *Config, name string) (*Container, []string, error) { // Lookup image @@ -646,7 +602,6 @@ func NewRuntime(config *DaemonConfig) (*Runtime, error) { if err != nil { return nil, err } - runtime.UpdateCapabilities(false) return runtime, nil } @@ -675,10 +630,6 @@ func NewRuntimeFromDirectory(config *DaemonConfig) (*Runtime, error) { } } - utils.Debugf("Escaping AppArmor confinement") - if err := linkLxcStart(config.Root); err != nil { - return nil, err - } utils.Debugf("Creating images graph") g, err := NewGraph(path.Join(config.Root, "graph"), driver) if err != nil { @@ -735,6 +686,26 @@ func NewRuntimeFromDirectory(config *DaemonConfig) (*Runtime, error) { sysInitPath = localCopy } + sysInfo := sysinfo.New(false) + + /* + temporarilly disabled. + */ + if false { + var ed execdriver.Driver + if driver := os.Getenv("EXEC_DRIVER"); driver == "lxc" { + ed, err = lxc.NewDriver(config.Root, sysInfo.AppArmor) + } else { + ed, err = chroot.NewDriver() + } + if ed != nil { + } + } + ed, err := lxc.NewDriver(config.Root, sysInfo.AppArmor) + if err != nil { + return nil, err + } + runtime := &Runtime{ repository: runtimeRepo, containers: list.New(), @@ -742,12 +713,13 @@ func NewRuntimeFromDirectory(config *DaemonConfig) (*Runtime, error) { graph: g, repositories: repositories, idIndex: utils.NewTruncIndex(), - capabilities: &Capabilities{}, + sysInfo: sysInfo, volumes: volumes, config: config, containerGraph: graph, driver: driver, sysInitPath: sysInitPath, + execDriver: ed, } if err := runtime.restore(); err != nil { @@ -829,6 +801,18 @@ func (runtime *Runtime) Diff(container *Container) (archive.Archive, error) { return archive.ExportChanges(cDir, changes) } +func (runtime *Runtime) Run(c *Container, startCallback execdriver.StartCallback) (int, error) { + return runtime.execDriver.Run(c.process, startCallback) +} + +func (runtime *Runtime) Kill(c *Container, sig int) error { + return runtime.execDriver.Kill(c.process, sig) +} + +func (runtime *Runtime) WaitGhost(c *Container) error { + return runtime.execDriver.Wait(c.ID) +} + // Nuke kills all containers then removes all content // from the content root, including images, volumes and // container filesystems. @@ -848,23 +832,6 @@ func (runtime *Runtime) Nuke() error { return os.RemoveAll(runtime.config.Root) } -func linkLxcStart(root string) error { - sourcePath, err := exec.LookPath("lxc-start") - if err != nil { - return err - } - targetPath := path.Join(root, "lxc-start-unconfined") - - if _, err := os.Lstat(targetPath); err != nil && !os.IsNotExist(err) { - return err - } else if err == nil { - if err := os.Remove(targetPath); err != nil { - return err - } - } - return os.Symlink(sourcePath, targetPath) -} - // FIXME: this is a convenience function for integration tests // which need direct access to runtime.graph. // Once the tests switch to using engine and jobs, this method diff --git a/server.go b/server.go index 3892ba70ea..a25ba74a06 100644 --- a/server.go +++ b/server.go @@ -516,7 +516,7 @@ func (srv *Server) ImageInsert(job *engine.Job) engine.Status { } defer file.Body.Close() - config, _, _, err := ParseRun([]string{img.ID, "echo", "insert", url, path}, srv.runtime.capabilities) + config, _, _, err := ParseRun([]string{img.ID, "echo", "insert", url, path}, srv.runtime.sysInfo) if err != nil { job.Error(err) return engine.StatusErr @@ -661,13 +661,6 @@ func (srv *Server) DockerInfo(job *engine.Job) engine.Status { } else { imgcount = len(images) } - lxcVersion := "" - if output, err := exec.Command("lxc-version").CombinedOutput(); err == nil { - outputStr := string(output) - if len(strings.SplitN(outputStr, ":", 2)) == 2 { - lxcVersion = strings.TrimSpace(strings.SplitN(string(output), ":", 2)[1]) - } - } kernelVersion := "" if kv, err := utils.GetKernelVersion(); err == nil { kernelVersion = kv.String() @@ -685,13 +678,13 @@ func (srv *Server) DockerInfo(job *engine.Job) engine.Status { v.SetInt("Images", imgcount) v.Set("Driver", srv.runtime.driver.String()) v.SetJson("DriverStatus", srv.runtime.driver.Status()) - v.SetBool("MemoryLimit", srv.runtime.capabilities.MemoryLimit) - v.SetBool("SwapLimit", srv.runtime.capabilities.SwapLimit) - v.SetBool("IPv4Forwarding", !srv.runtime.capabilities.IPv4ForwardingDisabled) + v.SetBool("MemoryLimit", srv.runtime.sysInfo.MemoryLimit) + v.SetBool("SwapLimit", srv.runtime.sysInfo.SwapLimit) + v.SetBool("IPv4Forwarding", !srv.runtime.sysInfo.IPv4ForwardingDisabled) v.SetBool("Debug", os.Getenv("DEBUG") != "") v.SetInt("NFd", utils.GetTotalUsedFds()) v.SetInt("NGoroutines", runtime.NumGoroutine()) - v.Set("LXCVersion", lxcVersion) + v.Set("ExecutionDriver", srv.runtime.execDriver.Name()) v.SetInt("NEventsListener", len(srv.events)) v.Set("KernelVersion", kernelVersion) v.Set("IndexServerAddress", auth.IndexServerAddress()) @@ -1477,10 +1470,10 @@ func (srv *Server) ContainerCreate(job *engine.Job) engine.Status { job.Errorf("Minimum memory limit allowed is 512k") return engine.StatusErr } - if config.Memory > 0 && !srv.runtime.capabilities.MemoryLimit { + if config.Memory > 0 && !srv.runtime.sysInfo.MemoryLimit { config.Memory = 0 } - if config.Memory > 0 && !srv.runtime.capabilities.SwapLimit { + if config.Memory > 0 && !srv.runtime.sysInfo.SwapLimit { config.MemorySwap = -1 } container, buildWarnings, err := srv.runtime.Create(&config, name) diff --git a/sysinit/sysinit.go b/sysinit/sysinit.go index ce46e06f14..af69795cb6 100644 --- a/sysinit/sysinit.go +++ b/sysinit/sysinit.go @@ -4,163 +4,19 @@ import ( "encoding/json" "flag" "fmt" - "github.com/dotcloud/docker/pkg/netlink" - "github.com/dotcloud/docker/utils" - "github.com/syndtr/gocapability/capability" + "github.com/dotcloud/docker/execdriver" + _ "github.com/dotcloud/docker/execdriver/chroot" + _ "github.com/dotcloud/docker/execdriver/lxc" "io/ioutil" "log" - "net" "os" - "os/exec" - "strconv" "strings" - "syscall" ) -type DockerInitArgs struct { - user string - gateway string - ip string - workDir string - privileged bool - env []string - args []string - mtu int -} - -func setupHostname(args *DockerInitArgs) error { - hostname := getEnv(args, "HOSTNAME") - if hostname == "" { - return nil - } - return setHostname(hostname) -} - -// Setup networking -func setupNetworking(args *DockerInitArgs) error { - if args.ip != "" { - // eth0 - iface, err := net.InterfaceByName("eth0") - if err != nil { - return fmt.Errorf("Unable to set up networking: %v", err) - } - ip, ipNet, err := net.ParseCIDR(args.ip) - if err != nil { - return fmt.Errorf("Unable to set up networking: %v", err) - } - if err := netlink.NetworkLinkAddIp(iface, ip, ipNet); err != nil { - return fmt.Errorf("Unable to set up networking: %v", err) - } - if err := netlink.NetworkSetMTU(iface, args.mtu); err != nil { - return fmt.Errorf("Unable to set MTU: %v", err) - } - if err := netlink.NetworkLinkUp(iface); err != nil { - return fmt.Errorf("Unable to set up networking: %v", err) - } - - // loopback - iface, err = net.InterfaceByName("lo") - if err != nil { - return fmt.Errorf("Unable to set up networking: %v", err) - } - if err := netlink.NetworkLinkUp(iface); err != nil { - return fmt.Errorf("Unable to set up networking: %v", err) - } - } - if args.gateway != "" { - gw := net.ParseIP(args.gateway) - if gw == nil { - return fmt.Errorf("Unable to set up networking, %s is not a valid gateway IP", args.gateway) - } - - if err := netlink.AddDefaultGw(gw); err != nil { - return fmt.Errorf("Unable to set up networking: %v", err) - } - } - - return nil -} - -// Setup working directory -func setupWorkingDirectory(args *DockerInitArgs) error { - if args.workDir == "" { - return nil - } - if err := syscall.Chdir(args.workDir); err != nil { - return fmt.Errorf("Unable to change dir to %v: %v", args.workDir, err) - } - return nil -} - -// Takes care of dropping privileges to the desired user -func changeUser(args *DockerInitArgs) error { - if args.user == "" { - return nil - } - userent, err := utils.UserLookup(args.user) - if err != nil { - return fmt.Errorf("Unable to find user %v: %v", args.user, err) - } - - uid, err := strconv.Atoi(userent.Uid) - if err != nil { - return fmt.Errorf("Invalid uid: %v", userent.Uid) - } - gid, err := strconv.Atoi(userent.Gid) - if err != nil { - return fmt.Errorf("Invalid gid: %v", userent.Gid) - } - - if err := syscall.Setgid(gid); err != nil { - return fmt.Errorf("setgid failed: %v", err) - } - if err := syscall.Setuid(uid); err != nil { - return fmt.Errorf("setuid failed: %v", err) - } - - return nil -} - -func setupCapabilities(args *DockerInitArgs) error { - - if args.privileged { - return nil - } - - drop := []capability.Cap{ - capability.CAP_SETPCAP, - capability.CAP_SYS_MODULE, - capability.CAP_SYS_RAWIO, - capability.CAP_SYS_PACCT, - capability.CAP_SYS_ADMIN, - capability.CAP_SYS_NICE, - capability.CAP_SYS_RESOURCE, - capability.CAP_SYS_TIME, - capability.CAP_SYS_TTY_CONFIG, - capability.CAP_MKNOD, - capability.CAP_AUDIT_WRITE, - capability.CAP_AUDIT_CONTROL, - capability.CAP_MAC_OVERRIDE, - capability.CAP_MAC_ADMIN, - } - - c, err := capability.NewPid(os.Getpid()) - if err != nil { - return err - } - - c.Unset(capability.CAPS|capability.BOUNDS, drop...) - - if err := c.Apply(capability.CAPS | capability.BOUNDS); err != nil { - return err - } - return nil -} - // Clear environment pollution introduced by lxc-start -func setupEnv(args *DockerInitArgs) { +func setupEnv(args *execdriver.InitArgs) { os.Clearenv() - for _, kv := range args.env { + for _, kv := range args.Env { parts := strings.SplitN(kv, "=", 2) if len(parts) == 1 { parts = append(parts, "") @@ -169,50 +25,19 @@ func setupEnv(args *DockerInitArgs) { } } -func getEnv(args *DockerInitArgs, key string) string { - for _, kv := range args.env { - parts := strings.SplitN(kv, "=", 2) - if parts[0] == key && len(parts) == 2 { - return parts[1] - } - } - return "" -} - -func executeProgram(args *DockerInitArgs) error { +func executeProgram(args *execdriver.InitArgs) error { setupEnv(args) - - if err := setupHostname(args); err != nil { - return err - } - - if err := setupNetworking(args); err != nil { - return err - } - - if err := setupCapabilities(args); err != nil { - return err - } - - if err := setupWorkingDirectory(args); err != nil { - return err - } - - if err := changeUser(args); err != nil { - return err - } - - path, err := exec.LookPath(args.args[0]) + dockerInitFct, err := execdriver.GetInitFunc(args.Driver) if err != nil { - log.Printf("Unable to locate %v", args.args[0]) - os.Exit(127) + panic(err) + } + return dockerInitFct(args) + + if args.Driver == "lxc" { + // Will never reach + } else if args.Driver == "chroot" { } - if err := syscall.Exec(path, args.args, os.Environ()); err != nil { - return fmt.Errorf("dockerinit unable to execute %s - %s", path, err) - } - - // Will never reach here return nil } @@ -232,6 +57,7 @@ func SysInit() { workDir := flag.String("w", "", "workdir") privileged := flag.Bool("privileged", false, "privileged mode") mtu := flag.Int("mtu", 1500, "interface mtu") + driver := flag.String("driver", "", "exec driver") flag.Parse() // Get env @@ -247,15 +73,16 @@ func SysInit() { // Propagate the plugin-specific container env variable env = append(env, "container="+os.Getenv("container")) - args := &DockerInitArgs{ - user: *user, - gateway: *gateway, - ip: *ip, - workDir: *workDir, - privileged: *privileged, - env: env, - args: flag.Args(), - mtu: *mtu, + args := &execdriver.InitArgs{ + User: *user, + Gateway: *gateway, + Ip: *ip, + WorkDir: *workDir, + Privileged: *privileged, + Env: env, + Args: flag.Args(), + Mtu: *mtu, + Driver: *driver, } if err := executeProgram(args); err != nil { diff --git a/utils.go b/utils.go index 3eb1eac045..f0591158a4 100644 --- a/utils.go +++ b/utils.go @@ -5,7 +5,6 @@ import ( "github.com/dotcloud/docker/archive" "github.com/dotcloud/docker/pkg/namesgenerator" "github.com/dotcloud/docker/utils" - "io/ioutil" "strconv" "strings" ) @@ -328,20 +327,6 @@ func parseLink(rawLink string) (map[string]string, error) { return utils.PartParser("name:alias", rawLink) } -func RootIsShared() bool { - if data, err := ioutil.ReadFile("/proc/self/mountinfo"); err == nil { - for _, line := range strings.Split(string(data), "\n") { - cols := strings.Split(line, " ") - if len(cols) >= 6 && cols[4] == "/" { - return strings.HasPrefix(cols[6], "shared") - } - } - } - - // No idea, probably safe to assume so - return true -} - type checker struct { runtime *Runtime }