Add per-pod CGroups

Pods can now create their own (cgroupfs) cgroups which containers in them can (optionally) use. This presently only works with CGroupFS, systemd cgroups are still WIP Signed-off-by: Matthew Heon <matthew.heon@gmail.com> Closes: #784 Approved by: rhatdan
2018-05-16 12:45:09 -04:00 · 2018-05-16 12:45:09 -04:00 · 7e1ea9d26d
parent 018d2c6b1d
commit 7e1ea9d26d
5 changed files with 196 additions and 8 deletions
--- a/libpod/boltdb_state.go
+++ b/libpod/boltdb_state.go
@ -107,6 +107,11 @@ func (s *BoltState) Refresh() error {
 			return err
 		}
 		podsBucket, err := getPodBucket(tx)
 		if err != nil {
 			return err
 		}
 		// Iterate through all IDs. Check if they are containers.
 		// If they are, unmarshal their state, and then clear
 		// PID, mountpoint, and state for all of them
@ -115,6 +120,38 @@ func (s *BoltState) Refresh() error {
 		err = idBucket.ForEach(func(id, name []byte) error {
 			ctrBkt := ctrsBucket.Bucket(id)
 			if ctrBkt == nil {
 				// It's a pod
 				podBkt := podsBucket.Bucket(id)
 				if podBkt == nil {
 					// This is neither a pod nor a container
 					// Error out on the dangling ID
 					return errors.Wrapf(ErrInternal, "id %s is not a pod or a container", string(id))
 				}
 				// Get the state
 				stateBytes := podBkt.Get(stateKey)
 				if stateBytes == nil {
 					return errors.Wrapf(ErrInternal, "pod %s missing state key", string(id))
 				}
 				state := new(podState)
 				if err := json.Unmarshal(stateBytes, state); err != nil {
 					return errors.Wrapf(err, "error unmarshalling state for pod %s", string(id))
 				}
 				// Clear the CGroup path
 				state.CgroupPath = ""
 				newStateBytes, err := json.Marshal(state)
 				if err != nil {
 					return errors.Wrapf(err, "error marshalling modified state for pod %s", string(id))
 				}
 				if err := podBkt.Put(stateKey, newStateBytes); err != nil {
 					return errors.Wrapf(err, "error updating state for pod %s in DB", string(id))
 				}
 				// It's not a container, nothing to do
 				return nil
 			}
--- a/libpod/options.go
+++ b/libpod/options.go
@ -944,3 +944,32 @@ func WithPodLabels(labels map[string]string) PodCreateOption {
 		return nil
 	}
 }
 // WithPodCgroupParent sets the Cgroup Parent of the pod.
 func WithPodCgroupParent(path string) PodCreateOption {
 	return func(pod *Pod) error {
 		if pod.valid {
 			return ErrPodFinalized
 		}
 		pod.config.CgroupParent = path
 		return nil
 	}
 }
 // WithPodCgroups tells containers in this pod to use the cgroup created for
 // this pod.
 // This can still be overridden at the container level by explicitly specifying
 // a CGroup parent.
 func WithPodCgroups() PodCreateOption {
 	return func(pod *Pod) error {
 		if pod.valid {
 			return ErrPodFinalized
 		}
 		pod.config.UsePodCgroup = true
 		return nil
 	}
 }
--- a/libpod/pod.go
+++ b/libpod/pod.go
@ -31,6 +31,11 @@ type PodConfig struct {
 	Labels map[string]string `json:"labels"`
 	// CgroupParent contains the pod's CGroup parent
 	CgroupParent string `json:"cgroupParent"`
 	// UsePodCgroup indicates whether the pod will create its own CGroup and
 	// join containers to it.
 	// If true, all containers joined to the pod will use the pod cgroup as
 	// their cgroup parent, and cannot set a different cgroup parent
 	UsePodCgroup bool
 }
 // podState represents a pod's state
@ -64,6 +69,23 @@ func (p *Pod) CgroupParent() string {
 	return p.config.CgroupParent
 }
 // UsePodCgroup returns whether containers in the pod will default to this pod's
 // cgroup instead of the default libpod parent
 func (p *Pod) UsePodCgroup() bool {
 	return p.config.UsePodCgroup
 }
 // CgroupPath returns the path to the pod's CGroup
 func (p *Pod) CgroupPath() (string, error) {
 	p.lock.Lock()
 	p.lock.Unlock()
 	if err := p.updatePod(); err != nil {
 		return "", err
 	}
 	return p.state.CgroupPath, nil
 }
 // Creates a new, empty pod
 func newPod(lockDir string, runtime *Runtime) (*Pod, error) {
 	pod := new(Pod)
@ -85,6 +107,52 @@ func newPod(lockDir string, runtime *Runtime) (*Pod, error) {
 	return pod, nil
 }
 // Update pod state from database
 func (p *Pod) updatePod() error {
 	if err := p.runtime.state.UpdatePod(p); err != nil {
 		return err
 	}
 	return nil
 }
 // Save pod state to database
 func (p *Pod) save() error {
 	if err := p.runtime.state.SavePod(p); err != nil {
 		return errors.Wrapf(err, "error saving pod %s state")
 	}
 	return nil
 }
 // Refresh a pod's state after restart
 func (p *Pod) refresh() error {
 	p.lock.Lock()
 	defer p.lock.Unlock()
 	if !p.valid {
 		return ErrPodRemoved
 	}
 	// We need to recreate the pod's cgroup
 	if p.config.UsePodCgroup {
 		switch p.runtime.config.CgroupManager {
 		case SystemdCgroupsManager:
 			// NOOP for now, until proper systemd cgroup management
 			// is implemented
 		case CgroupfsCgroupsManager:
 			p.state.CgroupPath = filepath.Join(p.config.CgroupParent, p.ID())
 			logrus.Debugf("setting pod cgroup to %s", p.state.CgroupPath)
 		default:
 			return errors.Wrapf(ErrInvalidArg, "unknown cgroups manager %s specified", p.runtime.config.CgroupManager)
 		}
 	}
 	// Save changes
 	return p.save()
 }
 // Start starts all containers within a pod
 // It combines the effects of Init() and Start() on a container
 // If a container has already been initialized it will be started,
--- a/libpod/runtime_ctr.go
+++ b/libpod/runtime_ctr.go
@ -52,6 +52,15 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options ..
 	ctr.state.State = ContainerStateConfigured
 	ctr.runtime = r
 	var pod *Pod
 	if ctr.config.Pod != "" {
 		// Get the pod from state
 		pod, err = r.state.Pod(ctr.config.Pod)
 		if err != nil {
 			return nil, errors.Wrapf(err, "cannot add container %s to pod %s", ctr.ID(), ctr.config.Pod)
 		}
 	}
 	if ctr.config.Name == "" {
 		name, err := r.generateName()
 		if err != nil {
@ -65,13 +74,29 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options ..
 	switch r.config.CgroupManager {
 	case CgroupfsCgroupsManager:
 		if ctr.config.CgroupParent == "" {
-			ctr.config.CgroupParent = CgroupfsDefaultCgroupParent
+			if pod != nil && pod.config.UsePodCgroup {
 				podCgroup, err := pod.CgroupPath()
 				if err != nil {
 					return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID())
 				}
 				ctr.config.CgroupParent = podCgroup
 			} else {
 				ctr.config.CgroupParent = CgroupfsDefaultCgroupParent
 			}
 		} else if strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") {
 			return nil, errors.Wrapf(ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs")
 		}
 	case SystemdCgroupsManager:
 		if ctr.config.CgroupParent == "" {
-			ctr.config.CgroupParent = SystemdDefaultCgroupParent
+			if pod != nil && pod.config.UsePodCgroup {
 				podCgroup, err := pod.CgroupPath()
 				if err != nil {
 					return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID())
 				}
 				ctr.config.CgroupParent = podCgroup
 			} else {
 				ctr.config.CgroupParent = SystemdDefaultCgroupParent
 			}
 		} else if len(ctr.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") {
 			return nil, errors.Wrapf(ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups")
 		}
@ -110,12 +135,6 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options ..
 	// Add the container to the state
 	// TODO: May be worth looking into recovering from name/ID collisions here
 	if ctr.config.Pod != "" {
 		// Get the pod from state
 		pod, err := r.state.Pod(ctr.config.Pod)
 		if err != nil {
 			return nil, errors.Wrapf(err, "cannot add container %s to pod %s", ctr.ID(), ctr.config.Pod)
 		}
 		// Lock the pod to ensure we can't add containers to pods
 		// being removed
 		pod.lock.Lock()
--- a/libpod/runtime_pod.go
+++ b/libpod/runtime_pod.go
@ -2,9 +2,12 @@ package libpod
 import (
 	"path"
 	"path/filepath"
 	"strings"
 	"github.com/containerd/cgroups"
 	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
 )
 // Contains the public Runtime API for pods
@ -56,12 +59,21 @@ func (r *Runtime) NewPod(options ...PodCreateOption) (*Pod, error) {
 		} else if strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
 			return nil, errors.Wrapf(ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs")
 		}
 		// Creating CGroup path is currently a NOOP until proper systemd
 		// cgroup management is merged
 	case SystemdCgroupsManager:
 		if pod.config.CgroupParent == "" {
 			pod.config.CgroupParent = SystemdDefaultCgroupParent
 		} else if len(pod.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
 			return nil, errors.Wrapf(ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups")
 		}
 		// If we are set to use pod cgroups, set the cgroup parent that
 		// all containers in the pod will share
 		// No need to create it with cgroupfs - the first container to
 		// launch should do it for us
 		if pod.config.UsePodCgroup {
 			pod.state.CgroupPath = filepath.Join(pod.config.CgroupParent, pod.ID())
 		}
 	default:
 		return nil, errors.Wrapf(ErrInvalidArg, "unsupported CGroup manager: %s - cannot validate cgroup parent", r.config.CgroupManager)
 	}
@ -211,6 +223,29 @@ func (r *Runtime) RemovePod(p *Pod, removeCtrs, force bool) error {
 		ctr.valid = false
 	}
 	// Remove pod cgroup, if present
 	if p.state.CgroupPath != "" {
 		switch p.runtime.config.CgroupManager {
 		case SystemdCgroupsManager:
 			// NOOP for now, until proper systemd cgroup management
 			// is implemented
 		case CgroupfsCgroupsManager:
 			// Delete the cgroupfs cgroup
 			logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath)
 			cgroup, err := cgroups.Load(cgroups.V1, cgroups.StaticPath(p.state.CgroupPath))
 			if err != nil && err != cgroups.ErrCgroupDeleted {
 				return err
 			} else if err == nil {
 				if err := cgroup.Delete(); err != nil {
 					return err
 				}
 			}
 		default:
 			return errors.Wrapf(ErrInvalidArg, "unknown cgroups manager %s specified", p.runtime.config.CgroupManager)
 		}
 	}
 	// Remove pod from state
 	if err := r.state.RemovePod(p); err != nil {
 		return err