Add per-pod CGroups

Pods can now create their own (cgroupfs) cgroups which containers in them can (optionally) use. This presently only works with CGroupFS, systemd cgroups are still WIP Signed-off-by: Matthew Heon <matthew.heon@gmail.com> Closes: #784 Approved by: rhatdan
2018-05-16 12:45:09 -04:00 · 2018-05-16 12:45:09 -04:00 · 7e1ea9d26d
parent 018d2c6b1d
commit 7e1ea9d26d
5 changed files with 196 additions and 8 deletions
--- a/libpod/boltdb_state.go
+++ b/libpod/boltdb_state.go
@ -107,6 +107,11 @@ func (s *BoltState) Refresh() error {
 			return err
 		}

+		podsBucket, err := getPodBucket(tx)
+		if err != nil {
+			return err
+		}
+
 		// Iterate through all IDs. Check if they are containers.
 		// If they are, unmarshal their state, and then clear
 		// PID, mountpoint, and state for all of them
@ -115,6 +120,38 @@ func (s *BoltState) Refresh() error {
 		err = idBucket.ForEach(func(id, name []byte) error {
 			ctrBkt := ctrsBucket.Bucket(id)
 			if ctrBkt == nil {
+				// It's a pod
+				podBkt := podsBucket.Bucket(id)
+				if podBkt == nil {
+					// This is neither a pod nor a container
+					// Error out on the dangling ID
+					return errors.Wrapf(ErrInternal, "id %s is not a pod or a container", string(id))
+				}
+
+				// Get the state
+				stateBytes := podBkt.Get(stateKey)
+				if stateBytes == nil {
+					return errors.Wrapf(ErrInternal, "pod %s missing state key", string(id))
+				}
+
+				state := new(podState)
+
+				if err := json.Unmarshal(stateBytes, state); err != nil {
+					return errors.Wrapf(err, "error unmarshalling state for pod %s", string(id))
+				}
+
+				// Clear the CGroup path
+				state.CgroupPath = ""
+
+				newStateBytes, err := json.Marshal(state)
+				if err != nil {
+					return errors.Wrapf(err, "error marshalling modified state for pod %s", string(id))
+				}
+
+				if err := podBkt.Put(stateKey, newStateBytes); err != nil {
+					return errors.Wrapf(err, "error updating state for pod %s in DB", string(id))
+				}
+
 				// It's not a container, nothing to do
 				return nil
 			}
--- a/libpod/options.go
+++ b/libpod/options.go
@ -944,3 +944,32 @@ func WithPodLabels(labels map[string]string) PodCreateOption {
 		return nil
 	}
 }
+
+// WithPodCgroupParent sets the Cgroup Parent of the pod.
+func WithPodCgroupParent(path string) PodCreateOption {
+	return func(pod *Pod) error {
+		if pod.valid {
+			return ErrPodFinalized
+		}
+
+		pod.config.CgroupParent = path
+
+		return nil
+	}
+}
+
+// WithPodCgroups tells containers in this pod to use the cgroup created for
+// this pod.
+// This can still be overridden at the container level by explicitly specifying
+// a CGroup parent.
+func WithPodCgroups() PodCreateOption {
+	return func(pod *Pod) error {
+		if pod.valid {
+			return ErrPodFinalized
+		}
+
+		pod.config.UsePodCgroup = true
+
+		return nil
+	}
+}
--- a/libpod/pod.go
+++ b/libpod/pod.go
@ -31,6 +31,11 @@ type PodConfig struct {
 	Labels map[string]string `json:"labels"`
 	// CgroupParent contains the pod's CGroup parent
 	CgroupParent string `json:"cgroupParent"`
+	// UsePodCgroup indicates whether the pod will create its own CGroup and
+	// join containers to it.
+	// If true, all containers joined to the pod will use the pod cgroup as
+	// their cgroup parent, and cannot set a different cgroup parent
+	UsePodCgroup bool
 }

 // podState represents a pod's state
@ -64,6 +69,23 @@ func (p *Pod) CgroupParent() string {
 	return p.config.CgroupParent
 }

+// UsePodCgroup returns whether containers in the pod will default to this pod's
+// cgroup instead of the default libpod parent
+func (p *Pod) UsePodCgroup() bool {
+	return p.config.UsePodCgroup
+}
+
+// CgroupPath returns the path to the pod's CGroup
+func (p *Pod) CgroupPath() (string, error) {
+	p.lock.Lock()
+	p.lock.Unlock()
+	if err := p.updatePod(); err != nil {
+		return "", err
+	}
+
+	return p.state.CgroupPath, nil
+}
+
 // Creates a new, empty pod
 func newPod(lockDir string, runtime *Runtime) (*Pod, error) {
 	pod := new(Pod)
@ -85,6 +107,52 @@ func newPod(lockDir string, runtime *Runtime) (*Pod, error) {
 	return pod, nil
 }

+// Update pod state from database
+func (p *Pod) updatePod() error {
+	if err := p.runtime.state.UpdatePod(p); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// Save pod state to database
+func (p *Pod) save() error {
+	if err := p.runtime.state.SavePod(p); err != nil {
+		return errors.Wrapf(err, "error saving pod %s state")
+	}
+
+	return nil
+}
+
+// Refresh a pod's state after restart
+func (p *Pod) refresh() error {
+	p.lock.Lock()
+	defer p.lock.Unlock()
+
+	if !p.valid {
+		return ErrPodRemoved
+	}
+
+	// We need to recreate the pod's cgroup
+	if p.config.UsePodCgroup {
+		switch p.runtime.config.CgroupManager {
+		case SystemdCgroupsManager:
+			// NOOP for now, until proper systemd cgroup management
+			// is implemented
+		case CgroupfsCgroupsManager:
+			p.state.CgroupPath = filepath.Join(p.config.CgroupParent, p.ID())
+
+			logrus.Debugf("setting pod cgroup to %s", p.state.CgroupPath)
+		default:
+			return errors.Wrapf(ErrInvalidArg, "unknown cgroups manager %s specified", p.runtime.config.CgroupManager)
+		}
+	}
+
+	// Save changes
+	return p.save()
+}
+
 // Start starts all containers within a pod
 // It combines the effects of Init() and Start() on a container
 // If a container has already been initialized it will be started,
--- a/libpod/runtime_ctr.go
+++ b/libpod/runtime_ctr.go
@ -52,6 +52,15 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options ..
 	ctr.state.State = ContainerStateConfigured
 	ctr.runtime = r

+	var pod *Pod
+	if ctr.config.Pod != "" {
+		// Get the pod from state
+		pod, err = r.state.Pod(ctr.config.Pod)
+		if err != nil {
+			return nil, errors.Wrapf(err, "cannot add container %s to pod %s", ctr.ID(), ctr.config.Pod)
+		}
+	}
+
 	if ctr.config.Name == "" {
 		name, err := r.generateName()
 		if err != nil {
@ -65,13 +74,29 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options ..
 	switch r.config.CgroupManager {
 	case CgroupfsCgroupsManager:
 		if ctr.config.CgroupParent == "" {
-			ctr.config.CgroupParent = CgroupfsDefaultCgroupParent
+			if pod != nil && pod.config.UsePodCgroup {
+				podCgroup, err := pod.CgroupPath()
+				if err != nil {
+					return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID())
+				}
+				ctr.config.CgroupParent = podCgroup
+			} else {
+				ctr.config.CgroupParent = CgroupfsDefaultCgroupParent
+			}
 		} else if strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") {
 			return nil, errors.Wrapf(ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs")
 		}
 	case SystemdCgroupsManager:
 		if ctr.config.CgroupParent == "" {
-			ctr.config.CgroupParent = SystemdDefaultCgroupParent
+			if pod != nil && pod.config.UsePodCgroup {
+				podCgroup, err := pod.CgroupPath()
+				if err != nil {
+					return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID())
+				}
+				ctr.config.CgroupParent = podCgroup
+			} else {
+				ctr.config.CgroupParent = SystemdDefaultCgroupParent
+			}
 		} else if len(ctr.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") {
 			return nil, errors.Wrapf(ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups")
 		}
@ -110,12 +135,6 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options ..
 	// Add the container to the state
 	// TODO: May be worth looking into recovering from name/ID collisions here
 	if ctr.config.Pod != "" {
-		// Get the pod from state
-		pod, err := r.state.Pod(ctr.config.Pod)
-		if err != nil {
-			return nil, errors.Wrapf(err, "cannot add container %s to pod %s", ctr.ID(), ctr.config.Pod)
-		}
-
 		// Lock the pod to ensure we can't add containers to pods
 		// being removed
 		pod.lock.Lock()
--- a/libpod/runtime_pod.go
+++ b/libpod/runtime_pod.go
@ -2,9 +2,12 @@ package libpod

 import (
 	"path"
+	"path/filepath"
 	"strings"

+	"github.com/containerd/cgroups"
 	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
 )

 // Contains the public Runtime API for pods
@ -56,12 +59,21 @@ func (r *Runtime) NewPod(options ...PodCreateOption) (*Pod, error) {
 		} else if strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
 			return nil, errors.Wrapf(ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs")
 		}
+		// Creating CGroup path is currently a NOOP until proper systemd
+		// cgroup management is merged
 	case SystemdCgroupsManager:
 		if pod.config.CgroupParent == "" {
 			pod.config.CgroupParent = SystemdDefaultCgroupParent
 		} else if len(pod.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
 			return nil, errors.Wrapf(ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups")
 		}
+		// If we are set to use pod cgroups, set the cgroup parent that
+		// all containers in the pod will share
+		// No need to create it with cgroupfs - the first container to
+		// launch should do it for us
+		if pod.config.UsePodCgroup {
+			pod.state.CgroupPath = filepath.Join(pod.config.CgroupParent, pod.ID())
+		}
 	default:
 		return nil, errors.Wrapf(ErrInvalidArg, "unsupported CGroup manager: %s - cannot validate cgroup parent", r.config.CgroupManager)
 	}
@ -211,6 +223,29 @@ func (r *Runtime) RemovePod(p *Pod, removeCtrs, force bool) error {
 		ctr.valid = false
 	}

+	// Remove pod cgroup, if present
+	if p.state.CgroupPath != "" {
+		switch p.runtime.config.CgroupManager {
+		case SystemdCgroupsManager:
+			// NOOP for now, until proper systemd cgroup management
+			// is implemented
+		case CgroupfsCgroupsManager:
+			// Delete the cgroupfs cgroup
+			logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath)
+
+			cgroup, err := cgroups.Load(cgroups.V1, cgroups.StaticPath(p.state.CgroupPath))
+			if err != nil && err != cgroups.ErrCgroupDeleted {
+				return err
+			} else if err == nil {
+				if err := cgroup.Delete(); err != nil {
+					return err
+				}
+			}
+		default:
+			return errors.Wrapf(ErrInvalidArg, "unknown cgroups manager %s specified", p.runtime.config.CgroupManager)
+		}
+	}
+
 	// Remove pod from state
 	if err := r.state.RemovePod(p); err != nil {
 		return err