Add per-pod CGroups

Pods can now create their own (cgroupfs) cgroups which containers
in them can (optionally) use.

This presently only works with CGroupFS, systemd cgroups are
still WIP

Signed-off-by: Matthew Heon <matthew.heon@gmail.com>

Closes: #784
Approved by: rhatdan
This commit is contained in:
Matthew Heon 2018-05-16 12:45:09 -04:00 committed by Atomic Bot
parent 018d2c6b1d
commit 7e1ea9d26d
5 changed files with 196 additions and 8 deletions

View File

@ -107,6 +107,11 @@ func (s *BoltState) Refresh() error {
return err
}
podsBucket, err := getPodBucket(tx)
if err != nil {
return err
}
// Iterate through all IDs. Check if they are containers.
// If they are, unmarshal their state, and then clear
// PID, mountpoint, and state for all of them
@ -115,6 +120,38 @@ func (s *BoltState) Refresh() error {
err = idBucket.ForEach(func(id, name []byte) error {
ctrBkt := ctrsBucket.Bucket(id)
if ctrBkt == nil {
// It's a pod
podBkt := podsBucket.Bucket(id)
if podBkt == nil {
// This is neither a pod nor a container
// Error out on the dangling ID
return errors.Wrapf(ErrInternal, "id %s is not a pod or a container", string(id))
}
// Get the state
stateBytes := podBkt.Get(stateKey)
if stateBytes == nil {
return errors.Wrapf(ErrInternal, "pod %s missing state key", string(id))
}
state := new(podState)
if err := json.Unmarshal(stateBytes, state); err != nil {
return errors.Wrapf(err, "error unmarshalling state for pod %s", string(id))
}
// Clear the CGroup path
state.CgroupPath = ""
newStateBytes, err := json.Marshal(state)
if err != nil {
return errors.Wrapf(err, "error marshalling modified state for pod %s", string(id))
}
if err := podBkt.Put(stateKey, newStateBytes); err != nil {
return errors.Wrapf(err, "error updating state for pod %s in DB", string(id))
}
// It's not a container, nothing to do
return nil
}

View File

@ -944,3 +944,32 @@ func WithPodLabels(labels map[string]string) PodCreateOption {
return nil
}
}
// WithPodCgroupParent sets the Cgroup Parent of the pod.
func WithPodCgroupParent(path string) PodCreateOption {
return func(pod *Pod) error {
if pod.valid {
return ErrPodFinalized
}
pod.config.CgroupParent = path
return nil
}
}
// WithPodCgroups tells containers in this pod to use the cgroup created for
// this pod.
// This can still be overridden at the container level by explicitly specifying
// a CGroup parent.
func WithPodCgroups() PodCreateOption {
return func(pod *Pod) error {
if pod.valid {
return ErrPodFinalized
}
pod.config.UsePodCgroup = true
return nil
}
}

View File

@ -31,6 +31,11 @@ type PodConfig struct {
Labels map[string]string `json:"labels"`
// CgroupParent contains the pod's CGroup parent
CgroupParent string `json:"cgroupParent"`
// UsePodCgroup indicates whether the pod will create its own CGroup and
// join containers to it.
// If true, all containers joined to the pod will use the pod cgroup as
// their cgroup parent, and cannot set a different cgroup parent
UsePodCgroup bool
}
// podState represents a pod's state
@ -64,6 +69,23 @@ func (p *Pod) CgroupParent() string {
return p.config.CgroupParent
}
// UsePodCgroup returns whether containers in the pod will default to this pod's
// cgroup instead of the default libpod parent
func (p *Pod) UsePodCgroup() bool {
return p.config.UsePodCgroup
}
// CgroupPath returns the path to the pod's CGroup
func (p *Pod) CgroupPath() (string, error) {
p.lock.Lock()
p.lock.Unlock()
if err := p.updatePod(); err != nil {
return "", err
}
return p.state.CgroupPath, nil
}
// Creates a new, empty pod
func newPod(lockDir string, runtime *Runtime) (*Pod, error) {
pod := new(Pod)
@ -85,6 +107,52 @@ func newPod(lockDir string, runtime *Runtime) (*Pod, error) {
return pod, nil
}
// Update pod state from database
func (p *Pod) updatePod() error {
if err := p.runtime.state.UpdatePod(p); err != nil {
return err
}
return nil
}
// Save pod state to database
func (p *Pod) save() error {
if err := p.runtime.state.SavePod(p); err != nil {
return errors.Wrapf(err, "error saving pod %s state")
}
return nil
}
// Refresh a pod's state after restart
func (p *Pod) refresh() error {
p.lock.Lock()
defer p.lock.Unlock()
if !p.valid {
return ErrPodRemoved
}
// We need to recreate the pod's cgroup
if p.config.UsePodCgroup {
switch p.runtime.config.CgroupManager {
case SystemdCgroupsManager:
// NOOP for now, until proper systemd cgroup management
// is implemented
case CgroupfsCgroupsManager:
p.state.CgroupPath = filepath.Join(p.config.CgroupParent, p.ID())
logrus.Debugf("setting pod cgroup to %s", p.state.CgroupPath)
default:
return errors.Wrapf(ErrInvalidArg, "unknown cgroups manager %s specified", p.runtime.config.CgroupManager)
}
}
// Save changes
return p.save()
}
// Start starts all containers within a pod
// It combines the effects of Init() and Start() on a container
// If a container has already been initialized it will be started,

View File

@ -52,6 +52,15 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options ..
ctr.state.State = ContainerStateConfigured
ctr.runtime = r
var pod *Pod
if ctr.config.Pod != "" {
// Get the pod from state
pod, err = r.state.Pod(ctr.config.Pod)
if err != nil {
return nil, errors.Wrapf(err, "cannot add container %s to pod %s", ctr.ID(), ctr.config.Pod)
}
}
if ctr.config.Name == "" {
name, err := r.generateName()
if err != nil {
@ -65,13 +74,29 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options ..
switch r.config.CgroupManager {
case CgroupfsCgroupsManager:
if ctr.config.CgroupParent == "" {
ctr.config.CgroupParent = CgroupfsDefaultCgroupParent
if pod != nil && pod.config.UsePodCgroup {
podCgroup, err := pod.CgroupPath()
if err != nil {
return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID())
}
ctr.config.CgroupParent = podCgroup
} else {
ctr.config.CgroupParent = CgroupfsDefaultCgroupParent
}
} else if strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") {
return nil, errors.Wrapf(ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs")
}
case SystemdCgroupsManager:
if ctr.config.CgroupParent == "" {
ctr.config.CgroupParent = SystemdDefaultCgroupParent
if pod != nil && pod.config.UsePodCgroup {
podCgroup, err := pod.CgroupPath()
if err != nil {
return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID())
}
ctr.config.CgroupParent = podCgroup
} else {
ctr.config.CgroupParent = SystemdDefaultCgroupParent
}
} else if len(ctr.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") {
return nil, errors.Wrapf(ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups")
}
@ -110,12 +135,6 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options ..
// Add the container to the state
// TODO: May be worth looking into recovering from name/ID collisions here
if ctr.config.Pod != "" {
// Get the pod from state
pod, err := r.state.Pod(ctr.config.Pod)
if err != nil {
return nil, errors.Wrapf(err, "cannot add container %s to pod %s", ctr.ID(), ctr.config.Pod)
}
// Lock the pod to ensure we can't add containers to pods
// being removed
pod.lock.Lock()

View File

@ -2,9 +2,12 @@ package libpod
import (
"path"
"path/filepath"
"strings"
"github.com/containerd/cgroups"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
// Contains the public Runtime API for pods
@ -56,12 +59,21 @@ func (r *Runtime) NewPod(options ...PodCreateOption) (*Pod, error) {
} else if strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
return nil, errors.Wrapf(ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs")
}
// Creating CGroup path is currently a NOOP until proper systemd
// cgroup management is merged
case SystemdCgroupsManager:
if pod.config.CgroupParent == "" {
pod.config.CgroupParent = SystemdDefaultCgroupParent
} else if len(pod.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
return nil, errors.Wrapf(ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups")
}
// If we are set to use pod cgroups, set the cgroup parent that
// all containers in the pod will share
// No need to create it with cgroupfs - the first container to
// launch should do it for us
if pod.config.UsePodCgroup {
pod.state.CgroupPath = filepath.Join(pod.config.CgroupParent, pod.ID())
}
default:
return nil, errors.Wrapf(ErrInvalidArg, "unsupported CGroup manager: %s - cannot validate cgroup parent", r.config.CgroupManager)
}
@ -211,6 +223,29 @@ func (r *Runtime) RemovePod(p *Pod, removeCtrs, force bool) error {
ctr.valid = false
}
// Remove pod cgroup, if present
if p.state.CgroupPath != "" {
switch p.runtime.config.CgroupManager {
case SystemdCgroupsManager:
// NOOP for now, until proper systemd cgroup management
// is implemented
case CgroupfsCgroupsManager:
// Delete the cgroupfs cgroup
logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath)
cgroup, err := cgroups.Load(cgroups.V1, cgroups.StaticPath(p.state.CgroupPath))
if err != nil && err != cgroups.ErrCgroupDeleted {
return err
} else if err == nil {
if err := cgroup.Delete(); err != nil {
return err
}
}
default:
return errors.Wrapf(ErrInvalidArg, "unknown cgroups manager %s specified", p.runtime.config.CgroupManager)
}
}
// Remove pod from state
if err := r.state.RemovePod(p); err != nil {
return err