Add per-pod CGroups

Pods can now create their own (cgroupfs) cgroups which containers
in them can (optionally) use.

This presently only works with CGroupFS, systemd cgroups are
still WIP

Signed-off-by: Matthew Heon <matthew.heon@gmail.com>

Closes: #784
Approved by: rhatdan
This commit is contained in:
Matthew Heon 2018-05-16 12:45:09 -04:00 committed by Atomic Bot
parent 018d2c6b1d
commit 7e1ea9d26d
5 changed files with 196 additions and 8 deletions

View File

@ -107,6 +107,11 @@ func (s *BoltState) Refresh() error {
return err return err
} }
podsBucket, err := getPodBucket(tx)
if err != nil {
return err
}
// Iterate through all IDs. Check if they are containers. // Iterate through all IDs. Check if they are containers.
// If they are, unmarshal their state, and then clear // If they are, unmarshal their state, and then clear
// PID, mountpoint, and state for all of them // PID, mountpoint, and state for all of them
@ -115,6 +120,38 @@ func (s *BoltState) Refresh() error {
err = idBucket.ForEach(func(id, name []byte) error { err = idBucket.ForEach(func(id, name []byte) error {
ctrBkt := ctrsBucket.Bucket(id) ctrBkt := ctrsBucket.Bucket(id)
if ctrBkt == nil { if ctrBkt == nil {
// It's a pod
podBkt := podsBucket.Bucket(id)
if podBkt == nil {
// This is neither a pod nor a container
// Error out on the dangling ID
return errors.Wrapf(ErrInternal, "id %s is not a pod or a container", string(id))
}
// Get the state
stateBytes := podBkt.Get(stateKey)
if stateBytes == nil {
return errors.Wrapf(ErrInternal, "pod %s missing state key", string(id))
}
state := new(podState)
if err := json.Unmarshal(stateBytes, state); err != nil {
return errors.Wrapf(err, "error unmarshalling state for pod %s", string(id))
}
// Clear the CGroup path
state.CgroupPath = ""
newStateBytes, err := json.Marshal(state)
if err != nil {
return errors.Wrapf(err, "error marshalling modified state for pod %s", string(id))
}
if err := podBkt.Put(stateKey, newStateBytes); err != nil {
return errors.Wrapf(err, "error updating state for pod %s in DB", string(id))
}
// It's not a container, nothing to do // It's not a container, nothing to do
return nil return nil
} }

View File

@ -944,3 +944,32 @@ func WithPodLabels(labels map[string]string) PodCreateOption {
return nil return nil
} }
} }
// WithPodCgroupParent sets the Cgroup Parent of the pod.
func WithPodCgroupParent(path string) PodCreateOption {
return func(pod *Pod) error {
if pod.valid {
return ErrPodFinalized
}
pod.config.CgroupParent = path
return nil
}
}
// WithPodCgroups tells containers in this pod to use the cgroup created for
// this pod.
// This can still be overridden at the container level by explicitly specifying
// a CGroup parent.
func WithPodCgroups() PodCreateOption {
return func(pod *Pod) error {
if pod.valid {
return ErrPodFinalized
}
pod.config.UsePodCgroup = true
return nil
}
}

View File

@ -31,6 +31,11 @@ type PodConfig struct {
Labels map[string]string `json:"labels"` Labels map[string]string `json:"labels"`
// CgroupParent contains the pod's CGroup parent // CgroupParent contains the pod's CGroup parent
CgroupParent string `json:"cgroupParent"` CgroupParent string `json:"cgroupParent"`
// UsePodCgroup indicates whether the pod will create its own CGroup and
// join containers to it.
// If true, all containers joined to the pod will use the pod cgroup as
// their cgroup parent, and cannot set a different cgroup parent
UsePodCgroup bool
} }
// podState represents a pod's state // podState represents a pod's state
@ -64,6 +69,23 @@ func (p *Pod) CgroupParent() string {
return p.config.CgroupParent return p.config.CgroupParent
} }
// UsePodCgroup returns whether containers in the pod will default to this pod's
// cgroup instead of the default libpod parent
func (p *Pod) UsePodCgroup() bool {
return p.config.UsePodCgroup
}
// CgroupPath returns the path to the pod's CGroup
func (p *Pod) CgroupPath() (string, error) {
p.lock.Lock()
p.lock.Unlock()
if err := p.updatePod(); err != nil {
return "", err
}
return p.state.CgroupPath, nil
}
// Creates a new, empty pod // Creates a new, empty pod
func newPod(lockDir string, runtime *Runtime) (*Pod, error) { func newPod(lockDir string, runtime *Runtime) (*Pod, error) {
pod := new(Pod) pod := new(Pod)
@ -85,6 +107,52 @@ func newPod(lockDir string, runtime *Runtime) (*Pod, error) {
return pod, nil return pod, nil
} }
// Update pod state from database
func (p *Pod) updatePod() error {
if err := p.runtime.state.UpdatePod(p); err != nil {
return err
}
return nil
}
// Save pod state to database
func (p *Pod) save() error {
if err := p.runtime.state.SavePod(p); err != nil {
return errors.Wrapf(err, "error saving pod %s state")
}
return nil
}
// Refresh a pod's state after restart
func (p *Pod) refresh() error {
p.lock.Lock()
defer p.lock.Unlock()
if !p.valid {
return ErrPodRemoved
}
// We need to recreate the pod's cgroup
if p.config.UsePodCgroup {
switch p.runtime.config.CgroupManager {
case SystemdCgroupsManager:
// NOOP for now, until proper systemd cgroup management
// is implemented
case CgroupfsCgroupsManager:
p.state.CgroupPath = filepath.Join(p.config.CgroupParent, p.ID())
logrus.Debugf("setting pod cgroup to %s", p.state.CgroupPath)
default:
return errors.Wrapf(ErrInvalidArg, "unknown cgroups manager %s specified", p.runtime.config.CgroupManager)
}
}
// Save changes
return p.save()
}
// Start starts all containers within a pod // Start starts all containers within a pod
// It combines the effects of Init() and Start() on a container // It combines the effects of Init() and Start() on a container
// If a container has already been initialized it will be started, // If a container has already been initialized it will be started,

View File

@ -52,6 +52,15 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options ..
ctr.state.State = ContainerStateConfigured ctr.state.State = ContainerStateConfigured
ctr.runtime = r ctr.runtime = r
var pod *Pod
if ctr.config.Pod != "" {
// Get the pod from state
pod, err = r.state.Pod(ctr.config.Pod)
if err != nil {
return nil, errors.Wrapf(err, "cannot add container %s to pod %s", ctr.ID(), ctr.config.Pod)
}
}
if ctr.config.Name == "" { if ctr.config.Name == "" {
name, err := r.generateName() name, err := r.generateName()
if err != nil { if err != nil {
@ -65,13 +74,29 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options ..
switch r.config.CgroupManager { switch r.config.CgroupManager {
case CgroupfsCgroupsManager: case CgroupfsCgroupsManager:
if ctr.config.CgroupParent == "" { if ctr.config.CgroupParent == "" {
ctr.config.CgroupParent = CgroupfsDefaultCgroupParent if pod != nil && pod.config.UsePodCgroup {
podCgroup, err := pod.CgroupPath()
if err != nil {
return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID())
}
ctr.config.CgroupParent = podCgroup
} else {
ctr.config.CgroupParent = CgroupfsDefaultCgroupParent
}
} else if strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") { } else if strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") {
return nil, errors.Wrapf(ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs") return nil, errors.Wrapf(ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs")
} }
case SystemdCgroupsManager: case SystemdCgroupsManager:
if ctr.config.CgroupParent == "" { if ctr.config.CgroupParent == "" {
ctr.config.CgroupParent = SystemdDefaultCgroupParent if pod != nil && pod.config.UsePodCgroup {
podCgroup, err := pod.CgroupPath()
if err != nil {
return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID())
}
ctr.config.CgroupParent = podCgroup
} else {
ctr.config.CgroupParent = SystemdDefaultCgroupParent
}
} else if len(ctr.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") { } else if len(ctr.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") {
return nil, errors.Wrapf(ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups") return nil, errors.Wrapf(ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups")
} }
@ -110,12 +135,6 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options ..
// Add the container to the state // Add the container to the state
// TODO: May be worth looking into recovering from name/ID collisions here // TODO: May be worth looking into recovering from name/ID collisions here
if ctr.config.Pod != "" { if ctr.config.Pod != "" {
// Get the pod from state
pod, err := r.state.Pod(ctr.config.Pod)
if err != nil {
return nil, errors.Wrapf(err, "cannot add container %s to pod %s", ctr.ID(), ctr.config.Pod)
}
// Lock the pod to ensure we can't add containers to pods // Lock the pod to ensure we can't add containers to pods
// being removed // being removed
pod.lock.Lock() pod.lock.Lock()

View File

@ -2,9 +2,12 @@ package libpod
import ( import (
"path" "path"
"path/filepath"
"strings" "strings"
"github.com/containerd/cgroups"
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/sirupsen/logrus"
) )
// Contains the public Runtime API for pods // Contains the public Runtime API for pods
@ -56,12 +59,21 @@ func (r *Runtime) NewPod(options ...PodCreateOption) (*Pod, error) {
} else if strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") { } else if strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
return nil, errors.Wrapf(ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs") return nil, errors.Wrapf(ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs")
} }
// Creating CGroup path is currently a NOOP until proper systemd
// cgroup management is merged
case SystemdCgroupsManager: case SystemdCgroupsManager:
if pod.config.CgroupParent == "" { if pod.config.CgroupParent == "" {
pod.config.CgroupParent = SystemdDefaultCgroupParent pod.config.CgroupParent = SystemdDefaultCgroupParent
} else if len(pod.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") { } else if len(pod.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
return nil, errors.Wrapf(ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups") return nil, errors.Wrapf(ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups")
} }
// If we are set to use pod cgroups, set the cgroup parent that
// all containers in the pod will share
// No need to create it with cgroupfs - the first container to
// launch should do it for us
if pod.config.UsePodCgroup {
pod.state.CgroupPath = filepath.Join(pod.config.CgroupParent, pod.ID())
}
default: default:
return nil, errors.Wrapf(ErrInvalidArg, "unsupported CGroup manager: %s - cannot validate cgroup parent", r.config.CgroupManager) return nil, errors.Wrapf(ErrInvalidArg, "unsupported CGroup manager: %s - cannot validate cgroup parent", r.config.CgroupManager)
} }
@ -211,6 +223,29 @@ func (r *Runtime) RemovePod(p *Pod, removeCtrs, force bool) error {
ctr.valid = false ctr.valid = false
} }
// Remove pod cgroup, if present
if p.state.CgroupPath != "" {
switch p.runtime.config.CgroupManager {
case SystemdCgroupsManager:
// NOOP for now, until proper systemd cgroup management
// is implemented
case CgroupfsCgroupsManager:
// Delete the cgroupfs cgroup
logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath)
cgroup, err := cgroups.Load(cgroups.V1, cgroups.StaticPath(p.state.CgroupPath))
if err != nil && err != cgroups.ErrCgroupDeleted {
return err
} else if err == nil {
if err := cgroup.Delete(); err != nil {
return err
}
}
default:
return errors.Wrapf(ErrInvalidArg, "unknown cgroups manager %s specified", p.runtime.config.CgroupManager)
}
}
// Remove pod from state // Remove pod from state
if err := r.state.RemovePod(p); err != nil { if err := r.state.RemovePod(p); err != nil {
return err return err