mirror of https://github.com/containers/podman.git
Add per-pod CGroups
Pods can now create their own (cgroupfs) cgroups which containers in them can (optionally) use. This presently only works with CGroupFS, systemd cgroups are still WIP Signed-off-by: Matthew Heon <matthew.heon@gmail.com> Closes: #784 Approved by: rhatdan
This commit is contained in:
parent
018d2c6b1d
commit
7e1ea9d26d
|
|
@ -107,6 +107,11 @@ func (s *BoltState) Refresh() error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
podsBucket, err := getPodBucket(tx)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// Iterate through all IDs. Check if they are containers.
|
// Iterate through all IDs. Check if they are containers.
|
||||||
// If they are, unmarshal their state, and then clear
|
// If they are, unmarshal their state, and then clear
|
||||||
// PID, mountpoint, and state for all of them
|
// PID, mountpoint, and state for all of them
|
||||||
|
|
@ -115,6 +120,38 @@ func (s *BoltState) Refresh() error {
|
||||||
err = idBucket.ForEach(func(id, name []byte) error {
|
err = idBucket.ForEach(func(id, name []byte) error {
|
||||||
ctrBkt := ctrsBucket.Bucket(id)
|
ctrBkt := ctrsBucket.Bucket(id)
|
||||||
if ctrBkt == nil {
|
if ctrBkt == nil {
|
||||||
|
// It's a pod
|
||||||
|
podBkt := podsBucket.Bucket(id)
|
||||||
|
if podBkt == nil {
|
||||||
|
// This is neither a pod nor a container
|
||||||
|
// Error out on the dangling ID
|
||||||
|
return errors.Wrapf(ErrInternal, "id %s is not a pod or a container", string(id))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the state
|
||||||
|
stateBytes := podBkt.Get(stateKey)
|
||||||
|
if stateBytes == nil {
|
||||||
|
return errors.Wrapf(ErrInternal, "pod %s missing state key", string(id))
|
||||||
|
}
|
||||||
|
|
||||||
|
state := new(podState)
|
||||||
|
|
||||||
|
if err := json.Unmarshal(stateBytes, state); err != nil {
|
||||||
|
return errors.Wrapf(err, "error unmarshalling state for pod %s", string(id))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear the CGroup path
|
||||||
|
state.CgroupPath = ""
|
||||||
|
|
||||||
|
newStateBytes, err := json.Marshal(state)
|
||||||
|
if err != nil {
|
||||||
|
return errors.Wrapf(err, "error marshalling modified state for pod %s", string(id))
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := podBkt.Put(stateKey, newStateBytes); err != nil {
|
||||||
|
return errors.Wrapf(err, "error updating state for pod %s in DB", string(id))
|
||||||
|
}
|
||||||
|
|
||||||
// It's not a container, nothing to do
|
// It's not a container, nothing to do
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -944,3 +944,32 @@ func WithPodLabels(labels map[string]string) PodCreateOption {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WithPodCgroupParent sets the Cgroup Parent of the pod.
|
||||||
|
func WithPodCgroupParent(path string) PodCreateOption {
|
||||||
|
return func(pod *Pod) error {
|
||||||
|
if pod.valid {
|
||||||
|
return ErrPodFinalized
|
||||||
|
}
|
||||||
|
|
||||||
|
pod.config.CgroupParent = path
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithPodCgroups tells containers in this pod to use the cgroup created for
|
||||||
|
// this pod.
|
||||||
|
// This can still be overridden at the container level by explicitly specifying
|
||||||
|
// a CGroup parent.
|
||||||
|
func WithPodCgroups() PodCreateOption {
|
||||||
|
return func(pod *Pod) error {
|
||||||
|
if pod.valid {
|
||||||
|
return ErrPodFinalized
|
||||||
|
}
|
||||||
|
|
||||||
|
pod.config.UsePodCgroup = true
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,11 @@ type PodConfig struct {
|
||||||
Labels map[string]string `json:"labels"`
|
Labels map[string]string `json:"labels"`
|
||||||
// CgroupParent contains the pod's CGroup parent
|
// CgroupParent contains the pod's CGroup parent
|
||||||
CgroupParent string `json:"cgroupParent"`
|
CgroupParent string `json:"cgroupParent"`
|
||||||
|
// UsePodCgroup indicates whether the pod will create its own CGroup and
|
||||||
|
// join containers to it.
|
||||||
|
// If true, all containers joined to the pod will use the pod cgroup as
|
||||||
|
// their cgroup parent, and cannot set a different cgroup parent
|
||||||
|
UsePodCgroup bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// podState represents a pod's state
|
// podState represents a pod's state
|
||||||
|
|
@ -64,6 +69,23 @@ func (p *Pod) CgroupParent() string {
|
||||||
return p.config.CgroupParent
|
return p.config.CgroupParent
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// UsePodCgroup returns whether containers in the pod will default to this pod's
|
||||||
|
// cgroup instead of the default libpod parent
|
||||||
|
func (p *Pod) UsePodCgroup() bool {
|
||||||
|
return p.config.UsePodCgroup
|
||||||
|
}
|
||||||
|
|
||||||
|
// CgroupPath returns the path to the pod's CGroup
|
||||||
|
func (p *Pod) CgroupPath() (string, error) {
|
||||||
|
p.lock.Lock()
|
||||||
|
p.lock.Unlock()
|
||||||
|
if err := p.updatePod(); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return p.state.CgroupPath, nil
|
||||||
|
}
|
||||||
|
|
||||||
// Creates a new, empty pod
|
// Creates a new, empty pod
|
||||||
func newPod(lockDir string, runtime *Runtime) (*Pod, error) {
|
func newPod(lockDir string, runtime *Runtime) (*Pod, error) {
|
||||||
pod := new(Pod)
|
pod := new(Pod)
|
||||||
|
|
@ -85,6 +107,52 @@ func newPod(lockDir string, runtime *Runtime) (*Pod, error) {
|
||||||
return pod, nil
|
return pod, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update pod state from database
|
||||||
|
func (p *Pod) updatePod() error {
|
||||||
|
if err := p.runtime.state.UpdatePod(p); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save pod state to database
|
||||||
|
func (p *Pod) save() error {
|
||||||
|
if err := p.runtime.state.SavePod(p); err != nil {
|
||||||
|
return errors.Wrapf(err, "error saving pod %s state")
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Refresh a pod's state after restart
|
||||||
|
func (p *Pod) refresh() error {
|
||||||
|
p.lock.Lock()
|
||||||
|
defer p.lock.Unlock()
|
||||||
|
|
||||||
|
if !p.valid {
|
||||||
|
return ErrPodRemoved
|
||||||
|
}
|
||||||
|
|
||||||
|
// We need to recreate the pod's cgroup
|
||||||
|
if p.config.UsePodCgroup {
|
||||||
|
switch p.runtime.config.CgroupManager {
|
||||||
|
case SystemdCgroupsManager:
|
||||||
|
// NOOP for now, until proper systemd cgroup management
|
||||||
|
// is implemented
|
||||||
|
case CgroupfsCgroupsManager:
|
||||||
|
p.state.CgroupPath = filepath.Join(p.config.CgroupParent, p.ID())
|
||||||
|
|
||||||
|
logrus.Debugf("setting pod cgroup to %s", p.state.CgroupPath)
|
||||||
|
default:
|
||||||
|
return errors.Wrapf(ErrInvalidArg, "unknown cgroups manager %s specified", p.runtime.config.CgroupManager)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save changes
|
||||||
|
return p.save()
|
||||||
|
}
|
||||||
|
|
||||||
// Start starts all containers within a pod
|
// Start starts all containers within a pod
|
||||||
// It combines the effects of Init() and Start() on a container
|
// It combines the effects of Init() and Start() on a container
|
||||||
// If a container has already been initialized it will be started,
|
// If a container has already been initialized it will be started,
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,15 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options ..
|
||||||
ctr.state.State = ContainerStateConfigured
|
ctr.state.State = ContainerStateConfigured
|
||||||
ctr.runtime = r
|
ctr.runtime = r
|
||||||
|
|
||||||
|
var pod *Pod
|
||||||
|
if ctr.config.Pod != "" {
|
||||||
|
// Get the pod from state
|
||||||
|
pod, err = r.state.Pod(ctr.config.Pod)
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.Wrapf(err, "cannot add container %s to pod %s", ctr.ID(), ctr.config.Pod)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if ctr.config.Name == "" {
|
if ctr.config.Name == "" {
|
||||||
name, err := r.generateName()
|
name, err := r.generateName()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
@ -65,13 +74,29 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options ..
|
||||||
switch r.config.CgroupManager {
|
switch r.config.CgroupManager {
|
||||||
case CgroupfsCgroupsManager:
|
case CgroupfsCgroupsManager:
|
||||||
if ctr.config.CgroupParent == "" {
|
if ctr.config.CgroupParent == "" {
|
||||||
ctr.config.CgroupParent = CgroupfsDefaultCgroupParent
|
if pod != nil && pod.config.UsePodCgroup {
|
||||||
|
podCgroup, err := pod.CgroupPath()
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID())
|
||||||
|
}
|
||||||
|
ctr.config.CgroupParent = podCgroup
|
||||||
|
} else {
|
||||||
|
ctr.config.CgroupParent = CgroupfsDefaultCgroupParent
|
||||||
|
}
|
||||||
} else if strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") {
|
} else if strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") {
|
||||||
return nil, errors.Wrapf(ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs")
|
return nil, errors.Wrapf(ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs")
|
||||||
}
|
}
|
||||||
case SystemdCgroupsManager:
|
case SystemdCgroupsManager:
|
||||||
if ctr.config.CgroupParent == "" {
|
if ctr.config.CgroupParent == "" {
|
||||||
ctr.config.CgroupParent = SystemdDefaultCgroupParent
|
if pod != nil && pod.config.UsePodCgroup {
|
||||||
|
podCgroup, err := pod.CgroupPath()
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.Wrapf(err, "error retrieving pod %s cgroup", pod.ID())
|
||||||
|
}
|
||||||
|
ctr.config.CgroupParent = podCgroup
|
||||||
|
} else {
|
||||||
|
ctr.config.CgroupParent = SystemdDefaultCgroupParent
|
||||||
|
}
|
||||||
} else if len(ctr.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") {
|
} else if len(ctr.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(ctr.config.CgroupParent), ".slice") {
|
||||||
return nil, errors.Wrapf(ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups")
|
return nil, errors.Wrapf(ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups")
|
||||||
}
|
}
|
||||||
|
|
@ -110,12 +135,6 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, options ..
|
||||||
// Add the container to the state
|
// Add the container to the state
|
||||||
// TODO: May be worth looking into recovering from name/ID collisions here
|
// TODO: May be worth looking into recovering from name/ID collisions here
|
||||||
if ctr.config.Pod != "" {
|
if ctr.config.Pod != "" {
|
||||||
// Get the pod from state
|
|
||||||
pod, err := r.state.Pod(ctr.config.Pod)
|
|
||||||
if err != nil {
|
|
||||||
return nil, errors.Wrapf(err, "cannot add container %s to pod %s", ctr.ID(), ctr.config.Pod)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Lock the pod to ensure we can't add containers to pods
|
// Lock the pod to ensure we can't add containers to pods
|
||||||
// being removed
|
// being removed
|
||||||
pod.lock.Lock()
|
pod.lock.Lock()
|
||||||
|
|
|
||||||
|
|
@ -2,9 +2,12 @@ package libpod
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"path"
|
"path"
|
||||||
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/containerd/cgroups"
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Contains the public Runtime API for pods
|
// Contains the public Runtime API for pods
|
||||||
|
|
@ -56,12 +59,21 @@ func (r *Runtime) NewPod(options ...PodCreateOption) (*Pod, error) {
|
||||||
} else if strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
|
} else if strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
|
||||||
return nil, errors.Wrapf(ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs")
|
return nil, errors.Wrapf(ErrInvalidArg, "systemd slice received as cgroup parent when using cgroupfs")
|
||||||
}
|
}
|
||||||
|
// Creating CGroup path is currently a NOOP until proper systemd
|
||||||
|
// cgroup management is merged
|
||||||
case SystemdCgroupsManager:
|
case SystemdCgroupsManager:
|
||||||
if pod.config.CgroupParent == "" {
|
if pod.config.CgroupParent == "" {
|
||||||
pod.config.CgroupParent = SystemdDefaultCgroupParent
|
pod.config.CgroupParent = SystemdDefaultCgroupParent
|
||||||
} else if len(pod.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
|
} else if len(pod.config.CgroupParent) < 6 || !strings.HasSuffix(path.Base(pod.config.CgroupParent), ".slice") {
|
||||||
return nil, errors.Wrapf(ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups")
|
return nil, errors.Wrapf(ErrInvalidArg, "did not receive systemd slice as cgroup parent when using systemd to manage cgroups")
|
||||||
}
|
}
|
||||||
|
// If we are set to use pod cgroups, set the cgroup parent that
|
||||||
|
// all containers in the pod will share
|
||||||
|
// No need to create it with cgroupfs - the first container to
|
||||||
|
// launch should do it for us
|
||||||
|
if pod.config.UsePodCgroup {
|
||||||
|
pod.state.CgroupPath = filepath.Join(pod.config.CgroupParent, pod.ID())
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
return nil, errors.Wrapf(ErrInvalidArg, "unsupported CGroup manager: %s - cannot validate cgroup parent", r.config.CgroupManager)
|
return nil, errors.Wrapf(ErrInvalidArg, "unsupported CGroup manager: %s - cannot validate cgroup parent", r.config.CgroupManager)
|
||||||
}
|
}
|
||||||
|
|
@ -211,6 +223,29 @@ func (r *Runtime) RemovePod(p *Pod, removeCtrs, force bool) error {
|
||||||
ctr.valid = false
|
ctr.valid = false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Remove pod cgroup, if present
|
||||||
|
if p.state.CgroupPath != "" {
|
||||||
|
switch p.runtime.config.CgroupManager {
|
||||||
|
case SystemdCgroupsManager:
|
||||||
|
// NOOP for now, until proper systemd cgroup management
|
||||||
|
// is implemented
|
||||||
|
case CgroupfsCgroupsManager:
|
||||||
|
// Delete the cgroupfs cgroup
|
||||||
|
logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath)
|
||||||
|
|
||||||
|
cgroup, err := cgroups.Load(cgroups.V1, cgroups.StaticPath(p.state.CgroupPath))
|
||||||
|
if err != nil && err != cgroups.ErrCgroupDeleted {
|
||||||
|
return err
|
||||||
|
} else if err == nil {
|
||||||
|
if err := cgroup.Delete(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return errors.Wrapf(ErrInvalidArg, "unknown cgroups manager %s specified", p.runtime.config.CgroupManager)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Remove pod from state
|
// Remove pod from state
|
||||||
if err := r.state.RemovePod(p); err != nil {
|
if err := r.state.RemovePod(p); err != nil {
|
||||||
return err
|
return err
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue