pod: add exit policies

Add the notion of an "exit policy" to a pod.  This policy controls the
behaviour when the last container of pod exits.  Initially, there are
two policies:

 - "continue" : the pod continues running. This is the default policy
                when creating a pod.

 - "stop" : stop the pod when the last container exits. This is the
            default behaviour for `play kube`.

In order to implement the deferred stop of a pod, add a worker queue to
the libpod runtime.  The queue will pick up work items and in this case
helps resolve dead locks that would otherwise occur if we attempted to
stop a pod during container cleanup.

Note that the default restart policy of `play kube` is "Always".  Hence,
in order to really solve #13464, the YAML files must set a custom
restart policy; the tests use "OnFailure".

Fixes: #13464
Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
This commit is contained in:
Valentin Rothberg 2022-04-13 16:21:21 +02:00
parent 77d872ea38
commit 4eff0c8cf2
15 changed files with 271 additions and 2 deletions

View File

@ -492,6 +492,11 @@ func AutocompleteImages(cmd *cobra.Command, args []string, toComplete string) ([
return getImages(cmd, toComplete)
}
// AutocompletePodExitPolicy - Autocomplete pod exit policy.
func AutocompletePodExitPolicy(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
return config.PodExitPolicies, cobra.ShellCompDirectiveNoFileComp
}
// AutocompleteCreateRun - Autocomplete only the fist argument as image and then do file completion.
func AutocompleteCreateRun(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
if !validCurrentCmdLine(cmd, args, toComplete) {

View File

@ -72,6 +72,10 @@ func init() {
flags.StringVarP(&createOptions.Name, nameFlagName, "n", "", "Assign a name to the pod")
_ = createCommand.RegisterFlagCompletionFunc(nameFlagName, completion.AutocompleteNone)
policyFlag := "exit-policy"
flags.StringVarP(&createOptions.ExitPolicy, policyFlag, "", string(containerConfig.Engine.PodExitPolicy), "Behaviour when the last container exits")
_ = createCommand.RegisterFlagCompletionFunc(policyFlag, common.AutocompletePodExitPolicy)
infraImageFlagName := "infra-image"
var defInfraImage string
if !registry.IsRemote() {

View File

@ -75,6 +75,15 @@ Set custom DNS options in the /etc/resolv.conf file that will be shared between
Set custom DNS search domains in the /etc/resolv.conf file that will be shared between all containers in the pod.
#### **--exit-policy**=**continue** | *stop*
Set the exit policy of the pod when the last container exits. Supported policies are:
| Exit Policy | Description |
| ------------------ | --------------------------------------------------------------------------- |
| *continue* | The pod continues running when the last container exits. Used by default. |
| *stop* | The pod is stopped when the last container exits. Used in `play kube`. |
#### **--gidmap**=*container_gid:host_gid:amount*
GID map for the user namespace. Using this flag will run the container with user namespace enabled. It conflicts with the `--userns` and `--subgidname` flags.
@ -554,7 +563,7 @@ $ podman pod create --network net1:ip=10.89.1.5 --network net2:ip=10.89.10.10
```
## SEE ALSO
**[podman(1)](podman.1.md)**, **[podman-pod(1)](podman-pod.1.md)**, **containers.conf(1)**
**[podman(1)](podman.1.md)**, **[podman-pod(1)](podman-pod.1.md)**, **[podman-play-kube(1)](podman-play-kube.1.md)**, **containers.conf(1)**
## HISTORY

View File

@ -1939,9 +1939,51 @@ func (c *Container) cleanup(ctx context.Context) error {
}
}
if err := c.stopPodIfNeeded(context.Background()); err != nil {
if lastError == nil {
lastError = err
} else {
logrus.Errorf("Stopping pod of container %s: %v", c.ID(), err)
}
}
return lastError
}
// If the container is part of a pod where only the infra container remains
// running, attempt to stop the pod.
func (c *Container) stopPodIfNeeded(ctx context.Context) error {
if c.config.Pod == "" {
return nil
}
pod, err := c.runtime.state.Pod(c.config.Pod)
if err != nil {
return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), c.config.Pod, err)
}
switch pod.config.ExitPolicy {
case config.PodExitPolicyContinue:
return nil
case config.PodExitPolicyStop:
// Use the runtime's work queue to stop the pod. This resolves
// a number of scenarios where we'd otherwise run into
// deadlocks. For instance, during `pod stop`, the pod has
// already been locked.
// The work queue is a simple means without having to worry about
// future changes that may introduce more deadlock scenarios.
c.runtime.queueWork(func() {
if err := pod.stopIfOnlyInfraRemains(ctx, c.ID()); err != nil {
if !errors.Is(err, define.ErrNoSuchPod) {
logrus.Errorf("Checking if infra needs to be stopped: %v", err)
}
}
})
}
return nil
}
// delete deletes the container and runs any configured poststop
// hooks.
func (c *Container) delete(ctx context.Context) error {

View File

@ -19,6 +19,8 @@ type InspectPodData struct {
// CreateCommand is the full command plus arguments of the process the
// container has been created with.
CreateCommand []string `json:"CreateCommand,omitempty"`
// ExitPolicy of the pod.
ExitPolicy string `json:"ExitPolicy,omitempty"`
// State represents the current state of the pod.
State string `json:"State"`
// Hostname is the hostname that the pod will set.

View File

@ -1843,6 +1843,24 @@ func WithPodName(name string) PodCreateOption {
}
}
// WithPodExitPolicy sets the exit policy of the pod.
func WithPodExitPolicy(policy string) PodCreateOption {
return func(pod *Pod) error {
if pod.valid {
return define.ErrPodFinalized
}
parsed, err := config.ParsePodExitPolicy(policy)
if err != nil {
return err
}
pod.config.ExitPolicy = parsed
return nil
}
}
// WithPodHostname sets the hostname of the pod.
func WithPodHostname(hostname string) PodCreateOption {
return func(pod *Pod) error {

View File

@ -6,6 +6,7 @@ import (
"strings"
"time"
"github.com/containers/common/pkg/config"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/libpod/lock"
"github.com/opencontainers/runtime-spec/specs-go"
@ -70,6 +71,9 @@ type PodConfig struct {
// container has been created with.
CreateCommand []string `json:"CreateCommand,omitempty"`
// The pod's exit policy.
ExitPolicy config.PodExitPolicy `json:"ExitPolicy,omitempty"`
// ID of the pod's lock
LockID uint32 `json:"lockID"`
}

View File

@ -2,6 +2,7 @@ package libpod
import (
"context"
"fmt"
"github.com/containers/common/pkg/cgroups"
"github.com/containers/podman/v4/libpod/define"
@ -134,6 +135,10 @@ func (p *Pod) StopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m
p.lock.Lock()
defer p.lock.Unlock()
return p.stopWithTimeout(ctx, cleanup, timeout)
}
func (p *Pod) stopWithTimeout(ctx context.Context, cleanup bool, timeout int) (map[string]error, error) {
if !p.valid {
return nil, define.ErrPodRemoved
}
@ -195,6 +200,51 @@ func (p *Pod) StopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m
return nil, nil
}
// Stops the pod if only the infra containers remains running.
func (p *Pod) stopIfOnlyInfraRemains(ctx context.Context, ignoreID string) error {
p.lock.Lock()
defer p.lock.Unlock()
infraID := ""
if p.HasInfraContainer() {
infra, err := p.infraContainer()
if err != nil {
return err
}
infraID = infra.ID()
}
allCtrs, err := p.runtime.state.PodContainers(p)
if err != nil {
return err
}
for _, ctr := range allCtrs {
if ctr.ID() == infraID || ctr.ID() == ignoreID {
continue
}
state, err := ctr.State()
if err != nil {
return fmt.Errorf("getting state of container %s: %w", ctr.ID(), err)
}
switch state {
case define.ContainerStateExited,
define.ContainerStateRemoving,
define.ContainerStateStopping,
define.ContainerStateUnknown:
continue
default:
return nil
}
}
_, err = p.stopWithTimeout(ctx, true, -1)
return err
}
// Cleanup cleans up all containers within a pod that have stopped.
// All containers are cleaned up independently. An error with one container will
// not prevent other containers being cleaned up.
@ -661,6 +711,7 @@ func (p *Pod) Inspect() (*define.InspectPodData, error) {
Namespace: p.Namespace(),
Created: p.CreatedTime(),
CreateCommand: p.config.CreateCommand,
ExitPolicy: string(p.config.ExitPolicy),
State: podState,
Hostname: p.config.Hostname,
Labels: p.Labels(),

View File

@ -86,6 +86,10 @@ type Runtime struct {
libimageEventsShutdown chan bool
lockManager lock.Manager
// Worker
workerShutdown chan bool
workerChannel chan func()
// syslog describes whenever logrus should log to the syslog as well.
// Note that the syslog hook will be enabled early in cmd/podman/syslog_linux.go
// This bool is just needed so that we can set it for netavark interface.
@ -597,6 +601,8 @@ func makeRuntime(runtime *Runtime) (retErr error) {
}
}
runtime.startWorker()
// Mark the runtime as valid - ready to be used, cannot be modified
// further
runtime.valid = true
@ -817,6 +823,14 @@ func (r *Runtime) Shutdown(force bool) error {
return define.ErrRuntimeStopped
}
if r.workerShutdown != nil {
// Signal the worker routine to shutdown. The routine will
// process all pending work items and then read from the
// channel; we're blocked until all work items have been
// processed.
r.workerShutdown <- true
}
r.valid = false
// Shutdown all containers if --force is given

41
libpod/runtime_worker.go Normal file
View File

@ -0,0 +1,41 @@
package libpod
import (
"time"
)
func (r *Runtime) startWorker() {
if r.workerChannel == nil {
r.workerChannel = make(chan func(), 1)
r.workerShutdown = make(chan bool)
}
go func() {
for {
// Make sure to read all workers before
// checking if we're about to shutdown.
for len(r.workerChannel) > 0 {
w := <-r.workerChannel
w()
}
select {
// We'll read from the shutdown channel only when all
// items above have been processed.
//
// (*Runtime).Shutdown() will block until until the
// item is read.
case <-r.workerShutdown:
return
default:
time.Sleep(100 * time.Millisecond)
}
}
}()
}
func (r *Runtime) queueWork(f func()) {
go func() {
r.workerChannel <- f
}()
}

View File

@ -122,6 +122,7 @@ type PodCreateOptions struct {
CreateCommand []string `json:"create_command,omitempty"`
Devices []string `json:"devices,omitempty"`
DeviceReadBPs []string `json:"device_read_bps,omitempty"`
ExitPolicy string `json:"exit_policy,omitempty"`
Hostname string `json:"hostname,omitempty"`
Infra bool `json:"infra,omitempty"`
InfraImage string `json:"infra_image,omitempty"`
@ -319,6 +320,7 @@ func ToPodSpecGen(s specgen.PodSpecGenerator, p *PodCreateOptions) (*specgen.Pod
}
s.Pid = out
s.Hostname = p.Hostname
s.ExitPolicy = p.ExitPolicy
s.Labels = p.Labels
s.Devices = p.Devices
s.SecurityOpt = p.SecurityOpt

View File

@ -197,7 +197,11 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY
return nil, errors.Errorf("pod does not have a name")
}
podOpt := entities.PodCreateOptions{Infra: true, Net: &entities.NetOptions{NoHosts: options.NoHosts}}
podOpt := entities.PodCreateOptions{
Infra: true,
Net: &entities.NetOptions{NoHosts: options.NoHosts},
ExitPolicy: string(config.PodExitPolicyStop),
}
podOpt, err = kube.ToPodOpt(ctx, podName, podOpt, podYAML)
if err != nil {
return nil, err

View File

@ -197,6 +197,8 @@ func createPodOptions(p *specgen.PodSpecGenerator) ([]libpod.PodCreateOption, er
options = append(options, libpod.WithPodHostname(p.Hostname))
}
options = append(options, libpod.WithPodExitPolicy(p.ExitPolicy))
return options, nil
}

View File

@ -19,6 +19,8 @@ type PodBasicConfig struct {
// all containers in the pod as long as the UTS namespace is shared.
// Optional.
Hostname string `json:"hostname,omitempty"`
// ExitPolicy determines the pod's exit and stop behaviour.
ExitPolicy string `json:"exit_policy,omitempty"`
// Labels are key-value pairs that are used to add metadata to pods.
// Optional.
Labels map[string]string `json:"labels,omitempty"`

View File

@ -406,7 +406,76 @@ EOF
run_podman pod inspect test --format {{.InfraConfig.HostNetwork}}
is "$output" "true" "Host network sharing with only ipc should be true"
run_podman pod rm test
}
# Wait for the pod (1st arg) to transition into the state (2nd arg)
function _ensure_pod_state() {
for i in {0..5}; do
run_podman pod inspect $1 --format "{{.State}}"
if [[ $output == "$2" ]]; then
break
fi
sleep 0.5
done
is "$output" "$2" "unexpected pod state"
}
@test "pod exit policies" {
# Test setting exit policies
run_podman pod create
podID="$output"
run_podman pod inspect $podID --format "{{.ExitPolicy}}"
is "$output" "continue" "default exit policy"
run_podman pod rm $podID
run_podman pod create --exit-policy stop
podID="$output"
run_podman pod inspect $podID --format "{{.ExitPolicy}}"
is "$output" "stop" "custom exit policy"
run_podman pod rm $podID
run_podman 125 pod create --exit-policy invalid
is "$output" "Error: .*error running pod create option: invalid pod exit policy: \"invalid\"" "invalid exit policy"
# Test exit-policy behaviour
run_podman pod create --exit-policy continue
podID="$output"
run_podman run --pod $podID $IMAGE true
run_podman pod inspect $podID --format "{{.State}}"
_ensure_pod_state $podID Degraded
run_podman pod rm $podID
run_podman pod create --exit-policy stop
podID="$output"
run_podman run --pod $podID $IMAGE true
run_podman pod inspect $podID --format "{{.State}}"
_ensure_pod_state $podID Exited
run_podman pod rm $podID
}
@test "pod exit policies - play kube" {
# play-kube sets the exit policy to "stop"
local name="$(random_string 10 | tr A-Z a-z)"
kubeFile="apiVersion: v1
kind: Pod
metadata:
name: $name-pod
spec:
containers:
- command:
- \"true\"
image: $IMAGE
name: ctr
restartPolicy: OnFailure"
echo "$kubeFile" > $PODMAN_TMPDIR/test.yaml
run_podman play kube $PODMAN_TMPDIR/test.yaml
run_podman pod inspect $name-pod --format "{{.ExitPolicy}}"
is "$output" "stop" "custom exit policy"
_ensure_pod_state $name-pod Exited
run_podman pod rm $name-pod
}
# vim: filetype=sh