mirror of https://github.com/containers/podman.git
pod: add exit policies
Add the notion of an "exit policy" to a pod. This policy controls the behaviour when the last container of pod exits. Initially, there are two policies: - "continue" : the pod continues running. This is the default policy when creating a pod. - "stop" : stop the pod when the last container exits. This is the default behaviour for `play kube`. In order to implement the deferred stop of a pod, add a worker queue to the libpod runtime. The queue will pick up work items and in this case helps resolve dead locks that would otherwise occur if we attempted to stop a pod during container cleanup. Note that the default restart policy of `play kube` is "Always". Hence, in order to really solve #13464, the YAML files must set a custom restart policy; the tests use "OnFailure". Fixes: #13464 Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
This commit is contained in:
parent
77d872ea38
commit
4eff0c8cf2
|
@ -492,6 +492,11 @@ func AutocompleteImages(cmd *cobra.Command, args []string, toComplete string) ([
|
|||
return getImages(cmd, toComplete)
|
||||
}
|
||||
|
||||
// AutocompletePodExitPolicy - Autocomplete pod exit policy.
|
||||
func AutocompletePodExitPolicy(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
|
||||
return config.PodExitPolicies, cobra.ShellCompDirectiveNoFileComp
|
||||
}
|
||||
|
||||
// AutocompleteCreateRun - Autocomplete only the fist argument as image and then do file completion.
|
||||
func AutocompleteCreateRun(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
|
||||
if !validCurrentCmdLine(cmd, args, toComplete) {
|
||||
|
|
|
@ -72,6 +72,10 @@ func init() {
|
|||
flags.StringVarP(&createOptions.Name, nameFlagName, "n", "", "Assign a name to the pod")
|
||||
_ = createCommand.RegisterFlagCompletionFunc(nameFlagName, completion.AutocompleteNone)
|
||||
|
||||
policyFlag := "exit-policy"
|
||||
flags.StringVarP(&createOptions.ExitPolicy, policyFlag, "", string(containerConfig.Engine.PodExitPolicy), "Behaviour when the last container exits")
|
||||
_ = createCommand.RegisterFlagCompletionFunc(policyFlag, common.AutocompletePodExitPolicy)
|
||||
|
||||
infraImageFlagName := "infra-image"
|
||||
var defInfraImage string
|
||||
if !registry.IsRemote() {
|
||||
|
|
|
@ -75,6 +75,15 @@ Set custom DNS options in the /etc/resolv.conf file that will be shared between
|
|||
|
||||
Set custom DNS search domains in the /etc/resolv.conf file that will be shared between all containers in the pod.
|
||||
|
||||
#### **--exit-policy**=**continue** | *stop*
|
||||
|
||||
Set the exit policy of the pod when the last container exits. Supported policies are:
|
||||
|
||||
| Exit Policy | Description |
|
||||
| ------------------ | --------------------------------------------------------------------------- |
|
||||
| *continue* | The pod continues running when the last container exits. Used by default. |
|
||||
| *stop* | The pod is stopped when the last container exits. Used in `play kube`. |
|
||||
|
||||
#### **--gidmap**=*container_gid:host_gid:amount*
|
||||
|
||||
GID map for the user namespace. Using this flag will run the container with user namespace enabled. It conflicts with the `--userns` and `--subgidname` flags.
|
||||
|
@ -554,7 +563,7 @@ $ podman pod create --network net1:ip=10.89.1.5 --network net2:ip=10.89.10.10
|
|||
```
|
||||
|
||||
## SEE ALSO
|
||||
**[podman(1)](podman.1.md)**, **[podman-pod(1)](podman-pod.1.md)**, **containers.conf(1)**
|
||||
**[podman(1)](podman.1.md)**, **[podman-pod(1)](podman-pod.1.md)**, **[podman-play-kube(1)](podman-play-kube.1.md)**, **containers.conf(1)**
|
||||
|
||||
|
||||
## HISTORY
|
||||
|
|
|
@ -1939,9 +1939,51 @@ func (c *Container) cleanup(ctx context.Context) error {
|
|||
}
|
||||
}
|
||||
|
||||
if err := c.stopPodIfNeeded(context.Background()); err != nil {
|
||||
if lastError == nil {
|
||||
lastError = err
|
||||
} else {
|
||||
logrus.Errorf("Stopping pod of container %s: %v", c.ID(), err)
|
||||
}
|
||||
}
|
||||
|
||||
return lastError
|
||||
}
|
||||
|
||||
// If the container is part of a pod where only the infra container remains
|
||||
// running, attempt to stop the pod.
|
||||
func (c *Container) stopPodIfNeeded(ctx context.Context) error {
|
||||
if c.config.Pod == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
pod, err := c.runtime.state.Pod(c.config.Pod)
|
||||
if err != nil {
|
||||
return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), c.config.Pod, err)
|
||||
}
|
||||
|
||||
switch pod.config.ExitPolicy {
|
||||
case config.PodExitPolicyContinue:
|
||||
return nil
|
||||
|
||||
case config.PodExitPolicyStop:
|
||||
// Use the runtime's work queue to stop the pod. This resolves
|
||||
// a number of scenarios where we'd otherwise run into
|
||||
// deadlocks. For instance, during `pod stop`, the pod has
|
||||
// already been locked.
|
||||
// The work queue is a simple means without having to worry about
|
||||
// future changes that may introduce more deadlock scenarios.
|
||||
c.runtime.queueWork(func() {
|
||||
if err := pod.stopIfOnlyInfraRemains(ctx, c.ID()); err != nil {
|
||||
if !errors.Is(err, define.ErrNoSuchPod) {
|
||||
logrus.Errorf("Checking if infra needs to be stopped: %v", err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// delete deletes the container and runs any configured poststop
|
||||
// hooks.
|
||||
func (c *Container) delete(ctx context.Context) error {
|
||||
|
|
|
@ -19,6 +19,8 @@ type InspectPodData struct {
|
|||
// CreateCommand is the full command plus arguments of the process the
|
||||
// container has been created with.
|
||||
CreateCommand []string `json:"CreateCommand,omitempty"`
|
||||
// ExitPolicy of the pod.
|
||||
ExitPolicy string `json:"ExitPolicy,omitempty"`
|
||||
// State represents the current state of the pod.
|
||||
State string `json:"State"`
|
||||
// Hostname is the hostname that the pod will set.
|
||||
|
|
|
@ -1843,6 +1843,24 @@ func WithPodName(name string) PodCreateOption {
|
|||
}
|
||||
}
|
||||
|
||||
// WithPodExitPolicy sets the exit policy of the pod.
|
||||
func WithPodExitPolicy(policy string) PodCreateOption {
|
||||
return func(pod *Pod) error {
|
||||
if pod.valid {
|
||||
return define.ErrPodFinalized
|
||||
}
|
||||
|
||||
parsed, err := config.ParsePodExitPolicy(policy)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
pod.config.ExitPolicy = parsed
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithPodHostname sets the hostname of the pod.
|
||||
func WithPodHostname(hostname string) PodCreateOption {
|
||||
return func(pod *Pod) error {
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/containers/common/pkg/config"
|
||||
"github.com/containers/podman/v4/libpod/define"
|
||||
"github.com/containers/podman/v4/libpod/lock"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
|
@ -70,6 +71,9 @@ type PodConfig struct {
|
|||
// container has been created with.
|
||||
CreateCommand []string `json:"CreateCommand,omitempty"`
|
||||
|
||||
// The pod's exit policy.
|
||||
ExitPolicy config.PodExitPolicy `json:"ExitPolicy,omitempty"`
|
||||
|
||||
// ID of the pod's lock
|
||||
LockID uint32 `json:"lockID"`
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ package libpod
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/containers/common/pkg/cgroups"
|
||||
"github.com/containers/podman/v4/libpod/define"
|
||||
|
@ -134,6 +135,10 @@ func (p *Pod) StopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m
|
|||
p.lock.Lock()
|
||||
defer p.lock.Unlock()
|
||||
|
||||
return p.stopWithTimeout(ctx, cleanup, timeout)
|
||||
}
|
||||
|
||||
func (p *Pod) stopWithTimeout(ctx context.Context, cleanup bool, timeout int) (map[string]error, error) {
|
||||
if !p.valid {
|
||||
return nil, define.ErrPodRemoved
|
||||
}
|
||||
|
@ -195,6 +200,51 @@ func (p *Pod) StopWithTimeout(ctx context.Context, cleanup bool, timeout int) (m
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
// Stops the pod if only the infra containers remains running.
|
||||
func (p *Pod) stopIfOnlyInfraRemains(ctx context.Context, ignoreID string) error {
|
||||
p.lock.Lock()
|
||||
defer p.lock.Unlock()
|
||||
|
||||
infraID := ""
|
||||
|
||||
if p.HasInfraContainer() {
|
||||
infra, err := p.infraContainer()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
infraID = infra.ID()
|
||||
}
|
||||
|
||||
allCtrs, err := p.runtime.state.PodContainers(p)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, ctr := range allCtrs {
|
||||
if ctr.ID() == infraID || ctr.ID() == ignoreID {
|
||||
continue
|
||||
}
|
||||
|
||||
state, err := ctr.State()
|
||||
if err != nil {
|
||||
return fmt.Errorf("getting state of container %s: %w", ctr.ID(), err)
|
||||
}
|
||||
|
||||
switch state {
|
||||
case define.ContainerStateExited,
|
||||
define.ContainerStateRemoving,
|
||||
define.ContainerStateStopping,
|
||||
define.ContainerStateUnknown:
|
||||
continue
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
_, err = p.stopWithTimeout(ctx, true, -1)
|
||||
return err
|
||||
}
|
||||
|
||||
// Cleanup cleans up all containers within a pod that have stopped.
|
||||
// All containers are cleaned up independently. An error with one container will
|
||||
// not prevent other containers being cleaned up.
|
||||
|
@ -661,6 +711,7 @@ func (p *Pod) Inspect() (*define.InspectPodData, error) {
|
|||
Namespace: p.Namespace(),
|
||||
Created: p.CreatedTime(),
|
||||
CreateCommand: p.config.CreateCommand,
|
||||
ExitPolicy: string(p.config.ExitPolicy),
|
||||
State: podState,
|
||||
Hostname: p.config.Hostname,
|
||||
Labels: p.Labels(),
|
||||
|
|
|
@ -86,6 +86,10 @@ type Runtime struct {
|
|||
libimageEventsShutdown chan bool
|
||||
lockManager lock.Manager
|
||||
|
||||
// Worker
|
||||
workerShutdown chan bool
|
||||
workerChannel chan func()
|
||||
|
||||
// syslog describes whenever logrus should log to the syslog as well.
|
||||
// Note that the syslog hook will be enabled early in cmd/podman/syslog_linux.go
|
||||
// This bool is just needed so that we can set it for netavark interface.
|
||||
|
@ -597,6 +601,8 @@ func makeRuntime(runtime *Runtime) (retErr error) {
|
|||
}
|
||||
}
|
||||
|
||||
runtime.startWorker()
|
||||
|
||||
// Mark the runtime as valid - ready to be used, cannot be modified
|
||||
// further
|
||||
runtime.valid = true
|
||||
|
@ -817,6 +823,14 @@ func (r *Runtime) Shutdown(force bool) error {
|
|||
return define.ErrRuntimeStopped
|
||||
}
|
||||
|
||||
if r.workerShutdown != nil {
|
||||
// Signal the worker routine to shutdown. The routine will
|
||||
// process all pending work items and then read from the
|
||||
// channel; we're blocked until all work items have been
|
||||
// processed.
|
||||
r.workerShutdown <- true
|
||||
}
|
||||
|
||||
r.valid = false
|
||||
|
||||
// Shutdown all containers if --force is given
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
package libpod
|
||||
|
||||
import (
|
||||
"time"
|
||||
)
|
||||
|
||||
func (r *Runtime) startWorker() {
|
||||
if r.workerChannel == nil {
|
||||
r.workerChannel = make(chan func(), 1)
|
||||
r.workerShutdown = make(chan bool)
|
||||
}
|
||||
go func() {
|
||||
for {
|
||||
// Make sure to read all workers before
|
||||
// checking if we're about to shutdown.
|
||||
for len(r.workerChannel) > 0 {
|
||||
w := <-r.workerChannel
|
||||
w()
|
||||
}
|
||||
|
||||
select {
|
||||
// We'll read from the shutdown channel only when all
|
||||
// items above have been processed.
|
||||
//
|
||||
// (*Runtime).Shutdown() will block until until the
|
||||
// item is read.
|
||||
case <-r.workerShutdown:
|
||||
return
|
||||
|
||||
default:
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func (r *Runtime) queueWork(f func()) {
|
||||
go func() {
|
||||
r.workerChannel <- f
|
||||
}()
|
||||
}
|
|
@ -122,6 +122,7 @@ type PodCreateOptions struct {
|
|||
CreateCommand []string `json:"create_command,omitempty"`
|
||||
Devices []string `json:"devices,omitempty"`
|
||||
DeviceReadBPs []string `json:"device_read_bps,omitempty"`
|
||||
ExitPolicy string `json:"exit_policy,omitempty"`
|
||||
Hostname string `json:"hostname,omitempty"`
|
||||
Infra bool `json:"infra,omitempty"`
|
||||
InfraImage string `json:"infra_image,omitempty"`
|
||||
|
@ -319,6 +320,7 @@ func ToPodSpecGen(s specgen.PodSpecGenerator, p *PodCreateOptions) (*specgen.Pod
|
|||
}
|
||||
s.Pid = out
|
||||
s.Hostname = p.Hostname
|
||||
s.ExitPolicy = p.ExitPolicy
|
||||
s.Labels = p.Labels
|
||||
s.Devices = p.Devices
|
||||
s.SecurityOpt = p.SecurityOpt
|
||||
|
|
|
@ -197,7 +197,11 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY
|
|||
return nil, errors.Errorf("pod does not have a name")
|
||||
}
|
||||
|
||||
podOpt := entities.PodCreateOptions{Infra: true, Net: &entities.NetOptions{NoHosts: options.NoHosts}}
|
||||
podOpt := entities.PodCreateOptions{
|
||||
Infra: true,
|
||||
Net: &entities.NetOptions{NoHosts: options.NoHosts},
|
||||
ExitPolicy: string(config.PodExitPolicyStop),
|
||||
}
|
||||
podOpt, err = kube.ToPodOpt(ctx, podName, podOpt, podYAML)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
|
@ -197,6 +197,8 @@ func createPodOptions(p *specgen.PodSpecGenerator) ([]libpod.PodCreateOption, er
|
|||
options = append(options, libpod.WithPodHostname(p.Hostname))
|
||||
}
|
||||
|
||||
options = append(options, libpod.WithPodExitPolicy(p.ExitPolicy))
|
||||
|
||||
return options, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,8 @@ type PodBasicConfig struct {
|
|||
// all containers in the pod as long as the UTS namespace is shared.
|
||||
// Optional.
|
||||
Hostname string `json:"hostname,omitempty"`
|
||||
// ExitPolicy determines the pod's exit and stop behaviour.
|
||||
ExitPolicy string `json:"exit_policy,omitempty"`
|
||||
// Labels are key-value pairs that are used to add metadata to pods.
|
||||
// Optional.
|
||||
Labels map[string]string `json:"labels,omitempty"`
|
||||
|
|
|
@ -406,7 +406,76 @@ EOF
|
|||
run_podman pod inspect test --format {{.InfraConfig.HostNetwork}}
|
||||
is "$output" "true" "Host network sharing with only ipc should be true"
|
||||
run_podman pod rm test
|
||||
}
|
||||
|
||||
# Wait for the pod (1st arg) to transition into the state (2nd arg)
|
||||
function _ensure_pod_state() {
|
||||
for i in {0..5}; do
|
||||
run_podman pod inspect $1 --format "{{.State}}"
|
||||
if [[ $output == "$2" ]]; then
|
||||
break
|
||||
fi
|
||||
sleep 0.5
|
||||
done
|
||||
|
||||
is "$output" "$2" "unexpected pod state"
|
||||
}
|
||||
|
||||
@test "pod exit policies" {
|
||||
# Test setting exit policies
|
||||
run_podman pod create
|
||||
podID="$output"
|
||||
run_podman pod inspect $podID --format "{{.ExitPolicy}}"
|
||||
is "$output" "continue" "default exit policy"
|
||||
run_podman pod rm $podID
|
||||
|
||||
run_podman pod create --exit-policy stop
|
||||
podID="$output"
|
||||
run_podman pod inspect $podID --format "{{.ExitPolicy}}"
|
||||
is "$output" "stop" "custom exit policy"
|
||||
run_podman pod rm $podID
|
||||
|
||||
run_podman 125 pod create --exit-policy invalid
|
||||
is "$output" "Error: .*error running pod create option: invalid pod exit policy: \"invalid\"" "invalid exit policy"
|
||||
|
||||
# Test exit-policy behaviour
|
||||
run_podman pod create --exit-policy continue
|
||||
podID="$output"
|
||||
run_podman run --pod $podID $IMAGE true
|
||||
run_podman pod inspect $podID --format "{{.State}}"
|
||||
_ensure_pod_state $podID Degraded
|
||||
run_podman pod rm $podID
|
||||
|
||||
run_podman pod create --exit-policy stop
|
||||
podID="$output"
|
||||
run_podman run --pod $podID $IMAGE true
|
||||
run_podman pod inspect $podID --format "{{.State}}"
|
||||
_ensure_pod_state $podID Exited
|
||||
run_podman pod rm $podID
|
||||
}
|
||||
|
||||
@test "pod exit policies - play kube" {
|
||||
# play-kube sets the exit policy to "stop"
|
||||
local name="$(random_string 10 | tr A-Z a-z)"
|
||||
|
||||
kubeFile="apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: $name-pod
|
||||
spec:
|
||||
containers:
|
||||
- command:
|
||||
- \"true\"
|
||||
image: $IMAGE
|
||||
name: ctr
|
||||
restartPolicy: OnFailure"
|
||||
|
||||
echo "$kubeFile" > $PODMAN_TMPDIR/test.yaml
|
||||
run_podman play kube $PODMAN_TMPDIR/test.yaml
|
||||
run_podman pod inspect $name-pod --format "{{.ExitPolicy}}"
|
||||
is "$output" "stop" "custom exit policy"
|
||||
_ensure_pod_state $name-pod Exited
|
||||
run_podman pod rm $name-pod
|
||||
}
|
||||
|
||||
# vim: filetype=sh
|
||||
|
|
Loading…
Reference in New Issue