Merge pull request #2826 from mheon/restart_policy

Add restart policy for containers
This commit is contained in:
OpenShift Merge Robot 2019-05-03 23:14:12 +02:00 committed by GitHub
commit 4aa90145bf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 357 additions and 56 deletions

View File

@ -444,7 +444,7 @@ func getCreateFlags(c *cliconfig.PodmanCommand) {
)
createFlags.String(
"restart", "",
"Restart is not supported. Please use a systemd unit file for restart",
"Restart policy to apply when a container exits",
)
createFlags.Bool(
"rm", false,

View File

@ -41,6 +41,9 @@ func CreateContainer(ctx context.Context, c *GenericCLIResults, runtime *libpod.
span, _ := opentracing.StartSpanFromContext(ctx, "createContainer")
defer span.Finish()
}
if c.Bool("rm") && c.String("restart") != "" && c.String("restart") != "no" {
return nil, nil, errors.Errorf("the --rm option conflicts with --restart")
}
rtc, err := runtime.GetConfig()
if err != nil {
@ -279,9 +282,6 @@ func ParseCreateOpts(ctx context.Context, c *GenericCLIResults, runtime *libpod.
blkioWeight uint16
namespaces map[string]string
)
if c.IsSet("restart") {
return nil, errors.Errorf("--restart option is not supported.\nUse systemd unit files for restarting containers")
}
idmappings, err := util.ParseIDMapping(c.StringSlice("uidmap"), c.StringSlice("gidmap"), c.String("subuidname"), c.String("subgidname"))
if err != nil {
@ -676,21 +676,22 @@ func ParseCreateOpts(ctx context.Context, c *GenericCLIResults, runtime *libpod.
PidsLimit: c.Int64("pids-limit"),
Ulimit: c.StringSlice("ulimit"),
},
Rm: c.Bool("rm"),
StopSignal: stopSignal,
StopTimeout: c.Uint("stop-timeout"),
Sysctl: sysctl,
Systemd: systemd,
Tmpfs: c.StringSlice("tmpfs"),
Tty: tty,
User: user,
UsernsMode: usernsMode,
MountsFlag: c.StringArray("mount"),
Volumes: c.StringArray("volume"),
WorkDir: workDir,
Rootfs: rootfs,
VolumesFrom: c.StringSlice("volumes-from"),
Syslog: c.Bool("syslog"),
RestartPolicy: c.String("restart"),
Rm: c.Bool("rm"),
StopSignal: stopSignal,
StopTimeout: c.Uint("stop-timeout"),
Sysctl: sysctl,
Systemd: systemd,
Tmpfs: c.StringSlice("tmpfs"),
Tty: tty,
User: user,
UsernsMode: usernsMode,
MountsFlag: c.StringArray("mount"),
Volumes: c.StringArray("volume"),
WorkDir: workDir,
Rootfs: rootfs,
VolumesFrom: c.StringSlice("volumes-from"),
Syslog: c.Bool("syslog"),
}
if config.Privileged {

View File

@ -567,11 +567,17 @@ If container is running in --read-only mode, then mount a read-write tmpfs on /r
**--restart=""**
Not implemented.
Restart policy to follow when containers exit.
Restart policy will not take effect if a container is stopped via the `podman kill` or `podman stop` commands.
Valid values are:
Restart should be handled via a systemd unit files. Please add your podman
commands to a unit file and allow systemd or your init system to handle the
restarting of the container processes. See example below.
- `no` : Do not restart containers on exit
- `on-failure[:max_retries]` : Restart containers when they exit with a non-0 exit code, retrying indefinitely or until the optional max_retries count is hit
- `always` : Restart containers when they exit, regardless of status, retrying indefinitely
Please note that restart will not restart containers after a system reboot.
If this functionality is required in your environment, you can invoke Podman from a systemd unit file, or create an init script for whichever init system is in use.
To generate systemd unit files, please see *podman generate systemd*
**--rm**=*true*|*false*
@ -859,21 +865,6 @@ the uids and gids from the host.
$ podman create --uidmap 0:30000:7000 --gidmap 0:30000:7000 fedora echo hello
```
### Running a podman container to restart inside of a systemd unit file
```
[Unit]
Description=My App
[Service]
Restart=always
ExecStart=/usr/bin/podman start -a my_app
ExecStop=/usr/bin/podman stop -t 10 my_app
KillMode=process
[Install]
WantedBy=multi-user.target
```
### Rootless Containers
Podman runs as a non root user on most systems. This feature requires that a new enough version of shadow-utils

View File

@ -28,6 +28,7 @@ The *container* event type will report the follow statuses:
* pause
* prune
* remove
* restart
* restore
* start
* stop

View File

@ -589,11 +589,17 @@ If container is running in --read-only mode, then mount a read-write tmpfs on /r
**--restart=""**
Not implemented.
Restart policy to follow when containers exit.
Restart policy will not take effect if a container is stopped via the `podman kill` or `podman stop` commands.
Valid values are:
Restart should be handled via a systemd unit files. Please add your podman
commands to a unit file and allow systemd or your init system to handle the
restarting of the container processes. See *podman generate systemd*.
- `no` : Do not restart containers on exit
- `on-failure[:max_retries]` : Restart containers when they exit with a non-0 exit code, retrying indefinitely or until the optional max_retries count is hit
- `always` : Restart containers when they exit, regardless of status, retrying indefinitely
Please note that restart will not restart containers after a system reboot.
If this functionality is required in your environment, you can invoke Podman from a systemd unit file, or create an init script for whichever init system is in use.
To generate systemd unit files, please see *podman generate systemd*
**--rm**=*true*|*false*

View File

@ -102,6 +102,20 @@ func (ns LinuxNS) String() string {
}
}
// Valid restart policy types.
const (
// RestartPolicyNone indicates that no restart policy has been requested
// by a container.
RestartPolicyNone = ""
// RestartPolicyNo is identical in function to RestartPolicyNone.
RestartPolicyNo = "no"
// RestartPolicyAlways unconditionally restarts the container.
RestartPolicyAlways = "always"
// RestartPolicyOnFailure restarts the container on non-0 exit code,
// with an optional maximum number of retries.
RestartPolicyOnFailure = "on-failure"
)
// Container is a single OCI container.
// All operations on a Container that access state must begin with a call to
// syncContainer().
@ -179,6 +193,16 @@ type ContainerState struct {
// This maps the path the file will be mounted to in the container to
// the path of the file on disk outside the container
BindMounts map[string]string `json:"bindMounts,omitempty"`
// StoppedByUser indicates whether the container was stopped by an
// explicit call to the Stop() API.
StoppedByUser bool `json:"stoppedByUser,omitempty"`
// RestartPolicyMatch indicates whether the conditions for restart
// policy have been met.
RestartPolicyMatch bool `json:"restartPolicyMatch,omitempty"`
// RestartCount is how many times the container was restarted by its
// restart policy. This is NOT incremented by normal container restarts
// (only by restart policy).
RestartCount uint `json:"restartCount,omitempty"`
// ExtensionStageHooks holds hooks which will be executed by libpod
// and not delegated to the OCI runtime.
@ -346,6 +370,17 @@ type ContainerConfig struct {
LogPath string `json:"logPath"`
// File containing the conmon PID
ConmonPidFile string `json:"conmonPidFile,omitempty"`
// RestartPolicy indicates what action the container will take upon
// exiting naturally.
// Allowed options are "no" (take no action), "on-failure" (restart on
// non-zero exit code, up an a maximum of RestartRetries times),
// and "always" (always restart the container on any exit code).
// The empty string is treated as the default ("no")
RestartPolicy string `json:"restart_policy,omitempty"`
// RestartRetries indicates the number of attempts that will be made to
// restart the container. Used only if RestartPolicy is set to
// "on-failure".
RestartRetries uint `json:"restart_retries,omitempty"`
// TODO log options for log drivers
PostConfigureNetNS bool `json:"postConfigureNetNS"`
@ -729,6 +764,17 @@ func (c *Container) LogPath() string {
return c.config.LogPath
}
// RestartPolicy returns the container's restart policy.
func (c *Container) RestartPolicy() string {
return c.config.RestartPolicy
}
// RestartRetries returns the number of retries that will be attempted when
// using the "on-failure" restart policy
func (c *Container) RestartRetries() uint {
return c.config.RestartRetries
}
// RuntimeName returns the name of the runtime
func (c *Container) RuntimeName() string {
return c.runtime.ociRuntime.name
@ -1003,6 +1049,21 @@ func (c *Container) BindMounts() (map[string]string, error) {
return newMap, nil
}
// StoppedByUser returns whether the container was last stopped by an explicit
// call to the Stop() API, or whether it exited naturally.
func (c *Container) StoppedByUser() (bool, error) {
if !c.batched {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
return false, err
}
}
return c.state.StoppedByUser, nil
}
// Misc Accessors
// Most will require locking

View File

@ -57,11 +57,11 @@ func (c *Container) Init(ctx context.Context) (err error) {
if c.state.State == ContainerStateStopped {
// Reinitialize the container
return c.reinit(ctx)
return c.reinit(ctx, false)
}
// Initialize the container for the first time
return c.init(ctx)
return c.init(ctx, false)
}
// Start starts a container.
@ -199,8 +199,15 @@ func (c *Container) Kill(signal uint) error {
if c.state.State != ContainerStateRunning {
return errors.Wrapf(ErrCtrStateInvalid, "can only kill running containers")
}
defer c.newContainerEvent(events.Kill)
return c.runtime.ociRuntime.killContainer(c, signal)
if err := c.runtime.ociRuntime.killContainer(c, signal); err != nil {
return err
}
c.state.StoppedByUser = true
return c.save()
}
// Exec starts a new process inside the container
@ -583,6 +590,7 @@ func (c *Container) Cleanup(ctx context.Context) error {
if !c.batched {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
return err
}
@ -593,6 +601,19 @@ func (c *Container) Cleanup(ctx context.Context) error {
return errors.Wrapf(ErrCtrStateInvalid, "container %s is running or paused, refusing to clean up", c.ID())
}
// Handle restart policy.
// Returns a bool indicating whether we actually restarted.
// If we did, don't proceed to cleanup - just exit.
didRestart, err := c.handleRestartPolicy(ctx)
if err != nil {
return err
}
if didRestart {
return nil
}
// If we didn't restart, we perform a normal cleanup
// Check if we have active exec sessions
if len(c.state.ExecSessions) != 0 {
return errors.Wrapf(ErrCtrStateInvalid, "container %s has active exec sessions, refusing to clean up", c.ID())
@ -754,7 +775,7 @@ func (c *Container) Refresh(ctx context.Context) error {
if err := c.prepare(); err != nil {
return err
}
if err := c.init(ctx); err != nil {
if err := c.init(ctx, false); err != nil {
return err
}
}

View File

@ -95,6 +95,7 @@ func (c *Container) getContainerInspectData(size bool, driverData *inspect.Data)
LogPath: config.LogPath,
ConmonPidFile: config.ConmonPidFile,
Name: config.Name,
RestartCount: int32(runtimeInfo.RestartCount),
Driver: driverData.Name,
MountLabel: config.MountLabel,
ProcessLabel: config.ProcessLabel,

View File

@ -210,6 +210,90 @@ func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
return nil
}
// Handle container restart policy.
// This is called when a container has exited, and was not explicitly stopped by
// an API call to stop the container or pod it is in.
func (c *Container) handleRestartPolicy(ctx context.Context) (restarted bool, err error) {
// If we did not get a restart policy match, exit immediately.
// Do the same if we're not a policy that restarts.
if !c.state.RestartPolicyMatch ||
c.config.RestartPolicy == RestartPolicyNo ||
c.config.RestartPolicy == RestartPolicyNone {
return false, nil
}
// If we're RestartPolicyOnFailure, we need to check retries and exit
// code.
if c.config.RestartPolicy == RestartPolicyOnFailure {
if c.state.ExitCode == 0 {
return false, nil
}
// If we don't have a max retries set, continue
if c.config.RestartRetries > 0 {
if c.state.RestartCount < c.config.RestartRetries {
logrus.Debugf("Container %s restart policy trigger: on retry %d (of %d)",
c.ID(), c.state.RestartCount, c.config.RestartRetries)
} else {
logrus.Debugf("Container %s restart policy trigger: retries exhausted", c.ID())
return false, nil
}
}
}
logrus.Debugf("Restarting container %s due to restart policy %s", c.ID(), c.config.RestartPolicy)
// Need to check if dependencies are alive.
if err = c.checkDependenciesAndHandleError(ctx); err != nil {
return false, err
}
// Is the container running again?
// If so, we don't have to do anything
if c.state.State == ContainerStateRunning || c.state.State == ContainerStatePaused {
return false, nil
} else if c.state.State == ContainerStateUnknown {
return false, errors.Wrapf(ErrInternal, "invalid container state encountered in restart attempt!")
}
c.newContainerEvent(events.Restart)
// Increment restart count
c.state.RestartCount = c.state.RestartCount + 1
logrus.Debugf("Container %s now on retry %d", c.ID(), c.state.RestartCount)
if err := c.save(); err != nil {
return false, err
}
defer func() {
if err != nil {
if err2 := c.cleanup(ctx); err2 != nil {
logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2)
}
}
}()
if err := c.prepare(); err != nil {
return false, err
}
if c.state.State == ContainerStateStopped {
// Reinitialize the container if we need to
if err := c.reinit(ctx, true); err != nil {
return false, err
}
} else if c.state.State == ContainerStateConfigured ||
c.state.State == ContainerStateExited {
// Initialize the container
if err := c.init(ctx, true); err != nil {
return false, err
}
}
if err := c.start(); err != nil {
return false, err
}
return true, nil
}
// Sync this container with on-disk state and runtime status
// Should only be called with container lock held
// This function should suffice to ensure a container's state is accurate and
@ -230,6 +314,14 @@ func (c *Container) syncContainer() error {
}
// Only save back to DB if state changed
if c.state.State != oldState {
// Check for a restart policy match
if c.config.RestartPolicy != RestartPolicyNone && c.config.RestartPolicy != RestartPolicyNo &&
(oldState == ContainerStateRunning || oldState == ContainerStatePaused) &&
(c.state.State == ContainerStateStopped || c.state.State == ContainerStateExited) &&
!c.state.StoppedByUser {
c.state.RestartPolicyMatch = true
}
if err := c.save(); err != nil {
return err
}
@ -376,6 +468,9 @@ func resetState(state *ContainerState) error {
state.ExecSessions = make(map[string]*ExecSession)
state.NetworkStatus = nil
state.BindMounts = make(map[string]string)
state.StoppedByUser = false
state.RestartPolicyMatch = false
state.RestartCount = 0
return nil
}
@ -569,13 +664,13 @@ func (c *Container) prepareToStart(ctx context.Context, recursive bool) (err err
if c.state.State == ContainerStateStopped {
// Reinitialize the container if we need to
if err := c.reinit(ctx); err != nil {
if err := c.reinit(ctx, false); err != nil {
return err
}
} else if c.state.State == ContainerStateConfigured ||
c.state.State == ContainerStateExited {
// Or initialize it if necessary
if err := c.init(ctx); err != nil {
if err := c.init(ctx, false); err != nil {
return err
}
}
@ -763,7 +858,7 @@ func (c *Container) completeNetworkSetup() error {
}
// Initialize a container, creating it in the runtime
func (c *Container) init(ctx context.Context) error {
func (c *Container) init(ctx context.Context, retainRetries bool) error {
span, _ := opentracing.StartSpanFromContext(ctx, "init")
span.SetTag("struct", "container")
defer span.Finish()
@ -789,6 +884,12 @@ func (c *Container) init(ctx context.Context) error {
c.state.ExitCode = 0
c.state.Exited = false
c.state.State = ContainerStateCreated
c.state.StoppedByUser = false
c.state.RestartPolicyMatch = false
if !retainRetries {
c.state.RestartCount = 0
}
if err := c.save(); err != nil {
return err
@ -851,7 +952,7 @@ func (c *Container) cleanupRuntime(ctx context.Context) error {
// Should only be done on ContainerStateStopped containers.
// Not necessary for ContainerStateExited - the container has already been
// removed from the runtime, so init() can proceed freely.
func (c *Container) reinit(ctx context.Context) error {
func (c *Container) reinit(ctx context.Context, retainRetries bool) error {
span, _ := opentracing.StartSpanFromContext(ctx, "reinit")
span.SetTag("struct", "container")
defer span.Finish()
@ -863,7 +964,7 @@ func (c *Container) reinit(ctx context.Context) error {
}
// Initialize the container again
return c.init(ctx)
return c.init(ctx, retainRetries)
}
// Initialize (if necessary) and start a container
@ -901,12 +1002,12 @@ func (c *Container) initAndStart(ctx context.Context) (err error) {
if c.state.State == ContainerStateStopped {
logrus.Debugf("Recreating container %s in OCI runtime", c.ID())
if err := c.reinit(ctx); err != nil {
if err := c.reinit(ctx, false); err != nil {
return err
}
} else if c.state.State == ContainerStateConfigured ||
c.state.State == ContainerStateExited {
if err := c.init(ctx); err != nil {
if err := c.init(ctx, false); err != nil {
return err
}
}
@ -950,6 +1051,11 @@ func (c *Container) stop(timeout uint) error {
return err
}
c.state.StoppedByUser = true
if err := c.save(); err != nil {
return errors.Wrapf(err, "error saving container %s state after stopping", c.ID())
}
// Wait until we have an exit file, and sync once we do
return c.waitForExitFileAndSync()
}
@ -986,6 +1092,8 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (err e
return errors.Wrapf(ErrCtrStateInvalid, "unable to restart a container in a paused or unknown state")
}
c.newContainerEvent(events.Restart)
if c.state.State == ContainerStateRunning {
if err := c.stop(timeout); err != nil {
return err
@ -1004,13 +1112,13 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (err e
if c.state.State == ContainerStateStopped {
// Reinitialize the container if we need to
if err := c.reinit(ctx); err != nil {
if err := c.reinit(ctx, false); err != nil {
return err
}
} else if c.state.State == ContainerStateConfigured ||
c.state.State == ContainerStateExited {
// Initialize the container
if err := c.init(ctx); err != nil {
if err := c.init(ctx, false); err != nil {
return err
}
}

View File

@ -134,6 +134,8 @@ const (
// Renumber indicates that lock numbers were reallocated at user
// request.
Renumber Status = "renumber"
// Restart indicates the target was restarted via an API call.
Restart Status = "restart"
// Restore ...
Restore Status = "restore"
// Save ...

View File

@ -144,6 +144,8 @@ func StringToStatus(name string) (Status, error) {
return Remove, nil
case Renumber.String():
return Renumber, nil
case Restart.String():
return Restart, nil
case Restore.String():
return Restore, nil
case Save.String():

View File

@ -1239,6 +1239,41 @@ func WithUseImageHosts() CtrCreateOption {
}
}
// WithRestartPolicy sets the container's restart policy. Valid values are
// "no", "on-failure", and "always". The empty string is allowed, and will be
// equivalent to "no".
func WithRestartPolicy(policy string) CtrCreateOption {
return func(ctr *Container) error {
if ctr.valid {
return ErrCtrFinalized
}
switch policy {
case RestartPolicyNone, RestartPolicyNo, RestartPolicyOnFailure, RestartPolicyAlways:
ctr.config.RestartPolicy = policy
default:
return errors.Wrapf(ErrInvalidArg, "%q is not a valid restart policy", policy)
}
return nil
}
}
// WithRestartRetries sets the number of retries to use when restarting a
// container with the "on-failure" restart policy.
// 0 is an allowed value, and indicates infinite retries.
func WithRestartRetries(tries uint) CtrCreateOption {
return func(ctr *Container) error {
if ctr.valid {
return ErrCtrFinalized
}
ctr.config.RestartRetries = tries
return nil
}
}
// withIsInfra sets the container to be an infra container. This means the container will be sometimes hidden
// and expected to be the first container in the pod.
func withIsInfra() CtrCreateOption {

View File

@ -364,6 +364,13 @@ func (p *Pod) Kill(signal uint) (map[string]error, error) {
}
logrus.Debugf("Killed container %s with signal %d", ctr.ID(), signal)
ctr.state.StoppedByUser = true
if err := ctr.save(); err != nil {
ctrErrors[ctr.ID()] = err
}
ctr.lock.Unlock()
}
if len(ctrErrors) > 0 {

View File

@ -161,7 +161,7 @@ type ContainerInspectData struct {
LogPath string `json:"LogPath"`
ConmonPidFile string `json:"ConmonPidFile"`
Name string `json:"Name"`
RestartCount int32 `json:"RestartCount"` //TODO
RestartCount int32 `json:"RestartCount"`
Driver string `json:"Driver"`
MountLabel string `json:"MountLabel"`
ProcessLabel string `json:"ProcessLabel"`

View File

@ -108,6 +108,7 @@ type CreateConfig struct {
ReadOnlyRootfs bool //read-only
ReadOnlyTmpfs bool //read-only-tmpfs
Resources CreateResourceConfig
RestartPolicy string
Rm bool //rm
StopSignal syscall.Signal // stop-signal
StopTimeout uint // stop-timeout
@ -359,6 +360,25 @@ func (c *CreateConfig) getContainerCreateOptions(runtime *libpod.Runtime, pod *l
options = append(options, libpod.WithCgroupParent(c.CgroupParent))
}
if c.RestartPolicy != "" {
if c.RestartPolicy == "unless-stopped" {
return nil, errors.Wrapf(libpod.ErrInvalidArg, "the unless-stopped restart policy is not supported")
}
split := strings.Split(c.RestartPolicy, ":")
if len(split) > 1 {
numTries, err := strconv.Atoi(split[1])
if err != nil {
return nil, errors.Wrapf(err, "%s is not a valid number of retries for restart policy", split[1])
}
if numTries < 0 {
return nil, errors.Wrapf(libpod.ErrInvalidArg, "restart policy requires a positive number of retries")
}
options = append(options, libpod.WithRestartRetries(uint(numTries)))
}
options = append(options, libpod.WithRestartPolicy(split[0]))
}
// Always use a cleanup process to clean up Podman after termination
exitCmd, err := c.createExitCommand(runtime)
if err != nil {

View File

@ -9,6 +9,7 @@ import (
"os"
"path/filepath"
"strings"
"time"
. "github.com/containers/libpod/test/utils"
"github.com/mrunalp/fileutils"
@ -720,4 +721,48 @@ USER mail`
Expect(session.ExitCode()).To(Equal(1))
os.Unsetenv("http_proxy")
})
It("podman run with restart-policy always restarts containers", func() {
podmanTest.RestoreArtifact(fedoraMinimal)
testDir := filepath.Join(podmanTest.RunRoot, "restart-test")
err := os.Mkdir(testDir, 0755)
Expect(err).To(BeNil())
aliveFile := filepath.Join(testDir, "running")
file, err := os.Create(aliveFile)
Expect(err).To(BeNil())
file.Close()
session := podmanTest.Podman([]string{"run", "-dt", "--restart", "always", "-v", fmt.Sprintf("%s:/tmp/runroot:Z", testDir), fedoraMinimal, "bash", "-c", "date +%N > /tmp/runroot/ran && while test -r /tmp/runroot/running; do sleep 0.1s; done"})
found := false
testFile := filepath.Join(testDir, "ran")
for i := 0; i < 10; i++ {
time.Sleep(1 * time.Second)
if _, err := os.Stat(testFile); err == nil {
found = true
err = os.Remove(testFile)
Expect(err).To(BeNil())
break
}
}
Expect(found).To(BeTrue())
err = os.Remove(aliveFile)
Expect(err).To(BeNil())
session.WaitWithDefaultTimeout()
// 10 seconds to restart the container
found = false
for i := 0; i < 10; i++ {
time.Sleep(1 * time.Second)
if _, err := os.Stat(testFile); err == nil {
found = true
break
}
}
Expect(found).To(BeTrue())
})
})