From ff47a4c2d5485fc49f937f3ce0c4e2fd6bdb1956 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 20 Nov 2018 14:08:08 +0000 Subject: [PATCH 1/3] Use a struct to pass options to Checkpoint() For upcoming changes to the Checkpoint() functions this commit switches checkpoint options from a boolean to a struct, so that additional options can be passed easily to Checkpoint() without changing the function parameters all the time. Signed-off-by: Adrian Reber --- cmd/podman/checkpoint.go | 6 ++++-- libpod/container_api.go | 10 ++++++++-- libpod/container_internal_linux.go | 4 ++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/cmd/podman/checkpoint.go b/cmd/podman/checkpoint.go index bf280920d7..49e2aec63f 100644 --- a/cmd/podman/checkpoint.go +++ b/cmd/podman/checkpoint.go @@ -50,7 +50,9 @@ func checkpointCmd(c *cli.Context) error { } defer runtime.Shutdown(false) - keep := c.Bool("keep") + options := libpod.ContainerCheckpointOptions{ + Keep: c.Bool("keep"), + } if err := checkAllAndLatest(c); err != nil { return err @@ -59,7 +61,7 @@ func checkpointCmd(c *cli.Context) error { containers, lastError := getAllOrLatestContainers(c, runtime, libpod.ContainerStateRunning, "running") for _, ctr := range containers { - if err = ctr.Checkpoint(context.TODO(), keep); err != nil { + if err = ctr.Checkpoint(context.TODO(), options); err != nil { if lastError != nil { fmt.Fprintln(os.Stderr, lastError) } diff --git a/libpod/container_api.go b/libpod/container_api.go index 390987394a..9f5436b14c 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -830,8 +830,14 @@ func (c *Container) Refresh(ctx context.Context) error { return nil } +// ContainerCheckpointOptions is a struct used to pass the parameters +// for checkpointing to corresponding functions +type ContainerCheckpointOptions struct { + Keep bool +} + // Checkpoint checkpoints a container -func (c *Container) Checkpoint(ctx context.Context, keep bool) error { +func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointOptions) error { logrus.Debugf("Trying to checkpoint container %s", c) if !c.batched { c.lock.Lock() @@ -842,7 +848,7 @@ func (c *Container) Checkpoint(ctx context.Context, keep bool) error { } } - return c.checkpoint(ctx, keep) + return c.checkpoint(ctx, options) } // Restore restores a container diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 66c7e8a044..003e8284a9 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -431,7 +431,7 @@ func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr return nil } -func (c *Container) checkpoint(ctx context.Context, keep bool) (err error) { +func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) (err error) { if !criu.CheckForCriu() { return errors.Errorf("checkpointing a container requires at least CRIU %d", criu.MinCriuVersion) @@ -464,7 +464,7 @@ func (c *Container) checkpoint(ctx context.Context, keep bool) (err error) { return err } - if !keep { + if !options.Keep { // Remove log file os.Remove(filepath.Join(c.bundlePath(), "dump.log")) // Remove statistic file From b0572d622974837c2221ed1f01a2ab982f078370 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 20 Nov 2018 15:34:15 +0000 Subject: [PATCH 2/3] Added option to keep containers running after checkpointing CRIU supports to leave processes running after checkpointing: -R|--leave-running leave tasks in running state after checkpoint runc also support to leave containers running after checkpointing: --leave-running leave the process running after checkpointing With this commit the support to leave a container running after checkpointing is brought to Podman: --leave-running, -R leave the container running after writing checkpoint to disk Now it is possible to checkpoint a container at some point in time without stopping the container. This can be used to rollback the container to an early state: $ podman run --tmpfs /tmp --name podman-criu-test -d docker://docker.io/yovfiatbeb/podman-criu-test $ curl 10.88.64.253:8080/examples/servlets/servlet/HelloWorldExample 3 $ podman container checkpoint -R -l $ curl 10.88.64.253:8080/examples/servlets/servlet/HelloWorldExample 4 $ curl 10.88.64.253:8080/examples/servlets/servlet/HelloWorldExample 5 $ podman stop -l $ podman container restore -l $ curl 10.88.64.253:8080/examples/servlets/servlet/HelloWorldExample 4 So after checkpointing the container kept running and was stopped after some time. Restoring this container will restore the state right at the checkpoint. Signed-off-by: Adrian Reber --- cmd/podman/checkpoint.go | 7 ++++++- libpod/container_api.go | 3 ++- libpod/container_internal_linux.go | 12 +++++++----- libpod/oci.go | 15 ++++++++++++--- 4 files changed, 27 insertions(+), 10 deletions(-) diff --git a/cmd/podman/checkpoint.go b/cmd/podman/checkpoint.go index 49e2aec63f..ddfd12bc3e 100644 --- a/cmd/podman/checkpoint.go +++ b/cmd/podman/checkpoint.go @@ -23,6 +23,10 @@ var ( Name: "keep, k", Usage: "keep all temporary checkpoint files", }, + cli.BoolFlag{ + Name: "leave-running, R", + Usage: "leave the container running after writing checkpoint to disk", + }, cli.BoolFlag{ Name: "all, a", Usage: "checkpoint all running containers", @@ -51,7 +55,8 @@ func checkpointCmd(c *cli.Context) error { defer runtime.Shutdown(false) options := libpod.ContainerCheckpointOptions{ - Keep: c.Bool("keep"), + Keep: c.Bool("keep"), + KeepRunning: c.Bool("leave-running"), } if err := checkAllAndLatest(c); err != nil { diff --git a/libpod/container_api.go b/libpod/container_api.go index 9f5436b14c..df6b6e9628 100644 --- a/libpod/container_api.go +++ b/libpod/container_api.go @@ -833,7 +833,8 @@ func (c *Container) Refresh(ctx context.Context) error { // ContainerCheckpointOptions is a struct used to pass the parameters // for checkpointing to corresponding functions type ContainerCheckpointOptions struct { - Keep bool + Keep bool + KeepRunning bool } // Checkpoint checkpoints a container diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 003e8284a9..e6071945d6 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -440,7 +440,7 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO if c.state.State != ContainerStateRunning { return errors.Wrapf(ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State) } - if err := c.runtime.ociRuntime.checkpointContainer(c); err != nil { + if err := c.runtime.ociRuntime.checkpointContainer(c, options); err != nil { return err } @@ -457,11 +457,13 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO logrus.Debugf("Checkpointed container %s", c.ID()) - c.state.State = ContainerStateStopped + if !options.KeepRunning { + c.state.State = ContainerStateStopped - // Cleanup Storage and Network - if err := c.cleanup(ctx); err != nil { - return err + // Cleanup Storage and Network + if err := c.cleanup(ctx); err != nil { + return err + } } if !options.Keep { diff --git a/libpod/oci.go b/libpod/oci.go index 71da830b59..8ee2c948f5 100644 --- a/libpod/oci.go +++ b/libpod/oci.go @@ -844,13 +844,22 @@ func (r *OCIRuntime) execStopContainer(ctr *Container, timeout uint) error { } // checkpointContainer checkpoints the given container -func (r *OCIRuntime) checkpointContainer(ctr *Container) error { +func (r *OCIRuntime) checkpointContainer(ctr *Container, options ContainerCheckpointOptions) error { // imagePath is used by CRIU to store the actual checkpoint files imagePath := ctr.CheckpointPath() // workPath will be used to store dump.log and stats-dump workPath := ctr.bundlePath() logrus.Debugf("Writing checkpoint to %s", imagePath) logrus.Debugf("Writing checkpoint logs to %s", workPath) - return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, "checkpoint", - "--image-path", imagePath, "--work-path", workPath, ctr.ID()) + args := []string{} + args = append(args, "checkpoint") + args = append(args, "--image-path") + args = append(args, imagePath) + args = append(args, "--work-path") + args = append(args, workPath) + if options.KeepRunning { + args = append(args, "--leave-running") + } + args = append(args, ctr.ID()) + return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, args...) } From 24c0739453b3f103a1b548c1bb611013b488afbe Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 20 Nov 2018 16:22:48 +0000 Subject: [PATCH 3/3] Update checkpoint/restore man pages This adds the '--leave-running, -R' to the container-checkpoint man page. As the information for '--all, -a' and '--latest, -l' was also still missing it is included in this commit. Signed-off-by: Adrian Reber --- docs/podman-container-checkpoint.1.md | 12 ++++++++++++ docs/podman-container-restore.1.md | 8 ++++++++ 2 files changed, 20 insertions(+) diff --git a/docs/podman-container-checkpoint.1.md b/docs/podman-container-checkpoint.1.md index 4906e0e12e..6f454dfd10 100644 --- a/docs/podman-container-checkpoint.1.md +++ b/docs/podman-container-checkpoint.1.md @@ -17,6 +17,18 @@ are not deleted if checkpointing fails for further debugging. If checkpointing s files are theoretically not needed, but if these files are needed Podman can keep the files for further analysis. +**--all, -a** + +Checkpoint all running containers. + +**--latest, -l** + +Instead of providing the container name or ID, checkpoint the last created container. + +**--leave-running, -R** + +Leave the container running after checkpointing instead of stopping it. + ## EXAMPLE podman container checkpoint mywebserver diff --git a/docs/podman-container-restore.1.md b/docs/podman-container-restore.1.md index 6360bccb0b..4dd5ea7c78 100644 --- a/docs/podman-container-restore.1.md +++ b/docs/podman-container-restore.1.md @@ -24,6 +24,14 @@ processes in the checkpointed container. Without the **-k**, **--keep** option the checkpoint will be consumed and cannot be used again. +**--all, -a** + +Restore all checkpointed containers. + +**--latest, -l** + +Instead of providing the container name or ID, restore the last created container. + ## EXAMPLE podman container restore mywebserver