mirror of https://github.com/containers/podman.git
				
				
				
			Merge pull request #469 from adrianreber/master
Add support to checkpoint/restore containers
This commit is contained in:
		
						commit
						06a959f74a
					
				
							
								
								
									
										12
									
								
								Dockerfile
								
								
								
								
							
							
						
						
									
										12
									
								
								Dockerfile
								
								
								
								
							|  | @ -18,6 +18,8 @@ RUN apt-get update && apt-get install -y \ | |||
|     libaio-dev \ | ||||
|     libcap-dev \ | ||||
|     libfuse-dev \ | ||||
|     libnet-dev \ | ||||
|     libnl-3-dev \ | ||||
|     libostree-dev \ | ||||
|     libprotobuf-dev \ | ||||
|     libprotobuf-c0-dev \ | ||||
|  | @ -110,6 +112,16 @@ RUN set -x \ | |||
|       && go get -u github.com/mailru/easyjson/... \ | ||||
|       && install -D -m 755 "$GOPATH"/bin/easyjson /usr/bin/ | ||||
| 
 | ||||
| # Install criu | ||||
| ENV CRIU_COMMIT 584cbe4643c3fc7dc901ff08bf923ca0fe7326f9 | ||||
| RUN set -x \ | ||||
|       && cd /tmp \ | ||||
|       && git clone https://github.com/checkpoint-restore/criu.git \ | ||||
|       && cd criu \ | ||||
|       && make \ | ||||
|       && install -D -m 755  criu/criu /usr/sbin/ \ | ||||
|       && rm -rf /tmp/criu | ||||
| 
 | ||||
| # Install cni config | ||||
| #RUN make install.cni | ||||
| RUN mkdir -p /etc/cni/net.d/ | ||||
|  |  | |||
|  | @ -0,0 +1,73 @@ | |||
| package main | ||||
| 
 | ||||
| import ( | ||||
| 	"context" | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 
 | ||||
| 	"github.com/containers/libpod/cmd/podman/libpodruntime" | ||||
| 	"github.com/containers/libpod/pkg/rootless" | ||||
| 	"github.com/pkg/errors" | ||||
| 	"github.com/urfave/cli" | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	checkpointDescription = ` | ||||
|    podman container checkpoint | ||||
| 
 | ||||
|    Checkpoints one or more running containers. The container name or ID can be used. | ||||
| ` | ||||
| 	checkpointFlags = []cli.Flag{ | ||||
| 		cli.BoolFlag{ | ||||
| 			Name:  "keep, k", | ||||
| 			Usage: "keep all temporary checkpoint files", | ||||
| 		}, | ||||
| 	} | ||||
| 	checkpointCommand = cli.Command{ | ||||
| 		Name:        "checkpoint", | ||||
| 		Usage:       "Checkpoints one or more containers", | ||||
| 		Description: checkpointDescription, | ||||
| 		Flags:       checkpointFlags, | ||||
| 		Action:      checkpointCmd, | ||||
| 		ArgsUsage:   "CONTAINER-NAME [CONTAINER-NAME ...]", | ||||
| 	} | ||||
| ) | ||||
| 
 | ||||
| func checkpointCmd(c *cli.Context) error { | ||||
| 	if rootless.IsRootless() { | ||||
| 		return errors.New("checkpointing a container requires root") | ||||
| 	} | ||||
| 
 | ||||
| 	runtime, err := libpodruntime.GetRuntime(c) | ||||
| 	if err != nil { | ||||
| 		return errors.Wrapf(err, "could not get runtime") | ||||
| 	} | ||||
| 	defer runtime.Shutdown(false) | ||||
| 
 | ||||
| 	keep := c.Bool("keep") | ||||
| 	args := c.Args() | ||||
| 	if len(args) < 1 { | ||||
| 		return errors.Errorf("you must provide at least one container name or id") | ||||
| 	} | ||||
| 
 | ||||
| 	var lastError error | ||||
| 	for _, arg := range args { | ||||
| 		ctr, err := runtime.LookupContainer(arg) | ||||
| 		if err != nil { | ||||
| 			if lastError != nil { | ||||
| 				fmt.Fprintln(os.Stderr, lastError) | ||||
| 			} | ||||
| 			lastError = errors.Wrapf(err, "error looking up container %q", arg) | ||||
| 			continue | ||||
| 		} | ||||
| 		if err = ctr.Checkpoint(context.TODO(), keep); err != nil { | ||||
| 			if lastError != nil { | ||||
| 				fmt.Fprintln(os.Stderr, lastError) | ||||
| 			} | ||||
| 			lastError = errors.Wrapf(err, "failed to checkpoint container %v", ctr.ID()) | ||||
| 		} else { | ||||
| 			fmt.Println(ctr.ID()) | ||||
| 		} | ||||
| 	} | ||||
| 	return lastError | ||||
| } | ||||
|  | @ -7,6 +7,7 @@ import ( | |||
| var ( | ||||
| 	subCommands = []cli.Command{ | ||||
| 		attachCommand, | ||||
| 		checkpointCommand, | ||||
| 		cleanupCommand, | ||||
| 		commitCommand, | ||||
| 		createCommand, | ||||
|  | @ -23,6 +24,7 @@ var ( | |||
| 		//		pruneCommand,
 | ||||
| 		refreshCommand, | ||||
| 		restartCommand, | ||||
| 		restoreCommand, | ||||
| 		rmCommand, | ||||
| 		runCommand, | ||||
| 		runlabelCommand, | ||||
|  |  | |||
|  | @ -0,0 +1,73 @@ | |||
| package main | ||||
| 
 | ||||
| import ( | ||||
| 	"context" | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 
 | ||||
| 	"github.com/containers/libpod/cmd/podman/libpodruntime" | ||||
| 	"github.com/containers/libpod/pkg/rootless" | ||||
| 	"github.com/pkg/errors" | ||||
| 	"github.com/urfave/cli" | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| 	restoreDescription = ` | ||||
|    podman container restore | ||||
| 
 | ||||
|    Restores a container from a checkpoint. The container name or ID can be used. | ||||
| ` | ||||
| 	restoreFlags = []cli.Flag{ | ||||
| 		cli.BoolFlag{ | ||||
| 			Name:  "keep, k", | ||||
| 			Usage: "keep all temporary checkpoint files", | ||||
| 		}, | ||||
| 	} | ||||
| 	restoreCommand = cli.Command{ | ||||
| 		Name:        "restore", | ||||
| 		Usage:       "Restores one or more containers from a checkpoint", | ||||
| 		Description: restoreDescription, | ||||
| 		Flags:       restoreFlags, | ||||
| 		Action:      restoreCmd, | ||||
| 		ArgsUsage:   "CONTAINER-NAME [CONTAINER-NAME ...]", | ||||
| 	} | ||||
| ) | ||||
| 
 | ||||
| func restoreCmd(c *cli.Context) error { | ||||
| 	if rootless.IsRootless() { | ||||
| 		return errors.New("restoring a container requires root") | ||||
| 	} | ||||
| 
 | ||||
| 	runtime, err := libpodruntime.GetRuntime(c) | ||||
| 	if err != nil { | ||||
| 		return errors.Wrapf(err, "could not get runtime") | ||||
| 	} | ||||
| 	defer runtime.Shutdown(false) | ||||
| 
 | ||||
| 	keep := c.Bool("keep") | ||||
| 	args := c.Args() | ||||
| 	if len(args) < 1 { | ||||
| 		return errors.Errorf("you must provide at least one container name or id") | ||||
| 	} | ||||
| 
 | ||||
| 	var lastError error | ||||
| 	for _, arg := range args { | ||||
| 		ctr, err := runtime.LookupContainer(arg) | ||||
| 		if err != nil { | ||||
| 			if lastError != nil { | ||||
| 				fmt.Fprintln(os.Stderr, lastError) | ||||
| 			} | ||||
| 			lastError = errors.Wrapf(err, "error looking up container %q", arg) | ||||
| 			continue | ||||
| 		} | ||||
| 		if err = ctr.Restore(context.TODO(), keep); err != nil { | ||||
| 			if lastError != nil { | ||||
| 				fmt.Fprintln(os.Stderr, lastError) | ||||
| 			} | ||||
| 			lastError = errors.Wrapf(err, "failed to restore container %v", ctr.ID()) | ||||
| 		} else { | ||||
| 			fmt.Println(ctr.ID()) | ||||
| 		} | ||||
| 	} | ||||
| 	return lastError | ||||
| } | ||||
|  | @ -87,6 +87,10 @@ __podman_complete_containers_all() { | |||
| 	__podman_complete_containers "$@" --all | ||||
| } | ||||
| 
 | ||||
| __podman_complete_containers_created() { | ||||
| 	__podman_complete_containers "$@" --all --filter status=created | ||||
| } | ||||
| 
 | ||||
| __podman_complete_containers_running() { | ||||
| 	__podman_complete_containers "$@" --filter status=running | ||||
| } | ||||
|  | @ -710,6 +714,24 @@ _podman_container_attach() { | |||
|      _podman_attach | ||||
| } | ||||
| 
 | ||||
| _podman_container_checkpoint() { | ||||
|      local options_with_args=" | ||||
|      --help -h | ||||
|      " | ||||
|      local boolean_options=" | ||||
|      --keep | ||||
|      -k | ||||
|      " | ||||
|      case "$cur" in | ||||
|         -*) | ||||
|             COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur")) | ||||
|             ;; | ||||
|         *) | ||||
|             __podman_complete_containers_running | ||||
|             ;; | ||||
|      esac | ||||
| } | ||||
| 
 | ||||
| _podman_container_commit() { | ||||
|      _podman_commit | ||||
| } | ||||
|  | @ -770,6 +792,24 @@ _podman_container_restart() { | |||
|      _podman_restart | ||||
| } | ||||
| 
 | ||||
| _podman_container_restore() { | ||||
|      local options_with_args=" | ||||
|      --help -h | ||||
|      " | ||||
|      local boolean_options=" | ||||
|      --keep | ||||
|      -k | ||||
|      " | ||||
|      case "$cur" in | ||||
|         -*) | ||||
|             COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur")) | ||||
|             ;; | ||||
|         *) | ||||
|             __podman_complete_containers_created | ||||
|             ;; | ||||
|      esac | ||||
| } | ||||
| 
 | ||||
| _podman_container_rm() { | ||||
|      _podman_rm | ||||
| } | ||||
|  | @ -817,6 +857,7 @@ _podman_container() { | |||
| 	" | ||||
|      subcommands=" | ||||
| 	 attach | ||||
| 	 checkpoint | ||||
| 	 commit | ||||
| 	 create | ||||
| 	 diff | ||||
|  | @ -831,6 +872,7 @@ _podman_container() { | |||
| 	 port | ||||
| 	 refresh | ||||
| 	 restart | ||||
| 	 restore | ||||
| 	 rm | ||||
| 	 run | ||||
| 	 start | ||||
|  |  | |||
|  | @ -0,0 +1,30 @@ | |||
| % podman-container-checkpoint(1) | ||||
| 
 | ||||
| ## NAME | ||||
| podman\-container\-checkpoint - Checkpoints one or more running containers | ||||
| 
 | ||||
| ## SYNOPSIS | ||||
| **podman container checkpoint** [*options*] *container* ... | ||||
| 
 | ||||
| ## DESCRIPTION | ||||
| Checkpoints all the processes in one or more containers. You may use container IDs or names as input. | ||||
| 
 | ||||
| ## OPTIONS | ||||
| **-k**, **--keep** | ||||
| 
 | ||||
| Keep all temporary log and statistics files created by CRIU during checkpointing. These files | ||||
| are not deleted if checkpointing fails for further debugging. If checkpointing succeeds these | ||||
| files are theoretically not needed, but if these files are needed Podman can keep the files | ||||
| for further analysis. | ||||
| 
 | ||||
| ## EXAMPLE | ||||
| 
 | ||||
| podman container checkpoint mywebserver | ||||
| 
 | ||||
| podman container checkpoint 860a4b23 | ||||
| 
 | ||||
| ## SEE ALSO | ||||
| podman(1), podman-container-restore(1) | ||||
| 
 | ||||
| ## HISTORY | ||||
| September 2018, Originally compiled by Adrian Reber <areber@redhat.com> | ||||
|  | @ -0,0 +1,37 @@ | |||
| % podman-container-restore(1) | ||||
| 
 | ||||
| ## NAME | ||||
| podman\-container\-restore - Restores one or more running containers | ||||
| 
 | ||||
| ## SYNOPSIS | ||||
| **podman container restore** [*options*] *container* ... | ||||
| 
 | ||||
| ## DESCRIPTION | ||||
| Restores a container from a checkpoint. You may use container IDs or names as input. | ||||
| 
 | ||||
| ## OPTIONS | ||||
| **-k**, **--keep** | ||||
| 
 | ||||
| Keep all temporary log and statistics files created by CRIU during | ||||
| checkpointing as well as restoring. These files are not deleted if restoring | ||||
| fails for further debugging. If restoring succeeds these files are | ||||
| theoretically not needed, but if these files are needed Podman can keep the | ||||
| files for further analysis. This includes the checkpoint directory with all | ||||
| files created during checkpointing. The size required by the checkpoint | ||||
| directory is roughly the same as the amount of memory required by the | ||||
| processes in the checkpointed container. | ||||
| 
 | ||||
| Without the **-k**, **--keep** option the checkpoint will be consumed and cannot be used | ||||
| again. | ||||
| 
 | ||||
| ## EXAMPLE | ||||
| 
 | ||||
| podman container restore mywebserver | ||||
| 
 | ||||
| podman container restore 860a4b23 | ||||
| 
 | ||||
| ## SEE ALSO | ||||
| podman(1), podman-container-checkpoint(1) | ||||
| 
 | ||||
| ## HISTORY | ||||
| September 2018, Originally compiled by Adrian Reber <areber@redhat.com> | ||||
|  | @ -14,6 +14,7 @@ The container command allows you to manage containers | |||
| | Command  | Man Page                                            | Description                                                                  | | ||||
| | -------  | --------------------------------------------------- | ---------------------------------------------------------------------------- | | ||||
| | attach   | [podman-attach(1)](podman-attach.1.md)              | Attach to a running container.                                               | | ||||
| | checkpoint | [podman-container-checkpoint(1)](podman-container-checkpoint.1.md)  | Checkpoints one or more containers.                        | | ||||
| | cleanup  | [podman-container-cleanup(1)](podman-container-cleanup.1.md)    | Cleanup containers network and mountpoints.                               | | ||||
| | commit   | [podman-commit(1)](podman-commit.1.md)              | Create new image based on the changed container.                             | | ||||
| | create   | [podman-create(1)](podman-create.1.md)              | Create a new container.                                                      | | ||||
|  | @ -29,6 +30,7 @@ The container command allows you to manage containers | |||
| | port     | [podman-port(1)](podman-port.1.md)                  | List port mappings for the container.                                        | | ||||
| | refresh  | [podman-refresh(1)](podman-container-refresh.1.md)  | Refresh the state of all containers                                          | | ||||
| | restart  | [podman-restart(1)](podman-restart.1.md)            | Restart one or more containers.                                              | | ||||
| | restore  | [podman-container-restore(1)](podman-container-restore.1.md)  | Restores one or more containers from a checkpoint.                 | | ||||
| | rm       | [podman-rm(1)](podman-rm.1.md)                      | Remove one or more containers.                                               | | ||||
| | run      | [podman-run(1)](podman-run.1.md)                    | Run a command in a container.                                                | | ||||
| | start    | [podman-start(1)](podman-start.1.md)                | Starts one or more containers.                                               | | ||||
|  |  | |||
|  | @ -157,6 +157,28 @@ $ sudo podman top <container_id> | |||
|   101 31889 31873  0 09:21 ?        00:00:00 nginx: worker process | ||||
| ``` | ||||
| 
 | ||||
| ### Checkpointing the container | ||||
| Checkpointing a container stops the container while writing the state of all processes in the container to disk. | ||||
| With this a container can later be restored and continue running at exactly the same point in time as the | ||||
| checkpoint. This capability requires CRIU 3.11 or later installed on the system. | ||||
| To checkpoint the container use: | ||||
| ```console | ||||
| $ sudo podman container checkpoint <container_id> | ||||
| ``` | ||||
| 
 | ||||
| ### Restoring the container | ||||
| Restoring a container is only possible for a previously checkpointed container. The restored container will | ||||
| continue to run at exactly the same point in time it was checkpointed. | ||||
| To restore the container use: | ||||
| ```console | ||||
| $ sudo podman container restore <container_id> | ||||
| ``` | ||||
| 
 | ||||
| After being restored, the container will answer requests again as it did before checkpointing. | ||||
| ```console | ||||
| # curl http://<IP_address>:8080 | ||||
| ``` | ||||
| 
 | ||||
| ### Stopping the container | ||||
| To stop the httpd container: | ||||
| ```console | ||||
|  |  | |||
|  | @ -832,3 +832,33 @@ func (c *Container) Refresh(ctx context.Context) error { | |||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // Checkpoint checkpoints a container
 | ||||
| func (c *Container) Checkpoint(ctx context.Context, keep bool) error { | ||||
| 	logrus.Debugf("Trying to checkpoint container %s", c) | ||||
| 	if !c.batched { | ||||
| 		c.lock.Lock() | ||||
| 		defer c.lock.Unlock() | ||||
| 
 | ||||
| 		if err := c.syncContainer(); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return c.checkpoint(ctx, keep) | ||||
| } | ||||
| 
 | ||||
| // Restore restores a container
 | ||||
| func (c *Container) Restore(ctx context.Context, keep bool) (err error) { | ||||
| 	logrus.Debugf("Trying to restore container %s", c) | ||||
| 	if !c.batched { | ||||
| 		c.lock.Lock() | ||||
| 		defer c.lock.Unlock() | ||||
| 
 | ||||
| 		if err := c.syncContainer(); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return c.restore(ctx, keep) | ||||
| } | ||||
|  |  | |||
|  | @ -129,6 +129,11 @@ func (c *Container) ControlSocketPath() string { | |||
| 	return filepath.Join(c.bundlePath(), "ctl") | ||||
| } | ||||
| 
 | ||||
| // CheckpointPath returns the path to the directory containing the checkpoint
 | ||||
| func (c *Container) CheckpointPath() string { | ||||
| 	return filepath.Join(c.bundlePath(), "checkpoint") | ||||
| } | ||||
| 
 | ||||
| // AttachSocketPath retrieves the path of the container's attach socket
 | ||||
| func (c *Container) AttachSocketPath() string { | ||||
| 	return filepath.Join(c.runtime.ociRuntime.socketsDir, c.ID(), "attach") | ||||
|  | @ -523,7 +528,7 @@ func (c *Container) init(ctx context.Context) error { | |||
| 	} | ||||
| 
 | ||||
| 	// With the spec complete, do an OCI create
 | ||||
| 	if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent); err != nil { | ||||
| 	if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent, false); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
|  |  | |||
|  | @ -4,12 +4,18 @@ package libpod | |||
| 
 | ||||
| import ( | ||||
| 	"context" | ||||
| 	"encoding/json" | ||||
| 	"fmt" | ||||
| 	"io/ioutil" | ||||
| 	"net" | ||||
| 	"os" | ||||
| 	"path" | ||||
| 	"path/filepath" | ||||
| 	"strings" | ||||
| 	"syscall" | ||||
| 	"time" | ||||
| 
 | ||||
| 	cnitypes "github.com/containernetworking/cni/pkg/types/current" | ||||
| 	crioAnnotations "github.com/containers/libpod/pkg/annotations" | ||||
| 	"github.com/containers/libpod/pkg/chrootuser" | ||||
| 	"github.com/containers/libpod/pkg/rootless" | ||||
|  | @ -307,3 +313,155 @@ func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr | |||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (c *Container) checkpoint(ctx context.Context, keep bool) (err error) { | ||||
| 
 | ||||
| 	if c.state.State != ContainerStateRunning { | ||||
| 		return errors.Wrapf(ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State) | ||||
| 	} | ||||
| 	if err := c.runtime.ociRuntime.checkpointContainer(c); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	// Save network.status. This is needed to restore the container with
 | ||||
| 	// the same IP. Currently limited to one IP address in a container
 | ||||
| 	// with one interface.
 | ||||
| 	formatJSON, err := json.MarshalIndent(c.state.NetworkStatus, "", "	") | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	if err := ioutil.WriteFile(filepath.Join(c.bundlePath(), "network.status"), formatJSON, 0644); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	logrus.Debugf("Checkpointed container %s", c.ID()) | ||||
| 
 | ||||
| 	c.state.State = ContainerStateStopped | ||||
| 
 | ||||
| 	// Cleanup Storage and Network
 | ||||
| 	if err := c.cleanup(ctx); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	if !keep { | ||||
| 		// Remove log file
 | ||||
| 		os.Remove(filepath.Join(c.bundlePath(), "dump.log")) | ||||
| 		// Remove statistic file
 | ||||
| 		os.Remove(filepath.Join(c.bundlePath(), "stats-dump")) | ||||
| 	} | ||||
| 
 | ||||
| 	return c.save() | ||||
| } | ||||
| 
 | ||||
| func (c *Container) restore(ctx context.Context, keep bool) (err error) { | ||||
| 
 | ||||
| 	if (c.state.State != ContainerStateConfigured) && (c.state.State != ContainerStateExited) { | ||||
| 		return errors.Wrapf(ErrCtrStateInvalid, "container %s is running or paused, cannot restore", c.ID()) | ||||
| 	} | ||||
| 
 | ||||
| 	// Let's try to stat() CRIU's inventory file. If it does not exist, it makes
 | ||||
| 	// no sense to try a restore. This is a minimal check if a checkpoint exist.
 | ||||
| 	if _, err := os.Stat(filepath.Join(c.CheckpointPath(), "inventory.img")); os.IsNotExist(err) { | ||||
| 		return errors.Wrapf(err, "A complete checkpoint for this container cannot be found, cannot restore") | ||||
| 	} | ||||
| 
 | ||||
| 	// Read network configuration from checkpoint
 | ||||
| 	// Currently only one interface with one IP is supported.
 | ||||
| 	networkStatusFile, err := os.Open(filepath.Join(c.bundlePath(), "network.status")) | ||||
| 	if err == nil { | ||||
| 		// The file with the network.status does exist. Let's restore the
 | ||||
| 		// container with the same IP address as during checkpointing.
 | ||||
| 		defer networkStatusFile.Close() | ||||
| 		var networkStatus []*cnitypes.Result | ||||
| 		networkJSON, err := ioutil.ReadAll(networkStatusFile) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		json.Unmarshal(networkJSON, &networkStatus) | ||||
| 		// Take the first IP address
 | ||||
| 		var IP net.IP | ||||
| 		if len(networkStatus) > 0 { | ||||
| 			if len(networkStatus[0].IPs) > 0 { | ||||
| 				IP = networkStatus[0].IPs[0].Address.IP | ||||
| 			} | ||||
| 		} | ||||
| 		if IP != nil { | ||||
| 			env := fmt.Sprintf("IP=%s", IP) | ||||
| 			// Tell CNI which IP address we want.
 | ||||
| 			os.Setenv("CNI_ARGS", env) | ||||
| 			logrus.Debugf("Restoring container with %s", env) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if err := c.prepare(); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	defer func() { | ||||
| 		if err != nil { | ||||
| 			if err2 := c.cleanup(ctx); err2 != nil { | ||||
| 				logrus.Errorf("error cleaning up container %s: %v", c.ID(), err2) | ||||
| 			} | ||||
| 		} | ||||
| 	}() | ||||
| 
 | ||||
| 	// TODO: use existing way to request static IPs, once it is merged in ocicni
 | ||||
| 	// https://github.com/cri-o/ocicni/pull/23/
 | ||||
| 
 | ||||
| 	// CNI_ARGS was used to request a certain IP address. Unconditionally remove it.
 | ||||
| 	os.Unsetenv("CNI_ARGS") | ||||
| 
 | ||||
| 	// Read config
 | ||||
| 	jsonPath := filepath.Join(c.bundlePath(), "config.json") | ||||
| 	logrus.Debugf("generate.NewFromFile at %v", jsonPath) | ||||
| 	g, err := generate.NewFromFile(jsonPath) | ||||
| 	if err != nil { | ||||
| 		logrus.Debugf("generate.NewFromFile failed with %v", err) | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	// We want to have the same network namespace as before.
 | ||||
| 	if c.config.CreateNetNS { | ||||
| 		g.AddOrReplaceLinuxNamespace(spec.NetworkNamespace, c.state.NetNS.Path()) | ||||
| 	} | ||||
| 
 | ||||
| 	// Save the OCI spec to disk
 | ||||
| 	if err := c.saveSpec(g.Spec()); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	if err := c.makeBindMounts(); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	// Cleanup for a working restore.
 | ||||
| 	c.removeConmonFiles() | ||||
| 
 | ||||
| 	if err := c.runtime.ociRuntime.createContainer(c, c.config.CgroupParent, true); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 
 | ||||
| 	logrus.Debugf("Restored container %s", c.ID()) | ||||
| 
 | ||||
| 	c.state.State = ContainerStateRunning | ||||
| 
 | ||||
| 	if !keep { | ||||
| 		// Delete all checkpoint related files. At this point, in theory, all files
 | ||||
| 		// should exist. Still ignoring errors for now as the container should be
 | ||||
| 		// restored and running. Not erroring out just because some cleanup operation
 | ||||
| 		// failed. Starting with the checkpoint directory
 | ||||
| 		err = os.RemoveAll(c.CheckpointPath()) | ||||
| 		if err != nil { | ||||
| 			logrus.Debugf("Non-fatal: removal of checkpoint directory (%s) failed: %v", c.CheckpointPath(), err) | ||||
| 		} | ||||
| 		cleanup := [...]string{"restore.log", "dump.log", "stats-dump", "stats-restore", "network.status"} | ||||
| 		for _, delete := range cleanup { | ||||
| 			file := filepath.Join(c.bundlePath(), delete) | ||||
| 			err = os.Remove(file) | ||||
| 			if err != nil { | ||||
| 				logrus.Debugf("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return c.save() | ||||
| } | ||||
|  |  | |||
|  | @ -27,3 +27,11 @@ func (c *Container) cleanupNetwork() error { | |||
| func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { | ||||
| 	return nil, ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| func (c *Container) checkpoint(ctx context.Context, keep bool) error { | ||||
| 	return ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| func (c *Container) restore(ctx context.Context, keep bool) error { | ||||
| 	return ErrNotImplemented | ||||
| } | ||||
|  |  | |||
|  | @ -227,7 +227,7 @@ func bindPorts(ports []ocicni.PortMapping) ([]*os.File, error) { | |||
| 	return files, nil | ||||
| } | ||||
| 
 | ||||
| func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string) (err error) { | ||||
| func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string, restoreContainer bool) (err error) { | ||||
| 	var stderrBuf bytes.Buffer | ||||
| 
 | ||||
| 	runtimeDir, err := GetRootlessRuntimeDir() | ||||
|  | @ -289,6 +289,10 @@ func (r *OCIRuntime) createOCIContainer(ctr *Container, cgroupParent string) (er | |||
| 		args = append(args, "--syslog") | ||||
| 	} | ||||
| 
 | ||||
| 	if restoreContainer { | ||||
| 		args = append(args, "--restore", ctr.CheckpointPath()) | ||||
| 	} | ||||
| 
 | ||||
| 	logrus.WithFields(logrus.Fields{ | ||||
| 		"args": args, | ||||
| 	}).Debugf("running conmon: %s", r.conmonPath) | ||||
|  | @ -766,3 +770,15 @@ func (r *OCIRuntime) execStopContainer(ctr *Container, timeout uint) error { | |||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // checkpointContainer checkpoints the given container
 | ||||
| func (r *OCIRuntime) checkpointContainer(ctr *Container) error { | ||||
| 	// imagePath is used by CRIU to store the actual checkpoint files
 | ||||
| 	imagePath := ctr.CheckpointPath() | ||||
| 	// workPath will be used to store dump.log and stats-dump
 | ||||
| 	workPath := ctr.bundlePath() | ||||
| 	logrus.Debugf("Writing checkpoint to %s", imagePath) | ||||
| 	logrus.Debugf("Writing checkpoint logs to %s", workPath) | ||||
| 	return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, "checkpoint", | ||||
| 		"--image-path", imagePath, "--work-path", workPath, ctr.ID()) | ||||
| } | ||||
|  |  | |||
|  | @ -63,10 +63,10 @@ func newPipe() (parent *os.File, child *os.File, err error) { | |||
| // CreateContainer creates a container in the OCI runtime
 | ||||
| // TODO terminal support for container
 | ||||
| // Presently just ignoring conmon opts related to it
 | ||||
| func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) (err error) { | ||||
| func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreContainer bool) (err error) { | ||||
| 	if ctr.state.UserNSRoot == "" { | ||||
| 		// no need of an intermediate mount ns
 | ||||
| 		return r.createOCIContainer(ctr, cgroupParent) | ||||
| 		return r.createOCIContainer(ctr, cgroupParent, restoreContainer) | ||||
| 	} | ||||
| 	var wg sync.WaitGroup | ||||
| 	wg.Add(1) | ||||
|  | @ -103,7 +103,7 @@ func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) (err e | |||
| 		if err != nil { | ||||
| 			return | ||||
| 		} | ||||
| 		err = r.createOCIContainer(ctr, cgroupParent) | ||||
| 		err = r.createOCIContainer(ctr, cgroupParent, restoreContainer) | ||||
| 	}() | ||||
| 	wg.Wait() | ||||
| 
 | ||||
|  |  | |||
|  | @ -15,7 +15,7 @@ func newPipe() (parent *os.File, child *os.File, err error) { | |||
| 	return nil, nil, ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
| func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string) (err error) { | ||||
| func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreContainer bool) (err error) { | ||||
| 	return ErrNotImplemented | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -0,0 +1,129 @@ | |||
| package integration | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 
 | ||||
| 	. "github.com/onsi/ginkgo" | ||||
| 	. "github.com/onsi/gomega" | ||||
| ) | ||||
| 
 | ||||
| var _ = Describe("Podman checkpoint", func() { | ||||
| 	var ( | ||||
| 		tempdir    string | ||||
| 		err        error | ||||
| 		podmanTest PodmanTest | ||||
| 	) | ||||
| 
 | ||||
| 	BeforeEach(func() { | ||||
| 		tempdir, err = CreateTempDirInTempDir() | ||||
| 		if err != nil { | ||||
| 			os.Exit(1) | ||||
| 		} | ||||
| 		podmanTest = PodmanCreate(tempdir) | ||||
| 		podmanTest.RestoreAllArtifacts() | ||||
| 		// At least CRIU 3.11 is needed
 | ||||
| 		skip, err := podmanTest.isCriuAtLeast(31100) | ||||
| 		if err != nil || skip { | ||||
| 			Skip("CRIU missing or too old.") | ||||
| 		} | ||||
| 	}) | ||||
| 
 | ||||
| 	AfterEach(func() { | ||||
| 		podmanTest.Cleanup() | ||||
| 		f := CurrentGinkgoTestDescription() | ||||
| 		timedResult := fmt.Sprintf("Test: %s completed in %f seconds", f.TestText, f.Duration.Seconds()) | ||||
| 		GinkgoWriter.Write([]byte(timedResult)) | ||||
| 	}) | ||||
| 
 | ||||
| 	It("podman checkpoint bogus container", func() { | ||||
| 		session := podmanTest.Podman([]string{"container", "checkpoint", "foobar"}) | ||||
| 		session.WaitWithDefaultTimeout() | ||||
| 		Expect(session.ExitCode()).To(Not(Equal(0))) | ||||
| 	}) | ||||
| 
 | ||||
| 	It("podman restore bogus container", func() { | ||||
| 		session := podmanTest.Podman([]string{"container", "restore", "foobar"}) | ||||
| 		session.WaitWithDefaultTimeout() | ||||
| 		Expect(session.ExitCode()).To(Not(Equal(0))) | ||||
| 	}) | ||||
| 
 | ||||
| 	It("podman checkpoint a running container by id", func() { | ||||
| 		// CRIU does not work with seccomp correctly on RHEL7
 | ||||
| 		session := podmanTest.Podman([]string{"run", "-it", "--security-opt", "seccomp=unconfined", "-d", ALPINE, "top"}) | ||||
| 		session.WaitWithDefaultTimeout() | ||||
| 		Expect(session.ExitCode()).To(Equal(0)) | ||||
| 		cid := session.OutputToString() | ||||
| 
 | ||||
| 		result := podmanTest.Podman([]string{"container", "checkpoint", cid}) | ||||
| 		result.WaitWithDefaultTimeout() | ||||
| 
 | ||||
| 		Expect(result.ExitCode()).To(Equal(0)) | ||||
| 		Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0)) | ||||
| 		Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited")) | ||||
| 
 | ||||
| 		result = podmanTest.Podman([]string{"container", "restore", cid}) | ||||
| 		result.WaitWithDefaultTimeout() | ||||
| 
 | ||||
| 		Expect(result.ExitCode()).To(Equal(0)) | ||||
| 		Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1)) | ||||
| 		Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up")) | ||||
| 	}) | ||||
| 
 | ||||
| 	It("podman checkpoint a running container by name", func() { | ||||
| 		session := podmanTest.Podman([]string{"run", "-it", "--security-opt", "seccomp=unconfined", "--name", "test_name", "-d", ALPINE, "top"}) | ||||
| 		session.WaitWithDefaultTimeout() | ||||
| 		Expect(session.ExitCode()).To(Equal(0)) | ||||
| 
 | ||||
| 		result := podmanTest.Podman([]string{"container", "checkpoint", "test_name"}) | ||||
| 		result.WaitWithDefaultTimeout() | ||||
| 
 | ||||
| 		Expect(result.ExitCode()).To(Equal(0)) | ||||
| 		Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0)) | ||||
| 		Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited")) | ||||
| 
 | ||||
| 		result = podmanTest.Podman([]string{"container", "restore", "test_name"}) | ||||
| 		result.WaitWithDefaultTimeout() | ||||
| 
 | ||||
| 		Expect(result.ExitCode()).To(Equal(0)) | ||||
| 		Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1)) | ||||
| 		Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up")) | ||||
| 	}) | ||||
| 
 | ||||
| 	It("podman pause a checkpointed container by id", func() { | ||||
| 		session := podmanTest.Podman([]string{"run", "-it", "--security-opt", "seccomp=unconfined", "-d", ALPINE, "top"}) | ||||
| 		session.WaitWithDefaultTimeout() | ||||
| 		Expect(session.ExitCode()).To(Equal(0)) | ||||
| 		cid := session.OutputToString() | ||||
| 
 | ||||
| 		result := podmanTest.Podman([]string{"container", "checkpoint", cid}) | ||||
| 		result.WaitWithDefaultTimeout() | ||||
| 
 | ||||
| 		Expect(result.ExitCode()).To(Equal(0)) | ||||
| 		Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0)) | ||||
| 		Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited")) | ||||
| 
 | ||||
| 		result = podmanTest.Podman([]string{"pause", cid}) | ||||
| 		result.WaitWithDefaultTimeout() | ||||
| 
 | ||||
| 		Expect(result.ExitCode()).To(Equal(125)) | ||||
| 		Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0)) | ||||
| 		Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited")) | ||||
| 
 | ||||
| 		result = podmanTest.Podman([]string{"container", "restore", cid}) | ||||
| 		result.WaitWithDefaultTimeout() | ||||
| 		Expect(result.ExitCode()).To(Equal(0)) | ||||
| 		Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1)) | ||||
| 
 | ||||
| 		result = podmanTest.Podman([]string{"rm", cid}) | ||||
| 		result.WaitWithDefaultTimeout() | ||||
| 		Expect(result.ExitCode()).To(Equal(125)) | ||||
| 		Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1)) | ||||
| 
 | ||||
| 		result = podmanTest.Podman([]string{"rm", "-f", cid}) | ||||
| 		result.WaitWithDefaultTimeout() | ||||
| 		Expect(result.ExitCode()).To(Equal(0)) | ||||
| 		Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0)) | ||||
| 
 | ||||
| 	}) | ||||
| }) | ||||
|  | @ -2,6 +2,7 @@ package integration | |||
| 
 | ||||
| import ( | ||||
| 	"bufio" | ||||
| 	"bytes" | ||||
| 	"context" | ||||
| 	"encoding/json" | ||||
| 	"fmt" | ||||
|  | @ -64,6 +65,7 @@ type PodmanTest struct { | |||
| 	TempDir             string | ||||
| 	CgroupManager       string | ||||
| 	Host                HostOS | ||||
| 	CriuBinary          string | ||||
| } | ||||
| 
 | ||||
| // HostOS is a simple struct for the test os
 | ||||
|  | @ -164,6 +166,7 @@ func PodmanCreate(tempDir string) PodmanTest { | |||
| 		runCBinary = "/usr/bin/runc" | ||||
| 	} | ||||
| 
 | ||||
| 	criuBinary := "/usr/sbin/criu" | ||||
| 	CNIConfigDir := "/etc/cni/net.d" | ||||
| 
 | ||||
| 	p := PodmanTest{ | ||||
|  | @ -179,6 +182,7 @@ func PodmanCreate(tempDir string) PodmanTest { | |||
| 		TempDir:             tempDir, | ||||
| 		CgroupManager:       cgroupManager, | ||||
| 		Host:                host, | ||||
| 		CriuBinary:          criuBinary, | ||||
| 	} | ||||
| 
 | ||||
| 	// Setup registries.conf ENV variable
 | ||||
|  | @ -678,6 +682,39 @@ func (p *PodmanTest) setRegistriesConfigEnv(b []byte) { | |||
| 	ioutil.WriteFile(outfile, b, 0644) | ||||
| } | ||||
| 
 | ||||
| func (p *PodmanTest) isCriuAtLeast(version int) (bool, error) { | ||||
| 	cmd := exec.Command(p.CriuBinary, "-V") | ||||
| 	var out bytes.Buffer | ||||
| 	cmd.Stdout = &out | ||||
| 	err := cmd.Run() | ||||
| 	if err != nil { | ||||
| 		return false, err | ||||
| 	} | ||||
| 
 | ||||
| 	var x int | ||||
| 	var y int | ||||
| 	var z int | ||||
| 
 | ||||
| 	fmt.Sscanf(out.String(), "Version: %d.%d.%d", &x, &y, &z) | ||||
| 
 | ||||
| 	if strings.Contains(out.String(), "GitID") { | ||||
| 		// If CRIU is built from git it contains a git ID.
 | ||||
| 		// If that is the case, increase minor by one as this
 | ||||
| 		// could mean we are running a development version.
 | ||||
| 		y = y + 1 | ||||
| 	} | ||||
| 
 | ||||
| 	parsed_version := x*10000 + y*100 + z | ||||
| 
 | ||||
| 	fmt.Println(parsed_version) | ||||
| 
 | ||||
| 	if parsed_version >= version { | ||||
| 		return false, nil | ||||
| 	} else { | ||||
| 		return true, nil | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func resetRegistriesConfigEnv() { | ||||
| 	os.Setenv("REGISTRIES_CONFIG_PATH", "") | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue