mirror of https://github.com/containers/podman.git
Added optional container checkpointing statistics
This adds the parameter '--print-stats' to 'podman container checkpoint'. With '--print-stats' Podman will measure how long Podman itself, the OCI runtime and CRIU requires to create a checkpoint and print out these information. CRIU already creates checkpointing statistics which are just read in addition to the added measurements. In contrast to just printing out the ID of the checkpointed container, Podman will now print out JSON: # podman container checkpoint --latest --print-stats { "podman_checkpoint_duration": 360749, "container_statistics": [ { "Id": "25244244bf2efbef30fb6857ddea8cb2e5489f07eb6659e20dda117f0c466808", "runtime_checkpoint_duration": 177222, "criu_statistics": { "freezing_time": 100657, "frozen_time": 60700, "memdump_time": 8162, "memwrite_time": 4224, "pages_scanned": 20561, "pages_written": 2129 } } ] } The output contains 'podman_checkpoint_duration' which contains the number of microseconds Podman required to create the checkpoint. The output also includes 'runtime_checkpoint_duration' which is the time the runtime needed to checkpoint that specific container. Each container also includes 'criu_statistics' which displays the timing information collected by CRIU. Signed-off-by: Adrian Reber <areber@redhat.com>
This commit is contained in:
parent
cca6df428c
commit
6202e8102b
|
@ -4,6 +4,7 @@ import (
|
|||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/containers/common/pkg/completion"
|
||||
"github.com/containers/podman/v3/cmd/podman/common"
|
||||
|
@ -40,6 +41,11 @@ var (
|
|||
|
||||
var checkpointOptions entities.CheckpointOptions
|
||||
|
||||
type checkpointStatistics struct {
|
||||
PodmanDuration int64 `json:"podman_checkpoint_duration"`
|
||||
ContainerStatistics []*entities.CheckpointReport `json:"container_statistics"`
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.Commands = append(registry.Commands, registry.CliCommand{
|
||||
Command: checkpointCommand,
|
||||
|
@ -63,11 +69,19 @@ func init() {
|
|||
flags.StringP("compress", "c", "zstd", "Select compression algorithm (gzip, none, zstd) for checkpoint archive.")
|
||||
_ = checkpointCommand.RegisterFlagCompletionFunc("compress", common.AutocompleteCheckpointCompressType)
|
||||
|
||||
flags.BoolVar(
|
||||
&checkpointOptions.PrintStats,
|
||||
"print-stats",
|
||||
false,
|
||||
"Display checkpoint statistics",
|
||||
)
|
||||
|
||||
validate.AddLatestFlag(checkpointCommand, &checkpointOptions.Latest)
|
||||
}
|
||||
|
||||
func checkpoint(cmd *cobra.Command, args []string) error {
|
||||
var errs utils.OutputErrors
|
||||
podmanStart := time.Now()
|
||||
if cmd.Flags().Changed("compress") {
|
||||
if checkpointOptions.Export == "" {
|
||||
return errors.Errorf("--compress can only be used with --export")
|
||||
|
@ -102,12 +116,30 @@ func checkpoint(cmd *cobra.Command, args []string) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
podmanFinished := time.Now()
|
||||
|
||||
var statistics checkpointStatistics
|
||||
|
||||
for _, r := range responses {
|
||||
if r.Err == nil {
|
||||
fmt.Println(r.Id)
|
||||
if checkpointOptions.PrintStats {
|
||||
statistics.ContainerStatistics = append(statistics.ContainerStatistics, r)
|
||||
} else {
|
||||
fmt.Println(r.Id)
|
||||
}
|
||||
} else {
|
||||
errs = append(errs, r.Err)
|
||||
}
|
||||
}
|
||||
|
||||
if checkpointOptions.PrintStats {
|
||||
statistics.PodmanDuration = podmanFinished.Sub(podmanStart).Microseconds()
|
||||
j, err := json.MarshalIndent(statistics, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Println(string(j))
|
||||
}
|
||||
|
||||
return errs.PrintErrors()
|
||||
}
|
||||
|
|
|
@ -794,21 +794,29 @@ type ContainerCheckpointOptions struct {
|
|||
// container no PID 1 will be in the namespace and that is not
|
||||
// possible.
|
||||
Pod string
|
||||
// PrintStats tells the API to fill out the statistics about
|
||||
// how much time each component in the stack requires to
|
||||
// checkpoint a container.
|
||||
PrintStats bool
|
||||
}
|
||||
|
||||
// Checkpoint checkpoints a container
|
||||
func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointOptions) error {
|
||||
// The return values *define.CRIUCheckpointRestoreStatistics and int64 (time
|
||||
// the runtime needs to checkpoint the container) are only set if
|
||||
// options.PrintStats is set to true. Not setting options.PrintStats to true
|
||||
// will return nil and 0.
|
||||
func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) {
|
||||
logrus.Debugf("Trying to checkpoint container %s", c.ID())
|
||||
|
||||
if options.TargetFile != "" {
|
||||
if err := c.prepareCheckpointExport(); err != nil {
|
||||
return err
|
||||
return nil, 0, err
|
||||
}
|
||||
}
|
||||
|
||||
if options.WithPrevious {
|
||||
if err := c.canWithPrevious(); err != nil {
|
||||
return err
|
||||
return nil, 0, err
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -817,7 +825,7 @@ func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointO
|
|||
defer c.lock.Unlock()
|
||||
|
||||
if err := c.syncContainer(); err != nil {
|
||||
return err
|
||||
return nil, 0, err
|
||||
}
|
||||
}
|
||||
return c.checkpoint(ctx, options)
|
||||
|
|
|
@ -1129,25 +1129,26 @@ func (c *Container) checkpointRestoreSupported(version int) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) error {
|
||||
func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) {
|
||||
if err := c.checkpointRestoreSupported(criu.MinCriuVersion); err != nil {
|
||||
return err
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
if c.state.State != define.ContainerStateRunning {
|
||||
return errors.Wrapf(define.ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State)
|
||||
return nil, 0, errors.Wrapf(define.ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State)
|
||||
}
|
||||
|
||||
if c.AutoRemove() && options.TargetFile == "" {
|
||||
return errors.Errorf("cannot checkpoint containers that have been started with '--rm' unless '--export' is used")
|
||||
return nil, 0, errors.Errorf("cannot checkpoint containers that have been started with '--rm' unless '--export' is used")
|
||||
}
|
||||
|
||||
if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "dump.log", c.MountLabel()); err != nil {
|
||||
return err
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
if err := c.ociRuntime.CheckpointContainer(c, options); err != nil {
|
||||
return err
|
||||
runtimeCheckpointDuration, err := c.ociRuntime.CheckpointContainer(c, options)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
// Save network.status. This is needed to restore the container with
|
||||
|
@ -1155,7 +1156,7 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO
|
|||
// with one interface.
|
||||
// FIXME: will this break something?
|
||||
if _, err := metadata.WriteJSONFile(c.getNetworkStatus(), c.bundlePath(), metadata.NetworkStatusFile); err != nil {
|
||||
return err
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
defer c.newContainerEvent(events.Checkpoint)
|
||||
|
@ -1165,13 +1166,13 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO
|
|||
if options.WithPrevious {
|
||||
os.Remove(path.Join(c.CheckpointPath(), "parent"))
|
||||
if err := os.Symlink("../pre-checkpoint", path.Join(c.CheckpointPath(), "parent")); err != nil {
|
||||
return err
|
||||
return nil, 0, err
|
||||
}
|
||||
}
|
||||
|
||||
if options.TargetFile != "" {
|
||||
if err := c.exportCheckpoint(options); err != nil {
|
||||
return err
|
||||
return nil, 0, err
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1183,10 +1184,37 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO
|
|||
|
||||
// Cleanup Storage and Network
|
||||
if err := c.cleanup(ctx); err != nil {
|
||||
return err
|
||||
return nil, 0, err
|
||||
}
|
||||
}
|
||||
|
||||
criuStatistics, err := func() (*define.CRIUCheckpointRestoreStatistics, error) {
|
||||
if !options.PrintStats {
|
||||
return nil, nil
|
||||
}
|
||||
statsDirectory, err := os.Open(c.bundlePath())
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "Not able to open %q", c.bundlePath())
|
||||
}
|
||||
|
||||
dumpStatistics, err := stats.CriuGetDumpStats(statsDirectory)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "Displaying checkpointing statistics not possible")
|
||||
}
|
||||
|
||||
return &define.CRIUCheckpointRestoreStatistics{
|
||||
FreezingTime: dumpStatistics.GetFreezingTime(),
|
||||
FrozenTime: dumpStatistics.GetFrozenTime(),
|
||||
MemdumpTime: dumpStatistics.GetMemdumpTime(),
|
||||
MemwriteTime: dumpStatistics.GetMemwriteTime(),
|
||||
PagesScanned: dumpStatistics.GetPagesScanned(),
|
||||
PagesWritten: dumpStatistics.GetPagesWritten(),
|
||||
}, nil
|
||||
}()
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
if !options.Keep && !options.PreCheckPoint {
|
||||
cleanup := []string{
|
||||
"dump.log",
|
||||
|
@ -1203,7 +1231,7 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO
|
|||
}
|
||||
|
||||
c.state.FinishedTime = time.Now()
|
||||
return c.save()
|
||||
return criuStatistics, runtimeCheckpointDuration, c.save()
|
||||
}
|
||||
|
||||
func (c *Container) importCheckpoint(input string) error {
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
package define
|
||||
|
||||
// This contains values reported by CRIU during
|
||||
// checkpointing or restoring.
|
||||
// All names are the same as reported by CRIU.
|
||||
type CRIUCheckpointRestoreStatistics struct {
|
||||
// Checkpoint values
|
||||
// Time required to freeze/pause/quiesce the processes
|
||||
FreezingTime uint32 `json:"freezing_time,omitempty"`
|
||||
// Time the processes are actually not running during checkpointing
|
||||
FrozenTime uint32 `json:"frozen_time,omitempty"`
|
||||
// Time required to extract memory pages from the processes
|
||||
MemdumpTime uint32 `json:"memdump_time,omitempty"`
|
||||
// Time required to write memory pages to disk
|
||||
MemwriteTime uint32 `json:"memwrite_time,omitempty"`
|
||||
// Number of memory pages CRIU analyzed
|
||||
PagesScanned uint64 `json:"pages_scanned,omitempty"`
|
||||
// Number of memory pages written
|
||||
PagesWritten uint64 `json:"pages_written,omitempty"`
|
||||
|
||||
// Restore values
|
||||
// Number of pages compared during restore
|
||||
PagesCompared uint64 `json:"pages_compared,omitempty"`
|
||||
// Number of COW pages skipped during restore
|
||||
PagesSkippedCow uint64 `json:"pages_skipped_cow,omitempty"`
|
||||
// Time required to fork processes
|
||||
ForkingTime uint32 `json:"forking_time,omitempty"`
|
||||
// Time required to restore
|
||||
RestoreTime uint32 `json:"restore_time,omitempty"`
|
||||
// Number of memory pages restored
|
||||
PagesRestored uint64 `json:"pages_restored,omitempty"`
|
||||
}
|
|
@ -101,8 +101,10 @@ type OCIRuntime interface {
|
|||
// CheckpointContainer checkpoints the given container.
|
||||
// Some OCI runtimes may not support this - if SupportsCheckpoint()
|
||||
// returns false, this is not implemented, and will always return an
|
||||
// error.
|
||||
CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error
|
||||
// error. If CheckpointOptions.PrintStats is true the first return parameter
|
||||
// contains the number of microseconds the runtime needed to checkpoint
|
||||
// the given container.
|
||||
CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error)
|
||||
|
||||
// CheckConmonRunning verifies that the given container's Conmon
|
||||
// instance is still running. Runtimes without Conmon, or systems where
|
||||
|
|
|
@ -760,9 +760,9 @@ func (r *ConmonOCIRuntime) AttachResize(ctr *Container, newSize define.TerminalS
|
|||
}
|
||||
|
||||
// CheckpointContainer checkpoints the given container.
|
||||
func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error {
|
||||
func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error) {
|
||||
if err := label.SetSocketLabel(ctr.ProcessLabel()); err != nil {
|
||||
return err
|
||||
return 0, err
|
||||
}
|
||||
// imagePath is used by CRIU to store the actual checkpoint files
|
||||
imagePath := ctr.CheckpointPath()
|
||||
|
@ -802,14 +802,25 @@ func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options Container
|
|||
}
|
||||
runtimeDir, err := util.GetRuntimeDir()
|
||||
if err != nil {
|
||||
return err
|
||||
return 0, err
|
||||
}
|
||||
if err = os.Setenv("XDG_RUNTIME_DIR", runtimeDir); err != nil {
|
||||
return errors.Wrapf(err, "cannot set XDG_RUNTIME_DIR")
|
||||
return 0, errors.Wrapf(err, "cannot set XDG_RUNTIME_DIR")
|
||||
}
|
||||
args = append(args, ctr.ID())
|
||||
logrus.Debugf("the args to checkpoint: %s %s", r.path, strings.Join(args, " "))
|
||||
return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, args...)
|
||||
|
||||
runtimeCheckpointStarted := time.Now()
|
||||
err = utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, args...)
|
||||
|
||||
runtimeCheckpointDuration := func() int64 {
|
||||
if options.PrintStats {
|
||||
return time.Since(runtimeCheckpointStarted).Microseconds()
|
||||
}
|
||||
return 0
|
||||
}()
|
||||
|
||||
return runtimeCheckpointDuration, err
|
||||
}
|
||||
|
||||
func (r *ConmonOCIRuntime) CheckConmonRunning(ctr *Container) (bool, error) {
|
||||
|
|
|
@ -153,8 +153,8 @@ func (r *MissingRuntime) ExecUpdateStatus(ctr *Container, sessionID string) (boo
|
|||
}
|
||||
|
||||
// CheckpointContainer is not available as the runtime is missing
|
||||
func (r *MissingRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error {
|
||||
return r.printError()
|
||||
func (r *MissingRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error) {
|
||||
return 0, r.printError()
|
||||
}
|
||||
|
||||
// CheckConmonRunning is not available as the runtime is missing
|
||||
|
|
|
@ -214,6 +214,7 @@ func Checkpoint(w http.ResponseWriter, r *http.Request) {
|
|||
TCPEstablished bool `schema:"tcpEstablished"`
|
||||
Export bool `schema:"export"`
|
||||
IgnoreRootFS bool `schema:"ignoreRootFS"`
|
||||
PrintStats bool `schema:"printStats"`
|
||||
}{
|
||||
// override any golang type defaults
|
||||
}
|
||||
|
@ -248,11 +249,12 @@ func Checkpoint(w http.ResponseWriter, r *http.Request) {
|
|||
KeepRunning: query.LeaveRunning,
|
||||
TCPEstablished: query.TCPEstablished,
|
||||
IgnoreRootfs: query.IgnoreRootFS,
|
||||
PrintStats: query.PrintStats,
|
||||
}
|
||||
if query.Export {
|
||||
options.TargetFile = targetFile
|
||||
}
|
||||
err = ctr.Checkpoint(r.Context(), options)
|
||||
criuStatistics, runtimeCheckpointDuration, err := ctr.Checkpoint(r.Context(), options)
|
||||
if err != nil {
|
||||
utils.InternalServerError(w, err)
|
||||
return
|
||||
|
@ -267,7 +269,15 @@ func Checkpoint(w http.ResponseWriter, r *http.Request) {
|
|||
utils.WriteResponse(w, http.StatusOK, f)
|
||||
return
|
||||
}
|
||||
utils.WriteResponse(w, http.StatusOK, entities.CheckpointReport{Id: ctr.ID()})
|
||||
utils.WriteResponse(
|
||||
w,
|
||||
http.StatusOK,
|
||||
entities.CheckpointReport{
|
||||
Id: ctr.ID(),
|
||||
RuntimeDuration: runtimeCheckpointDuration,
|
||||
CRIUStatistics: criuStatistics,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
func Restore(w http.ResponseWriter, r *http.Request) {
|
||||
|
|
|
@ -1441,6 +1441,10 @@ func (s *APIServer) registerContainersHandlers(r *mux.Router) error {
|
|||
// name: ignoreRootFS
|
||||
// type: boolean
|
||||
// description: do not include root file-system changes when exporting
|
||||
// - in: query
|
||||
// name: printStats
|
||||
// type: boolean
|
||||
// description: add checkpoint statistics to the returned CheckpointReport
|
||||
// produces:
|
||||
// - application/json
|
||||
// responses:
|
||||
|
|
|
@ -190,11 +190,14 @@ type CheckpointOptions struct {
|
|||
PreCheckPoint bool
|
||||
WithPrevious bool
|
||||
Compression archive.Compression
|
||||
PrintStats bool
|
||||
}
|
||||
|
||||
type CheckpointReport struct {
|
||||
Err error
|
||||
Id string //nolint
|
||||
Err error `json:"-"`
|
||||
Id string `json:"Id` //nolint
|
||||
RuntimeDuration int64 `json:"runtime_checkpoint_duration"`
|
||||
CRIUStatistics *define.CRIUCheckpointRestoreStatistics `json:"criu_statistics"`
|
||||
}
|
||||
|
||||
type RestoreOptions struct {
|
||||
|
|
|
@ -515,6 +515,7 @@ func (ic *ContainerEngine) ContainerCheckpoint(ctx context.Context, namesOrIds [
|
|||
PreCheckPoint: options.PreCheckPoint,
|
||||
WithPrevious: options.WithPrevious,
|
||||
Compression: options.Compression,
|
||||
PrintStats: options.PrintStats,
|
||||
}
|
||||
|
||||
if options.All {
|
||||
|
@ -531,10 +532,12 @@ func (ic *ContainerEngine) ContainerCheckpoint(ctx context.Context, namesOrIds [
|
|||
}
|
||||
reports := make([]*entities.CheckpointReport, 0, len(cons))
|
||||
for _, con := range cons {
|
||||
err = con.Checkpoint(ctx, checkOpts)
|
||||
criuStatistics, runtimeCheckpointDuration, err := con.Checkpoint(ctx, checkOpts)
|
||||
reports = append(reports, &entities.CheckpointReport{
|
||||
Err: err,
|
||||
Id: con.ID(),
|
||||
Err: err,
|
||||
Id: con.ID(),
|
||||
RuntimeDuration: runtimeCheckpointDuration,
|
||||
CRIUStatistics: criuStatistics,
|
||||
})
|
||||
}
|
||||
return reports, nil
|
||||
|
|
Loading…
Reference in New Issue