Added optional container checkpointing statistics

This adds the parameter '--print-stats' to 'podman container checkpoint'.
With '--print-stats' Podman will measure how long Podman itself, the OCI
runtime and CRIU requires to create a checkpoint and print out these
information. CRIU already creates checkpointing statistics which are
just read in addition to the added measurements. In contrast to just
printing out the ID of the checkpointed container, Podman will now print
out JSON:

 # podman container checkpoint --latest --print-stats
 {
     "podman_checkpoint_duration": 360749,
     "container_statistics": [
         {
             "Id": "25244244bf2efbef30fb6857ddea8cb2e5489f07eb6659e20dda117f0c466808",
             "runtime_checkpoint_duration": 177222,
             "criu_statistics": {
                 "freezing_time": 100657,
                 "frozen_time": 60700,
                 "memdump_time": 8162,
                 "memwrite_time": 4224,
                 "pages_scanned": 20561,
                 "pages_written": 2129
             }
         }
     ]
 }

The output contains 'podman_checkpoint_duration' which contains the
number of microseconds Podman required to create the checkpoint. The
output also includes 'runtime_checkpoint_duration' which is the time
the runtime needed to checkpoint that specific container. Each container
also includes 'criu_statistics' which displays the timing information
collected by CRIU.

Signed-off-by: Adrian Reber <areber@redhat.com>
This commit is contained in:
Adrian Reber 2021-11-09 09:15:50 +00:00
parent cca6df428c
commit 6202e8102b
No known key found for this signature in database
GPG Key ID: 82C9378ED3C4906A
11 changed files with 166 additions and 33 deletions

View File

@ -4,6 +4,7 @@ import (
"context" "context"
"fmt" "fmt"
"strings" "strings"
"time"
"github.com/containers/common/pkg/completion" "github.com/containers/common/pkg/completion"
"github.com/containers/podman/v3/cmd/podman/common" "github.com/containers/podman/v3/cmd/podman/common"
@ -40,6 +41,11 @@ var (
var checkpointOptions entities.CheckpointOptions var checkpointOptions entities.CheckpointOptions
type checkpointStatistics struct {
PodmanDuration int64 `json:"podman_checkpoint_duration"`
ContainerStatistics []*entities.CheckpointReport `json:"container_statistics"`
}
func init() { func init() {
registry.Commands = append(registry.Commands, registry.CliCommand{ registry.Commands = append(registry.Commands, registry.CliCommand{
Command: checkpointCommand, Command: checkpointCommand,
@ -63,11 +69,19 @@ func init() {
flags.StringP("compress", "c", "zstd", "Select compression algorithm (gzip, none, zstd) for checkpoint archive.") flags.StringP("compress", "c", "zstd", "Select compression algorithm (gzip, none, zstd) for checkpoint archive.")
_ = checkpointCommand.RegisterFlagCompletionFunc("compress", common.AutocompleteCheckpointCompressType) _ = checkpointCommand.RegisterFlagCompletionFunc("compress", common.AutocompleteCheckpointCompressType)
flags.BoolVar(
&checkpointOptions.PrintStats,
"print-stats",
false,
"Display checkpoint statistics",
)
validate.AddLatestFlag(checkpointCommand, &checkpointOptions.Latest) validate.AddLatestFlag(checkpointCommand, &checkpointOptions.Latest)
} }
func checkpoint(cmd *cobra.Command, args []string) error { func checkpoint(cmd *cobra.Command, args []string) error {
var errs utils.OutputErrors var errs utils.OutputErrors
podmanStart := time.Now()
if cmd.Flags().Changed("compress") { if cmd.Flags().Changed("compress") {
if checkpointOptions.Export == "" { if checkpointOptions.Export == "" {
return errors.Errorf("--compress can only be used with --export") return errors.Errorf("--compress can only be used with --export")
@ -102,12 +116,30 @@ func checkpoint(cmd *cobra.Command, args []string) error {
if err != nil { if err != nil {
return err return err
} }
podmanFinished := time.Now()
var statistics checkpointStatistics
for _, r := range responses { for _, r := range responses {
if r.Err == nil { if r.Err == nil {
if checkpointOptions.PrintStats {
statistics.ContainerStatistics = append(statistics.ContainerStatistics, r)
} else {
fmt.Println(r.Id) fmt.Println(r.Id)
}
} else { } else {
errs = append(errs, r.Err) errs = append(errs, r.Err)
} }
} }
if checkpointOptions.PrintStats {
statistics.PodmanDuration = podmanFinished.Sub(podmanStart).Microseconds()
j, err := json.MarshalIndent(statistics, "", " ")
if err != nil {
return err
}
fmt.Println(string(j))
}
return errs.PrintErrors() return errs.PrintErrors()
} }

View File

@ -794,21 +794,29 @@ type ContainerCheckpointOptions struct {
// container no PID 1 will be in the namespace and that is not // container no PID 1 will be in the namespace and that is not
// possible. // possible.
Pod string Pod string
// PrintStats tells the API to fill out the statistics about
// how much time each component in the stack requires to
// checkpoint a container.
PrintStats bool
} }
// Checkpoint checkpoints a container // Checkpoint checkpoints a container
func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointOptions) error { // The return values *define.CRIUCheckpointRestoreStatistics and int64 (time
// the runtime needs to checkpoint the container) are only set if
// options.PrintStats is set to true. Not setting options.PrintStats to true
// will return nil and 0.
func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) {
logrus.Debugf("Trying to checkpoint container %s", c.ID()) logrus.Debugf("Trying to checkpoint container %s", c.ID())
if options.TargetFile != "" { if options.TargetFile != "" {
if err := c.prepareCheckpointExport(); err != nil { if err := c.prepareCheckpointExport(); err != nil {
return err return nil, 0, err
} }
} }
if options.WithPrevious { if options.WithPrevious {
if err := c.canWithPrevious(); err != nil { if err := c.canWithPrevious(); err != nil {
return err return nil, 0, err
} }
} }
@ -817,7 +825,7 @@ func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointO
defer c.lock.Unlock() defer c.lock.Unlock()
if err := c.syncContainer(); err != nil { if err := c.syncContainer(); err != nil {
return err return nil, 0, err
} }
} }
return c.checkpoint(ctx, options) return c.checkpoint(ctx, options)

View File

@ -1129,25 +1129,26 @@ func (c *Container) checkpointRestoreSupported(version int) error {
return nil return nil
} }
func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) error { func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) {
if err := c.checkpointRestoreSupported(criu.MinCriuVersion); err != nil { if err := c.checkpointRestoreSupported(criu.MinCriuVersion); err != nil {
return err return nil, 0, err
} }
if c.state.State != define.ContainerStateRunning { if c.state.State != define.ContainerStateRunning {
return errors.Wrapf(define.ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State) return nil, 0, errors.Wrapf(define.ErrCtrStateInvalid, "%q is not running, cannot checkpoint", c.state.State)
} }
if c.AutoRemove() && options.TargetFile == "" { if c.AutoRemove() && options.TargetFile == "" {
return errors.Errorf("cannot checkpoint containers that have been started with '--rm' unless '--export' is used") return nil, 0, errors.Errorf("cannot checkpoint containers that have been started with '--rm' unless '--export' is used")
} }
if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "dump.log", c.MountLabel()); err != nil { if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "dump.log", c.MountLabel()); err != nil {
return err return nil, 0, err
} }
if err := c.ociRuntime.CheckpointContainer(c, options); err != nil { runtimeCheckpointDuration, err := c.ociRuntime.CheckpointContainer(c, options)
return err if err != nil {
return nil, 0, err
} }
// Save network.status. This is needed to restore the container with // Save network.status. This is needed to restore the container with
@ -1155,7 +1156,7 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO
// with one interface. // with one interface.
// FIXME: will this break something? // FIXME: will this break something?
if _, err := metadata.WriteJSONFile(c.getNetworkStatus(), c.bundlePath(), metadata.NetworkStatusFile); err != nil { if _, err := metadata.WriteJSONFile(c.getNetworkStatus(), c.bundlePath(), metadata.NetworkStatusFile); err != nil {
return err return nil, 0, err
} }
defer c.newContainerEvent(events.Checkpoint) defer c.newContainerEvent(events.Checkpoint)
@ -1165,13 +1166,13 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO
if options.WithPrevious { if options.WithPrevious {
os.Remove(path.Join(c.CheckpointPath(), "parent")) os.Remove(path.Join(c.CheckpointPath(), "parent"))
if err := os.Symlink("../pre-checkpoint", path.Join(c.CheckpointPath(), "parent")); err != nil { if err := os.Symlink("../pre-checkpoint", path.Join(c.CheckpointPath(), "parent")); err != nil {
return err return nil, 0, err
} }
} }
if options.TargetFile != "" { if options.TargetFile != "" {
if err := c.exportCheckpoint(options); err != nil { if err := c.exportCheckpoint(options); err != nil {
return err return nil, 0, err
} }
} }
@ -1183,10 +1184,37 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO
// Cleanup Storage and Network // Cleanup Storage and Network
if err := c.cleanup(ctx); err != nil { if err := c.cleanup(ctx); err != nil {
return err return nil, 0, err
} }
} }
criuStatistics, err := func() (*define.CRIUCheckpointRestoreStatistics, error) {
if !options.PrintStats {
return nil, nil
}
statsDirectory, err := os.Open(c.bundlePath())
if err != nil {
return nil, errors.Wrapf(err, "Not able to open %q", c.bundlePath())
}
dumpStatistics, err := stats.CriuGetDumpStats(statsDirectory)
if err != nil {
return nil, errors.Wrap(err, "Displaying checkpointing statistics not possible")
}
return &define.CRIUCheckpointRestoreStatistics{
FreezingTime: dumpStatistics.GetFreezingTime(),
FrozenTime: dumpStatistics.GetFrozenTime(),
MemdumpTime: dumpStatistics.GetMemdumpTime(),
MemwriteTime: dumpStatistics.GetMemwriteTime(),
PagesScanned: dumpStatistics.GetPagesScanned(),
PagesWritten: dumpStatistics.GetPagesWritten(),
}, nil
}()
if err != nil {
return nil, 0, err
}
if !options.Keep && !options.PreCheckPoint { if !options.Keep && !options.PreCheckPoint {
cleanup := []string{ cleanup := []string{
"dump.log", "dump.log",
@ -1203,7 +1231,7 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO
} }
c.state.FinishedTime = time.Now() c.state.FinishedTime = time.Now()
return c.save() return criuStatistics, runtimeCheckpointDuration, c.save()
} }
func (c *Container) importCheckpoint(input string) error { func (c *Container) importCheckpoint(input string) error {

View File

@ -0,0 +1,32 @@
package define
// This contains values reported by CRIU during
// checkpointing or restoring.
// All names are the same as reported by CRIU.
type CRIUCheckpointRestoreStatistics struct {
// Checkpoint values
// Time required to freeze/pause/quiesce the processes
FreezingTime uint32 `json:"freezing_time,omitempty"`
// Time the processes are actually not running during checkpointing
FrozenTime uint32 `json:"frozen_time,omitempty"`
// Time required to extract memory pages from the processes
MemdumpTime uint32 `json:"memdump_time,omitempty"`
// Time required to write memory pages to disk
MemwriteTime uint32 `json:"memwrite_time,omitempty"`
// Number of memory pages CRIU analyzed
PagesScanned uint64 `json:"pages_scanned,omitempty"`
// Number of memory pages written
PagesWritten uint64 `json:"pages_written,omitempty"`
// Restore values
// Number of pages compared during restore
PagesCompared uint64 `json:"pages_compared,omitempty"`
// Number of COW pages skipped during restore
PagesSkippedCow uint64 `json:"pages_skipped_cow,omitempty"`
// Time required to fork processes
ForkingTime uint32 `json:"forking_time,omitempty"`
// Time required to restore
RestoreTime uint32 `json:"restore_time,omitempty"`
// Number of memory pages restored
PagesRestored uint64 `json:"pages_restored,omitempty"`
}

View File

@ -101,8 +101,10 @@ type OCIRuntime interface {
// CheckpointContainer checkpoints the given container. // CheckpointContainer checkpoints the given container.
// Some OCI runtimes may not support this - if SupportsCheckpoint() // Some OCI runtimes may not support this - if SupportsCheckpoint()
// returns false, this is not implemented, and will always return an // returns false, this is not implemented, and will always return an
// error. // error. If CheckpointOptions.PrintStats is true the first return parameter
CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error // contains the number of microseconds the runtime needed to checkpoint
// the given container.
CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error)
// CheckConmonRunning verifies that the given container's Conmon // CheckConmonRunning verifies that the given container's Conmon
// instance is still running. Runtimes without Conmon, or systems where // instance is still running. Runtimes without Conmon, or systems where

View File

@ -760,9 +760,9 @@ func (r *ConmonOCIRuntime) AttachResize(ctr *Container, newSize define.TerminalS
} }
// CheckpointContainer checkpoints the given container. // CheckpointContainer checkpoints the given container.
func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error { func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error) {
if err := label.SetSocketLabel(ctr.ProcessLabel()); err != nil { if err := label.SetSocketLabel(ctr.ProcessLabel()); err != nil {
return err return 0, err
} }
// imagePath is used by CRIU to store the actual checkpoint files // imagePath is used by CRIU to store the actual checkpoint files
imagePath := ctr.CheckpointPath() imagePath := ctr.CheckpointPath()
@ -802,14 +802,25 @@ func (r *ConmonOCIRuntime) CheckpointContainer(ctr *Container, options Container
} }
runtimeDir, err := util.GetRuntimeDir() runtimeDir, err := util.GetRuntimeDir()
if err != nil { if err != nil {
return err return 0, err
} }
if err = os.Setenv("XDG_RUNTIME_DIR", runtimeDir); err != nil { if err = os.Setenv("XDG_RUNTIME_DIR", runtimeDir); err != nil {
return errors.Wrapf(err, "cannot set XDG_RUNTIME_DIR") return 0, errors.Wrapf(err, "cannot set XDG_RUNTIME_DIR")
} }
args = append(args, ctr.ID()) args = append(args, ctr.ID())
logrus.Debugf("the args to checkpoint: %s %s", r.path, strings.Join(args, " ")) logrus.Debugf("the args to checkpoint: %s %s", r.path, strings.Join(args, " "))
return utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, args...)
runtimeCheckpointStarted := time.Now()
err = utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, nil, r.path, args...)
runtimeCheckpointDuration := func() int64 {
if options.PrintStats {
return time.Since(runtimeCheckpointStarted).Microseconds()
}
return 0
}()
return runtimeCheckpointDuration, err
} }
func (r *ConmonOCIRuntime) CheckConmonRunning(ctr *Container) (bool, error) { func (r *ConmonOCIRuntime) CheckConmonRunning(ctr *Container) (bool, error) {

View File

@ -153,8 +153,8 @@ func (r *MissingRuntime) ExecUpdateStatus(ctr *Container, sessionID string) (boo
} }
// CheckpointContainer is not available as the runtime is missing // CheckpointContainer is not available as the runtime is missing
func (r *MissingRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) error { func (r *MissingRuntime) CheckpointContainer(ctr *Container, options ContainerCheckpointOptions) (int64, error) {
return r.printError() return 0, r.printError()
} }
// CheckConmonRunning is not available as the runtime is missing // CheckConmonRunning is not available as the runtime is missing

View File

@ -214,6 +214,7 @@ func Checkpoint(w http.ResponseWriter, r *http.Request) {
TCPEstablished bool `schema:"tcpEstablished"` TCPEstablished bool `schema:"tcpEstablished"`
Export bool `schema:"export"` Export bool `schema:"export"`
IgnoreRootFS bool `schema:"ignoreRootFS"` IgnoreRootFS bool `schema:"ignoreRootFS"`
PrintStats bool `schema:"printStats"`
}{ }{
// override any golang type defaults // override any golang type defaults
} }
@ -248,11 +249,12 @@ func Checkpoint(w http.ResponseWriter, r *http.Request) {
KeepRunning: query.LeaveRunning, KeepRunning: query.LeaveRunning,
TCPEstablished: query.TCPEstablished, TCPEstablished: query.TCPEstablished,
IgnoreRootfs: query.IgnoreRootFS, IgnoreRootfs: query.IgnoreRootFS,
PrintStats: query.PrintStats,
} }
if query.Export { if query.Export {
options.TargetFile = targetFile options.TargetFile = targetFile
} }
err = ctr.Checkpoint(r.Context(), options) criuStatistics, runtimeCheckpointDuration, err := ctr.Checkpoint(r.Context(), options)
if err != nil { if err != nil {
utils.InternalServerError(w, err) utils.InternalServerError(w, err)
return return
@ -267,7 +269,15 @@ func Checkpoint(w http.ResponseWriter, r *http.Request) {
utils.WriteResponse(w, http.StatusOK, f) utils.WriteResponse(w, http.StatusOK, f)
return return
} }
utils.WriteResponse(w, http.StatusOK, entities.CheckpointReport{Id: ctr.ID()}) utils.WriteResponse(
w,
http.StatusOK,
entities.CheckpointReport{
Id: ctr.ID(),
RuntimeDuration: runtimeCheckpointDuration,
CRIUStatistics: criuStatistics,
},
)
} }
func Restore(w http.ResponseWriter, r *http.Request) { func Restore(w http.ResponseWriter, r *http.Request) {

View File

@ -1441,6 +1441,10 @@ func (s *APIServer) registerContainersHandlers(r *mux.Router) error {
// name: ignoreRootFS // name: ignoreRootFS
// type: boolean // type: boolean
// description: do not include root file-system changes when exporting // description: do not include root file-system changes when exporting
// - in: query
// name: printStats
// type: boolean
// description: add checkpoint statistics to the returned CheckpointReport
// produces: // produces:
// - application/json // - application/json
// responses: // responses:

View File

@ -190,11 +190,14 @@ type CheckpointOptions struct {
PreCheckPoint bool PreCheckPoint bool
WithPrevious bool WithPrevious bool
Compression archive.Compression Compression archive.Compression
PrintStats bool
} }
type CheckpointReport struct { type CheckpointReport struct {
Err error Err error `json:"-"`
Id string //nolint Id string `json:"Id` //nolint
RuntimeDuration int64 `json:"runtime_checkpoint_duration"`
CRIUStatistics *define.CRIUCheckpointRestoreStatistics `json:"criu_statistics"`
} }
type RestoreOptions struct { type RestoreOptions struct {

View File

@ -515,6 +515,7 @@ func (ic *ContainerEngine) ContainerCheckpoint(ctx context.Context, namesOrIds [
PreCheckPoint: options.PreCheckPoint, PreCheckPoint: options.PreCheckPoint,
WithPrevious: options.WithPrevious, WithPrevious: options.WithPrevious,
Compression: options.Compression, Compression: options.Compression,
PrintStats: options.PrintStats,
} }
if options.All { if options.All {
@ -531,10 +532,12 @@ func (ic *ContainerEngine) ContainerCheckpoint(ctx context.Context, namesOrIds [
} }
reports := make([]*entities.CheckpointReport, 0, len(cons)) reports := make([]*entities.CheckpointReport, 0, len(cons))
for _, con := range cons { for _, con := range cons {
err = con.Checkpoint(ctx, checkOpts) criuStatistics, runtimeCheckpointDuration, err := con.Checkpoint(ctx, checkOpts)
reports = append(reports, &entities.CheckpointReport{ reports = append(reports, &entities.CheckpointReport{
Err: err, Err: err,
Id: con.ID(), Id: con.ID(),
RuntimeDuration: runtimeCheckpointDuration,
CRIUStatistics: criuStatistics,
}) })
} }
return reports, nil return reports, nil