Merge pull request #12625 from adrianreber/2021-12-16-podman-inspect

Add more checkpoint/restore information to 'inspect'
This commit is contained in:
OpenShift Merge Robot 2021-12-17 16:04:26 +01:00 committed by GitHub
commit 59766ebe76
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 158 additions and 34 deletions

View File

@ -213,6 +213,15 @@ type ContainerState struct {
// containerPlatformState holds platform-specific container state.
containerPlatformState
// Following checkpoint/restore related information is displayed
// if the container has been checkpointed or restored.
CheckpointedTime time.Time `json:"checkpointedTime,omitempty"`
RestoredTime time.Time `json:"restoredTime,omitempty"`
CheckpointLog string `json:"checkpointLog,omitempty"`
CheckpointPath string `json:"checkpointPath,omitempty"`
RestoreLog string `json:"restoreLog,omitempty"`
Restored bool `json:"restored,omitempty"`
}
// ContainerNamedVolume is a named volume that will be mounted into the

View File

@ -113,20 +113,26 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver
Path: path,
Args: args,
State: &define.InspectContainerState{
OciVersion: ctrSpec.Version,
Status: runtimeInfo.State.String(),
Running: runtimeInfo.State == define.ContainerStateRunning,
Paused: runtimeInfo.State == define.ContainerStatePaused,
OOMKilled: runtimeInfo.OOMKilled,
Dead: runtimeInfo.State.String() == "bad state",
Pid: runtimeInfo.PID,
ConmonPid: runtimeInfo.ConmonPID,
ExitCode: runtimeInfo.ExitCode,
Error: "", // can't get yet
StartedAt: runtimeInfo.StartedTime,
FinishedAt: runtimeInfo.FinishedTime,
Checkpointed: runtimeInfo.Checkpointed,
CgroupPath: cgroupPath,
OciVersion: ctrSpec.Version,
Status: runtimeInfo.State.String(),
Running: runtimeInfo.State == define.ContainerStateRunning,
Paused: runtimeInfo.State == define.ContainerStatePaused,
OOMKilled: runtimeInfo.OOMKilled,
Dead: runtimeInfo.State.String() == "bad state",
Pid: runtimeInfo.PID,
ConmonPid: runtimeInfo.ConmonPID,
ExitCode: runtimeInfo.ExitCode,
Error: "", // can't get yet
StartedAt: runtimeInfo.StartedTime,
FinishedAt: runtimeInfo.FinishedTime,
Checkpointed: runtimeInfo.Checkpointed,
CgroupPath: cgroupPath,
RestoredAt: runtimeInfo.RestoredTime,
CheckpointedAt: runtimeInfo.CheckpointedTime,
Restored: runtimeInfo.Restored,
CheckpointPath: runtimeInfo.CheckpointPath,
CheckpointLog: runtimeInfo.CheckpointLog,
RestoreLog: runtimeInfo.RestoreLog,
},
Image: config.RootfsImageID,
ImageName: config.RootfsImageName,

View File

@ -634,6 +634,12 @@ func resetState(state *ContainerState) {
state.RestartPolicyMatch = false
state.RestartCount = 0
state.Checkpointed = false
state.Restored = false
state.CheckpointedTime = time.Time{}
state.RestoredTime = time.Time{}
state.CheckpointPath = ""
state.CheckpointLog = ""
state.RestoreLog = ""
}
// Refresh refreshes the container's state after a restart.
@ -1111,6 +1117,12 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error {
}
c.state.Checkpointed = false
c.state.Restored = false
c.state.CheckpointedTime = time.Time{}
c.state.RestoredTime = time.Time{}
c.state.CheckpointPath = ""
c.state.CheckpointLog = ""
c.state.RestoreLog = ""
c.state.ExitCode = 0
c.state.Exited = false
c.state.State = define.ContainerStateCreated

View File

@ -1134,6 +1134,10 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO
return nil, 0, err
}
// Setting CheckpointLog early in case there is a failure.
c.state.CheckpointLog = path.Join(c.bundlePath(), "dump.log")
c.state.CheckpointPath = c.CheckpointPath()
runtimeCheckpointDuration, err := c.ociRuntime.CheckpointContainer(c, options)
if err != nil {
return nil, 0, err
@ -1169,6 +1173,9 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO
if !options.KeepRunning && !options.PreCheckPoint {
c.state.State = define.ContainerStateStopped
c.state.Checkpointed = true
c.state.CheckpointedTime = time.Now()
c.state.Restored = false
c.state.RestoredTime = time.Time{}
// Cleanup Storage and Network
if err := c.cleanup(ctx); err != nil {
@ -1216,6 +1223,8 @@ func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointO
logrus.Debugf("Unable to remove file %s", file)
}
}
// The file has been deleted. Do not mention it.
c.state.CheckpointLog = ""
}
c.state.FinishedTime = time.Now()
@ -1293,6 +1302,10 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
return nil, 0, err
}
// Setting RestoreLog early in case there is a failure.
c.state.RestoreLog = path.Join(c.bundlePath(), "restore.log")
c.state.CheckpointPath = c.CheckpointPath()
// Read network configuration from checkpoint
var netStatus map[string]types.StatusBlock
_, err := metadata.ReadJSONFile(&netStatus, c.bundlePath(), metadata.NetworkStatusFile)
@ -1559,6 +1572,9 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
c.state.State = define.ContainerStateRunning
c.state.Checkpointed = false
c.state.Restored = true
c.state.CheckpointedTime = time.Time{}
c.state.RestoredTime = time.Now()
if !options.Keep {
// Delete all checkpoint related files. At this point, in theory, all files
@ -1569,6 +1585,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
if err != nil {
logrus.Debugf("Non-fatal: removal of checkpoint directory (%s) failed: %v", c.CheckpointPath(), err)
}
c.state.CheckpointPath = ""
err = os.RemoveAll(c.PreCheckPointPath())
if err != nil {
logrus.Debugf("Non-fatal: removal of pre-checkpoint directory (%s) failed: %v", c.PreCheckPointPath(), err)
@ -1589,6 +1606,8 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
logrus.Debugf("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err)
}
}
c.state.CheckpointLog = ""
c.state.RestoreLog = ""
}
return criuStatistics, runtimeRestoreDuration, c.save()

View File

@ -189,22 +189,28 @@ type InspectMount struct {
// Docker, but here we see more fields that are unused (nonsensical in the
// context of Libpod).
type InspectContainerState struct {
OciVersion string `json:"OciVersion"`
Status string `json:"Status"`
Running bool `json:"Running"`
Paused bool `json:"Paused"`
Restarting bool `json:"Restarting"` // TODO
OOMKilled bool `json:"OOMKilled"`
Dead bool `json:"Dead"`
Pid int `json:"Pid"`
ConmonPid int `json:"ConmonPid,omitempty"`
ExitCode int32 `json:"ExitCode"`
Error string `json:"Error"` // TODO
StartedAt time.Time `json:"StartedAt"`
FinishedAt time.Time `json:"FinishedAt"`
Health HealthCheckResults `json:"Health,omitempty"`
Checkpointed bool `json:"Checkpointed,omitempty"`
CgroupPath string `json:"CgroupPath,omitempty"`
OciVersion string `json:"OciVersion"`
Status string `json:"Status"`
Running bool `json:"Running"`
Paused bool `json:"Paused"`
Restarting bool `json:"Restarting"` // TODO
OOMKilled bool `json:"OOMKilled"`
Dead bool `json:"Dead"`
Pid int `json:"Pid"`
ConmonPid int `json:"ConmonPid,omitempty"`
ExitCode int32 `json:"ExitCode"`
Error string `json:"Error"` // TODO
StartedAt time.Time `json:"StartedAt"`
FinishedAt time.Time `json:"FinishedAt"`
Health HealthCheckResults `json:"Health,omitempty"`
Checkpointed bool `json:"Checkpointed,omitempty"`
CgroupPath string `json:"CgroupPath,omitempty"`
CheckpointedAt time.Time `json:"CheckpointedAt,omitempty"`
RestoredAt time.Time `json:"RestoredAt,omitempty"`
CheckpointLog string `json:"CheckpointLog,omitempty"`
CheckpointPath string `json:"CheckpointPath,omitempty"`
RestoreLog string `json:"RestoreLog,omitempty"`
Restored bool `json:"Restored,omitempty"`
}
// Healthcheck returns the HealthCheckResults. This is used for old podman compat

View File

@ -91,25 +91,97 @@ var _ = Describe("Podman checkpoint", func() {
Expect(session).Should(Exit(0))
cid := session.OutputToString()
result := podmanTest.Podman([]string{"container", "checkpoint", cid})
// Check if none of the checkpoint/restore specific information is displayed
// for newly started containers.
inspect := podmanTest.Podman([]string{"inspect", cid})
inspect.WaitWithDefaultTimeout()
Expect(inspect).Should(Exit(0))
inspectOut := inspect.InspectContainerToJSON()
Expect(inspectOut[0].State.Checkpointed).To(BeFalse(), ".State.Checkpointed")
Expect(inspectOut[0].State.Restored).To(BeFalse(), ".State.Restored")
Expect(inspectOut[0].State.CheckpointPath).To(Equal(""))
Expect(inspectOut[0].State.CheckpointLog).To(Equal(""))
Expect(inspectOut[0].State.RestoreLog).To(Equal(""))
result := podmanTest.Podman([]string{
"container",
"checkpoint",
"--keep",
cid,
})
result.WaitWithDefaultTimeout()
Expect(result).Should(Exit(0))
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Exited"))
inspect := podmanTest.Podman([]string{"inspect", cid})
// For a checkpointed container we expect the checkpoint related information
// to be populated.
inspect = podmanTest.Podman([]string{"inspect", cid})
inspect.WaitWithDefaultTimeout()
Expect(inspect).Should(Exit(0))
inspectOut := inspect.InspectContainerToJSON()
inspectOut = inspect.InspectContainerToJSON()
Expect(inspectOut[0].State.Checkpointed).To(BeTrue(), ".State.Checkpointed")
Expect(inspectOut[0].State.Restored).To(BeFalse(), ".State.Restored")
Expect(inspectOut[0].State.CheckpointPath).To(ContainSubstring("userdata/checkpoint"))
Expect(inspectOut[0].State.CheckpointLog).To(ContainSubstring("userdata/dump.log"))
Expect(inspectOut[0].State.RestoreLog).To(Equal(""))
result = podmanTest.Podman([]string{"container", "restore", cid})
result = podmanTest.Podman([]string{
"container",
"restore",
"--keep",
cid,
})
result.WaitWithDefaultTimeout()
Expect(result).Should(Exit(0))
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1))
Expect(podmanTest.GetContainerStatus()).To(ContainSubstring("Up"))
inspect = podmanTest.Podman([]string{"inspect", cid})
inspect.WaitWithDefaultTimeout()
Expect(inspect).Should(Exit(0))
inspectOut = inspect.InspectContainerToJSON()
Expect(inspectOut[0].State.Restored).To(BeTrue(), ".State.Restored")
Expect(inspectOut[0].State.Checkpointed).To(BeFalse(), ".State.Checkpointed")
Expect(inspectOut[0].State.CheckpointPath).To(ContainSubstring("userdata/checkpoint"))
Expect(inspectOut[0].State.CheckpointLog).To(ContainSubstring("userdata/dump.log"))
Expect(inspectOut[0].State.RestoreLog).To(ContainSubstring("userdata/restore.log"))
result = podmanTest.Podman([]string{
"container",
"stop",
"--timeout",
"0",
cid,
})
result.WaitWithDefaultTimeout()
Expect(result).Should(Exit(0))
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(0))
result = podmanTest.Podman([]string{
"container",
"start",
cid,
})
result.WaitWithDefaultTimeout()
Expect(result).Should(Exit(0))
Expect(podmanTest.NumberOfContainersRunning()).To(Equal(1))
// Stopping and starting the container should remove all checkpoint
// related information from inspect again.
inspect = podmanTest.Podman([]string{"inspect", cid})
inspect.WaitWithDefaultTimeout()
Expect(inspect).Should(Exit(0))
inspectOut = inspect.InspectContainerToJSON()
Expect(inspectOut[0].State.Checkpointed).To(BeFalse(), ".State.Checkpointed")
Expect(inspectOut[0].State.Restored).To(BeFalse(), ".State.Restored")
Expect(inspectOut[0].State.CheckpointPath).To(Equal(""))
Expect(inspectOut[0].State.CheckpointLog).To(Equal(""))
Expect(inspectOut[0].State.RestoreLog).To(Equal(""))
})
It("podman checkpoint a running container by name", func() {