Fix: Prevent OCI runtime directory remain

This bug was introduced in https://github.com/containers/podman/pull/8906.

When we use 'podman rm/restart/stop/kill etc...' command to
the container running with --rm, the OCI runtime directory
remains at /run/<runtime name> (root user) or
/run/user/<user id>/<runtime name> (rootless user).

This bug could cause other bugs.
For example, when we checkpoint the container running with
--rm (podman checkpoint --export) and restore it
(podman restore --import) with crun, error message
"Error: OCI runtime error: crun: container `<container id>`
already exists" is outputted.
This error is caused by an attempt to restore the container with
the same container ID as the remaining OCI runtime's container ID.

Therefore, I fix that the cleanupRuntime() function runs to
remove the OCI runtime directory,
even if the container has already been removed by --rm option.

Signed-off-by: Toshiki Sonoda <sonoda.toshiki@fujitsu.com>
This commit is contained in:
Toshiki Sonoda 2022-06-24 09:29:24 +09:00
parent 8e88abda85
commit 3619f0be95
5 changed files with 40 additions and 1 deletions

View File

@ -621,6 +621,15 @@ func (c *Container) Cleanup(ctx context.Context) error {
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
switch errors.Cause(err) {
// When the container has already been removed, the OCI runtime directory remain.
case define.ErrNoSuchCtr, define.ErrCtrRemoved:
if err := c.cleanupRuntime(ctx); err != nil {
return errors.Wrapf(err, "error cleaning up container %s from OCI runtime", c.ID())
}
default:
logrus.Errorf("Syncing container %s status: %v", c.ID(), err)
}
return err
}
}

View File

@ -1309,8 +1309,9 @@ func (c *Container) stop(timeout uint) error {
if err := c.syncContainer(); err != nil {
switch errors.Cause(err) {
// If the container has already been removed (e.g., via
// the cleanup process), there's nothing left to do.
// the cleanup process), set the container state to "stopped".
case define.ErrNoSuchCtr, define.ErrCtrRemoved:
c.state.State = define.ContainerStateStopped
return stopErr
default:
if stopErr != nil {

View File

@ -715,6 +715,10 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
// Do a quick ping of the database to check if the container
// still exists.
if ok, _ := r.state.HasContainer(c.ID()); !ok {
// When the container has already been removed, the OCI runtime directory remain.
if err := c.cleanupRuntime(ctx); err != nil {
return errors.Wrapf(err, "error cleaning up container %s from OCI runtime", c.ID())
}
return nil
}
}

View File

@ -171,4 +171,19 @@ load helpers
run_podman --noout stop -t 0 stopme
is "$output" "" "output should be empty"
}
@test "podman stop, with --rm container" {
OCIDir=/run/$(podman_runtime)
if is_rootless; then
OCIDir=/run/user/$(id -u)/$(podman_runtime)
fi
run_podman run --rm -d --name rmstop $IMAGE sleep infinity
local cid="$output"
run_podman stop rmstop
# Check the OCI runtime directory has removed.
is "$(ls $OCIDir | grep $cid)" "" "The OCI runtime directory should have been removed"
}
# vim: filetype=sh

View File

@ -52,10 +52,20 @@ load helpers
}
@test "podman rm <-> run --rm race" {
OCIDir=/run/$(podman_runtime)
if is_rootless; then
OCIDir=/run/user/$(id -u)/$(podman_runtime)
fi
# A container's lock is released before attempting to stop it. This opens
# the window for race conditions that led to #9479.
run_podman run --rm -d $IMAGE sleep infinity
local cid="$output"
run_podman rm -af
# Check the OCI runtime directory has removed.
is "$(ls $OCIDir | grep $cid)" "" "The OCI runtime directory should have been removed"
}
@test "podman rm --depend" {