cgroup: retry rmdir up to 5 seconds on EBUSY

on a busy system, the conmon process could take longer to complete or
to be reaped by the parent, leaving the cgroup busy.  If the rmdir
fails with EBUSY, try again up to 5 seconds before reporting an
error.

Closes: https://github.com/containers/podman/issues/11946

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
Giuseppe Scrivano 2021-12-14 16:09:33 +01:00
parent 9555cb154f
commit cbc695fed4
1 changed files with 17 additions and 4 deletions

View File

@ -11,6 +11,7 @@ import (
"path/filepath"
"strconv"
"strings"
"time"
"github.com/containers/storage/pkg/unshare"
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
@ -18,6 +19,7 @@ import (
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
var (
@ -527,12 +529,23 @@ func rmDirRecursively(path string) error {
}
}
}
if err := os.Remove(path); err != nil {
if !os.IsNotExist(err) {
return errors.Wrapf(err, "remove %s", path)
attempts := 0
for {
err := os.Remove(path)
if err == nil || os.IsNotExist(err) {
return nil
}
if errors.Is(err, unix.EBUSY) {
// attempt up to 5 seconds if the cgroup is busy
if attempts < 500 {
time.Sleep(time.Millisecond * 10)
attempts++
continue
}
}
return errors.Wrapf(err, "remove %s", path)
}
return nil
}
// DeleteByPathConn deletes the specified cgroup path using the specified