cgroupns: private cgroupns on cgroupv1 breaks --systemd
On cgroup v1 we need to mount only the systemd named hierarchy as writeable, so we configure the OCI runtime to mount /sys/fs/cgroup as read-only and on top of that bind mount /sys/fs/cgroup/systemd. But when we use a private cgroupns, we cannot do that since we don't know the final cgroup path. Also, do not override the mount if there is already one for /sys/fs/cgroup/systemd. Closes: https://github.com/containers/podman/issues/17727 Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
parent
01fd5bcc30
commit
2d1f4a8bff
|
|
@ -242,17 +242,17 @@ func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) erro
|
|||
return err
|
||||
}
|
||||
|
||||
hasCgroupNs := false
|
||||
for _, ns := range c.config.Spec.Linux.Namespaces {
|
||||
if ns.Type == spec.CgroupNamespace {
|
||||
hasCgroupNs = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if unified {
|
||||
g.RemoveMount("/sys/fs/cgroup")
|
||||
|
||||
hasCgroupNs := false
|
||||
for _, ns := range c.config.Spec.Linux.Namespaces {
|
||||
if ns.Type == spec.CgroupNamespace {
|
||||
hasCgroupNs = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
var systemdMnt spec.Mount
|
||||
if hasCgroupNs {
|
||||
systemdMnt = spec.Mount{
|
||||
|
|
@ -271,40 +271,46 @@ func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) erro
|
|||
}
|
||||
g.AddMount(systemdMnt)
|
||||
} else {
|
||||
mountOptions := []string{"bind", "rprivate"}
|
||||
skipMount := false
|
||||
|
||||
var statfs unix.Statfs_t
|
||||
if err := unix.Statfs("/sys/fs/cgroup/systemd", &statfs); err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
// If the mount is missing on the host, we cannot bind mount it so
|
||||
// just skip it.
|
||||
skipMount = true
|
||||
}
|
||||
mountOptions = append(mountOptions, "nodev", "noexec", "nosuid")
|
||||
} else {
|
||||
if statfs.Flags&unix.MS_NODEV == unix.MS_NODEV {
|
||||
mountOptions = append(mountOptions, "nodev")
|
||||
}
|
||||
if statfs.Flags&unix.MS_NOEXEC == unix.MS_NOEXEC {
|
||||
mountOptions = append(mountOptions, "noexec")
|
||||
}
|
||||
if statfs.Flags&unix.MS_NOSUID == unix.MS_NOSUID {
|
||||
mountOptions = append(mountOptions, "nosuid")
|
||||
}
|
||||
if statfs.Flags&unix.MS_RDONLY == unix.MS_RDONLY {
|
||||
mountOptions = append(mountOptions, "ro")
|
||||
}
|
||||
hasSystemdMount := MountExists(mounts, "/sys/fs/cgroup/systemd")
|
||||
if hasCgroupNs && !hasSystemdMount {
|
||||
return errors.New("cgroup namespace is not supported with cgroup v1 and systemd mode")
|
||||
}
|
||||
if !skipMount {
|
||||
systemdMnt := spec.Mount{
|
||||
Destination: "/sys/fs/cgroup/systemd",
|
||||
Type: "bind",
|
||||
Source: "/sys/fs/cgroup/systemd",
|
||||
Options: mountOptions,
|
||||
mountOptions := []string{"bind", "rprivate"}
|
||||
|
||||
if !hasSystemdMount {
|
||||
skipMount := hasSystemdMount
|
||||
var statfs unix.Statfs_t
|
||||
if err := unix.Statfs("/sys/fs/cgroup/systemd", &statfs); err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
// If the mount is missing on the host, we cannot bind mount it so
|
||||
// just skip it.
|
||||
skipMount = true
|
||||
}
|
||||
mountOptions = append(mountOptions, "nodev", "noexec", "nosuid")
|
||||
} else {
|
||||
if statfs.Flags&unix.MS_NODEV == unix.MS_NODEV {
|
||||
mountOptions = append(mountOptions, "nodev")
|
||||
}
|
||||
if statfs.Flags&unix.MS_NOEXEC == unix.MS_NOEXEC {
|
||||
mountOptions = append(mountOptions, "noexec")
|
||||
}
|
||||
if statfs.Flags&unix.MS_NOSUID == unix.MS_NOSUID {
|
||||
mountOptions = append(mountOptions, "nosuid")
|
||||
}
|
||||
if statfs.Flags&unix.MS_RDONLY == unix.MS_RDONLY {
|
||||
mountOptions = append(mountOptions, "ro")
|
||||
}
|
||||
}
|
||||
if !skipMount {
|
||||
systemdMnt := spec.Mount{
|
||||
Destination: "/sys/fs/cgroup/systemd",
|
||||
Type: "bind",
|
||||
Source: "/sys/fs/cgroup/systemd",
|
||||
Options: mountOptions,
|
||||
}
|
||||
g.AddMount(systemdMnt)
|
||||
g.AddLinuxMaskedPaths("/sys/fs/cgroup/systemd/release_agent")
|
||||
}
|
||||
g.AddMount(systemdMnt)
|
||||
g.AddLinuxMaskedPaths("/sys/fs/cgroup/systemd/release_agent")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -280,6 +280,13 @@ LISTEN_FDNAMES=listen_fdnames" | sort)
|
|||
is "${container_uuid}" "${output:0:32}" "UUID should be first 32 chars of Container id"
|
||||
}
|
||||
|
||||
@test "podman --systemd fails on cgroup v1 with a private cgroupns" {
|
||||
skip_if_cgroupsv2
|
||||
|
||||
run_podman 126 run --systemd=always --cgroupns=private $IMAGE true
|
||||
assert "$output" =~ ".*cgroup namespace is not supported with cgroup v1 and systemd mode"
|
||||
}
|
||||
|
||||
# https://github.com/containers/podman/issues/13153
|
||||
@test "podman rootless-netns slirp4netns process should be in different cgroup" {
|
||||
is_rootless || skip "only meaningful for rootless"
|
||||
|
|
|
|||
|
|
@ -497,6 +497,15 @@ function skip_if_cgroupsv1() {
|
|||
fi
|
||||
}
|
||||
|
||||
#######################
|
||||
# skip_if_cgroupsv2 # ...with an optional message
|
||||
#######################
|
||||
function skip_if_cgroupsv2() {
|
||||
if is_cgroupsv2; then
|
||||
skip "${1:-test requires cgroupsv1}"
|
||||
fi
|
||||
}
|
||||
|
||||
######################
|
||||
# skip_if_rootless_cgroupsv1 # ...with an optional message
|
||||
######################
|
||||
|
|
|
|||
Loading…
Reference in New Issue