libpod: do not move podman with --cgroups=disabled

The expectation with --cgroups=disabled is that the current cgroup is
used by the container.

Currently the --cgroups=disabled is passed directly to the OCI
runtime, but it doesn't stop Podman from creating a new cgroup when it
doesn't own the current one.

Closes: https://github.com/containers/podman/issues/20910

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
Giuseppe Scrivano 2024-05-30 10:44:18 +02:00
parent 939cb83a04
commit 900e29549a
No known key found for this signature in database
GPG Key ID: 67E38F7A8BA21772
7 changed files with 50 additions and 23 deletions

View File

@ -58,8 +58,13 @@ func setupContainerEngine(cmd *cobra.Command) (entities.ContainerEngine, error)
} }
if !registry.IsRemote() { if !registry.IsRemote() {
_, noMoveProcess := cmd.Annotations[registry.NoMoveProcess] _, noMoveProcess := cmd.Annotations[registry.NoMoveProcess]
cgroupMode := ""
err := containerEngine.SetupRootless(registry.Context(), noMoveProcess) if flag := cmd.LocalFlags().Lookup("cgroups"); flag != nil {
cgroupMode = flag.Value.String()
}
err := containerEngine.SetupRootless(registry.Context(), noMoveProcess, cgroupMode)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -362,8 +362,12 @@ func persistentPreRunE(cmd *cobra.Command, args []string) error {
// 3) command doesn't require Parent Namespace // 3) command doesn't require Parent Namespace
_, found := cmd.Annotations[registry.ParentNSRequired] _, found := cmd.Annotations[registry.ParentNSRequired]
if !registry.IsRemote() && !found { if !registry.IsRemote() && !found {
cgroupMode := ""
_, noMoveProcess := cmd.Annotations[registry.NoMoveProcess] _, noMoveProcess := cmd.Annotations[registry.NoMoveProcess]
err := registry.ContainerEngine().SetupRootless(registry.Context(), noMoveProcess) if flag := cmd.LocalFlags().Lookup("cgroups"); flag != nil {
cgroupMode = flag.Value.String()
}
err := registry.ContainerEngine().SetupRootless(registry.Context(), noMoveProcess, cgroupMode)
if err != nil { if err != nil {
return err return err
} }

View File

@ -95,7 +95,7 @@ type ContainerEngine interface { //nolint:interfacebloat
PodUnpause(ctx context.Context, namesOrIds []string, options PodunpauseOptions) ([]*PodUnpauseReport, error) PodUnpause(ctx context.Context, namesOrIds []string, options PodunpauseOptions) ([]*PodUnpauseReport, error)
Renumber(ctx context.Context) error Renumber(ctx context.Context) error
Reset(ctx context.Context) error Reset(ctx context.Context) error
SetupRootless(ctx context.Context, noMoveProcess bool) error SetupRootless(ctx context.Context, noMoveProcess bool, cgroupMode string) error
SecretCreate(ctx context.Context, name string, reader io.Reader, options SecretCreateOptions) (*SecretCreateReport, error) SecretCreate(ctx context.Context, name string, reader io.Reader, options SecretCreateOptions) (*SecretCreateReport, error)
SecretInspect(ctx context.Context, nameOrIDs []string, options SecretInspectOptions) ([]*SecretInfoReport, []error, error) SecretInspect(ctx context.Context, nameOrIDs []string, options SecretInspectOptions) ([]*SecretInfoReport, []error, error)
SecretList(ctx context.Context, opts SecretListRequest) ([]*SecretInfoReport, error) SecretList(ctx context.Context, opts SecretListRequest) ([]*SecretInfoReport, error)

View File

@ -8,6 +8,6 @@ import (
const defaultRunPath = "/var/run" const defaultRunPath = "/var/run"
// SetupRootless in a NOP for freebsd as it only configures the rootless userns on linux. // SetupRootless in a NOP for freebsd as it only configures the rootless userns on linux.
func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool) error { func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, cgroupMode string) error {
return nil return nil
} }

View File

@ -17,7 +17,7 @@ import (
// Default path for system runtime state // Default path for system runtime state
const defaultRunPath = "/run" const defaultRunPath = "/run"
func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool) error { func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, cgroupMode string) error {
runsUnderSystemd := systemd.RunsOnSystemd() runsUnderSystemd := systemd.RunsOnSystemd()
if !runsUnderSystemd { if !runsUnderSystemd {
isPid1 := os.Getpid() == 1 isPid1 := os.Getpid() == 1
@ -30,30 +30,33 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool)
} }
} }
// do it only after podman has already re-execed and running with uid==0. configureCgroup := cgroupMode != "disabled"
hasCapSysAdmin, err := unshare.HasCapSysAdmin() if configureCgroup {
if err != nil { // do it only after podman has already re-execed and running with uid==0.
return err hasCapSysAdmin, err := unshare.HasCapSysAdmin()
}
// check for both euid == 0 and CAP_SYS_ADMIN because we may be running in a container with CAP_SYS_ADMIN set.
if os.Geteuid() == 0 && hasCapSysAdmin {
ownsCgroup, err := cgroups.UserOwnsCurrentSystemdCgroup()
if err != nil { if err != nil {
logrus.Infof("Failed to detect the owner for the current cgroup: %v", err) return err
} }
if !ownsCgroup { // check for both euid == 0 and CAP_SYS_ADMIN because we may be running in a container with CAP_SYS_ADMIN set.
conf, err := ic.Config(context.Background()) if os.Geteuid() == 0 && hasCapSysAdmin {
ownsCgroup, err := cgroups.UserOwnsCurrentSystemdCgroup()
if err != nil { if err != nil {
return err logrus.Infof("Failed to detect the owner for the current cgroup: %v", err)
} }
unitName := fmt.Sprintf("podman-%d.scope", os.Getpid()) if !ownsCgroup {
if runsUnderSystemd || conf.Engine.CgroupManager == config.SystemdCgroupsManager { conf, err := ic.Config(context.Background())
if err := systemd.RunUnderSystemdScope(os.Getpid(), "user.slice", unitName); err != nil { if err != nil {
logrus.Debugf("Failed to add podman to systemd sandbox cgroup: %v", err) return err
}
unitName := fmt.Sprintf("podman-%d.scope", os.Getpid())
if runsUnderSystemd || conf.Engine.CgroupManager == config.SystemdCgroupsManager {
if err := systemd.RunUnderSystemdScope(os.Getpid(), "user.slice", unitName); err != nil {
logrus.Debugf("Failed to add podman to systemd sandbox cgroup: %v", err)
}
} }
} }
return nil
} }
return nil
} }
pausePidPath, err := util.GetRootlessPauseProcessPidPath() pausePidPath, err := util.GetRootlessPauseProcessPidPath()

View File

@ -13,7 +13,7 @@ func (ic *ContainerEngine) Info(ctx context.Context) (*define.Info, error) {
return system.Info(ic.ClientCtx, nil) return system.Info(ic.ClientCtx, nil)
} }
func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool) error { func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, cgroupMode string) error {
panic(errors.New("rootless engine mode is not supported when tunneling")) panic(errors.New("rootless engine mode is not supported when tunneling"))
} }

View File

@ -37,4 +37,19 @@ load helpers
run_podman rm myc run_podman rm myc
} }
@test "podman run --cgroups=disabled keeps the current cgroup" {
skip_if_remote "podman-remote does not support --cgroups=disabled"
skip_if_rootless_cgroupsv1
runtime=$(podman_runtime)
if [[ $runtime != "crun" ]]; then
skip "runtime is $runtime; --cgroups=disabled requires crun"
fi
current_cgroup=$(cat /proc/self/cgroup)
# --cgroupns=host is required to have full visibility of the cgroup path inside the container
run_podman run --cgroups=disabled --cgroupns=host --rm $IMAGE cat /proc/self/cgroup
is "$output" $current_cgroup "--cgroups=disabled must not change the current cgroup"
}
# vim: filetype=sh # vim: filetype=sh