mirror of https://github.com/containers/podman.git
libpod: intermediate mount if UID not mapped into the userns
if the current user is not mapped into the new user namespace, use an intermediate mount to allow the mount point to be accessible instead of opening up all the parent directories for the mountpoint. Closes: https://github.com/containers/podman/issues/23028 Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
parent
08a8429459
commit
49eb5af301
|
@ -14,6 +14,8 @@ import (
|
|||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
metadata "github.com/checkpoint-restore/checkpointctl/lib"
|
||||
|
@ -2362,6 +2364,75 @@ func (c *Container) setupOCIHooks(ctx context.Context, config *spec.Spec) (map[s
|
|||
return allHooks, nil
|
||||
}
|
||||
|
||||
// getRootPathForOCI returns the root path to use for the OCI runtime.
|
||||
// If the current user is mapped in the container user namespace, then it returns
|
||||
// the container's mountpoint directly from the storage.
|
||||
// Otherwise, it returns an intermediate mountpoint that is accessible to anyone.
|
||||
func (c *Container) getRootPathForOCI() (string, error) {
|
||||
if hasCurrentUserMapped(c) {
|
||||
return c.state.Mountpoint, nil
|
||||
}
|
||||
return c.getIntermediateMountpointUser()
|
||||
}
|
||||
|
||||
var (
|
||||
intermediateMountPoint string
|
||||
intermediateMountPointErr error
|
||||
intermediateMountPointSync sync.Mutex
|
||||
)
|
||||
|
||||
// getIntermediateMountpointUser returns a path that is accessible to everyone. It must be on TMPDIR since
|
||||
// the runroot/tmpdir used by libpod are accessible only to the owner.
|
||||
// To avoid TOCTOU issues, the path must be owned by the current user's UID and GID.
|
||||
// The path can be used by different containers, so a mount must be created only in a private mount namespace.
|
||||
func (c *Container) recreateIntermediateMountpointUser() (string, error) {
|
||||
uid := os.Geteuid()
|
||||
gid := os.Getegid()
|
||||
for i := 0; ; i++ {
|
||||
tmpDir := os.Getenv("TMPDIR")
|
||||
if tmpDir == "" {
|
||||
tmpDir = "/tmp"
|
||||
}
|
||||
dir := filepath.Join(tmpDir, fmt.Sprintf("intermediate-mountpoint-%d.%d", rootless.GetRootlessUID(), i))
|
||||
err := os.Mkdir(dir, 0755)
|
||||
if err != nil {
|
||||
if !errors.Is(err, os.ErrExist) {
|
||||
return "", err
|
||||
}
|
||||
st, err2 := os.Stat(dir)
|
||||
if err2 != nil {
|
||||
return "", err
|
||||
}
|
||||
sys := st.Sys().(*syscall.Stat_t)
|
||||
if !st.IsDir() || sys.Uid != uint32(uid) || sys.Gid != uint32(gid) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
return dir, nil
|
||||
}
|
||||
}
|
||||
|
||||
// getIntermediateMountpointUser returns a path that is accessible to everyone.
|
||||
// To avoid TOCTOU issues, the path must be owned by the current user's UID and GID.
|
||||
// The path can be used by different containers, so a mount must be created only in a private mount namespace.
|
||||
func (c *Container) getIntermediateMountpointUser() (string, error) {
|
||||
intermediateMountPointSync.Lock()
|
||||
defer intermediateMountPointSync.Unlock()
|
||||
|
||||
if intermediateMountPoint == "" || fileutils.Exists(intermediateMountPoint) != nil {
|
||||
return c.recreateIntermediateMountpointUser()
|
||||
}
|
||||
|
||||
// update the timestamp to prevent systemd-tmpfiles from removing it
|
||||
now := time.Now()
|
||||
if err := os.Chtimes(intermediateMountPoint, now, now); err != nil {
|
||||
if !errors.Is(err, os.ErrNotExist) {
|
||||
return c.recreateIntermediateMountpointUser()
|
||||
}
|
||||
}
|
||||
return intermediateMountPoint, intermediateMountPointErr
|
||||
}
|
||||
|
||||
// mount mounts the container's root filesystem
|
||||
func (c *Container) mount() (string, error) {
|
||||
if c.state.State == define.ContainerStateRemoving {
|
||||
|
|
|
@ -565,7 +565,11 @@ func (c *Container) generateSpec(ctx context.Context) (s *spec.Spec, cleanupFunc
|
|||
return nil, nil, err
|
||||
}
|
||||
|
||||
g.SetRootPath(c.state.Mountpoint)
|
||||
rootPath, err := c.getRootPathForOCI()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
g.SetRootPath(rootPath)
|
||||
g.AddAnnotation("org.opencontainers.image.stopSignal", strconv.FormatUint(uint64(c.config.StopSignal), 10))
|
||||
|
||||
if _, exists := g.Config.Annotations[annotations.ContainerManager]; !exists {
|
||||
|
|
|
@ -184,15 +184,10 @@ func hasCurrentUserMapped(ctr *Container) bool {
|
|||
// CreateContainer creates a container.
|
||||
func (r *ConmonOCIRuntime) CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
|
||||
if !hasCurrentUserMapped(ctr) {
|
||||
if err := makeAccessible(ctr.state.Mountpoint, ctr.RootUID(), ctr.RootGID()); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// if we are running a non privileged container, be sure to umount some kernel paths so they are not
|
||||
// bind mounted inside the container at all.
|
||||
if !ctr.config.Privileged && !rootless.IsRootless() {
|
||||
return r.createRootlessContainer(ctr, restoreOptions)
|
||||
}
|
||||
hideFiles := !ctr.config.Privileged && !rootless.IsRootless()
|
||||
return r.createRootlessContainer(ctr, restoreOptions, hideFiles)
|
||||
}
|
||||
return r.createOCIContainer(ctr, restoreOptions)
|
||||
}
|
||||
|
@ -972,28 +967,6 @@ func (r *ConmonOCIRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntime
|
|||
return &conmon, &ocirt, nil
|
||||
}
|
||||
|
||||
// makeAccessible changes the path permission and each parent directory to have --x--x--x
|
||||
func makeAccessible(path string, uid, gid int) error {
|
||||
for ; path != "/"; path = filepath.Dir(path) {
|
||||
st, err := os.Stat(path)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid {
|
||||
continue
|
||||
}
|
||||
if st.Mode()&0111 != 0111 {
|
||||
if err := os.Chmod(path, st.Mode()|0111); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Wait for a container which has been sent a signal to stop
|
||||
func waitContainerStop(ctr *Container, timeout time.Duration) error {
|
||||
return waitPidStop(ctr.state.PID, timeout)
|
||||
|
|
|
@ -8,7 +8,7 @@ import (
|
|||
"os/exec"
|
||||
)
|
||||
|
||||
func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
|
||||
func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions, hideFiles bool) (int64, error) {
|
||||
return -1, errors.New("unsupported (*ConmonOCIRuntime) createRootlessContainer")
|
||||
}
|
||||
|
||||
|
|
|
@ -3,7 +3,9 @@
|
|||
package libpod
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
|
@ -25,7 +27,7 @@ import (
|
|||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) (int64, error) {
|
||||
func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions, hideFiles bool) (int64, error) {
|
||||
type result struct {
|
||||
restoreDuration int64
|
||||
err error
|
||||
|
@ -40,6 +42,11 @@ func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOption
|
|||
}
|
||||
defer errorhandling.CloseQuiet(fd)
|
||||
|
||||
rootPath, err := ctr.getRootPathForOCI()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// create a new mountns on the current thread
|
||||
if err = unix.Unshare(unix.CLONE_NEWNS); err != nil {
|
||||
return 0, err
|
||||
|
@ -54,26 +61,69 @@ func (r *ConmonOCIRuntime) createRootlessContainer(ctr *Container, restoreOption
|
|||
logrus.Errorf("Unable to reset the previous mount namespace: %q", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// don't spread our mounts around. We are setting only /sys to be slave
|
||||
// so that the cleanup process is still able to umount the storage and the
|
||||
// changes are propagated to the host.
|
||||
err = unix.Mount("/sys", "/sys", "none", unix.MS_REC|unix.MS_SLAVE, "")
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("cannot make /sys slave: %w", err)
|
||||
}
|
||||
|
||||
mounts, err := pmount.GetMounts()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
for _, m := range mounts {
|
||||
if !strings.HasPrefix(m.Mountpoint, "/sys/kernel") {
|
||||
continue
|
||||
if rootPath != "" {
|
||||
byMountpoint := make(map[string]*pmount.Info)
|
||||
for _, m := range mounts {
|
||||
byMountpoint[m.Mountpoint] = m
|
||||
}
|
||||
err = unix.Unmount(m.Mountpoint, 0)
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return 0, fmt.Errorf("cannot unmount %s: %w", m.Mountpoint, err)
|
||||
isShared := false
|
||||
var parentMount string
|
||||
for dir := filepath.Dir(rootPath); ; dir = filepath.Dir(dir) {
|
||||
if m, found := byMountpoint[dir]; found {
|
||||
parentMount = dir
|
||||
for _, o := range strings.Split(m.Optional, ",") {
|
||||
opt := strings.Split(o, ":")
|
||||
if opt[0] == "shared" {
|
||||
isShared = true
|
||||
break
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
if dir == "/" {
|
||||
return 0, fmt.Errorf("cannot find mountpoint for the root path")
|
||||
}
|
||||
}
|
||||
|
||||
// do not propagate the bind mount on the parent mount namespace
|
||||
if err := unix.Mount("", parentMount, "", unix.MS_SLAVE, ""); err != nil {
|
||||
return 0, fmt.Errorf("failed to make %s slave: %w", parentMount, err)
|
||||
}
|
||||
|
||||
// bind mount the containers' mount path to the path where the OCI runtime expects it to be
|
||||
if err := unix.Mount(ctr.state.Mountpoint, rootPath, "", unix.MS_BIND, ""); err != nil {
|
||||
return 0, fmt.Errorf("failed to bind mount %s to %s: %w", ctr.state.Mountpoint, rootPath, err)
|
||||
}
|
||||
|
||||
if isShared {
|
||||
// we need to restore the shared propagation of the parent mount so that we don't break -v $SRC:$DST:shared in the container
|
||||
// if $SRC is on the same mount as the root path
|
||||
if err := unix.Mount("", parentMount, "", unix.MS_SHARED, ""); err != nil {
|
||||
return 0, fmt.Errorf("failed to restore MS_SHARED propagation for %s: %w", parentMount, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if hideFiles {
|
||||
// don't spread our mounts around. We are setting only /sys to be slave
|
||||
// so that the cleanup process is still able to umount the storage and the
|
||||
// changes are propagated to the host.
|
||||
err = unix.Mount("/sys", "/sys", "none", unix.MS_REC|unix.MS_SLAVE, "")
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("cannot make /sys slave: %w", err)
|
||||
}
|
||||
for _, m := range mounts {
|
||||
if !strings.HasPrefix(m.Mountpoint, "/sys/kernel") {
|
||||
continue
|
||||
}
|
||||
err = unix.Unmount(m.Mountpoint, 0)
|
||||
if err != nil && !errors.Is(err, fs.ErrNotExist) {
|
||||
return 0, fmt.Errorf("cannot unmount %s: %w", m.Mountpoint, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return r.createOCIContainer(ctr, restoreOptions)
|
||||
|
|
Loading…
Reference in New Issue