rootless: new function to join existing conmon processes

move the logic for joining existing namespaces down to the rootless
package.  In main_local we still retrieve the list of conmon pid files
and use it from the rootless package.

In addition, create a temporary user namespace for reading these
files, as the unprivileged user might not have enough privileges for
reading the conmon pid file, for example when running with a different
uidmap and root in the container is different than the rootless user.

Closes: https://github.com/containers/libpod/issues/3187

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
Giuseppe Scrivano 2019-05-23 11:57:51 +02:00
parent ce26aa701f
commit ee11f3bce9
No known key found for this signature in database
GPG Key ID: E4730F97F60286ED
4 changed files with 207 additions and 74 deletions

View File

@ -4,11 +4,9 @@ package main
import ( import (
"context" "context"
"io/ioutil"
"log/syslog" "log/syslog"
"os" "os"
"runtime/pprof" "runtime/pprof"
"strconv"
"strings" "strings"
"syscall" "syscall"
@ -120,18 +118,10 @@ func setupRootless(cmd *cobra.Command, args []string) error {
return errors.Wrapf(err, "could not get pause process pid file path") return errors.Wrapf(err, "could not get pause process pid file path")
} }
data, err := ioutil.ReadFile(pausePidPath) if _, err := os.Stat(pausePidPath); err == nil {
if err != nil && !os.IsNotExist(err) { became, ret, err := rootless.TryJoinFromFilePaths("", false, []string{pausePidPath})
return errors.Wrapf(err, "cannot read pause process pid file %s", pausePidPath)
}
if err == nil {
pausePid, err := strconv.Atoi(string(data))
if err != nil { if err != nil {
return errors.Wrapf(err, "cannot parse pause pid file %s", pausePidPath) logrus.Errorf("cannot join pause process. You may need to remove %s and stop all containers", pausePidPath)
}
became, ret, err := rootless.JoinUserAndMountNS(uint(pausePid), "")
if err != nil {
logrus.Errorf("cannot join pause process pid %d. You may need to remove %s and stop all containers", pausePid, pausePidPath)
logrus.Errorf("you can use `system migrate` to recreate the pause process") logrus.Errorf("you can use `system migrate` to recreate the pause process")
logrus.Errorf(err.Error()) logrus.Errorf(err.Error())
os.Exit(1) os.Exit(1)
@ -154,28 +144,13 @@ func setupRootless(cmd *cobra.Command, args []string) error {
logrus.Errorf(err.Error()) logrus.Errorf(err.Error())
os.Exit(1) os.Exit(1)
} }
var became bool
var ret int paths := []string{}
if len(ctrs) == 0 { for _, ctr := range ctrs {
became, ret, err = rootless.BecomeRootInUserNS(pausePidPath) paths = append(paths, ctr.Config().ConmonPidFile)
} else {
for _, ctr := range ctrs {
data, err := ioutil.ReadFile(ctr.Config().ConmonPidFile)
if err != nil {
logrus.Errorf(err.Error())
continue
}
conmonPid, err := strconv.Atoi(string(data))
if err != nil {
logrus.Errorf(err.Error())
continue
}
became, ret, err = rootless.JoinUserAndMountNS(uint(conmonPid), pausePidPath)
if err == nil {
break
}
}
} }
became, ret, err := rootless.TryJoinFromFilePaths(pausePidPath, true, paths)
if err != nil { if err != nil {
logrus.Errorf(err.Error()) logrus.Errorf(err.Error())
os.Exit(1) os.Exit(1)
@ -185,6 +160,7 @@ func setupRootless(cmd *cobra.Command, args []string) error {
} }
return nil return nil
} }
func setRLimits() error { func setRLimits() error {
rlimits := new(syscall.Rlimit) rlimits := new(syscall.Rlimit)
rlimits.Cur = 1048576 rlimits.Cur = 1048576

View File

@ -346,6 +346,26 @@ syscall_clone (unsigned long flags, void *child_stack)
#endif #endif
} }
int
reexec_in_user_namespace_wait (int pid, int options)
{
pid_t p;
int status;
do
p = waitpid (pid, &status, 0);
while (p < 0 && errno == EINTR);
if (p < 0)
return -1;
if (WIFEXITED (status))
return WEXITSTATUS (status);
if (WIFSIGNALED (status))
return 128 + WTERMSIG (status);
return -1;
}
static int static int
create_pause_process (const char *pause_pid_file_path, char **argv) create_pause_process (const char *pause_pid_file_path, char **argv)
{ {
@ -369,6 +389,8 @@ create_pause_process (const char *pause_pid_file_path, char **argv)
while (r < 0 && errno == EINTR); while (r < 0 && errno == EINTR);
close (p[0]); close (p[0]);
reexec_in_user_namespace_wait(r, 0);
return r == 1 && b == '0' ? 0 : -1; return r == 1 && b == '0' ? 0 : -1;
} }
else else
@ -573,8 +595,51 @@ check_proc_sys_userns_file (const char *path)
} }
} }
static int
copy_file_to_fd (const char *file_to_read, int outfd)
{
char buf[512];
int fd;
fd = open (file_to_read, O_RDONLY);
if (fd < 0)
return fd;
for (;;)
{
ssize_t r, w, t = 0;
do
r = read (fd, buf, sizeof buf);
while (r < 0 && errno == EINTR);
if (r < 0)
{
close (fd);
return r;
}
if (r == 0)
break;
while (t < r)
{
do
w = write (outfd, &buf[t], r - t);
while (w < 0 && errno == EINTR);
if (w < 0)
{
close (fd);
return w;
}
t += w;
}
}
close (fd);
return 0;
}
int int
reexec_in_user_namespace (int ready, char *pause_pid_file_path) reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_read, int outputfd)
{ {
int ret; int ret;
pid_t pid; pid_t pid;
@ -598,11 +663,11 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path)
listen_pid = getenv("LISTEN_PID"); listen_pid = getenv("LISTEN_PID");
listen_fds = getenv("LISTEN_FDS"); listen_fds = getenv("LISTEN_FDS");
if (listen_pid != NULL && listen_fds != NULL) { if (listen_pid != NULL && listen_fds != NULL)
if (strtol(listen_pid, NULL, 10) == getpid()) { {
do_socket_activation = true; if (strtol(listen_pid, NULL, 10) == getpid())
do_socket_activation = true;
} }
}
sprintf (uid, "%d", geteuid ()); sprintf (uid, "%d", geteuid ());
sprintf (gid, "%d", getegid ()); sprintf (gid, "%d", getegid ());
@ -658,11 +723,12 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path)
_exit (EXIT_FAILURE); _exit (EXIT_FAILURE);
} }
if (do_socket_activation) { if (do_socket_activation)
char s[32]; {
sprintf (s, "%d", getpid()); char s[32];
setenv ("LISTEN_PID", s, true); sprintf (s, "%d", getpid());
} setenv ("LISTEN_PID", s, true);
}
setenv ("_CONTAINERS_USERNS_CONFIGURED", "init", 1); setenv ("_CONTAINERS_USERNS_CONFIGURED", "init", 1);
setenv ("_CONTAINERS_ROOTLESS_UID", uid, 1); setenv ("_CONTAINERS_ROOTLESS_UID", uid, 1);
@ -721,27 +787,14 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path)
_exit (EXIT_FAILURE); _exit (EXIT_FAILURE);
} }
if (file_to_read && file_to_read[0])
{
ret = copy_file_to_fd (file_to_read, outputfd);
close (outputfd);
_exit (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
execvp (argv[0], argv); execvp (argv[0], argv);
_exit (EXIT_FAILURE); _exit (EXIT_FAILURE);
} }
int
reexec_in_user_namespace_wait (int pid)
{
pid_t p;
int status;
do
p = waitpid (pid, &status, 0);
while (p < 0 && errno == EINTR);
if (p < 0)
return -1;
if (WIFEXITED (status))
return WEXITSTATUS (status);
if (WIFSIGNALED (status))
return 128 + WTERMSIG (status);
return -1;
}

View File

@ -26,8 +26,8 @@ import (
#include <stdlib.h> #include <stdlib.h>
extern uid_t rootless_uid(); extern uid_t rootless_uid();
extern uid_t rootless_gid(); extern uid_t rootless_gid();
extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path); extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path, char *file_to_read, int fd);
extern int reexec_in_user_namespace_wait(int pid); extern int reexec_in_user_namespace_wait(int pid, int options);
extern int reexec_userns_join(int userns, int mountns, char *pause_pid_file_path); extern int reexec_userns_join(int userns, int mountns, char *pause_pid_file_path);
*/ */
import "C" import "C"
@ -226,7 +226,7 @@ func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
return false, -1, errors.Errorf("cannot re-exec process") return false, -1, errors.Errorf("cannot re-exec process")
} }
ret := C.reexec_in_user_namespace_wait(pidC) ret := C.reexec_in_user_namespace_wait(pidC, 0)
if ret < 0 { if ret < 0 {
return false, -1, errors.New("error waiting for the re-exec process") return false, -1, errors.New("error waiting for the re-exec process")
} }
@ -234,11 +234,7 @@ func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
return true, int(ret), nil return true, int(ret), nil
} }
// BecomeRootInUserNS re-exec podman in a new userNS. It returns whether podman was re-executed func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (bool, int, error) {
// into a new user namespace and the return code from the re-executed podman process.
// If podman was re-executed the caller needs to propagate the error code returned by the child
// process.
func BecomeRootInUserNS(pausePid string) (bool, int, error) {
if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" { if os.Geteuid() == 0 || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
if os.Getenv("_CONTAINERS_USERNS_CONFIGURED") == "init" { if os.Getenv("_CONTAINERS_USERNS_CONFIGURED") == "init" {
return false, 0, runInUser() return false, 0, runInUser()
@ -249,6 +245,13 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) {
cPausePid := C.CString(pausePid) cPausePid := C.CString(pausePid)
defer C.free(unsafe.Pointer(cPausePid)) defer C.free(unsafe.Pointer(cPausePid))
cFileToRead := C.CString(fileToRead)
defer C.free(unsafe.Pointer(cFileToRead))
var fileOutputFD C.int
if fileOutput != nil {
fileOutputFD = C.int(fileOutput.Fd())
}
runtime.LockOSThread() runtime.LockOSThread()
defer runtime.UnlockOSThread() defer runtime.UnlockOSThread()
@ -262,7 +265,7 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) {
defer w.Close() defer w.Close()
defer w.Write([]byte("0")) defer w.Write([]byte("0"))
pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid) pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid, cFileToRead, fileOutputFD)
pid := int(pidC) pid := int(pidC)
if pid < 0 { if pid < 0 {
return false, -1, errors.Errorf("cannot re-exec process") return false, -1, errors.Errorf("cannot re-exec process")
@ -328,6 +331,10 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) {
return false, -1, errors.Wrapf(err, "read from sync pipe") return false, -1, errors.Wrapf(err, "read from sync pipe")
} }
if fileOutput != nil {
return true, 0, nil
}
if b[0] == '2' { if b[0] == '2' {
// We have lost the race for writing the PID file, as probably another // We have lost the race for writing the PID file, as probably another
// process created a namespace and wrote the PID. // process created a namespace and wrote the PID.
@ -368,10 +375,95 @@ func BecomeRootInUserNS(pausePid string) (bool, int, error) {
} }
}() }()
ret := C.reexec_in_user_namespace_wait(pidC) ret := C.reexec_in_user_namespace_wait(pidC, 0)
if ret < 0 { if ret < 0 {
return false, -1, errors.New("error waiting for the re-exec process") return false, -1, errors.New("error waiting for the re-exec process")
} }
return true, int(ret), nil return true, int(ret), nil
} }
// BecomeRootInUserNS re-exec podman in a new userNS. It returns whether podman was re-executed
// into a new user namespace and the return code from the re-executed podman process.
// If podman was re-executed the caller needs to propagate the error code returned by the child
// process.
func BecomeRootInUserNS(pausePid string) (bool, int, error) {
return becomeRootInUserNS(pausePid, "", nil)
}
// TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths.
// This is useful when there are already running containers and we
// don't have a pause process yet. We can use the paths to the conmon
// processes to attempt joining their namespaces.
// If needNewNamespace is set, the file is read from a temporary user
// namespace, this is useful for containers that are running with a
// different uidmap and the unprivileged user has no way to read the
// file owned by the root in the container.
func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
if len(paths) == 0 {
return BecomeRootInUserNS(pausePidPath)
}
var lastErr error
var pausePid int
for _, path := range paths {
if !needNewNamespace {
data, err := ioutil.ReadFile(path)
if err != nil {
lastErr = err
continue
}
pausePid, err = strconv.Atoi(string(data))
if err != nil {
lastErr = errors.Wrapf(err, "cannot parse file %s", path)
continue
}
lastErr = nil
break
} else {
fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_DGRAM, 0)
if err != nil {
lastErr = err
continue
}
r, w := os.NewFile(uintptr(fds[0]), "read file"), os.NewFile(uintptr(fds[1]), "write file")
defer w.Close()
defer r.Close()
if _, _, err := becomeRootInUserNS("", path, w); err != nil {
lastErr = err
continue
}
w.Close()
defer func() {
r.Close()
C.reexec_in_user_namespace_wait(-1, 0)
}()
b := make([]byte, 32)
n, err := r.Read(b)
if err != nil {
lastErr = errors.Wrapf(err, "cannot read %s\n", path)
continue
}
pausePid, err = strconv.Atoi(string(b[:n]))
if err == nil {
lastErr = nil
break
}
}
}
if lastErr != nil {
return false, 0, lastErr
}
return JoinUserAndMountNS(uint(pausePid), pausePidPath)
}

View File

@ -36,3 +36,15 @@ func GetRootlessGID() int {
func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) { func JoinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
return false, -1, errors.New("this function is not supported on this os") return false, -1, errors.New("this function is not supported on this os")
} }
// TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths.
// This is useful when there are already running containers and we
// don't have a pause process yet. We can use the paths to the conmon
// processes to attempt joining their namespaces.
// If needNewNamespace is set, the file is read from a temporary user
// namespace, this is useful for containers that are running with a
// different uidmap and the unprivileged user has no way to read the
// file owned by the root in the container.
func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
return false, -1, errors.New("this function is not supported on this os")
}