use rootless netns from c/common
Use the new rootlessnetns logic from c/common, drop the podman code here and make use of the new much simpler API. ref: https://github.com/containers/common/pull/1761 [NO NEW TESTS NEEDED] Signed-off-by: Paul Holzinger <pholzing@redhat.com>
This commit is contained in:
parent
605a29a714
commit
a687c38860
|
@ -18,7 +18,6 @@ import (
|
||||||
"github.com/containers/podman/v4/pkg/domain/entities"
|
"github.com/containers/podman/v4/pkg/domain/entities"
|
||||||
"github.com/containers/podman/v4/pkg/domain/infra"
|
"github.com/containers/podman/v4/pkg/domain/infra"
|
||||||
"github.com/containers/podman/v4/pkg/rootless"
|
"github.com/containers/podman/v4/pkg/rootless"
|
||||||
"github.com/containers/podman/v4/utils"
|
|
||||||
"github.com/coreos/go-systemd/v22/activation"
|
"github.com/coreos/go-systemd/v22/activation"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"github.com/spf13/pflag"
|
"github.com/spf13/pflag"
|
||||||
|
@ -131,7 +130,7 @@ func restService(flags *pflag.FlagSet, cfg *entities.PodmanConfig, opts entities
|
||||||
logrus.Warnf("Running 'system service' in rootless mode without cgroup v2, containers won't survive a 'system service' restart")
|
logrus.Warnf("Running 'system service' in rootless mode without cgroup v2, containers won't survive a 'system service' restart")
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := utils.MaybeMoveToSubCgroup(); err != nil {
|
if err := cgroups.MaybeMoveToSubCgroup(); err != nil {
|
||||||
// it is a best effort operation, so just print the
|
// it is a best effort operation, so just print the
|
||||||
// error for debugging purposes.
|
// error for debugging purposes.
|
||||||
logrus.Debugf("Could not move to subcgroup: %v", err)
|
logrus.Debugf("Could not move to subcgroup: %v", err)
|
||||||
|
|
2
go.mod
2
go.mod
|
@ -12,7 +12,7 @@ require (
|
||||||
github.com/containernetworking/cni v1.1.2
|
github.com/containernetworking/cni v1.1.2
|
||||||
github.com/containernetworking/plugins v1.4.0
|
github.com/containernetworking/plugins v1.4.0
|
||||||
github.com/containers/buildah v1.33.2-0.20231121195905-d1a1c53c8e1c
|
github.com/containers/buildah v1.33.2-0.20231121195905-d1a1c53c8e1c
|
||||||
github.com/containers/common v0.57.1-0.20231130092720-630c929caef9
|
github.com/containers/common v0.57.1-0.20231206135104-b647eb3a5eea
|
||||||
github.com/containers/conmon v2.0.20+incompatible
|
github.com/containers/conmon v2.0.20+incompatible
|
||||||
github.com/containers/gvisor-tap-vsock v0.7.1
|
github.com/containers/gvisor-tap-vsock v0.7.1
|
||||||
github.com/containers/image/v5 v5.29.1-0.20231201205726-671ab94a09ea
|
github.com/containers/image/v5 v5.29.1-0.20231201205726-671ab94a09ea
|
||||||
|
|
4
go.sum
4
go.sum
|
@ -256,8 +256,8 @@ github.com/containernetworking/plugins v1.4.0 h1:+w22VPYgk7nQHw7KT92lsRmuToHvb7w
|
||||||
github.com/containernetworking/plugins v1.4.0/go.mod h1:UYhcOyjefnrQvKvmmyEKsUA+M9Nfn7tqULPpH0Pkcj0=
|
github.com/containernetworking/plugins v1.4.0/go.mod h1:UYhcOyjefnrQvKvmmyEKsUA+M9Nfn7tqULPpH0Pkcj0=
|
||||||
github.com/containers/buildah v1.33.2-0.20231121195905-d1a1c53c8e1c h1:E7nxvH3N3kpyson0waJv1X+eY9hAs+x2zQswsK+//yY=
|
github.com/containers/buildah v1.33.2-0.20231121195905-d1a1c53c8e1c h1:E7nxvH3N3kpyson0waJv1X+eY9hAs+x2zQswsK+//yY=
|
||||||
github.com/containers/buildah v1.33.2-0.20231121195905-d1a1c53c8e1c/go.mod h1:oMNfVrZGEfWVOxXTNOYPMdZzDfSo2umURK/TO0d8TRk=
|
github.com/containers/buildah v1.33.2-0.20231121195905-d1a1c53c8e1c/go.mod h1:oMNfVrZGEfWVOxXTNOYPMdZzDfSo2umURK/TO0d8TRk=
|
||||||
github.com/containers/common v0.57.1-0.20231130092720-630c929caef9 h1:56pMgYcYyhTlmPPhRmG34NBmT5S/IwMMmOq0o4LJAMo=
|
github.com/containers/common v0.57.1-0.20231206135104-b647eb3a5eea h1:PI6EWt76Df+v4KrZ6Wn1Fvz/zQvbAYO+2gAQeBGzj3s=
|
||||||
github.com/containers/common v0.57.1-0.20231130092720-630c929caef9/go.mod h1:1TyelTjZvU4ZVSq6tGl0ImFlMKIbE8QkzpACQCdcs4U=
|
github.com/containers/common v0.57.1-0.20231206135104-b647eb3a5eea/go.mod h1:WbO7Tl8eLCt/+b35lsuc1NkWy7cZsdgF84EJ7VKhgOU=
|
||||||
github.com/containers/conmon v2.0.20+incompatible h1:YbCVSFSCqFjjVwHTPINGdMX1F6JXHGTUje2ZYobNrkg=
|
github.com/containers/conmon v2.0.20+incompatible h1:YbCVSFSCqFjjVwHTPINGdMX1F6JXHGTUje2ZYobNrkg=
|
||||||
github.com/containers/conmon v2.0.20+incompatible/go.mod h1:hgwZ2mtuDrppv78a/cOBNiCm6O0UMWGx1mu7P00nu5I=
|
github.com/containers/conmon v2.0.20+incompatible/go.mod h1:hgwZ2mtuDrppv78a/cOBNiCm6O0UMWGx1mu7P00nu5I=
|
||||||
github.com/containers/gvisor-tap-vsock v0.7.1 h1:+Rc+sOPplrkQb/BUXeN0ug8TxjgyrIqo/9P/eNS2A4c=
|
github.com/containers/gvisor-tap-vsock v0.7.1 h1:+Rc+sOPplrkQb/BUXeN0ug8TxjgyrIqo/9P/eNS2A4c=
|
||||||
|
|
|
@ -21,7 +21,6 @@ import (
|
||||||
"github.com/containers/common/pkg/config"
|
"github.com/containers/common/pkg/config"
|
||||||
"github.com/containers/podman/v4/libpod/define"
|
"github.com/containers/podman/v4/libpod/define"
|
||||||
"github.com/containers/podman/v4/pkg/rootless"
|
"github.com/containers/podman/v4/pkg/rootless"
|
||||||
"github.com/containers/podman/v4/utils"
|
|
||||||
spec "github.com/opencontainers/runtime-spec/specs-go"
|
spec "github.com/opencontainers/runtime-spec/specs-go"
|
||||||
"github.com/opencontainers/runtime-tools/generate"
|
"github.com/opencontainers/runtime-tools/generate"
|
||||||
"github.com/opencontainers/selinux/go-selinux/label"
|
"github.com/opencontainers/selinux/go-selinux/label"
|
||||||
|
@ -390,7 +389,7 @@ func (c *Container) getOCICgroupPath() (string, error) {
|
||||||
case c.config.NoCgroups:
|
case c.config.NoCgroups:
|
||||||
return "", nil
|
return "", nil
|
||||||
case c.config.CgroupsMode == cgroupSplit:
|
case c.config.CgroupsMode == cgroupSplit:
|
||||||
selfCgroup, err := utils.GetOwnCgroupDisallowRoot()
|
selfCgroup, err := cgroups.GetOwnCgroupDisallowRoot()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,10 +10,10 @@ import (
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
systemdCommon "github.com/containers/common/pkg/systemd"
|
||||||
"github.com/containers/podman/v4/pkg/errorhandling"
|
"github.com/containers/podman/v4/pkg/errorhandling"
|
||||||
"github.com/containers/podman/v4/pkg/rootless"
|
"github.com/containers/podman/v4/pkg/rootless"
|
||||||
"github.com/containers/podman/v4/pkg/systemd"
|
"github.com/containers/podman/v4/pkg/systemd"
|
||||||
"github.com/containers/podman/v4/utils"
|
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -138,7 +138,7 @@ func (c *Container) removeTransientFiles(ctx context.Context, isStartup bool) er
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Container) disableHealthCheckSystemd(isStartup bool) bool {
|
func (c *Container) disableHealthCheckSystemd(isStartup bool) bool {
|
||||||
if !utils.RunsOnSystemd() || os.Getenv("DISABLE_HC_SYSTEMD") == "true" {
|
if !systemdCommon.RunsOnSystemd() || os.Getenv("DISABLE_HC_SYSTEMD") == "true" {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if isStartup {
|
if isStartup {
|
||||||
|
|
|
@ -65,24 +65,7 @@ func (c *Container) getNetworkOptions(networkOpts map[string]types.PerNetworkOpt
|
||||||
// setUpNetwork will set up the networks, on error it will also tear down the cni
|
// setUpNetwork will set up the networks, on error it will also tear down the cni
|
||||||
// networks. If rootless it will join/create the rootless network namespace.
|
// networks. If rootless it will join/create the rootless network namespace.
|
||||||
func (r *Runtime) setUpNetwork(ns string, opts types.NetworkOptions) (map[string]types.StatusBlock, error) {
|
func (r *Runtime) setUpNetwork(ns string, opts types.NetworkOptions) (map[string]types.StatusBlock, error) {
|
||||||
rootlessNetNS, err := r.GetRootlessNetNs(true)
|
return r.network.Setup(ns, types.SetupOptions{NetworkOptions: opts})
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
var results map[string]types.StatusBlock
|
|
||||||
setUpPod := func() error {
|
|
||||||
results, err = r.network.Setup(ns, types.SetupOptions{NetworkOptions: opts})
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
// rootlessNetNS is nil if we are root
|
|
||||||
if rootlessNetNS != nil {
|
|
||||||
// execute the setup in the rootless net ns
|
|
||||||
err = rootlessNetNS.Do(setUpPod)
|
|
||||||
rootlessNetNS.Lock.Unlock()
|
|
||||||
} else {
|
|
||||||
err = setUpPod()
|
|
||||||
}
|
|
||||||
return results, err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// getNetworkPodName return the pod name (hostname) used by dns backend.
|
// getNetworkPodName return the pod name (hostname) used by dns backend.
|
||||||
|
@ -100,29 +83,7 @@ func getNetworkPodName(c *Container) string {
|
||||||
// Tear down a container's network configuration and joins the
|
// Tear down a container's network configuration and joins the
|
||||||
// rootless net ns as rootless user
|
// rootless net ns as rootless user
|
||||||
func (r *Runtime) teardownNetworkBackend(ns string, opts types.NetworkOptions) error {
|
func (r *Runtime) teardownNetworkBackend(ns string, opts types.NetworkOptions) error {
|
||||||
rootlessNetNS, err := r.GetRootlessNetNs(false)
|
return r.network.Teardown(ns, types.TeardownOptions{NetworkOptions: opts})
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
tearDownPod := func() error {
|
|
||||||
if err := r.network.Teardown(ns, types.TeardownOptions{NetworkOptions: opts}); err != nil {
|
|
||||||
return fmt.Errorf("tearing down network namespace configuration for container %s: %w", opts.ContainerID, err)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// rootlessNetNS is nil if we are root
|
|
||||||
if rootlessNetNS != nil {
|
|
||||||
// execute the network setup in the rootless net ns
|
|
||||||
err = rootlessNetNS.Do(tearDownPod)
|
|
||||||
if cerr := rootlessNetNS.Cleanup(r); cerr != nil {
|
|
||||||
logrus.WithError(cerr).Error("failed to clean up rootless netns")
|
|
||||||
}
|
|
||||||
rootlessNetNS.Lock.Unlock()
|
|
||||||
} else {
|
|
||||||
err = tearDownPod()
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tear down a container's network backend configuration, but do not tear down the
|
// Tear down a container's network backend configuration, but do not tear down the
|
||||||
|
|
|
@ -5,479 +5,22 @@ package libpod
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"crypto/rand"
|
"crypto/rand"
|
||||||
"crypto/sha256"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"net"
|
"net"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
"syscall"
|
|
||||||
|
|
||||||
"github.com/containernetworking/plugins/pkg/ns"
|
"github.com/containernetworking/plugins/pkg/ns"
|
||||||
"github.com/containers/common/libnetwork/resolvconf"
|
|
||||||
"github.com/containers/common/libnetwork/slirp4netns"
|
|
||||||
"github.com/containers/common/libnetwork/types"
|
"github.com/containers/common/libnetwork/types"
|
||||||
netUtil "github.com/containers/common/libnetwork/util"
|
netUtil "github.com/containers/common/libnetwork/util"
|
||||||
"github.com/containers/common/pkg/netns"
|
"github.com/containers/common/pkg/netns"
|
||||||
"github.com/containers/podman/v4/libpod/define"
|
|
||||||
"github.com/containers/podman/v4/pkg/rootless"
|
"github.com/containers/podman/v4/pkg/rootless"
|
||||||
"github.com/containers/podman/v4/pkg/util"
|
|
||||||
"github.com/containers/podman/v4/utils"
|
|
||||||
"github.com/containers/storage/pkg/lockfile"
|
|
||||||
"github.com/opencontainers/runtime-spec/specs-go"
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
"github.com/opencontainers/selinux/go-selinux/label"
|
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"github.com/vishvananda/netlink"
|
"github.com/vishvananda/netlink"
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
|
||||||
// rootlessNetNsName is the file name for the rootless network namespace bind mount
|
|
||||||
rootlessNetNsName = "rootless-netns"
|
|
||||||
|
|
||||||
// rootlessNetNsSilrp4netnsPidFile is the name of the rootless netns slirp4netns pid file
|
|
||||||
rootlessNetNsSilrp4netnsPidFile = "rootless-netns-slirp4netns.pid"
|
|
||||||
|
|
||||||
// persistentCNIDir is the directory where the CNI files are stored
|
|
||||||
persistentCNIDir = "/var/lib/cni"
|
|
||||||
)
|
|
||||||
|
|
||||||
type RootlessNetNS struct {
|
|
||||||
ns ns.NetNS
|
|
||||||
dir string
|
|
||||||
Lock *lockfile.LockFile
|
|
||||||
}
|
|
||||||
|
|
||||||
// getPath will join the given path to the rootless netns dir
|
|
||||||
func (r *RootlessNetNS) getPath(path string) string {
|
|
||||||
return filepath.Join(r.dir, path)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Do - run the given function in the rootless netns.
|
|
||||||
// It does not lock the rootlessCNI lock, the caller
|
|
||||||
// should only lock when needed, e.g. for network operations.
|
|
||||||
func (r *RootlessNetNS) Do(toRun func() error) error {
|
|
||||||
err := r.ns.Do(func(_ ns.NetNS) error {
|
|
||||||
// Before we can run the given function,
|
|
||||||
// we have to set up all mounts correctly.
|
|
||||||
|
|
||||||
// The order of the mounts is IMPORTANT.
|
|
||||||
// The idea of the extra mount ns is to make /run and /var/lib/cni writeable
|
|
||||||
// for the cni plugins but not affecting the podman user namespace.
|
|
||||||
// Because the plugins also need access to XDG_RUNTIME_DIR/netns some special setup is needed.
|
|
||||||
|
|
||||||
// The following bind mounts are needed
|
|
||||||
// 1. XDG_RUNTIME_DIR -> XDG_RUNTIME_DIR/rootless-netns/XDG_RUNTIME_DIR
|
|
||||||
// 2. /run/systemd -> XDG_RUNTIME_DIR/rootless-netns/run/systemd (only if it exists)
|
|
||||||
// 3. XDG_RUNTIME_DIR/rootless-netns/resolv.conf -> /etc/resolv.conf or XDG_RUNTIME_DIR/rootless-netns/run/symlink/target
|
|
||||||
// 4. XDG_RUNTIME_DIR/rootless-netns/var/lib/cni -> /var/lib/cni (if /var/lib/cni does not exist, use the parent dir)
|
|
||||||
// 5. XDG_RUNTIME_DIR/rootless-netns/run -> /run
|
|
||||||
|
|
||||||
// Create a new mount namespace,
|
|
||||||
// this must happen inside the netns thread.
|
|
||||||
err := unix.Unshare(unix.CLONE_NEWNS)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("cannot create a new mount namespace: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
xdgRuntimeDir, err := util.GetRootlessRuntimeDir()
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("could not get runtime directory: %w", err)
|
|
||||||
}
|
|
||||||
newXDGRuntimeDir := r.getPath(xdgRuntimeDir)
|
|
||||||
// 1. Mount the netns into the new run to keep them accessible.
|
|
||||||
// Otherwise cni setup will fail because it cannot access the netns files.
|
|
||||||
err = unix.Mount(xdgRuntimeDir, newXDGRuntimeDir, "none", unix.MS_BIND|unix.MS_SHARED|unix.MS_REC, "")
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to mount runtime directory for rootless netns: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2. Also keep /run/systemd if it exists.
|
|
||||||
// Many files are symlinked into this dir, for example /dev/log.
|
|
||||||
runSystemd := "/run/systemd"
|
|
||||||
_, err = os.Stat(runSystemd)
|
|
||||||
if err == nil {
|
|
||||||
newRunSystemd := r.getPath(runSystemd)
|
|
||||||
err = unix.Mount(runSystemd, newRunSystemd, "none", unix.MS_BIND|unix.MS_REC, "")
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to mount /run/systemd directory for rootless netns: %w", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 3. On some distros /etc/resolv.conf is symlinked to somewhere under /run.
|
|
||||||
// Because the kernel will follow the symlink before mounting, it is not
|
|
||||||
// possible to mount a file at /etc/resolv.conf. We have to ensure that
|
|
||||||
// the link target will be available in the mount ns.
|
|
||||||
// see: https://github.com/containers/podman/issues/10855
|
|
||||||
resolvePath := "/etc/resolv.conf"
|
|
||||||
linkCount := 0
|
|
||||||
for i := 1; i < len(resolvePath); i++ {
|
|
||||||
// Do not use filepath.EvalSymlinks, we only want the first symlink under /run.
|
|
||||||
// If /etc/resolv.conf has more than one symlink under /run, e.g.
|
|
||||||
// -> /run/systemd/resolve/stub-resolv.conf -> /run/systemd/resolve/resolv.conf
|
|
||||||
// we would put the netns resolv.conf file to the last path. However this will
|
|
||||||
// break dns because the second link does not exist in the mount ns.
|
|
||||||
// see https://github.com/containers/podman/issues/11222
|
|
||||||
//
|
|
||||||
// We also need to resolve all path components not just the last file.
|
|
||||||
// see https://github.com/containers/podman/issues/12461
|
|
||||||
|
|
||||||
if resolvePath[i] != '/' {
|
|
||||||
// if we are at the last char we need to inc i by one because there is no final slash
|
|
||||||
if i == len(resolvePath)-1 {
|
|
||||||
i++
|
|
||||||
} else {
|
|
||||||
// not the end of path, keep going
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
path := resolvePath[:i]
|
|
||||||
|
|
||||||
fi, err := os.Lstat(path)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to stat resolv.conf path: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// no link, just continue
|
|
||||||
if fi.Mode()&os.ModeSymlink == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
link, err := os.Readlink(path)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to read resolv.conf symlink: %w", err)
|
|
||||||
}
|
|
||||||
linkCount++
|
|
||||||
if filepath.IsAbs(link) {
|
|
||||||
// link is as an absolute path
|
|
||||||
resolvePath = filepath.Join(link, resolvePath[i:])
|
|
||||||
} else {
|
|
||||||
// link is as a relative, join it with the previous path
|
|
||||||
base := filepath.Dir(path)
|
|
||||||
resolvePath = filepath.Join(base, link, resolvePath[i:])
|
|
||||||
}
|
|
||||||
// set i back to zero since we now have a new base path
|
|
||||||
i = 0
|
|
||||||
|
|
||||||
// we have to stop at the first path under /run because we will have an empty /run and will create the path anyway
|
|
||||||
// if we would continue we would need to recreate all links under /run
|
|
||||||
if strings.HasPrefix(resolvePath, "/run/") {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
// make sure wo do not loop forever
|
|
||||||
if linkCount == 255 {
|
|
||||||
return errors.New("too many symlinks while resolving /etc/resolv.conf")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
logrus.Debugf("The path of /etc/resolv.conf in the mount ns is %q", resolvePath)
|
|
||||||
// When /etc/resolv.conf on the host is a symlink to /run/systemd/resolve/stub-resolv.conf,
|
|
||||||
// we have to mount an empty filesystem on /run/systemd/resolve in the child namespace,
|
|
||||||
// so as to isolate the directory from the host mount namespace.
|
|
||||||
//
|
|
||||||
// Otherwise our bind-mount for /run/systemd/resolve/stub-resolv.conf is unmounted
|
|
||||||
// when systemd-resolved unlinks and recreates /run/systemd/resolve/stub-resolv.conf on the host.
|
|
||||||
// see: https://github.com/containers/podman/issues/10929
|
|
||||||
if strings.HasPrefix(resolvePath, "/run/systemd/resolve/") {
|
|
||||||
rsr := r.getPath("/run/systemd/resolve")
|
|
||||||
err = unix.Mount("", rsr, define.TypeTmpfs, unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, "")
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to mount tmpfs on %q for rootless netns: %w", rsr, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if strings.HasPrefix(resolvePath, "/run/") {
|
|
||||||
resolvePath = r.getPath(resolvePath)
|
|
||||||
err = os.MkdirAll(filepath.Dir(resolvePath), 0700)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to create rootless-netns resolv.conf directory: %w", err)
|
|
||||||
}
|
|
||||||
// we want to bind mount on this file so we have to create the file first
|
|
||||||
_, err = os.OpenFile(resolvePath, os.O_CREATE|os.O_RDONLY, 0700)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to create rootless-netns resolv.conf file: %w", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// mount resolv.conf to make use of the host dns
|
|
||||||
err = unix.Mount(r.getPath("resolv.conf"), resolvePath, "none", unix.MS_BIND, "")
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to mount resolv.conf for rootless netns: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// 4. CNI plugins need access to /var/lib/cni and /run
|
|
||||||
varDir := ""
|
|
||||||
varTarget := persistentCNIDir
|
|
||||||
// we can only mount to a target dir which exists, check /var/lib/cni recursively
|
|
||||||
// while we could always use /var there are cases where a user might store the cni
|
|
||||||
// configs under /var/custom and this would break
|
|
||||||
for {
|
|
||||||
if _, err := os.Stat(varTarget); err == nil {
|
|
||||||
varDir = r.getPath(varTarget)
|
|
||||||
break
|
|
||||||
}
|
|
||||||
varTarget = filepath.Dir(varTarget)
|
|
||||||
if varTarget == "/" {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if varDir == "" {
|
|
||||||
return errors.New("failed to stat /var directory")
|
|
||||||
}
|
|
||||||
// make sure to mount var first
|
|
||||||
err = unix.Mount(varDir, varTarget, "none", unix.MS_BIND, "")
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to mount %s for rootless netns: %w", varTarget, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// 5. Mount the new prepared run dir to /run, it has to be recursive to keep the other bind mounts.
|
|
||||||
runDir := r.getPath("run")
|
|
||||||
err = unix.Mount(runDir, "/run", "none", unix.MS_BIND|unix.MS_REC, "")
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to mount /run for rootless netns: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// run the given function in the correct namespace
|
|
||||||
err = toRun()
|
|
||||||
return err
|
|
||||||
})
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clean up the rootless network namespace if needed.
|
|
||||||
// It checks if we have running containers with the bridge network mode.
|
|
||||||
// Cleanup() expects that r.Lock is locked
|
|
||||||
func (r *RootlessNetNS) Cleanup(runtime *Runtime) error {
|
|
||||||
_, err := os.Stat(r.dir)
|
|
||||||
if os.IsNotExist(err) {
|
|
||||||
// the directory does not exist, so no need for cleanup
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
activeNetns := func(c *Container) bool {
|
|
||||||
// no bridge => no need to check
|
|
||||||
if !c.config.NetMode.IsBridge() {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// we cannot use c.state() because it will try to lock the container
|
|
||||||
// locking is a problem because cleanup is called after net teardown
|
|
||||||
// at this stage the container is already locked.
|
|
||||||
// also do not try to lock only containers which are not currently in net
|
|
||||||
// teardown because this will result in an ABBA deadlock between the rootless
|
|
||||||
// rootless netns lock and the container lock
|
|
||||||
// because we need to get the state we have to sync otherwise this will not
|
|
||||||
// work because the state is empty by default
|
|
||||||
// I do not like this but I do not see a better way at moment
|
|
||||||
err := c.syncContainer()
|
|
||||||
if err != nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// only check for an active netns, we cannot use the container state
|
|
||||||
// because not running does not mean that the netns does not need cleanup
|
|
||||||
// only if the netns is empty we know that we do not need cleanup
|
|
||||||
return c.state.NetNS != ""
|
|
||||||
}
|
|
||||||
ctrs, err := runtime.GetContainers(false, activeNetns)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
// no cleanup if we found no other containers with a netns
|
|
||||||
// we will always find one container (the container cleanup that is currently calling us)
|
|
||||||
if len(ctrs) > 1 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
logrus.Debug("Cleaning up rootless network namespace")
|
|
||||||
err = netns.UnmountNS(r.ns.Path())
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
// make the following errors not fatal
|
|
||||||
err = r.ns.Close()
|
|
||||||
if err != nil {
|
|
||||||
logrus.Error(err)
|
|
||||||
}
|
|
||||||
b, err := os.ReadFile(r.getPath(rootlessNetNsSilrp4netnsPidFile))
|
|
||||||
if err == nil {
|
|
||||||
var i int
|
|
||||||
i, err = strconv.Atoi(string(b))
|
|
||||||
if err == nil {
|
|
||||||
// kill the slirp process so we do not leak it
|
|
||||||
err = syscall.Kill(i, syscall.SIGTERM)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
logrus.Errorf("Failed to kill slirp4netns process: %v", err)
|
|
||||||
}
|
|
||||||
err = os.RemoveAll(r.dir)
|
|
||||||
if err != nil {
|
|
||||||
logrus.Error(err)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetRootlessNetNs returns the rootless netns object. If create is set to true
|
|
||||||
// the rootless network namespace will be created if it does not already exist.
|
|
||||||
// If called as root it returns always nil.
|
|
||||||
// On success the returned RootlessCNI lock is locked and must be unlocked by the caller.
|
|
||||||
func (r *Runtime) GetRootlessNetNs(new bool) (*RootlessNetNS, error) {
|
|
||||||
if !rootless.IsRootless() {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
var rootlessNetNS *RootlessNetNS
|
|
||||||
runDir := r.config.Engine.TmpDir
|
|
||||||
|
|
||||||
lfile := filepath.Join(runDir, "rootless-netns.lock")
|
|
||||||
lock, err := lockfile.GetLockFile(lfile)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to get rootless-netns lockfile: %w", err)
|
|
||||||
}
|
|
||||||
lock.Lock()
|
|
||||||
defer func() {
|
|
||||||
// In case of an error (early exit) rootlessNetNS will be nil.
|
|
||||||
// Make sure to unlock otherwise we could deadlock.
|
|
||||||
if rootlessNetNS == nil {
|
|
||||||
lock.Unlock()
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
rootlessNetNsDir := filepath.Join(runDir, rootlessNetNsName)
|
|
||||||
err = os.MkdirAll(rootlessNetNsDir, 0700)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("could not create rootless-netns directory: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
nsDir, err := netns.GetNSRunDir()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// create a hash from the static dir
|
|
||||||
// the cleanup will check if there are running containers
|
|
||||||
// if you run a several libpod instances with different root/runroot directories this check will fail
|
|
||||||
// we want one netns for each libpod static dir so we use the hash to prevent name collisions
|
|
||||||
hash := sha256.Sum256([]byte(r.config.Engine.StaticDir))
|
|
||||||
netnsName := fmt.Sprintf("%s-%x", rootlessNetNsName, hash[:10])
|
|
||||||
|
|
||||||
path := filepath.Join(nsDir, netnsName)
|
|
||||||
nsReference, err := ns.GetNS(path)
|
|
||||||
if err != nil {
|
|
||||||
if !new {
|
|
||||||
// return an error if we could not get the namespace and should no create one
|
|
||||||
return nil, fmt.Errorf("getting rootless network namespace: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// When the netns is not valid but the file exists we have to remove it first,
|
|
||||||
// https://github.com/containers/common/pull/1381 changed the behavior from
|
|
||||||
// NewNSWithName()so it will now error when the file already exists.
|
|
||||||
// https://github.com/containers/podman/issues/17903#issuecomment-1494329622
|
|
||||||
if errors.As(err, &ns.NSPathNotNSErr{}) {
|
|
||||||
logrus.Infof("rootless netns is no longer valid: %v", err)
|
|
||||||
// ignore errors, if something is wrong NewNSWithName() will fail below anyway
|
|
||||||
_ = os.Remove(path)
|
|
||||||
}
|
|
||||||
|
|
||||||
// create a new namespace
|
|
||||||
logrus.Debugf("creating rootless network namespace with name %q", netnsName)
|
|
||||||
nsReference, err = netns.NewNSWithName(netnsName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("creating rootless network namespace: %w", err)
|
|
||||||
}
|
|
||||||
res, err := slirp4netns.Setup(&slirp4netns.SetupOptions{
|
|
||||||
Config: r.config,
|
|
||||||
ContainerID: "rootless-netns",
|
|
||||||
Netns: nsReference.Path(),
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to start rootless-netns slirp4netns: %w", err)
|
|
||||||
}
|
|
||||||
// create pid file for the slirp4netns process
|
|
||||||
// this is need to kill the process in the cleanup
|
|
||||||
pid := strconv.Itoa(res.Pid)
|
|
||||||
err = os.WriteFile(filepath.Join(rootlessNetNsDir, rootlessNetNsSilrp4netnsPidFile), []byte(pid), 0700)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("unable to write rootless-netns slirp4netns pid file: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if utils.RunsOnSystemd() {
|
|
||||||
// move to systemd scope to prevent systemd from killing it
|
|
||||||
err = utils.MoveRootlessNetnsSlirpProcessToUserSlice(res.Pid)
|
|
||||||
if err != nil {
|
|
||||||
// only log this, it is not fatal but can lead to issues when running podman inside systemd units
|
|
||||||
logrus.Errorf("failed to move the rootless netns slirp4netns process to the systemd user.slice: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// build a new resolv.conf file which uses the slirp4netns dns server address
|
|
||||||
resolveIP, err := slirp4netns.GetDNS(res.Subnet)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to determine default slirp4netns DNS address: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := resolvconf.New(&resolvconf.Params{
|
|
||||||
Path: filepath.Join(rootlessNetNsDir, "resolv.conf"),
|
|
||||||
// fake the netns since we want to filter localhost
|
|
||||||
Namespaces: []specs.LinuxNamespace{
|
|
||||||
{Type: specs.NetworkNamespace},
|
|
||||||
},
|
|
||||||
IPv6Enabled: res.IPv6,
|
|
||||||
KeepHostServers: true,
|
|
||||||
Nameservers: []string{resolveIP.String()},
|
|
||||||
}); err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to create rootless netns resolv.conf: %w", err)
|
|
||||||
}
|
|
||||||
// create cni directories to store files
|
|
||||||
// they will be bind mounted to the correct location in an extra mount ns
|
|
||||||
err = os.MkdirAll(filepath.Join(rootlessNetNsDir, persistentCNIDir), 0700)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("could not create rootless-netns var directory: %w", err)
|
|
||||||
}
|
|
||||||
runDir := filepath.Join(rootlessNetNsDir, "run")
|
|
||||||
err = os.MkdirAll(runDir, 0700)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("could not create rootless-netns run directory: %w", err)
|
|
||||||
}
|
|
||||||
// relabel the new run directory to the iptables /run label
|
|
||||||
// this is important, otherwise the iptables command will fail
|
|
||||||
err = label.Relabel(runDir, "system_u:object_r:iptables_var_run_t:s0", false)
|
|
||||||
if err != nil {
|
|
||||||
if !errors.Is(err, unix.ENOTSUP) {
|
|
||||||
return nil, fmt.Errorf("could not create relabel rootless-netns run directory: %w", err)
|
|
||||||
}
|
|
||||||
logrus.Debugf("Labeling not supported on %q", runDir)
|
|
||||||
}
|
|
||||||
// create systemd run directory
|
|
||||||
err = os.MkdirAll(filepath.Join(runDir, "systemd"), 0700)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("could not create rootless-netns systemd directory: %w", err)
|
|
||||||
}
|
|
||||||
// create the directory for the netns files at the same location
|
|
||||||
// relative to the rootless-netns location
|
|
||||||
err = os.MkdirAll(filepath.Join(rootlessNetNsDir, nsDir), 0700)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("could not create rootless-netns netns directory: %w", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// The CNI plugins and netavark need access to iptables in $PATH. As it turns out debian doesn't put
|
|
||||||
// /usr/sbin in $PATH for rootless users. This will break rootless networking completely.
|
|
||||||
// We might break existing users and we cannot expect everyone to change their $PATH so
|
|
||||||
// let's add /usr/sbin to $PATH ourselves.
|
|
||||||
path = os.Getenv("PATH")
|
|
||||||
if !strings.Contains(path, "/usr/sbin") {
|
|
||||||
path += ":/usr/sbin"
|
|
||||||
os.Setenv("PATH", path)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Important set rootlessNetNS as last step.
|
|
||||||
// Do not return any errors after this.
|
|
||||||
rootlessNetNS = &RootlessNetNS{
|
|
||||||
ns: nsReference,
|
|
||||||
dir: rootlessNetNsDir,
|
|
||||||
Lock: lock,
|
|
||||||
}
|
|
||||||
return rootlessNetNS, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create and configure a new network namespace for a container
|
// Create and configure a new network namespace for a container
|
||||||
func (r *Runtime) configureNetNS(ctr *Container, ctrNS string) (status map[string]types.StatusBlock, rerr error) {
|
func (r *Runtime) configureNetNS(ctr *Container, ctrNS string) (status map[string]types.StatusBlock, rerr error) {
|
||||||
if err := r.exposeMachinePorts(ctr.config.PortMappings); err != nil {
|
if err := r.exposeMachinePorts(ctr.config.PortMappings); err != nil {
|
||||||
|
|
|
@ -23,6 +23,7 @@ import (
|
||||||
"text/template"
|
"text/template"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/containers/common/pkg/cgroups"
|
||||||
"github.com/containers/common/pkg/config"
|
"github.com/containers/common/pkg/config"
|
||||||
"github.com/containers/common/pkg/detach"
|
"github.com/containers/common/pkg/detach"
|
||||||
"github.com/containers/common/pkg/resize"
|
"github.com/containers/common/pkg/resize"
|
||||||
|
@ -1099,7 +1100,7 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
|
||||||
}
|
}
|
||||||
|
|
||||||
if ctr.config.CgroupsMode == cgroupSplit {
|
if ctr.config.CgroupsMode == cgroupSplit {
|
||||||
if err := utils.MoveUnderCgroupSubtree("runtime"); err != nil {
|
if err := cgroups.MoveUnderCgroupSubtree("runtime"); err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,9 +16,9 @@ import (
|
||||||
|
|
||||||
"github.com/containers/common/pkg/cgroups"
|
"github.com/containers/common/pkg/cgroups"
|
||||||
"github.com/containers/common/pkg/config"
|
"github.com/containers/common/pkg/config"
|
||||||
|
"github.com/containers/common/pkg/systemd"
|
||||||
"github.com/containers/podman/v4/pkg/errorhandling"
|
"github.com/containers/podman/v4/pkg/errorhandling"
|
||||||
"github.com/containers/podman/v4/pkg/rootless"
|
"github.com/containers/podman/v4/pkg/rootless"
|
||||||
"github.com/containers/podman/v4/utils"
|
|
||||||
pmount "github.com/containers/storage/pkg/mount"
|
pmount "github.com/containers/storage/pkg/mount"
|
||||||
spec "github.com/opencontainers/runtime-spec/specs-go"
|
spec "github.com/opencontainers/runtime-spec/specs-go"
|
||||||
"github.com/opencontainers/selinux/go-selinux/label"
|
"github.com/opencontainers/selinux/go-selinux/label"
|
||||||
|
@ -149,7 +149,7 @@ func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec
|
||||||
}
|
}
|
||||||
|
|
||||||
logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName)
|
logrus.Infof("Running conmon under slice %s and unitName %s", realCgroupParent, unitName)
|
||||||
if err := utils.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil {
|
if err := systemd.RunUnderSystemdScope(cmd.Process.Pid, realCgroupParent, unitName); err != nil {
|
||||||
logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to systemd sandbox cgroup: %v", err)
|
logrus.StandardLogger().Logf(logLevel, "Failed to add conmon to systemd sandbox cgroup: %v", err)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -25,6 +25,7 @@ import (
|
||||||
"github.com/containers/common/pkg/cgroups"
|
"github.com/containers/common/pkg/cgroups"
|
||||||
"github.com/containers/common/pkg/config"
|
"github.com/containers/common/pkg/config"
|
||||||
"github.com/containers/common/pkg/secrets"
|
"github.com/containers/common/pkg/secrets"
|
||||||
|
systemdCommon "github.com/containers/common/pkg/systemd"
|
||||||
"github.com/containers/image/v5/pkg/sysregistriesv2"
|
"github.com/containers/image/v5/pkg/sysregistriesv2"
|
||||||
is "github.com/containers/image/v5/storage"
|
is "github.com/containers/image/v5/storage"
|
||||||
"github.com/containers/image/v5/types"
|
"github.com/containers/image/v5/types"
|
||||||
|
@ -36,7 +37,6 @@ import (
|
||||||
"github.com/containers/podman/v4/pkg/rootless"
|
"github.com/containers/podman/v4/pkg/rootless"
|
||||||
"github.com/containers/podman/v4/pkg/systemd"
|
"github.com/containers/podman/v4/pkg/systemd"
|
||||||
"github.com/containers/podman/v4/pkg/util"
|
"github.com/containers/podman/v4/pkg/util"
|
||||||
"github.com/containers/podman/v4/utils"
|
|
||||||
"github.com/containers/storage"
|
"github.com/containers/storage"
|
||||||
"github.com/containers/storage/pkg/lockfile"
|
"github.com/containers/storage/pkg/lockfile"
|
||||||
"github.com/containers/storage/pkg/unshare"
|
"github.com/containers/storage/pkg/unshare"
|
||||||
|
@ -608,7 +608,7 @@ func makeRuntime(runtime *Runtime) (retErr error) {
|
||||||
if became {
|
if became {
|
||||||
// Check if the pause process was created. If it was created, then
|
// Check if the pause process was created. If it was created, then
|
||||||
// move it to its own systemd scope.
|
// move it to its own systemd scope.
|
||||||
utils.MovePauseProcessToScope(pausePid)
|
systemdCommon.MovePauseProcessToScope(pausePid)
|
||||||
|
|
||||||
// gocritic complains because defer is not run on os.Exit()
|
// gocritic complains because defer is not run on os.Exit()
|
||||||
// However this is fine because the lock is released anyway when the process exits
|
// However this is fine because the lock is released anyway when the process exits
|
||||||
|
|
|
@ -13,7 +13,6 @@ import (
|
||||||
"github.com/containers/common/pkg/config"
|
"github.com/containers/common/pkg/config"
|
||||||
"github.com/containers/podman/v4/libpod/define"
|
"github.com/containers/podman/v4/libpod/define"
|
||||||
"github.com/containers/podman/v4/pkg/rootless"
|
"github.com/containers/podman/v4/pkg/rootless"
|
||||||
"github.com/containers/podman/v4/utils"
|
|
||||||
spec "github.com/opencontainers/runtime-spec/specs-go"
|
spec "github.com/opencontainers/runtime-spec/specs-go"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
@ -97,7 +96,7 @@ func (p *Pod) removePodCgroup() error {
|
||||||
}
|
}
|
||||||
logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath)
|
logrus.Debugf("Removing pod cgroup %s", p.state.CgroupPath)
|
||||||
|
|
||||||
cgroup, err := utils.GetOwnCgroup()
|
cgroup, err := cgroups.GetOwnCgroup()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -106,7 +105,7 @@ func (p *Pod) removePodCgroup() error {
|
||||||
// current process out of it before the cgroup is destroyed.
|
// current process out of it before the cgroup is destroyed.
|
||||||
if isSubDir(cgroup, string(filepath.Separator)+p.state.CgroupPath) {
|
if isSubDir(cgroup, string(filepath.Separator)+p.state.CgroupPath) {
|
||||||
parent := path.Dir(p.state.CgroupPath)
|
parent := path.Dir(p.state.CgroupPath)
|
||||||
if err := utils.MoveUnderCgroup(parent, "cleanup", nil); err != nil {
|
if err := cgroups.MoveUnderCgroup(parent, "cleanup", nil); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,12 +11,12 @@ import (
|
||||||
|
|
||||||
"github.com/containers/common/pkg/cgroups"
|
"github.com/containers/common/pkg/cgroups"
|
||||||
"github.com/containers/common/pkg/config"
|
"github.com/containers/common/pkg/config"
|
||||||
|
"github.com/containers/common/pkg/systemd"
|
||||||
"github.com/containers/podman/v4/libpod/define"
|
"github.com/containers/podman/v4/libpod/define"
|
||||||
"github.com/containers/podman/v4/pkg/domain/entities"
|
"github.com/containers/podman/v4/pkg/domain/entities"
|
||||||
"github.com/containers/podman/v4/pkg/domain/entities/reports"
|
"github.com/containers/podman/v4/pkg/domain/entities/reports"
|
||||||
"github.com/containers/podman/v4/pkg/rootless"
|
"github.com/containers/podman/v4/pkg/rootless"
|
||||||
"github.com/containers/podman/v4/pkg/util"
|
"github.com/containers/podman/v4/pkg/util"
|
||||||
"github.com/containers/podman/v4/utils"
|
|
||||||
"github.com/containers/storage"
|
"github.com/containers/storage"
|
||||||
"github.com/containers/storage/pkg/directory"
|
"github.com/containers/storage/pkg/directory"
|
||||||
"github.com/containers/storage/pkg/unshare"
|
"github.com/containers/storage/pkg/unshare"
|
||||||
|
@ -67,11 +67,11 @@ func (ic *ContainerEngine) Info(ctx context.Context) (*define.Info, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool) error {
|
func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool) error {
|
||||||
runsUnderSystemd := utils.RunsOnSystemd()
|
runsUnderSystemd := systemd.RunsOnSystemd()
|
||||||
if !runsUnderSystemd {
|
if !runsUnderSystemd {
|
||||||
isPid1 := os.Getpid() == 1
|
isPid1 := os.Getpid() == 1
|
||||||
if _, found := os.LookupEnv("container"); isPid1 || found {
|
if _, found := os.LookupEnv("container"); isPid1 || found {
|
||||||
if err := utils.MaybeMoveToSubCgroup(); err != nil {
|
if err := cgroups.MaybeMoveToSubCgroup(); err != nil {
|
||||||
// it is a best effort operation, so just print the
|
// it is a best effort operation, so just print the
|
||||||
// error for debugging purposes.
|
// error for debugging purposes.
|
||||||
logrus.Debugf("Could not move to subcgroup: %v", err)
|
logrus.Debugf("Could not move to subcgroup: %v", err)
|
||||||
|
@ -101,7 +101,7 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool)
|
||||||
}
|
}
|
||||||
unitName := fmt.Sprintf("podman-%d.scope", os.Getpid())
|
unitName := fmt.Sprintf("podman-%d.scope", os.Getpid())
|
||||||
if runsUnderSystemd || conf.Engine.CgroupManager == config.SystemdCgroupsManager {
|
if runsUnderSystemd || conf.Engine.CgroupManager == config.SystemdCgroupsManager {
|
||||||
if err := utils.RunUnderSystemdScope(os.Getpid(), "user.slice", unitName); err != nil {
|
if err := systemd.RunUnderSystemdScope(os.Getpid(), "user.slice", unitName); err != nil {
|
||||||
logrus.Debugf("Failed to add podman to systemd sandbox cgroup: %v", err)
|
logrus.Debugf("Failed to add podman to systemd sandbox cgroup: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -142,7 +142,7 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool)
|
||||||
} else {
|
} else {
|
||||||
became, ret, err = rootless.BecomeRootInUserNS(pausePidPath)
|
became, ret, err = rootless.BecomeRootInUserNS(pausePidPath)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
utils.MovePauseProcessToScope(pausePidPath)
|
systemd.MovePauseProcessToScope(pausePidPath)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -406,17 +406,7 @@ func (ic *ContainerEngine) Unshare(ctx context.Context, args []string, options e
|
||||||
}
|
}
|
||||||
|
|
||||||
if options.RootlessNetNS {
|
if options.RootlessNetNS {
|
||||||
rootlessNetNS, err := ic.Libpod.GetRootlessNetNs(true)
|
return ic.Libpod.Network().RunInRootlessNetns(unshare)
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
// Make sure to unlock, unshare can run for a long time.
|
|
||||||
rootlessNetNS.Lock.Unlock()
|
|
||||||
// We do not want to clean up the netns after unshare.
|
|
||||||
// The problem is that we cannot know if we need to clean up and
|
|
||||||
// secondly unshare should allow user to set up the namespace with
|
|
||||||
// special things, e.g. potentially macvlan or something like that.
|
|
||||||
return rootlessNetNS.Do(unshare)
|
|
||||||
}
|
}
|
||||||
return unshare()
|
return unshare()
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,7 +14,6 @@ import (
|
||||||
"github.com/containers/common/pkg/sysinfo"
|
"github.com/containers/common/pkg/sysinfo"
|
||||||
"github.com/containers/podman/v4/pkg/rootless"
|
"github.com/containers/podman/v4/pkg/rootless"
|
||||||
"github.com/containers/podman/v4/pkg/specgen"
|
"github.com/containers/podman/v4/pkg/specgen"
|
||||||
"github.com/containers/podman/v4/utils"
|
|
||||||
"github.com/opencontainers/runtime-spec/specs-go"
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -179,7 +178,7 @@ func verifyContainerResourcesCgroupV2(s *specgen.SpecGenerator) ([]string, error
|
||||||
|
|
||||||
// Memory checks
|
// Memory checks
|
||||||
if s.ResourceLimits.Memory != nil && s.ResourceLimits.Memory.Swap != nil {
|
if s.ResourceLimits.Memory != nil && s.ResourceLimits.Memory.Swap != nil {
|
||||||
own, err := utils.GetOwnCgroup()
|
own, err := cgroups.GetOwnCgroup()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return warnings, err
|
return warnings, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
0::/other
|
|
|
@ -1 +0,0 @@
|
||||||
0::/
|
|
119
utils/utils.go
119
utils/utils.go
|
@ -2,20 +2,16 @@ package utils
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"crypto/rand"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/containers/common/pkg/cgroups"
|
|
||||||
"github.com/containers/storage/pkg/archive"
|
"github.com/containers/storage/pkg/archive"
|
||||||
"github.com/containers/storage/pkg/chrootarchive"
|
"github.com/containers/storage/pkg/chrootarchive"
|
||||||
"github.com/godbus/dbus/v5"
|
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"github.com/vbauerster/mpb/v8"
|
"github.com/vbauerster/mpb/v8"
|
||||||
"github.com/vbauerster/mpb/v8/decor"
|
"github.com/vbauerster/mpb/v8/decor"
|
||||||
|
@ -133,121 +129,6 @@ func RemoveScientificNotationFromFloat(x float64) (float64, error) {
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
|
||||||
runsOnSystemdOnce sync.Once
|
|
||||||
runsOnSystemd bool
|
|
||||||
)
|
|
||||||
|
|
||||||
// RunsOnSystemd returns whether the system is using systemd
|
|
||||||
func RunsOnSystemd() bool {
|
|
||||||
runsOnSystemdOnce.Do(func() {
|
|
||||||
// per sd_booted(3), check for this dir
|
|
||||||
fd, err := os.Stat("/run/systemd/system")
|
|
||||||
runsOnSystemd = err == nil && fd.IsDir()
|
|
||||||
})
|
|
||||||
return runsOnSystemd
|
|
||||||
}
|
|
||||||
|
|
||||||
func moveProcessPIDFileToScope(pidPath, slice, scope string) error {
|
|
||||||
data, err := os.ReadFile(pidPath)
|
|
||||||
if err != nil {
|
|
||||||
// do not raise an error if the file doesn't exist
|
|
||||||
if os.IsNotExist(err) {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return fmt.Errorf("cannot read pid file: %w", err)
|
|
||||||
}
|
|
||||||
pid, err := strconv.ParseUint(string(data), 10, 0)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("cannot parse pid file %s: %w", pidPath, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return moveProcessToScope(int(pid), slice, scope)
|
|
||||||
}
|
|
||||||
|
|
||||||
func moveProcessToScope(pid int, slice, scope string) error {
|
|
||||||
err := RunUnderSystemdScope(pid, slice, scope)
|
|
||||||
// If the PID is not valid anymore, do not return an error.
|
|
||||||
if dbusErr, ok := err.(dbus.Error); ok {
|
|
||||||
if dbusErr.Name == "org.freedesktop.DBus.Error.UnixProcessIdUnknown" {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// MoveRootlessNetnsSlirpProcessToUserSlice moves the slirp4netns process for the rootless netns
|
|
||||||
// into a different scope so that systemd does not kill it with a container.
|
|
||||||
func MoveRootlessNetnsSlirpProcessToUserSlice(pid int) error {
|
|
||||||
randBytes := make([]byte, 4)
|
|
||||||
_, err := rand.Read(randBytes)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return moveProcessToScope(pid, "user.slice", fmt.Sprintf("rootless-netns-%x.scope", randBytes))
|
|
||||||
}
|
|
||||||
|
|
||||||
// MovePauseProcessToScope moves the pause process used for rootless mode to keep the namespaces alive to
|
|
||||||
// a separate scope.
|
|
||||||
func MovePauseProcessToScope(pausePidPath string) {
|
|
||||||
var err error
|
|
||||||
|
|
||||||
for i := 0; i < 10; i++ {
|
|
||||||
randBytes := make([]byte, 4)
|
|
||||||
_, err = rand.Read(randBytes)
|
|
||||||
if err != nil {
|
|
||||||
logrus.Errorf("failed to read random bytes: %v", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
err = moveProcessPIDFileToScope(pausePidPath, "user.slice", fmt.Sprintf("podman-pause-%x.scope", randBytes))
|
|
||||||
if err == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
unified, err2 := cgroups.IsCgroup2UnifiedMode()
|
|
||||||
if err2 != nil {
|
|
||||||
logrus.Warnf("Failed to detect if running with cgroup unified: %v", err)
|
|
||||||
}
|
|
||||||
if RunsOnSystemd() && unified {
|
|
||||||
logrus.Warnf("Failed to add pause process to systemd sandbox cgroup: %v", err)
|
|
||||||
} else {
|
|
||||||
logrus.Debugf("Failed to add pause process to systemd sandbox cgroup: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
maybeMoveToSubCgroupSync sync.Once
|
|
||||||
maybeMoveToSubCgroupSyncErr error
|
|
||||||
)
|
|
||||||
|
|
||||||
// MaybeMoveToSubCgroup moves the current process in a sub cgroup when
|
|
||||||
// it is running in the root cgroup on a system that uses cgroupv2.
|
|
||||||
func MaybeMoveToSubCgroup() error {
|
|
||||||
maybeMoveToSubCgroupSync.Do(func() {
|
|
||||||
unifiedMode, err := cgroups.IsCgroup2UnifiedMode()
|
|
||||||
if err != nil {
|
|
||||||
maybeMoveToSubCgroupSyncErr = err
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if !unifiedMode {
|
|
||||||
maybeMoveToSubCgroupSyncErr = nil
|
|
||||||
return
|
|
||||||
}
|
|
||||||
cgroup, err := GetOwnCgroup()
|
|
||||||
if err != nil {
|
|
||||||
maybeMoveToSubCgroupSyncErr = err
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if cgroup == "/" {
|
|
||||||
maybeMoveToSubCgroupSyncErr = MoveUnderCgroupSubtree("init")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
return maybeMoveToSubCgroupSyncErr
|
|
||||||
}
|
|
||||||
|
|
||||||
// GuardedRemoveAll functions much like os.RemoveAll but
|
// GuardedRemoveAll functions much like os.RemoveAll but
|
||||||
// will not delete certain catastrophic paths.
|
// will not delete certain catastrophic paths.
|
||||||
func GuardedRemoveAll(path string) error {
|
func GuardedRemoveAll(path string) error {
|
||||||
|
|
|
@ -1,205 +0,0 @@
|
||||||
//go:build linux || darwin || freebsd
|
|
||||||
// +build linux darwin freebsd
|
|
||||||
|
|
||||||
package utils
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bufio"
|
|
||||||
"bytes"
|
|
||||||
"context"
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/containers/common/pkg/cgroups"
|
|
||||||
"github.com/containers/podman/v4/pkg/rootless"
|
|
||||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
|
||||||
"github.com/godbus/dbus/v5"
|
|
||||||
"github.com/sirupsen/logrus"
|
|
||||||
)
|
|
||||||
|
|
||||||
// RunUnderSystemdScope adds the specified pid to a systemd scope
|
|
||||||
func RunUnderSystemdScope(pid int, slice string, unitName string) error {
|
|
||||||
var properties []systemdDbus.Property
|
|
||||||
var conn *systemdDbus.Conn
|
|
||||||
var err error
|
|
||||||
|
|
||||||
if rootless.IsRootless() {
|
|
||||||
conn, err = cgroups.UserConnection(rootless.GetRootlessUID())
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
conn, err = systemdDbus.NewWithContext(context.Background())
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
defer conn.Close()
|
|
||||||
properties = append(properties, systemdDbus.PropSlice(slice))
|
|
||||||
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
|
||||||
properties = append(properties, newProp("Delegate", true))
|
|
||||||
properties = append(properties, newProp("DefaultDependencies", false))
|
|
||||||
ch := make(chan string)
|
|
||||||
_, err = conn.StartTransientUnitContext(context.Background(), unitName, "replace", properties, ch)
|
|
||||||
if err != nil {
|
|
||||||
// On errors check if the cgroup already exists, if it does move the process there
|
|
||||||
if props, err := conn.GetUnitTypePropertiesContext(context.Background(), unitName, "Scope"); err == nil {
|
|
||||||
if cgroup, ok := props["ControlGroup"].(string); ok && cgroup != "" {
|
|
||||||
if err := MoveUnderCgroup(cgroup, "", []uint32{uint32(pid)}); err == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
// On errors return the original error message we got from StartTransientUnit.
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Block until job is started
|
|
||||||
<-ch
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func getCgroupProcess(procFile string, allowRoot bool) (string, error) {
|
|
||||||
f, err := os.Open(procFile)
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
scanner := bufio.NewScanner(f)
|
|
||||||
cgroup := ""
|
|
||||||
for scanner.Scan() {
|
|
||||||
line := scanner.Text()
|
|
||||||
parts := strings.SplitN(line, ":", 3)
|
|
||||||
if len(parts) != 3 {
|
|
||||||
return "", fmt.Errorf("cannot parse cgroup line %q", line)
|
|
||||||
}
|
|
||||||
if strings.HasPrefix(line, "0::") {
|
|
||||||
cgroup = line[3:]
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if len(parts[2]) > len(cgroup) {
|
|
||||||
cgroup = parts[2]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if len(cgroup) == 0 || (!allowRoot && cgroup == "/") {
|
|
||||||
return "", fmt.Errorf("could not find cgroup mount in %q", procFile)
|
|
||||||
}
|
|
||||||
return cgroup, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetOwnCgroup returns the cgroup for the current process.
|
|
||||||
func GetOwnCgroup() (string, error) {
|
|
||||||
return getCgroupProcess("/proc/self/cgroup", true)
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetOwnCgroupDisallowRoot() (string, error) {
|
|
||||||
return getCgroupProcess("/proc/self/cgroup", false)
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetCgroupProcess returns the cgroup for the specified process process.
|
|
||||||
func GetCgroupProcess(pid int) (string, error) {
|
|
||||||
return getCgroupProcess(fmt.Sprintf("/proc/%d/cgroup", pid), true)
|
|
||||||
}
|
|
||||||
|
|
||||||
// MoveUnderCgroupSubtree moves the PID under a cgroup subtree.
|
|
||||||
func MoveUnderCgroupSubtree(subtree string) error {
|
|
||||||
return MoveUnderCgroup("", subtree, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
// MoveUnderCgroup moves a group of processes to a new cgroup.
|
|
||||||
// If cgroup is the empty string, then the current calling process cgroup is used.
|
|
||||||
// If processes is empty, then the processes from the current cgroup are moved.
|
|
||||||
func MoveUnderCgroup(cgroup, subtree string, processes []uint32) error {
|
|
||||||
procFile := "/proc/self/cgroup"
|
|
||||||
f, err := os.Open(procFile)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
unifiedMode, err := cgroups.IsCgroup2UnifiedMode()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
scanner := bufio.NewScanner(f)
|
|
||||||
for scanner.Scan() {
|
|
||||||
line := scanner.Text()
|
|
||||||
parts := strings.SplitN(line, ":", 3)
|
|
||||||
if len(parts) != 3 {
|
|
||||||
return fmt.Errorf("cannot parse cgroup line %q", line)
|
|
||||||
}
|
|
||||||
|
|
||||||
// root cgroup, skip it
|
|
||||||
if parts[2] == "/" && !(unifiedMode && parts[1] == "") {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
cgroupRoot := "/sys/fs/cgroup"
|
|
||||||
// Special case the unified mount on hybrid cgroup and named hierarchies.
|
|
||||||
// This works on Fedora 31, but we should really parse the mounts to see
|
|
||||||
// where the cgroup hierarchy is mounted.
|
|
||||||
if parts[1] == "" && !unifiedMode {
|
|
||||||
// If it is not using unified mode, the cgroup v2 hierarchy is
|
|
||||||
// usually mounted under /sys/fs/cgroup/unified
|
|
||||||
cgroupRoot = filepath.Join(cgroupRoot, "unified")
|
|
||||||
|
|
||||||
// Ignore the unified mount if it doesn't exist
|
|
||||||
if _, err := os.Stat(cgroupRoot); err != nil && os.IsNotExist(err) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
} else if parts[1] != "" {
|
|
||||||
// Assume the controller is mounted at /sys/fs/cgroup/$CONTROLLER.
|
|
||||||
controller := strings.TrimPrefix(parts[1], "name=")
|
|
||||||
cgroupRoot = filepath.Join(cgroupRoot, controller)
|
|
||||||
}
|
|
||||||
|
|
||||||
parentCgroup := cgroup
|
|
||||||
if parentCgroup == "" {
|
|
||||||
parentCgroup = parts[2]
|
|
||||||
}
|
|
||||||
newCgroup := filepath.Join(cgroupRoot, parentCgroup, subtree)
|
|
||||||
if err := os.MkdirAll(newCgroup, 0755); err != nil && !os.IsExist(err) {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
f, err := os.OpenFile(filepath.Join(newCgroup, "cgroup.procs"), os.O_RDWR, 0755)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
if len(processes) > 0 {
|
|
||||||
for _, pid := range processes {
|
|
||||||
if _, err := f.WriteString(fmt.Sprintf("%d\n", pid)); err != nil {
|
|
||||||
logrus.Debugf("Cannot move process %d to cgroup %q: %v", pid, newCgroup, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
processesData, err := os.ReadFile(filepath.Join(cgroupRoot, parts[2], "cgroup.procs"))
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
for _, pid := range bytes.Split(processesData, []byte("\n")) {
|
|
||||||
if len(pid) == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if _, err := f.Write(pid); err != nil {
|
|
||||||
logrus.Debugf("Cannot move process %s to cgroup %q: %v", string(pid), newCgroup, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func newProp(name string, units interface{}) systemdDbus.Property {
|
|
||||||
return systemdDbus.Property{
|
|
||||||
Name: name,
|
|
||||||
Value: dbus.MakeVariant(units),
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,26 +0,0 @@
|
||||||
//go:build linux || darwin || freebsd
|
|
||||||
// +build linux darwin freebsd
|
|
||||||
|
|
||||||
package utils
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestCgroupProcess(t *testing.T) {
|
|
||||||
val, err := getCgroupProcess("testdata/cgroup.root", true)
|
|
||||||
assert.Nil(t, err)
|
|
||||||
assert.Equal(t, "/", val)
|
|
||||||
|
|
||||||
_, err = getCgroupProcess("testdata/cgroup.root", false)
|
|
||||||
assert.NotNil(t, err)
|
|
||||||
|
|
||||||
val, err = getCgroupProcess("testdata/cgroup.other", true)
|
|
||||||
assert.Nil(t, err)
|
|
||||||
assert.Equal(t, "/other", val)
|
|
||||||
|
|
||||||
_, err = getCgroupProcess("testdata/cgroup.empty", true)
|
|
||||||
assert.NotNil(t, err)
|
|
||||||
}
|
|
|
@ -1,26 +0,0 @@
|
||||||
//go:build windows
|
|
||||||
// +build windows
|
|
||||||
|
|
||||||
package utils
|
|
||||||
|
|
||||||
import "errors"
|
|
||||||
|
|
||||||
func RunUnderSystemdScope(pid int, slice string, unitName string) error {
|
|
||||||
return errors.New("not implemented for windows")
|
|
||||||
}
|
|
||||||
|
|
||||||
func MoveUnderCgroupSubtree(subtree string) error {
|
|
||||||
return errors.New("not implemented for windows")
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetOwnCgroup() (string, error) {
|
|
||||||
return "", errors.New("not implemented for windows")
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetOwnCgroupDisallowRoot() (string, error) {
|
|
||||||
return "", errors.New("not implemented for windows")
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetCgroupProcess(pid int) (string, error) {
|
|
||||||
return "", errors.New("not implemented for windows")
|
|
||||||
}
|
|
|
@ -364,11 +364,13 @@ func (c *copier) copy(ctx context.Context, source, destination types.ImageRefere
|
||||||
defer cancel()
|
defer cancel()
|
||||||
defer timer.Stop()
|
defer timer.Stop()
|
||||||
|
|
||||||
fmt.Fprintf(c.imageCopyOptions.ReportWriter,
|
if c.imageCopyOptions.ReportWriter != nil {
|
||||||
"Pulling image %s inside systemd: setting pull timeout to %s\n",
|
fmt.Fprintf(c.imageCopyOptions.ReportWriter,
|
||||||
source.StringWithinTransport(),
|
"Pulling image %s inside systemd: setting pull timeout to %s\n",
|
||||||
time.Duration(numExtensions)*extension,
|
source.StringWithinTransport(),
|
||||||
)
|
time.Duration(numExtensions)*extension,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
// From `man systemd.service(5)`:
|
// From `man systemd.service(5)`:
|
||||||
//
|
//
|
||||||
|
|
|
@ -26,8 +26,10 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/containernetworking/cni/pkg/invoke"
|
"github.com/containernetworking/cni/pkg/invoke"
|
||||||
"github.com/containernetworking/cni/pkg/version"
|
"github.com/containernetworking/cni/pkg/version"
|
||||||
|
@ -80,6 +82,16 @@ func (e *cniExec) ExecPlugin(ctx context.Context, pluginPath string, stdinData [
|
||||||
c.Env = append(c.Env, "XDG_RUNTIME_DIR=")
|
c.Env = append(c.Env, "XDG_RUNTIME_DIR=")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The CNI plugins need access to iptables in $PATH. As it turns out debian doesn't put
|
||||||
|
// /usr/sbin in $PATH for rootless users. This will break rootless networking completely.
|
||||||
|
// We might break existing users and we cannot expect everyone to change their $PATH so
|
||||||
|
// let's add /usr/sbin to $PATH ourselves.
|
||||||
|
path := os.Getenv("PATH")
|
||||||
|
if !strings.Contains(path, "/usr/sbin") {
|
||||||
|
path += ":/usr/sbin"
|
||||||
|
c.Env = append(c.Env, "PATH="+path)
|
||||||
|
}
|
||||||
|
|
||||||
err := c.Run()
|
err := c.Run()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, annotatePluginError(err, pluginPath, stdout.Bytes(), stderr.Bytes())
|
return nil, annotatePluginError(err, pluginPath, stdout.Bytes(), stderr.Bytes())
|
||||||
|
|
|
@ -16,6 +16,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/containernetworking/cni/libcni"
|
"github.com/containernetworking/cni/libcni"
|
||||||
|
"github.com/containers/common/libnetwork/internal/rootlessnetns"
|
||||||
"github.com/containers/common/libnetwork/types"
|
"github.com/containers/common/libnetwork/types"
|
||||||
"github.com/containers/common/pkg/config"
|
"github.com/containers/common/pkg/config"
|
||||||
"github.com/containers/common/pkg/version"
|
"github.com/containers/common/pkg/version"
|
||||||
|
@ -53,6 +54,9 @@ type cniNetwork struct {
|
||||||
|
|
||||||
// networks is a map with loaded networks, the key is the network name
|
// networks is a map with loaded networks, the key is the network name
|
||||||
networks map[string]*network
|
networks map[string]*network
|
||||||
|
|
||||||
|
// rootlessNetns is used for the rootless network setup/teardown
|
||||||
|
rootlessNetns *rootlessnetns.Netns
|
||||||
}
|
}
|
||||||
|
|
||||||
type network struct {
|
type network struct {
|
||||||
|
@ -65,21 +69,14 @@ type network struct {
|
||||||
type InitConfig struct {
|
type InitConfig struct {
|
||||||
// CNIConfigDir is directory where the cni config files are stored.
|
// CNIConfigDir is directory where the cni config files are stored.
|
||||||
CNIConfigDir string
|
CNIConfigDir string
|
||||||
// CNIPluginDirs is a list of directories where cni should look for the plugins.
|
|
||||||
CNIPluginDirs []string
|
|
||||||
// RunDir is a directory where temporary files can be stored.
|
// RunDir is a directory where temporary files can be stored.
|
||||||
RunDir string
|
RunDir string
|
||||||
|
|
||||||
// DefaultNetwork is the name for the default network.
|
|
||||||
DefaultNetwork string
|
|
||||||
// DefaultSubnet is the default subnet for the default network.
|
|
||||||
DefaultSubnet string
|
|
||||||
|
|
||||||
// DefaultsubnetPools contains the subnets which must be used to allocate a free subnet by network create
|
|
||||||
DefaultsubnetPools []config.SubnetPool
|
|
||||||
|
|
||||||
// IsMachine describes whenever podman runs in a podman machine environment.
|
// IsMachine describes whenever podman runs in a podman machine environment.
|
||||||
IsMachine bool
|
IsMachine bool
|
||||||
|
|
||||||
|
// Config containers.conf options
|
||||||
|
Config *config.Config
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewCNINetworkInterface creates the ContainerNetwork interface for the CNI backend.
|
// NewCNINetworkInterface creates the ContainerNetwork interface for the CNI backend.
|
||||||
|
@ -96,12 +93,12 @@ func NewCNINetworkInterface(conf *InitConfig) (types.ContainerNetwork, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
defaultNetworkName := conf.DefaultNetwork
|
defaultNetworkName := conf.Config.Network.DefaultNetwork
|
||||||
if defaultNetworkName == "" {
|
if defaultNetworkName == "" {
|
||||||
defaultNetworkName = types.DefaultNetworkName
|
defaultNetworkName = types.DefaultNetworkName
|
||||||
}
|
}
|
||||||
|
|
||||||
defaultSubnet := conf.DefaultSubnet
|
defaultSubnet := conf.Config.Network.DefaultSubnet
|
||||||
if defaultSubnet == "" {
|
if defaultSubnet == "" {
|
||||||
defaultSubnet = types.DefaultSubnet
|
defaultSubnet = types.DefaultSubnet
|
||||||
}
|
}
|
||||||
|
@ -110,21 +107,30 @@ func NewCNINetworkInterface(conf *InitConfig) (types.ContainerNetwork, error) {
|
||||||
return nil, fmt.Errorf("failed to parse default subnet: %w", err)
|
return nil, fmt.Errorf("failed to parse default subnet: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
defaultSubnetPools := conf.DefaultsubnetPools
|
defaultSubnetPools := conf.Config.Network.DefaultSubnetPools
|
||||||
if defaultSubnetPools == nil {
|
if defaultSubnetPools == nil {
|
||||||
defaultSubnetPools = config.DefaultSubnetPools
|
defaultSubnetPools = config.DefaultSubnetPools
|
||||||
}
|
}
|
||||||
|
|
||||||
cni := libcni.NewCNIConfig(conf.CNIPluginDirs, &cniExec{})
|
var netns *rootlessnetns.Netns
|
||||||
|
if unshare.IsRootless() {
|
||||||
|
netns, err = rootlessnetns.New(conf.RunDir, rootlessnetns.CNI, conf.Config)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cni := libcni.NewCNIConfig(conf.Config.Network.CNIPluginDirs.Values, &cniExec{})
|
||||||
n := &cniNetwork{
|
n := &cniNetwork{
|
||||||
cniConfigDir: conf.CNIConfigDir,
|
cniConfigDir: conf.CNIConfigDir,
|
||||||
cniPluginDirs: conf.CNIPluginDirs,
|
cniPluginDirs: conf.Config.Network.CNIPluginDirs.Get(),
|
||||||
cniConf: cni,
|
cniConf: cni,
|
||||||
defaultNetwork: defaultNetworkName,
|
defaultNetwork: defaultNetworkName,
|
||||||
defaultSubnet: defaultNet,
|
defaultSubnet: defaultNet,
|
||||||
defaultsubnetPools: defaultSubnetPools,
|
defaultsubnetPools: defaultSubnetPools,
|
||||||
isMachine: conf.IsMachine,
|
isMachine: conf.IsMachine,
|
||||||
lock: lock,
|
lock: lock,
|
||||||
|
rootlessNetns: netns,
|
||||||
}
|
}
|
||||||
|
|
||||||
return n, nil
|
return n, nil
|
||||||
|
|
|
@ -39,61 +39,71 @@ func (n *cniNetwork) Setup(namespacePath string, options types.SetupOptions) (ma
|
||||||
return nil, fmt.Errorf("failed to set the loopback adapter up: %w", err)
|
return nil, fmt.Errorf("failed to set the loopback adapter up: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
var retErr error
|
results := make(map[string]types.StatusBlock, len(options.Networks))
|
||||||
teardownOpts := options
|
|
||||||
teardownOpts.Networks = map[string]types.PerNetworkOptions{}
|
setup := func() error {
|
||||||
// make sure to teardown the already connected networks on error
|
var retErr error
|
||||||
defer func() {
|
teardownOpts := options
|
||||||
if retErr != nil {
|
teardownOpts.Networks = map[string]types.PerNetworkOptions{}
|
||||||
if len(teardownOpts.Networks) > 0 {
|
// make sure to teardown the already connected networks on error
|
||||||
err := n.teardown(namespacePath, types.TeardownOptions(teardownOpts))
|
defer func() {
|
||||||
if err != nil {
|
if retErr != nil {
|
||||||
logrus.Warn(err)
|
if len(teardownOpts.Networks) > 0 {
|
||||||
|
err := n.teardown(namespacePath, types.TeardownOptions(teardownOpts))
|
||||||
|
if err != nil {
|
||||||
|
logrus.Warn(err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
ports, err := convertSpecgenPortsToCNIPorts(options.PortMappings)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
}()
|
|
||||||
|
|
||||||
ports, err := convertSpecgenPortsToCNIPorts(options.PortMappings)
|
for name, netOpts := range options.Networks {
|
||||||
if err != nil {
|
netOpts := netOpts
|
||||||
return nil, err
|
network := n.networks[name]
|
||||||
}
|
rt := getRuntimeConfig(namespacePath, options.ContainerName, options.ContainerID, name, ports, &netOpts)
|
||||||
|
|
||||||
results := make(map[string]types.StatusBlock, len(options.Networks))
|
// If we have more than one static ip we need parse the ips via runtime config,
|
||||||
for name, netOpts := range options.Networks {
|
// make sure to add the ips capability to the first plugin otherwise it doesn't get the ips
|
||||||
netOpts := netOpts
|
if len(netOpts.StaticIPs) > 0 && !network.cniNet.Plugins[0].Network.Capabilities["ips"] {
|
||||||
network := n.networks[name]
|
caps := make(map[string]interface{})
|
||||||
rt := getRuntimeConfig(namespacePath, options.ContainerName, options.ContainerID, name, ports, &netOpts)
|
caps["capabilities"] = map[string]bool{"ips": true}
|
||||||
|
network.cniNet.Plugins[0], retErr = libcni.InjectConf(network.cniNet.Plugins[0], caps)
|
||||||
// If we have more than one static ip we need parse the ips via runtime config,
|
if retErr != nil {
|
||||||
// make sure to add the ips capability to the first plugin otherwise it doesn't get the ips
|
return retErr
|
||||||
if len(netOpts.StaticIPs) > 0 && !network.cniNet.Plugins[0].Network.Capabilities["ips"] {
|
}
|
||||||
caps := make(map[string]interface{})
|
|
||||||
caps["capabilities"] = map[string]bool{"ips": true}
|
|
||||||
network.cniNet.Plugins[0], retErr = libcni.InjectConf(network.cniNet.Plugins[0], caps)
|
|
||||||
if retErr != nil {
|
|
||||||
return nil, retErr
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
var res cnitypes.Result
|
var res cnitypes.Result
|
||||||
res, retErr = n.cniConf.AddNetworkList(context.Background(), network.cniNet, rt)
|
res, retErr = n.cniConf.AddNetworkList(context.Background(), network.cniNet, rt)
|
||||||
// Add this network to teardown opts since it is now connected.
|
// Add this network to teardown opts since it is now connected.
|
||||||
// Also add this if an errors was returned since we want to call teardown on this regardless.
|
// Also add this if an errors was returned since we want to call teardown on this regardless.
|
||||||
teardownOpts.Networks[name] = netOpts
|
teardownOpts.Networks[name] = netOpts
|
||||||
if retErr != nil {
|
if retErr != nil {
|
||||||
return nil, retErr
|
return retErr
|
||||||
}
|
}
|
||||||
|
|
||||||
logrus.Debugf("cni result for container %s network %s: %v", options.ContainerID, name, res)
|
logrus.Debugf("cni result for container %s network %s: %v", options.ContainerID, name, res)
|
||||||
var status types.StatusBlock
|
var status types.StatusBlock
|
||||||
status, retErr = CNIResultToStatus(res)
|
status, retErr = CNIResultToStatus(res)
|
||||||
if retErr != nil {
|
if retErr != nil {
|
||||||
return nil, retErr
|
return retErr
|
||||||
|
}
|
||||||
|
results[name] = status
|
||||||
}
|
}
|
||||||
results[name] = status
|
return nil
|
||||||
}
|
}
|
||||||
return results, nil
|
|
||||||
|
if n.rootlessNetns != nil {
|
||||||
|
err = n.rootlessNetns.Setup(len(options.Networks), setup)
|
||||||
|
} else {
|
||||||
|
err = setup()
|
||||||
|
}
|
||||||
|
return results, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// CNIResultToStatus convert the cni result to status block
|
// CNIResultToStatus convert the cni result to status block
|
||||||
|
@ -225,28 +235,39 @@ func (n *cniNetwork) teardown(namespacePath string, options types.TeardownOption
|
||||||
}
|
}
|
||||||
|
|
||||||
var multiErr *multierror.Error
|
var multiErr *multierror.Error
|
||||||
for name, netOpts := range options.Networks {
|
teardown := func() error {
|
||||||
netOpts := netOpts
|
for name, netOpts := range options.Networks {
|
||||||
rt := getRuntimeConfig(namespacePath, options.ContainerName, options.ContainerID, name, ports, &netOpts)
|
netOpts := netOpts
|
||||||
|
rt := getRuntimeConfig(namespacePath, options.ContainerName, options.ContainerID, name, ports, &netOpts)
|
||||||
|
|
||||||
cniConfList, newRt, err := getCachedNetworkConfig(n.cniConf, name, rt)
|
cniConfList, newRt, err := getCachedNetworkConfig(n.cniConf, name, rt)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
rt = newRt
|
rt = newRt
|
||||||
} else {
|
} else {
|
||||||
logrus.Warnf("Failed to load cached network config: %v, falling back to loading network %s from disk", err, name)
|
logrus.Warnf("Failed to load cached network config: %v, falling back to loading network %s from disk", err, name)
|
||||||
network := n.networks[name]
|
network := n.networks[name]
|
||||||
if network == nil {
|
if network == nil {
|
||||||
multiErr = multierror.Append(multiErr, fmt.Errorf("network %s: %w", name, types.ErrNoSuchNetwork))
|
multiErr = multierror.Append(multiErr, fmt.Errorf("network %s: %w", name, types.ErrNoSuchNetwork))
|
||||||
continue
|
continue
|
||||||
|
}
|
||||||
|
cniConfList = network.cniNet
|
||||||
}
|
}
|
||||||
cniConfList = network.cniNet
|
|
||||||
}
|
|
||||||
|
|
||||||
err = n.cniConf.DelNetworkList(context.Background(), cniConfList, rt)
|
err = n.cniConf.DelNetworkList(context.Background(), cniConfList, rt)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
multiErr = multierror.Append(multiErr, err)
|
multiErr = multierror.Append(multiErr, err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if n.rootlessNetns != nil {
|
||||||
|
err = n.rootlessNetns.Teardown(len(options.Networks), teardown)
|
||||||
|
} else {
|
||||||
|
err = teardown()
|
||||||
|
}
|
||||||
|
multiErr = multierror.Append(multiErr, err)
|
||||||
|
|
||||||
return multiErr.ErrorOrNil()
|
return multiErr.ErrorOrNil()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -267,3 +288,10 @@ func getCachedNetworkConfig(cniConf *libcni.CNIConfig, name string, rt *libcni.R
|
||||||
}
|
}
|
||||||
return cniConfList, rt, nil
|
return cniConfList, rt, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (n *cniNetwork) RunInRootlessNetns(toRun func() error) error {
|
||||||
|
if n.rootlessNetns == nil {
|
||||||
|
return types.ErrNotRootlessNetns
|
||||||
|
}
|
||||||
|
return n.rootlessNetns.Run(n.lock, toRun)
|
||||||
|
}
|
||||||
|
|
8
vendor/github.com/containers/common/libnetwork/internal/rootlessnetns/netns.go
generated
vendored
Normal file
8
vendor/github.com/containers/common/libnetwork/internal/rootlessnetns/netns.go
generated
vendored
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
package rootlessnetns
|
||||||
|
|
||||||
|
type NetworkBackend int
|
||||||
|
|
||||||
|
const (
|
||||||
|
Netavark NetworkBackend = iota
|
||||||
|
CNI
|
||||||
|
)
|
28
vendor/github.com/containers/common/libnetwork/internal/rootlessnetns/netns_freebsd.go
generated
vendored
Normal file
28
vendor/github.com/containers/common/libnetwork/internal/rootlessnetns/netns_freebsd.go
generated
vendored
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
package rootlessnetns
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
|
||||||
|
"github.com/containers/common/pkg/config"
|
||||||
|
"github.com/containers/storage/pkg/lockfile"
|
||||||
|
)
|
||||||
|
|
||||||
|
var ErrNotSupported = errors.New("rootless netns only supported on linux")
|
||||||
|
|
||||||
|
type Netns struct{}
|
||||||
|
|
||||||
|
func New(dir string, backend NetworkBackend, conf *config.Config) (*Netns, error) {
|
||||||
|
return nil, ErrNotSupported
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *Netns) Setup(nets int, toRun func() error) error {
|
||||||
|
return ErrNotSupported
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *Netns) Teardown(nets int, toRun func() error) error {
|
||||||
|
return ErrNotSupported
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *Netns) Run(lock *lockfile.LockFile, toRun func() error) error {
|
||||||
|
return ErrNotSupported
|
||||||
|
}
|
545
vendor/github.com/containers/common/libnetwork/internal/rootlessnetns/netns_linux.go
generated
vendored
Normal file
545
vendor/github.com/containers/common/libnetwork/internal/rootlessnetns/netns_linux.go
generated
vendored
Normal file
|
@ -0,0 +1,545 @@
|
||||||
|
package rootlessnetns
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io/fs"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"syscall"
|
||||||
|
|
||||||
|
"github.com/containernetworking/plugins/pkg/ns"
|
||||||
|
"github.com/containers/common/libnetwork/resolvconf"
|
||||||
|
"github.com/containers/common/libnetwork/slirp4netns"
|
||||||
|
"github.com/containers/common/pkg/config"
|
||||||
|
"github.com/containers/common/pkg/netns"
|
||||||
|
"github.com/containers/common/pkg/systemd"
|
||||||
|
"github.com/containers/storage/pkg/homedir"
|
||||||
|
"github.com/containers/storage/pkg/lockfile"
|
||||||
|
"github.com/hashicorp/go-multierror"
|
||||||
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
|
"github.com/opencontainers/selinux/go-selinux/label"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// rootlessNetnsDir is the directory name
|
||||||
|
rootlessNetnsDir = "rootless-netns"
|
||||||
|
// refCountFile file name for the ref count file
|
||||||
|
refCountFile = "ref-count"
|
||||||
|
|
||||||
|
// rootlessNetNsSilrp4netnsPidFile is the name of the rootless netns slirp4netns pid file
|
||||||
|
rootlessNetNsSilrp4netnsPidFile = "rootless-netns-slirp4netns.pid"
|
||||||
|
|
||||||
|
// persistentCNIDir is the directory where the CNI files are stored
|
||||||
|
persistentCNIDir = "/var/lib/cni"
|
||||||
|
|
||||||
|
tmpfs = "tmpfs"
|
||||||
|
none = "none"
|
||||||
|
resolvConfName = "resolv.conf"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Netns struct {
|
||||||
|
// dir used for the rootless netns
|
||||||
|
dir string
|
||||||
|
// backend used for the network setup/teardown
|
||||||
|
backend NetworkBackend
|
||||||
|
|
||||||
|
// config contains containers.conf options.
|
||||||
|
config *config.Config
|
||||||
|
}
|
||||||
|
|
||||||
|
type rootlessNetnsError struct {
|
||||||
|
msg string
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *rootlessNetnsError) Error() string {
|
||||||
|
msg := e.msg + ": "
|
||||||
|
return fmt.Sprintf("rootless netns: %s%v", msg, e.err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *rootlessNetnsError) Unwrap() error {
|
||||||
|
return e.err
|
||||||
|
}
|
||||||
|
|
||||||
|
// wrapError wraps the error with extra context
|
||||||
|
// It will always include "rootless netns:" so the msg should not mention it again,
|
||||||
|
// msg can be empty to just include the rootless netns part.
|
||||||
|
// err must be non nil.
|
||||||
|
func wrapError(msg string, err error) *rootlessNetnsError {
|
||||||
|
return &rootlessNetnsError{
|
||||||
|
msg: msg,
|
||||||
|
err: err,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func New(dir string, backend NetworkBackend, conf *config.Config) (*Netns, error) {
|
||||||
|
netnsDir := filepath.Join(dir, rootlessNetnsDir)
|
||||||
|
if err := os.MkdirAll(netnsDir, 0o700); err != nil {
|
||||||
|
return nil, wrapError("", err)
|
||||||
|
}
|
||||||
|
return &Netns{
|
||||||
|
dir: netnsDir,
|
||||||
|
backend: backend,
|
||||||
|
config: conf,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// getPath is a small wrapper around filepath.Join() to have a bit less code
|
||||||
|
func (n *Netns) getPath(path string) string {
|
||||||
|
return filepath.Join(n.dir, path)
|
||||||
|
}
|
||||||
|
|
||||||
|
// getOrCreateNetns returns the rootless netns, if it created a new one the
|
||||||
|
// returned bool is set to true.
|
||||||
|
func (n *Netns) getOrCreateNetns() (ns.NetNS, bool, error) {
|
||||||
|
nsPath := n.getPath(rootlessNetnsDir)
|
||||||
|
nsRef, err := ns.GetNS(nsPath)
|
||||||
|
if err == nil {
|
||||||
|
// TODO check if slirp4netns is alive
|
||||||
|
return nsRef, false, nil
|
||||||
|
}
|
||||||
|
logrus.Debugf("Creating rootless network namespace at %q", nsPath)
|
||||||
|
// We have to create the netns dir again here because it is possible
|
||||||
|
// that cleanup() removed it.
|
||||||
|
if err := os.MkdirAll(n.dir, 0o700); err != nil {
|
||||||
|
return nil, false, wrapError("", err)
|
||||||
|
}
|
||||||
|
netns, err := netns.NewNSAtPath(nsPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, false, wrapError("create netns", err)
|
||||||
|
}
|
||||||
|
err = n.setupSlirp4netns(nsPath)
|
||||||
|
return netns, true, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *Netns) cleanup() error {
|
||||||
|
if _, err := os.Stat(n.dir); err != nil {
|
||||||
|
if errors.Is(err, fs.ErrNotExist) {
|
||||||
|
// dir does not exists no need for cleanup
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
logrus.Debug("Cleaning up rootless network namespace")
|
||||||
|
|
||||||
|
nsPath := n.getPath(rootlessNetnsDir)
|
||||||
|
var multiErr *multierror.Error
|
||||||
|
if err := netns.UnmountNS(nsPath); err != nil {
|
||||||
|
multiErr = multierror.Append(multiErr, err)
|
||||||
|
}
|
||||||
|
if err := n.cleanupSlirp4netns(); err != nil {
|
||||||
|
multiErr = multierror.Append(multiErr, wrapError("kill slirp4netns", err))
|
||||||
|
}
|
||||||
|
if err := os.RemoveAll(n.dir); err != nil {
|
||||||
|
multiErr = multierror.Append(multiErr, wrapError("remove rootless netns dir", err))
|
||||||
|
}
|
||||||
|
|
||||||
|
return multiErr.ErrorOrNil()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *Netns) setupSlirp4netns(nsPath string) error {
|
||||||
|
res, err := slirp4netns.Setup(&slirp4netns.SetupOptions{
|
||||||
|
Config: n.config,
|
||||||
|
ContainerID: "rootless-netns",
|
||||||
|
Netns: nsPath,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return wrapError("start slirp4netns", err)
|
||||||
|
}
|
||||||
|
// create pid file for the slirp4netns process
|
||||||
|
// this is need to kill the process in the cleanup
|
||||||
|
pid := strconv.Itoa(res.Pid)
|
||||||
|
err = os.WriteFile(n.getPath(rootlessNetNsSilrp4netnsPidFile), []byte(pid), 0o600)
|
||||||
|
if err != nil {
|
||||||
|
return wrapError("write slirp4netns pid file", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if systemd.RunsOnSystemd() {
|
||||||
|
// move to systemd scope to prevent systemd from killing it
|
||||||
|
err = systemd.MoveRootlessNetnsSlirpProcessToUserSlice(res.Pid)
|
||||||
|
if err != nil {
|
||||||
|
// only log this, it is not fatal but can lead to issues when running podman inside systemd units
|
||||||
|
logrus.Errorf("failed to move the rootless netns slirp4netns process to the systemd user.slice: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// build a new resolv.conf file which uses the slirp4netns dns server address
|
||||||
|
resolveIP, err := slirp4netns.GetDNS(res.Subnet)
|
||||||
|
if err != nil {
|
||||||
|
return wrapError("determine default slirp4netns DNS address", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := resolvconf.New(&resolvconf.Params{
|
||||||
|
Path: n.getPath(resolvConfName),
|
||||||
|
// fake the netns since we want to filter localhost
|
||||||
|
Namespaces: []specs.LinuxNamespace{
|
||||||
|
{Type: specs.NetworkNamespace},
|
||||||
|
},
|
||||||
|
IPv6Enabled: res.IPv6,
|
||||||
|
KeepHostServers: true,
|
||||||
|
Nameservers: []string{resolveIP.String()},
|
||||||
|
}); err != nil {
|
||||||
|
return wrapError("create resolv.conf", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *Netns) cleanupSlirp4netns() error {
|
||||||
|
pidFile := n.getPath(rootlessNetNsSilrp4netnsPidFile)
|
||||||
|
b, err := os.ReadFile(pidFile)
|
||||||
|
if err == nil {
|
||||||
|
var i int
|
||||||
|
i, err = strconv.Atoi(string(b))
|
||||||
|
if err == nil {
|
||||||
|
// kill the slirp process so we do not leak it
|
||||||
|
err = syscall.Kill(i, syscall.SIGTERM)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// mountAndMkdirDest convenience wrapper for mount and mkdir
|
||||||
|
func mountAndMkdirDest(source string, target string, fstype string, flags uintptr) error {
|
||||||
|
if err := os.MkdirAll(target, 0o700); err != nil {
|
||||||
|
return wrapError("create mount point", err)
|
||||||
|
}
|
||||||
|
if err := unix.Mount(source, target, fstype, flags, ""); err != nil {
|
||||||
|
return wrapError(fmt.Sprintf("mount %q to %q", source, target), err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *Netns) setupMounts() error {
|
||||||
|
// Before we can run the given function,
|
||||||
|
// we have to set up all mounts correctly.
|
||||||
|
|
||||||
|
// The order of the mounts is IMPORTANT.
|
||||||
|
// The idea of the extra mount ns is to make /run and /var/lib/cni writeable
|
||||||
|
// for the cni plugins but not affecting the podman user namespace.
|
||||||
|
// Because the plugins also need access to XDG_RUNTIME_DIR/netns some special setup is needed.
|
||||||
|
|
||||||
|
// The following bind mounts are needed
|
||||||
|
// 1. XDG_RUNTIME_DIR -> XDG_RUNTIME_DIR/rootless-netns/XDG_RUNTIME_DIR
|
||||||
|
// 2. /run/systemd -> XDG_RUNTIME_DIR/rootless-netns/run/systemd (only if it exists)
|
||||||
|
// 3. XDG_RUNTIME_DIR/rootless-netns/resolv.conf -> /etc/resolv.conf or XDG_RUNTIME_DIR/rootless-netns/run/symlink/target
|
||||||
|
// 4. XDG_RUNTIME_DIR/rootless-netns/var/lib/cni -> /var/lib/cni (if /var/lib/cni does not exist, use the parent dir)
|
||||||
|
// 5. XDG_RUNTIME_DIR/rootless-netns/run -> /run
|
||||||
|
|
||||||
|
// Create a new mount namespace,
|
||||||
|
// this must happen inside the netns thread.
|
||||||
|
err := unix.Unshare(unix.CLONE_NEWNS)
|
||||||
|
if err != nil {
|
||||||
|
return wrapError("create new mount namespace", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
xdgRuntimeDir, err := homedir.GetRuntimeDir()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("could not get runtime directory: %w", err)
|
||||||
|
}
|
||||||
|
newXDGRuntimeDir := n.getPath(xdgRuntimeDir)
|
||||||
|
// 1. Mount the netns into the new run to keep them accessible.
|
||||||
|
// Otherwise cni setup will fail because it cannot access the netns files.
|
||||||
|
err = mountAndMkdirDest(xdgRuntimeDir, newXDGRuntimeDir, none, unix.MS_BIND|unix.MS_SHARED|unix.MS_REC)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Also keep /run/systemd if it exists.
|
||||||
|
// Many files are symlinked into this dir, for example /dev/log.
|
||||||
|
runSystemd := "/run/systemd"
|
||||||
|
_, err = os.Stat(runSystemd)
|
||||||
|
if err == nil {
|
||||||
|
newRunSystemd := n.getPath(runSystemd)
|
||||||
|
err = mountAndMkdirDest(runSystemd, newRunSystemd, none, unix.MS_BIND|unix.MS_REC)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. On some distros /etc/resolv.conf is symlinked to somewhere under /run.
|
||||||
|
// Because the kernel will follow the symlink before mounting, it is not
|
||||||
|
// possible to mount a file at /etc/resolv.conf. We have to ensure that
|
||||||
|
// the link target will be available in the mount ns.
|
||||||
|
// see: https://github.com/containers/podman/issues/10855
|
||||||
|
resolvePath := resolvconf.DefaultResolvConf
|
||||||
|
linkCount := 0
|
||||||
|
for i := 1; i < len(resolvePath); i++ {
|
||||||
|
// Do not use filepath.EvalSymlinks, we only want the first symlink under /run.
|
||||||
|
// If /etc/resolv.conf has more than one symlink under /run, e.g.
|
||||||
|
// -> /run/systemd/resolve/stub-resolv.conf -> /run/systemd/resolve/resolv.conf
|
||||||
|
// we would put the netns resolv.conf file to the last path. However this will
|
||||||
|
// break dns because the second link does not exist in the mount ns.
|
||||||
|
// see https://github.com/containers/podman/issues/11222
|
||||||
|
//
|
||||||
|
// We also need to resolve all path components not just the last file.
|
||||||
|
// see https://github.com/containers/podman/issues/12461
|
||||||
|
|
||||||
|
if resolvePath[i] != '/' {
|
||||||
|
// if we are at the last char we need to inc i by one because there is no final slash
|
||||||
|
if i == len(resolvePath)-1 {
|
||||||
|
i++
|
||||||
|
} else {
|
||||||
|
// not the end of path, keep going
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
path := resolvePath[:i]
|
||||||
|
|
||||||
|
fi, err := os.Lstat(path)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to stat resolv.conf path: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// no link, just continue
|
||||||
|
if fi.Mode()&os.ModeSymlink == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
link, err := os.Readlink(path)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to read resolv.conf symlink: %w", err)
|
||||||
|
}
|
||||||
|
linkCount++
|
||||||
|
if filepath.IsAbs(link) {
|
||||||
|
// link is as an absolute path
|
||||||
|
resolvePath = filepath.Join(link, resolvePath[i:])
|
||||||
|
} else {
|
||||||
|
// link is as a relative, join it with the previous path
|
||||||
|
base := filepath.Dir(path)
|
||||||
|
resolvePath = filepath.Join(base, link, resolvePath[i:])
|
||||||
|
}
|
||||||
|
// set i back to zero since we now have a new base path
|
||||||
|
i = 0
|
||||||
|
|
||||||
|
// we have to stop at the first path under /run because we will have an empty /run and will create the path anyway
|
||||||
|
// if we would continue we would need to recreate all links under /run
|
||||||
|
if strings.HasPrefix(resolvePath, "/run/") {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// make sure wo do not loop forever
|
||||||
|
if linkCount == 255 {
|
||||||
|
return errors.New("too many symlinks while resolving /etc/resolv.conf")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
logrus.Debugf("The path of /etc/resolv.conf in the mount ns is %q", resolvePath)
|
||||||
|
// When /etc/resolv.conf on the host is a symlink to /run/systemd/resolve/stub-resolv.conf,
|
||||||
|
// we have to mount an empty filesystem on /run/systemd/resolve in the child namespace,
|
||||||
|
// so as to isolate the directory from the host mount namespace.
|
||||||
|
//
|
||||||
|
// Otherwise our bind-mount for /run/systemd/resolve/stub-resolv.conf is unmounted
|
||||||
|
// when systemd-resolved unlinks and recreates /run/systemd/resolve/stub-resolv.conf on the host.
|
||||||
|
// see: https://github.com/containers/podman/issues/10929
|
||||||
|
if strings.HasPrefix(resolvePath, "/run/systemd/resolve/") {
|
||||||
|
rsr := n.getPath("/run/systemd/resolve")
|
||||||
|
err = mountAndMkdirDest("", rsr, tmpfs, unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(resolvePath, "/run/") {
|
||||||
|
resolvePath = n.getPath(resolvePath)
|
||||||
|
err = os.MkdirAll(filepath.Dir(resolvePath), 0o700)
|
||||||
|
if err != nil {
|
||||||
|
return wrapError("create resolv.conf directory", err)
|
||||||
|
}
|
||||||
|
// we want to bind mount on this file so we have to create the file first
|
||||||
|
_, err = os.OpenFile(resolvePath, os.O_CREATE|os.O_RDONLY, 0o600)
|
||||||
|
if err != nil {
|
||||||
|
return wrapError("create resolv.conf file: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// mount resolv.conf to make use of the host dns
|
||||||
|
err = unix.Mount(n.getPath(resolvConfName), resolvePath, none, unix.MS_BIND, "")
|
||||||
|
if err != nil {
|
||||||
|
return wrapError(fmt.Sprintf("mount resolv.conf to %q", resolvePath), err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. CNI plugins need access to /var/lib/cni
|
||||||
|
if n.backend == CNI {
|
||||||
|
if err := n.mountCNIVarDir(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. Mount the new prepared run dir to /run, it has to be recursive to keep the other bind mounts.
|
||||||
|
runDir := n.getPath("run")
|
||||||
|
// relabel the new run directory to the iptables /run label
|
||||||
|
// this is important, otherwise the iptables command will fail
|
||||||
|
err = label.Relabel(runDir, "system_u:object_r:iptables_var_run_t:s0", false)
|
||||||
|
if err != nil {
|
||||||
|
if !errors.Is(err, unix.ENOTSUP) {
|
||||||
|
return wrapError("relabel iptables_var_run_t", err)
|
||||||
|
}
|
||||||
|
logrus.Debugf("Labeling not supported on %q", runDir)
|
||||||
|
}
|
||||||
|
err = mountAndMkdirDest(runDir, "/run", none, unix.MS_BIND|unix.MS_REC)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *Netns) mountCNIVarDir() error {
|
||||||
|
varDir := ""
|
||||||
|
varTarget := persistentCNIDir
|
||||||
|
// we can only mount to a target dir which exists, check /var/lib/cni recursively
|
||||||
|
// while we could always use /var there are cases where a user might store the cni
|
||||||
|
// configs under /var/custom and this would break
|
||||||
|
for {
|
||||||
|
if _, err := os.Stat(varTarget); err == nil {
|
||||||
|
varDir = n.getPath(varTarget)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
varTarget = filepath.Dir(varTarget)
|
||||||
|
if varTarget == "/" {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if varDir == "" {
|
||||||
|
return errors.New("failed to stat /var directory")
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(varDir, 0o700); err != nil {
|
||||||
|
return wrapError("create var dir", err)
|
||||||
|
}
|
||||||
|
// make sure to mount var first
|
||||||
|
err := unix.Mount(varDir, varTarget, none, unix.MS_BIND, "")
|
||||||
|
if err != nil {
|
||||||
|
return wrapError(fmt.Sprintf("mount %q to %q", varDir, varTarget), err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *Netns) runInner(toRun func() error) (err error) {
|
||||||
|
nsRef, newNs, err := n.getOrCreateNetns()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer nsRef.Close()
|
||||||
|
// If a new netns was created make sure to clean it up again on an error to not leak it.
|
||||||
|
if newNs {
|
||||||
|
defer func() {
|
||||||
|
if err != nil {
|
||||||
|
if err := n.cleanup(); err != nil {
|
||||||
|
logrus.Errorf("Rootless netns cleanup error after failed setup: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
return nsRef.Do(func(_ ns.NetNS) error {
|
||||||
|
if err := n.setupMounts(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return toRun()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *Netns) Setup(nets int, toRun func() error) error {
|
||||||
|
err := n.runInner(toRun)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
_, err = refCount(n.dir, nets)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *Netns) Teardown(nets int, toRun func() error) error {
|
||||||
|
var multiErr *multierror.Error
|
||||||
|
count, countErr := refCount(n.dir, -nets)
|
||||||
|
if countErr != nil {
|
||||||
|
multiErr = multierror.Append(multiErr, countErr)
|
||||||
|
}
|
||||||
|
err := n.runInner(toRun)
|
||||||
|
if err != nil {
|
||||||
|
multiErr = multierror.Append(multiErr, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// only cleanup if the ref count did not throw an error
|
||||||
|
if count == 0 && countErr == nil {
|
||||||
|
err = n.cleanup()
|
||||||
|
if err != nil {
|
||||||
|
multiErr = multierror.Append(multiErr, wrapError("cleanup", err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return multiErr.ErrorOrNil()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run any long running function in the userns.
|
||||||
|
// We need to ensure that during setup/cleanup we are locked to avoid races.
|
||||||
|
// However because the given function could be running a long time we must
|
||||||
|
// unlock in between, i.e. this is used by podman unshare --rootless-nets
|
||||||
|
// and we do not want to keep it locked for the lifetime of the given command.
|
||||||
|
func (n *Netns) Run(lock *lockfile.LockFile, toRun func() error) error {
|
||||||
|
lock.Lock()
|
||||||
|
defer lock.Unlock()
|
||||||
|
_, err := refCount(n.dir, 1)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
inner := func() error {
|
||||||
|
lock.Unlock()
|
||||||
|
err = toRun()
|
||||||
|
lock.Lock()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
inErr := n.runInner(inner)
|
||||||
|
// make sure to always reset the ref counter afterwards
|
||||||
|
count, err := refCount(n.dir, -1)
|
||||||
|
if err != nil {
|
||||||
|
if inErr == nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
logrus.Errorf("Failed to decrement ref count: %v", err)
|
||||||
|
return inErr
|
||||||
|
}
|
||||||
|
if count == 0 {
|
||||||
|
err = n.cleanup()
|
||||||
|
if err != nil {
|
||||||
|
err = wrapError("cleanup", err)
|
||||||
|
if inErr == nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
logrus.Errorf("Failed to cleanup rootless netns: %v", err)
|
||||||
|
return inErr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return inErr
|
||||||
|
}
|
||||||
|
|
||||||
|
func refCount(dir string, inc int) (int, error) {
|
||||||
|
file := filepath.Join(dir, refCountFile)
|
||||||
|
content, err := os.ReadFile(file)
|
||||||
|
if err != nil && !errors.Is(err, fs.ErrNotExist) {
|
||||||
|
return -1, wrapError("read ref counter", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
currentCount := 0
|
||||||
|
if len(content) > 0 {
|
||||||
|
currentCount, err = strconv.Atoi(string(content))
|
||||||
|
if err != nil {
|
||||||
|
return -1, wrapError("parse ref counter", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
currentCount += inc
|
||||||
|
if currentCount < 0 {
|
||||||
|
logrus.Errorf("rootless netns ref counter out of sync, counter is at %d, resetting it back to 0", currentCount)
|
||||||
|
currentCount = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
newNum := strconv.Itoa(currentCount)
|
||||||
|
if err = os.WriteFile(file, []byte(newNum), 0o600); err != nil {
|
||||||
|
return -1, wrapError("write ref counter", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return currentCount, nil
|
||||||
|
}
|
|
@ -10,6 +10,7 @@ import (
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
@ -79,6 +80,15 @@ func getRustLogEnv() string {
|
||||||
func (n *netavarkNetwork) execNetavark(args []string, needPlugin bool, stdin, result interface{}) error {
|
func (n *netavarkNetwork) execNetavark(args []string, needPlugin bool, stdin, result interface{}) error {
|
||||||
// set the netavark log level to the same as the podman
|
// set the netavark log level to the same as the podman
|
||||||
env := append(os.Environ(), getRustLogEnv())
|
env := append(os.Environ(), getRustLogEnv())
|
||||||
|
// Netavark need access to iptables in $PATH. As it turns out debian doesn't put
|
||||||
|
// /usr/sbin in $PATH for rootless users. This will break rootless networking completely.
|
||||||
|
// We might break existing users and we cannot expect everyone to change their $PATH so
|
||||||
|
// let's add /usr/sbin to $PATH ourselves.
|
||||||
|
path := os.Getenv("PATH")
|
||||||
|
if !strings.Contains(path, "/usr/sbin") {
|
||||||
|
path += ":/usr/sbin"
|
||||||
|
env = append(env, "PATH="+path)
|
||||||
|
}
|
||||||
// if we run with debug log level lets also set RUST_BACKTRACE=1 so we can get the full stack trace in case of panics
|
// if we run with debug log level lets also set RUST_BACKTRACE=1 so we can get the full stack trace in case of panics
|
||||||
if logrus.IsLevelEnabled(logrus.DebugLevel) {
|
if logrus.IsLevelEnabled(logrus.DebugLevel) {
|
||||||
env = append(env, "RUST_BACKTRACE=1")
|
env = append(env, "RUST_BACKTRACE=1")
|
||||||
|
|
|
@ -12,6 +12,7 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/containers/common/libnetwork/internal/rootlessnetns"
|
||||||
"github.com/containers/common/libnetwork/internal/util"
|
"github.com/containers/common/libnetwork/internal/util"
|
||||||
"github.com/containers/common/libnetwork/types"
|
"github.com/containers/common/libnetwork/types"
|
||||||
"github.com/containers/common/pkg/config"
|
"github.com/containers/common/pkg/config"
|
||||||
|
@ -68,6 +69,9 @@ type netavarkNetwork struct {
|
||||||
|
|
||||||
// networks is a map with loaded networks, the key is the network name
|
// networks is a map with loaded networks, the key is the network name
|
||||||
networks map[string]*types.Network
|
networks map[string]*types.Network
|
||||||
|
|
||||||
|
// rootlessNetns is used for the rootless network setup/teardown
|
||||||
|
rootlessNetns *rootlessnetns.Netns
|
||||||
}
|
}
|
||||||
|
|
||||||
type InitConfig struct {
|
type InitConfig struct {
|
||||||
|
@ -82,26 +86,12 @@ type InitConfig struct {
|
||||||
// NetworkRunDir is where temporary files are stored, i.e.the ipam db, aardvark config
|
// NetworkRunDir is where temporary files are stored, i.e.the ipam db, aardvark config
|
||||||
NetworkRunDir string
|
NetworkRunDir string
|
||||||
|
|
||||||
// FirewallDriver sets the firewall driver to use
|
|
||||||
FirewallDriver string
|
|
||||||
|
|
||||||
// DefaultNetwork is the name for the default network.
|
|
||||||
DefaultNetwork string
|
|
||||||
// DefaultSubnet is the default subnet for the default network.
|
|
||||||
DefaultSubnet string
|
|
||||||
|
|
||||||
// DefaultsubnetPools contains the subnets which must be used to allocate a free subnet by network create
|
|
||||||
DefaultsubnetPools []config.SubnetPool
|
|
||||||
|
|
||||||
// DNSBindPort is set the port to pass to netavark for aardvark
|
|
||||||
DNSBindPort uint16
|
|
||||||
|
|
||||||
// PluginDirs list of directories were netavark plugins are located
|
|
||||||
PluginDirs []string
|
|
||||||
|
|
||||||
// Syslog describes whenever the netavark debug output should be log to the syslog as well.
|
// Syslog describes whenever the netavark debug output should be log to the syslog as well.
|
||||||
// This will use logrus to do so, make sure logrus is set up to log to the syslog.
|
// This will use logrus to do so, make sure logrus is set up to log to the syslog.
|
||||||
Syslog bool
|
Syslog bool
|
||||||
|
|
||||||
|
// Config containers.conf options
|
||||||
|
Config *config.Config
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewNetworkInterface creates the ContainerNetwork interface for the netavark backend.
|
// NewNetworkInterface creates the ContainerNetwork interface for the netavark backend.
|
||||||
|
@ -118,12 +108,12 @@ func NewNetworkInterface(conf *InitConfig) (types.ContainerNetwork, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
defaultNetworkName := conf.DefaultNetwork
|
defaultNetworkName := conf.Config.Network.DefaultNetwork
|
||||||
if defaultNetworkName == "" {
|
if defaultNetworkName == "" {
|
||||||
defaultNetworkName = types.DefaultNetworkName
|
defaultNetworkName = types.DefaultNetworkName
|
||||||
}
|
}
|
||||||
|
|
||||||
defaultSubnet := conf.DefaultSubnet
|
defaultSubnet := conf.Config.Network.DefaultSubnet
|
||||||
if defaultSubnet == "" {
|
if defaultSubnet == "" {
|
||||||
defaultSubnet = types.DefaultSubnet
|
defaultSubnet = types.DefaultSubnet
|
||||||
}
|
}
|
||||||
|
@ -140,11 +130,19 @@ func NewNetworkInterface(conf *InitConfig) (types.ContainerNetwork, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
defaultSubnetPools := conf.DefaultsubnetPools
|
defaultSubnetPools := conf.Config.Network.DefaultSubnetPools
|
||||||
if defaultSubnetPools == nil {
|
if defaultSubnetPools == nil {
|
||||||
defaultSubnetPools = config.DefaultSubnetPools
|
defaultSubnetPools = config.DefaultSubnetPools
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var netns *rootlessnetns.Netns
|
||||||
|
if unshare.IsRootless() {
|
||||||
|
netns, err = rootlessnetns.New(conf.NetworkRunDir, rootlessnetns.Netavark, conf.Config)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
n := &netavarkNetwork{
|
n := &netavarkNetwork{
|
||||||
networkConfigDir: conf.NetworkConfigDir,
|
networkConfigDir: conf.NetworkConfigDir,
|
||||||
networkRunDir: conf.NetworkRunDir,
|
networkRunDir: conf.NetworkRunDir,
|
||||||
|
@ -152,14 +150,15 @@ func NewNetworkInterface(conf *InitConfig) (types.ContainerNetwork, error) {
|
||||||
aardvarkBinary: conf.AardvarkBinary,
|
aardvarkBinary: conf.AardvarkBinary,
|
||||||
networkRootless: unshare.IsRootless(),
|
networkRootless: unshare.IsRootless(),
|
||||||
ipamDBPath: filepath.Join(conf.NetworkRunDir, "ipam.db"),
|
ipamDBPath: filepath.Join(conf.NetworkRunDir, "ipam.db"),
|
||||||
firewallDriver: conf.FirewallDriver,
|
firewallDriver: conf.Config.Network.FirewallDriver,
|
||||||
defaultNetwork: defaultNetworkName,
|
defaultNetwork: defaultNetworkName,
|
||||||
defaultSubnet: defaultNet,
|
defaultSubnet: defaultNet,
|
||||||
defaultsubnetPools: defaultSubnetPools,
|
defaultsubnetPools: defaultSubnetPools,
|
||||||
dnsBindPort: conf.DNSBindPort,
|
dnsBindPort: conf.Config.Network.DNSBindPort,
|
||||||
pluginDirs: conf.PluginDirs,
|
pluginDirs: conf.Config.Network.NetavarkPluginDirs.Get(),
|
||||||
lock: lock,
|
lock: lock,
|
||||||
syslog: conf.Syslog,
|
syslog: conf.Syslog,
|
||||||
|
rootlessNetns: netns,
|
||||||
}
|
}
|
||||||
|
|
||||||
return n, nil
|
return n, nil
|
||||||
|
|
|
@ -72,12 +72,24 @@ func (n *netavarkNetwork) Setup(namespacePath string, options types.SetupOptions
|
||||||
}
|
}
|
||||||
|
|
||||||
result := map[string]types.StatusBlock{}
|
result := map[string]types.StatusBlock{}
|
||||||
err = n.execNetavark([]string{"setup", namespacePath}, needPlugin, netavarkOpts, &result)
|
setup := func() error {
|
||||||
if err != nil {
|
err := n.execNetavark([]string{"setup", namespacePath}, needPlugin, netavarkOpts, &result)
|
||||||
// lets dealloc ips to prevent leaking
|
if err != nil {
|
||||||
if err := n.deallocIPs(&options.NetworkOptions); err != nil {
|
// lets dealloc ips to prevent leaking
|
||||||
logrus.Error(err)
|
if err := n.deallocIPs(&options.NetworkOptions); err != nil {
|
||||||
|
logrus.Error(err)
|
||||||
|
}
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if n.rootlessNetns != nil {
|
||||||
|
err = n.rootlessNetns.Setup(len(options.Networks), setup)
|
||||||
|
} else {
|
||||||
|
err = setup()
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -112,7 +124,16 @@ func (n *netavarkNetwork) Teardown(namespacePath string, options types.TeardownO
|
||||||
return fmt.Errorf("failed to convert net opts: %w", err)
|
return fmt.Errorf("failed to convert net opts: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
retErr := n.execNetavark([]string{"teardown", namespacePath}, needPlugin, netavarkOpts, nil)
|
var retErr error
|
||||||
|
teardown := func() error {
|
||||||
|
return n.execNetavark([]string{"teardown", namespacePath}, needPlugin, netavarkOpts, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
if n.rootlessNetns != nil {
|
||||||
|
retErr = n.rootlessNetns.Teardown(len(options.Networks), teardown)
|
||||||
|
} else {
|
||||||
|
retErr = teardown()
|
||||||
|
}
|
||||||
|
|
||||||
// when netavark returned an error we still free the used ips
|
// when netavark returned an error we still free the used ips
|
||||||
// otherwise we could end up in a state where block the ips forever
|
// otherwise we could end up in a state where block the ips forever
|
||||||
|
@ -160,3 +181,10 @@ func (n *netavarkNetwork) convertNetOpts(opts types.NetworkOptions) (*netavarkOp
|
||||||
}
|
}
|
||||||
return &netavarkOptions, needsPlugin, nil
|
return &netavarkOptions, needsPlugin, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (n *netavarkNetwork) RunInRootlessNetns(toRun func() error) error {
|
||||||
|
if n.rootlessNetns == nil {
|
||||||
|
return types.ErrNotRootlessNetns
|
||||||
|
}
|
||||||
|
return n.rootlessNetns.Run(n.lock, toRun)
|
||||||
|
}
|
||||||
|
|
|
@ -77,17 +77,12 @@ func NetworkBackend(store storage.Store, conf *config.Config, syslog bool) (type
|
||||||
}
|
}
|
||||||
|
|
||||||
netInt, err := netavark.NewNetworkInterface(&netavark.InitConfig{
|
netInt, err := netavark.NewNetworkInterface(&netavark.InitConfig{
|
||||||
NetworkConfigDir: confDir,
|
Config: conf,
|
||||||
NetworkRunDir: runDir,
|
NetworkConfigDir: confDir,
|
||||||
NetavarkBinary: netavarkBin,
|
NetworkRunDir: runDir,
|
||||||
AardvarkBinary: aardvarkBin,
|
NetavarkBinary: netavarkBin,
|
||||||
PluginDirs: conf.Network.NetavarkPluginDirs.Get(),
|
AardvarkBinary: aardvarkBin,
|
||||||
FirewallDriver: conf.Network.FirewallDriver,
|
Syslog: syslog,
|
||||||
DefaultNetwork: conf.Network.DefaultNetwork,
|
|
||||||
DefaultSubnet: conf.Network.DefaultSubnet,
|
|
||||||
DefaultsubnetPools: conf.Network.DefaultSubnetPools,
|
|
||||||
DNSBindPort: conf.Network.DNSBindPort,
|
|
||||||
Syslog: syslog,
|
|
||||||
})
|
})
|
||||||
return types.Netavark, netInt, err
|
return types.Netavark, netInt, err
|
||||||
case types.CNI:
|
case types.CNI:
|
||||||
|
@ -181,13 +176,10 @@ func getCniInterface(conf *config.Config) (types.ContainerNetwork, error) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return cni.NewCNINetworkInterface(&cni.InitConfig{
|
return cni.NewCNINetworkInterface(&cni.InitConfig{
|
||||||
CNIConfigDir: confDir,
|
Config: conf,
|
||||||
CNIPluginDirs: conf.Network.CNIPluginDirs.Get(),
|
CNIConfigDir: confDir,
|
||||||
RunDir: conf.Engine.TmpDir,
|
RunDir: conf.Engine.TmpDir,
|
||||||
DefaultNetwork: conf.Network.DefaultNetwork,
|
IsMachine: machine.IsGvProxyBased(),
|
||||||
DefaultSubnet: conf.Network.DefaultSubnet,
|
|
||||||
DefaultsubnetPools: conf.Network.DefaultSubnetPools,
|
|
||||||
IsMachine: machine.IsGvProxyBased(),
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,9 @@ var (
|
||||||
// exists.
|
// exists.
|
||||||
ErrNetworkExists = errors.New("network already exists")
|
ErrNetworkExists = errors.New("network already exists")
|
||||||
|
|
||||||
|
// ErrNotRootlessNetns indicates the rootless netns can only be used as root
|
||||||
|
ErrNotRootlessNetns = errors.New("rootless netns cannot be used as root")
|
||||||
|
|
||||||
// NameRegex is a regular expression to validate names.
|
// NameRegex is a regular expression to validate names.
|
||||||
// This must NOT be changed.
|
// This must NOT be changed.
|
||||||
NameRegex = regexp.Delayed("^[a-zA-Z0-9][a-zA-Z0-9_.-]*$")
|
NameRegex = regexp.Delayed("^[a-zA-Z0-9][a-zA-Z0-9_.-]*$")
|
||||||
|
|
|
@ -27,6 +27,10 @@ type ContainerNetwork interface {
|
||||||
// Teardown will teardown the container network namespace.
|
// Teardown will teardown the container network namespace.
|
||||||
Teardown(namespacePath string, options TeardownOptions) error
|
Teardown(namespacePath string, options TeardownOptions) error
|
||||||
|
|
||||||
|
// RunInRootlessNetns is used to run the given function in the rootless netns.
|
||||||
|
// Only used as rootless and should return an error as root.
|
||||||
|
RunInRootlessNetns(toRun func() error) error
|
||||||
|
|
||||||
// Drivers will return the list of supported network drivers
|
// Drivers will return the list of supported network drivers
|
||||||
// for this interface.
|
// for this interface.
|
||||||
Drivers() []string
|
Drivers() []string
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
package cgroups
|
package cgroups
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
@ -11,6 +12,7 @@ import (
|
||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
@ -143,3 +145,171 @@ func SetBlkioThrottle(res *configs.Resources, cgroupPath string) error {
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Code below was moved from podman/utils/utils_supported.go and should properly better
|
||||||
|
// integrated here as some parts may be redundant.
|
||||||
|
|
||||||
|
func getCgroupProcess(procFile string, allowRoot bool) (string, error) {
|
||||||
|
f, err := os.Open(procFile)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(f)
|
||||||
|
cgroup := ""
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Text()
|
||||||
|
parts := strings.SplitN(line, ":", 3)
|
||||||
|
if len(parts) != 3 {
|
||||||
|
return "", fmt.Errorf("cannot parse cgroup line %q", line)
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(line, "0::") {
|
||||||
|
cgroup = line[3:]
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if len(parts[2]) > len(cgroup) {
|
||||||
|
cgroup = parts[2]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(cgroup) == 0 || (!allowRoot && cgroup == "/") {
|
||||||
|
return "", fmt.Errorf("could not find cgroup mount in %q", procFile)
|
||||||
|
}
|
||||||
|
return cgroup, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetOwnCgroup returns the cgroup for the current process.
|
||||||
|
func GetOwnCgroup() (string, error) {
|
||||||
|
return getCgroupProcess("/proc/self/cgroup", true)
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetOwnCgroupDisallowRoot() (string, error) {
|
||||||
|
return getCgroupProcess("/proc/self/cgroup", false)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetCgroupProcess returns the cgroup for the specified process process.
|
||||||
|
func GetCgroupProcess(pid int) (string, error) {
|
||||||
|
return getCgroupProcess(fmt.Sprintf("/proc/%d/cgroup", pid), true)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MoveUnderCgroupSubtree moves the PID under a cgroup subtree.
|
||||||
|
func MoveUnderCgroupSubtree(subtree string) error {
|
||||||
|
return MoveUnderCgroup("", subtree, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MoveUnderCgroup moves a group of processes to a new cgroup.
|
||||||
|
// If cgroup is the empty string, then the current calling process cgroup is used.
|
||||||
|
// If processes is empty, then the processes from the current cgroup are moved.
|
||||||
|
func MoveUnderCgroup(cgroup, subtree string, processes []uint32) error {
|
||||||
|
procFile := "/proc/self/cgroup"
|
||||||
|
f, err := os.Open(procFile)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
unifiedMode, err := IsCgroup2UnifiedMode()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(f)
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Text()
|
||||||
|
parts := strings.SplitN(line, ":", 3)
|
||||||
|
if len(parts) != 3 {
|
||||||
|
return fmt.Errorf("cannot parse cgroup line %q", line)
|
||||||
|
}
|
||||||
|
|
||||||
|
// root cgroup, skip it
|
||||||
|
if parts[2] == "/" && !(unifiedMode && parts[1] == "") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
cgroupRoot := "/sys/fs/cgroup"
|
||||||
|
// Special case the unified mount on hybrid cgroup and named hierarchies.
|
||||||
|
// This works on Fedora 31, but we should really parse the mounts to see
|
||||||
|
// where the cgroup hierarchy is mounted.
|
||||||
|
if parts[1] == "" && !unifiedMode {
|
||||||
|
// If it is not using unified mode, the cgroup v2 hierarchy is
|
||||||
|
// usually mounted under /sys/fs/cgroup/unified
|
||||||
|
cgroupRoot = filepath.Join(cgroupRoot, "unified")
|
||||||
|
|
||||||
|
// Ignore the unified mount if it doesn't exist
|
||||||
|
if _, err := os.Stat(cgroupRoot); err != nil && os.IsNotExist(err) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
} else if parts[1] != "" {
|
||||||
|
// Assume the controller is mounted at /sys/fs/cgroup/$CONTROLLER.
|
||||||
|
controller := strings.TrimPrefix(parts[1], "name=")
|
||||||
|
cgroupRoot = filepath.Join(cgroupRoot, controller)
|
||||||
|
}
|
||||||
|
|
||||||
|
parentCgroup := cgroup
|
||||||
|
if parentCgroup == "" {
|
||||||
|
parentCgroup = parts[2]
|
||||||
|
}
|
||||||
|
newCgroup := filepath.Join(cgroupRoot, parentCgroup, subtree)
|
||||||
|
if err := os.MkdirAll(newCgroup, 0o755); err != nil && !os.IsExist(err) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err := os.OpenFile(filepath.Join(newCgroup, "cgroup.procs"), os.O_RDWR, 0o755)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
if len(processes) > 0 {
|
||||||
|
for _, pid := range processes {
|
||||||
|
if _, err := f.WriteString(fmt.Sprintf("%d\n", pid)); err != nil {
|
||||||
|
logrus.Debugf("Cannot move process %d to cgroup %q: %v", pid, newCgroup, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
processesData, err := os.ReadFile(filepath.Join(cgroupRoot, parts[2], "cgroup.procs"))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, pid := range bytes.Split(processesData, []byte("\n")) {
|
||||||
|
if len(pid) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, err := f.Write(pid); err != nil {
|
||||||
|
logrus.Debugf("Cannot move process %s to cgroup %q: %v", string(pid), newCgroup, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
maybeMoveToSubCgroupSync sync.Once
|
||||||
|
maybeMoveToSubCgroupSyncErr error
|
||||||
|
)
|
||||||
|
|
||||||
|
// MaybeMoveToSubCgroup moves the current process in a sub cgroup when
|
||||||
|
// it is running in the root cgroup on a system that uses cgroupv2.
|
||||||
|
func MaybeMoveToSubCgroup() error {
|
||||||
|
maybeMoveToSubCgroupSync.Do(func() {
|
||||||
|
unifiedMode, err := IsCgroup2UnifiedMode()
|
||||||
|
if err != nil {
|
||||||
|
maybeMoveToSubCgroupSyncErr = err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if !unifiedMode {
|
||||||
|
maybeMoveToSubCgroupSyncErr = nil
|
||||||
|
return
|
||||||
|
}
|
||||||
|
cgroup, err := GetOwnCgroup()
|
||||||
|
if err != nil {
|
||||||
|
maybeMoveToSubCgroupSyncErr = err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if cgroup == "/" {
|
||||||
|
maybeMoveToSubCgroupSyncErr = MoveUnderCgroupSubtree("init")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return maybeMoveToSubCgroupSyncErr
|
||||||
|
}
|
||||||
|
|
|
@ -32,10 +32,12 @@ import (
|
||||||
"github.com/containernetworking/plugins/pkg/ns"
|
"github.com/containernetworking/plugins/pkg/ns"
|
||||||
"github.com/containers/storage/pkg/homedir"
|
"github.com/containers/storage/pkg/homedir"
|
||||||
"github.com/containers/storage/pkg/unshare"
|
"github.com/containers/storage/pkg/unshare"
|
||||||
"github.com/sirupsen/logrus"
|
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// threadNsPath is the /proc path to the current netns handle for the current thread
|
||||||
|
const threadNsPath = "/proc/thread-self/ns/net"
|
||||||
|
|
||||||
// GetNSRunDir returns the dir of where to create the netNS. When running
|
// GetNSRunDir returns the dir of where to create the netNS. When running
|
||||||
// rootless, it needs to be at a location writable by user.
|
// rootless, it needs to be at a location writable by user.
|
||||||
func GetNSRunDir() (string, error) {
|
func GetNSRunDir() (string, error) {
|
||||||
|
@ -49,6 +51,10 @@ func GetNSRunDir() (string, error) {
|
||||||
return "/run/netns", nil
|
return "/run/netns", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func NewNSAtPath(nsPath string) (ns.NetNS, error) {
|
||||||
|
return newNSPath(nsPath)
|
||||||
|
}
|
||||||
|
|
||||||
// NewNS creates a new persistent (bind-mounted) network namespace and returns
|
// NewNS creates a new persistent (bind-mounted) network namespace and returns
|
||||||
// an object representing that namespace, without switching to it.
|
// an object representing that namespace, without switching to it.
|
||||||
func NewNS() (ns.NetNS, error) {
|
func NewNS() (ns.NetNS, error) {
|
||||||
|
@ -111,8 +117,12 @@ func NewNSWithName(name string) (ns.NetNS, error) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// create an empty file at the mount point
|
|
||||||
nsPath := path.Join(nsRunDir, name)
|
nsPath := path.Join(nsRunDir, name)
|
||||||
|
return newNSPath(nsPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
func newNSPath(nsPath string) (ns.NetNS, error) {
|
||||||
|
// create an empty file at the mount point
|
||||||
mountPointFd, err := os.OpenFile(nsPath, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o600)
|
mountPointFd, err := os.OpenFile(nsPath, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o600)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -140,24 +150,10 @@ func NewNSWithName(name string) (ns.NetNS, error) {
|
||||||
// Don't unlock. By not unlocking, golang will kill the OS thread when the
|
// Don't unlock. By not unlocking, golang will kill the OS thread when the
|
||||||
// goroutine is done (for go1.10+)
|
// goroutine is done (for go1.10+)
|
||||||
|
|
||||||
threadNsPath := getCurrentThreadNetNSPath()
|
|
||||||
|
|
||||||
var origNS ns.NetNS
|
|
||||||
origNS, err = ns.GetNS(threadNsPath)
|
|
||||||
if err != nil {
|
|
||||||
logrus.Warnf("Cannot open current network namespace %s: %q", threadNsPath, err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
if err := origNS.Close(); err != nil {
|
|
||||||
logrus.Errorf("Unable to close namespace: %q", err)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
// create a new netns on the current thread
|
// create a new netns on the current thread
|
||||||
err = unix.Unshare(unix.CLONE_NEWNET)
|
err = unix.Unshare(unix.CLONE_NEWNET)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logrus.Warnf("Cannot create a new network namespace: %q", err)
|
err = fmt.Errorf("unshare network namespace: %w", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -181,13 +177,8 @@ func NewNSWithName(name string) (ns.NetNS, error) {
|
||||||
|
|
||||||
// UnmountNS unmounts the given netns path
|
// UnmountNS unmounts the given netns path
|
||||||
func UnmountNS(nsPath string) error {
|
func UnmountNS(nsPath string) error {
|
||||||
nsRunDir, err := GetNSRunDir()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only unmount if it's been bind-mounted (don't touch namespaces in /proc...)
|
// Only unmount if it's been bind-mounted (don't touch namespaces in /proc...)
|
||||||
if strings.HasPrefix(nsPath, nsRunDir) {
|
if !strings.HasPrefix(nsPath, "/proc/") {
|
||||||
if err := unix.Unmount(nsPath, unix.MNT_DETACH); err != nil {
|
if err := unix.Unmount(nsPath, unix.MNT_DETACH); err != nil {
|
||||||
return fmt.Errorf("failed to unmount NS: at %s: %v", nsPath, err)
|
return fmt.Errorf("failed to unmount NS: at %s: %v", nsPath, err)
|
||||||
}
|
}
|
||||||
|
@ -199,11 +190,3 @@ func UnmountNS(nsPath string) error {
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// getCurrentThreadNetNSPath copied from pkg/ns
|
|
||||||
func getCurrentThreadNetNSPath() string {
|
|
||||||
// /proc/self/ns/net returns the namespace of the main thread, not
|
|
||||||
// of whatever thread this goroutine is running on. Make sure we
|
|
||||||
// use the thread's net namespace since the thread is switching around
|
|
||||||
return fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid())
|
|
||||||
}
|
|
||||||
|
|
|
@ -0,0 +1,151 @@
|
||||||
|
package systemd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"crypto/rand"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/containers/common/pkg/cgroups"
|
||||||
|
"github.com/containers/storage/pkg/unshare"
|
||||||
|
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||||
|
"github.com/godbus/dbus/v5"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
runsOnSystemdOnce sync.Once
|
||||||
|
runsOnSystemd bool
|
||||||
|
)
|
||||||
|
|
||||||
|
// RunsOnSystemd returns whether the system is using systemd
|
||||||
|
func RunsOnSystemd() bool {
|
||||||
|
runsOnSystemdOnce.Do(func() {
|
||||||
|
// per sd_booted(3), check for this dir
|
||||||
|
fd, err := os.Stat("/run/systemd/system")
|
||||||
|
runsOnSystemd = err == nil && fd.IsDir()
|
||||||
|
})
|
||||||
|
return runsOnSystemd
|
||||||
|
}
|
||||||
|
|
||||||
|
func moveProcessPIDFileToScope(pidPath, slice, scope string) error {
|
||||||
|
data, err := os.ReadFile(pidPath)
|
||||||
|
if err != nil {
|
||||||
|
// do not raise an error if the file doesn't exist
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return fmt.Errorf("cannot read pid file: %w", err)
|
||||||
|
}
|
||||||
|
pid, err := strconv.ParseUint(string(data), 10, 0)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("cannot parse pid file %s: %w", pidPath, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return moveProcessToScope(int(pid), slice, scope)
|
||||||
|
}
|
||||||
|
|
||||||
|
func moveProcessToScope(pid int, slice, scope string) error {
|
||||||
|
err := RunUnderSystemdScope(pid, slice, scope)
|
||||||
|
// If the PID is not valid anymore, do not return an error.
|
||||||
|
if dbusErr, ok := err.(dbus.Error); ok {
|
||||||
|
if dbusErr.Name == "org.freedesktop.DBus.Error.UnixProcessIdUnknown" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// MoveRootlessNetnsSlirpProcessToUserSlice moves the slirp4netns process for the rootless netns
|
||||||
|
// into a different scope so that systemd does not kill it with a container.
|
||||||
|
func MoveRootlessNetnsSlirpProcessToUserSlice(pid int) error {
|
||||||
|
randBytes := make([]byte, 4)
|
||||||
|
_, err := rand.Read(randBytes)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return moveProcessToScope(pid, "user.slice", fmt.Sprintf("rootless-netns-%x.scope", randBytes))
|
||||||
|
}
|
||||||
|
|
||||||
|
// MovePauseProcessToScope moves the pause process used for rootless mode to keep the namespaces alive to
|
||||||
|
// a separate scope.
|
||||||
|
func MovePauseProcessToScope(pausePidPath string) {
|
||||||
|
var err error
|
||||||
|
|
||||||
|
for i := 0; i < 10; i++ {
|
||||||
|
randBytes := make([]byte, 4)
|
||||||
|
_, err = rand.Read(randBytes)
|
||||||
|
if err != nil {
|
||||||
|
logrus.Errorf("failed to read random bytes: %v", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
err = moveProcessPIDFileToScope(pausePidPath, "user.slice", fmt.Sprintf("podman-pause-%x.scope", randBytes))
|
||||||
|
if err == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
unified, err2 := cgroups.IsCgroup2UnifiedMode()
|
||||||
|
if err2 != nil {
|
||||||
|
logrus.Warnf("Failed to detect if running with cgroup unified: %v", err)
|
||||||
|
}
|
||||||
|
if RunsOnSystemd() && unified {
|
||||||
|
logrus.Warnf("Failed to add pause process to systemd sandbox cgroup: %v", err)
|
||||||
|
} else {
|
||||||
|
logrus.Debugf("Failed to add pause process to systemd sandbox cgroup: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RunUnderSystemdScope adds the specified pid to a systemd scope
|
||||||
|
func RunUnderSystemdScope(pid int, slice string, unitName string) error {
|
||||||
|
var properties []systemdDbus.Property
|
||||||
|
var conn *systemdDbus.Conn
|
||||||
|
var err error
|
||||||
|
|
||||||
|
if unshare.GetRootlessUID() != 0 {
|
||||||
|
conn, err = cgroups.UserConnection(unshare.GetRootlessUID())
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
conn, err = systemdDbus.NewWithContext(context.Background())
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
defer conn.Close()
|
||||||
|
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||||
|
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
||||||
|
properties = append(properties, newProp("Delegate", true))
|
||||||
|
properties = append(properties, newProp("DefaultDependencies", false))
|
||||||
|
ch := make(chan string)
|
||||||
|
_, err = conn.StartTransientUnitContext(context.Background(), unitName, "replace", properties, ch)
|
||||||
|
if err != nil {
|
||||||
|
// On errors check if the cgroup already exists, if it does move the process there
|
||||||
|
if props, err := conn.GetUnitTypePropertiesContext(context.Background(), unitName, "Scope"); err == nil {
|
||||||
|
if cgroup, ok := props["ControlGroup"].(string); ok && cgroup != "" {
|
||||||
|
if err := cgroups.MoveUnderCgroup(cgroup, "", []uint32{uint32(pid)}); err == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
// On errors return the original error message we got from StartTransientUnit.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Block until job is started
|
||||||
|
<-ch
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func newProp(name string, units interface{}) systemdDbus.Property {
|
||||||
|
return systemdDbus.Property{
|
||||||
|
Name: name,
|
||||||
|
Value: dbus.MakeVariant(units),
|
||||||
|
}
|
||||||
|
}
|
15
vendor/github.com/containers/common/pkg/systemd/systemd_unsupported.go
generated
vendored
Normal file
15
vendor/github.com/containers/common/pkg/systemd/systemd_unsupported.go
generated
vendored
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
//go:build !linux
|
||||||
|
|
||||||
|
package systemd
|
||||||
|
|
||||||
|
import "errors"
|
||||||
|
|
||||||
|
func RunsOnSystemd() bool {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func MovePauseProcessToScope(pausePidPath string) {}
|
||||||
|
|
||||||
|
func RunUnderSystemdScope(pid int, slice string, unitName string) error {
|
||||||
|
return errors.New("RunUnderSystemdScope not supported on this OS")
|
||||||
|
}
|
|
@ -167,7 +167,7 @@ github.com/containers/buildah/pkg/sshagent
|
||||||
github.com/containers/buildah/pkg/util
|
github.com/containers/buildah/pkg/util
|
||||||
github.com/containers/buildah/pkg/volumes
|
github.com/containers/buildah/pkg/volumes
|
||||||
github.com/containers/buildah/util
|
github.com/containers/buildah/util
|
||||||
# github.com/containers/common v0.57.1-0.20231130092720-630c929caef9
|
# github.com/containers/common v0.57.1-0.20231206135104-b647eb3a5eea
|
||||||
## explicit; go 1.18
|
## explicit; go 1.18
|
||||||
github.com/containers/common/internal/attributedstring
|
github.com/containers/common/internal/attributedstring
|
||||||
github.com/containers/common/libimage
|
github.com/containers/common/libimage
|
||||||
|
@ -177,6 +177,7 @@ github.com/containers/common/libimage/manifests
|
||||||
github.com/containers/common/libimage/platform
|
github.com/containers/common/libimage/platform
|
||||||
github.com/containers/common/libnetwork/cni
|
github.com/containers/common/libnetwork/cni
|
||||||
github.com/containers/common/libnetwork/etchosts
|
github.com/containers/common/libnetwork/etchosts
|
||||||
|
github.com/containers/common/libnetwork/internal/rootlessnetns
|
||||||
github.com/containers/common/libnetwork/internal/util
|
github.com/containers/common/libnetwork/internal/util
|
||||||
github.com/containers/common/libnetwork/netavark
|
github.com/containers/common/libnetwork/netavark
|
||||||
github.com/containers/common/libnetwork/network
|
github.com/containers/common/libnetwork/network
|
||||||
|
@ -223,6 +224,7 @@ github.com/containers/common/pkg/ssh
|
||||||
github.com/containers/common/pkg/subscriptions
|
github.com/containers/common/pkg/subscriptions
|
||||||
github.com/containers/common/pkg/supplemented
|
github.com/containers/common/pkg/supplemented
|
||||||
github.com/containers/common/pkg/sysinfo
|
github.com/containers/common/pkg/sysinfo
|
||||||
|
github.com/containers/common/pkg/systemd
|
||||||
github.com/containers/common/pkg/timetype
|
github.com/containers/common/pkg/timetype
|
||||||
github.com/containers/common/pkg/umask
|
github.com/containers/common/pkg/umask
|
||||||
github.com/containers/common/pkg/util
|
github.com/containers/common/pkg/util
|
||||||
|
|
Loading…
Reference in New Issue