rootlessport: reduce memory usage of the process

Don't use reexec for the rootlessport process, instead make it a
separate binary to reduce the memory usage. The problem with reexec is
that it will import all packages that podman uses and therefore loads a
lot of stuff into the heap. The rootlessport process however only needs
the rootlesskit library.
The memory usage is a concern since the rootlessport process will spawn
two process per container which has ports forwarded. The processes stay
until the container dies. On my laptop the current reexec version uses
47800 KB RSS. The new separate binary only uses 4540 KB RSS. This is
more than a 90% improvement.

The Makefile has been updated to compile the new binary and install it
to the libexec directory.

Fixes #10790

[NO TESTS NEEDED]

Signed-off-by: Paul Holzinger <pholzing@redhat.com>
This commit is contained in:
Paul Holzinger 2021-09-02 14:14:59 +02:00
parent c90beedbe1
commit 3ba69dccf7
No known key found for this signature in database
GPG Key ID: EB145DD938A3CAF2
8 changed files with 394 additions and 361 deletions

View File

@ -39,6 +39,7 @@ LIBPOD_INSTANCE := libpod_dev
PREFIX ?= /usr/local PREFIX ?= /usr/local
BINDIR ?= ${PREFIX}/bin BINDIR ?= ${PREFIX}/bin
LIBEXECDIR ?= ${PREFIX}/libexec LIBEXECDIR ?= ${PREFIX}/libexec
LIBEXECPODMAN ?= ${LIBEXECDIR}/podman
MANDIR ?= ${PREFIX}/share/man MANDIR ?= ${PREFIX}/share/man
SHAREDIR_CONTAINERS ?= ${PREFIX}/share/containers SHAREDIR_CONTAINERS ?= ${PREFIX}/share/containers
ETCDIR ?= ${PREFIX}/etc ETCDIR ?= ${PREFIX}/etc
@ -186,7 +187,7 @@ default: all
all: binaries docs all: binaries docs
.PHONY: binaries .PHONY: binaries
binaries: podman podman-remote ## Build podman and podman-remote binaries binaries: podman podman-remote rootlessport ## Build podman, podman-remote and rootlessport binaries
# Extract text following double-# for targets, as their description for # Extract text following double-# for targets, as their description for
# the `help` target. Otherwise These simple-substitutions are resolved # the `help` target. Otherwise These simple-substitutions are resolved
@ -355,6 +356,15 @@ podman-remote-darwin: ## Build podman-remote for macOS
GOARCH=$(GOARCH) \ GOARCH=$(GOARCH) \
bin/darwin/podman bin/darwin/podman
bin/rootlessport: .gopathok $(SOURCES) go.mod go.sum
CGO_ENABLED=$(CGO_ENABLED) \
$(GO) build \
$(BUILDFLAGS) \
-o $@ ./cmd/rootlessport
.PHONY: rootlessport
rootlessport: bin/rootlessport
### ###
### Secondary binary-build targets ### Secondary binary-build targets
### ###
@ -718,11 +728,14 @@ install.bin-nobuild:
install ${SELINUXOPT} -d -m 755 $(DESTDIR)$(BINDIR) install ${SELINUXOPT} -d -m 755 $(DESTDIR)$(BINDIR)
install ${SELINUXOPT} -m 755 bin/podman $(DESTDIR)$(BINDIR)/podman install ${SELINUXOPT} -m 755 bin/podman $(DESTDIR)$(BINDIR)/podman
test -z "${SELINUXOPT}" || chcon --verbose --reference=$(DESTDIR)$(BINDIR)/podman bin/podman test -z "${SELINUXOPT}" || chcon --verbose --reference=$(DESTDIR)$(BINDIR)/podman bin/podman
install ${SELINUXOPT} -d -m 755 $(DESTDIR)$(LIBEXECPODMAN)
install ${SELINUXOPT} -m 755 bin/rootlessport $(DESTDIR)$(LIBEXECPODMAN)/rootlessport
test -z "${SELINUXOPT}" || chcon --verbose --reference=$(DESTDIR)$(LIBEXECPODMAN)/rootlessport bin/rootlessport
install ${SELINUXOPT} -m 755 -d ${DESTDIR}${TMPFILESDIR} install ${SELINUXOPT} -m 755 -d ${DESTDIR}${TMPFILESDIR}
install ${SELINUXOPT} -m 644 contrib/tmpfile/podman.conf ${DESTDIR}${TMPFILESDIR}/podman.conf install ${SELINUXOPT} -m 644 contrib/tmpfile/podman.conf ${DESTDIR}${TMPFILESDIR}/podman.conf
.PHONY: install.bin .PHONY: install.bin
install.bin: podman install.bin-nobuild install.bin: podman rootlessport install.bin-nobuild
.PHONY: install.man-nobuild .PHONY: install.man-nobuild
install.man-nobuild: install.man-nobuild:

353
cmd/rootlessport/main.go Normal file
View File

@ -0,0 +1,353 @@
package main
import (
"context"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net"
"os"
"os/exec"
"path/filepath"
"github.com/containernetworking/plugins/pkg/ns"
"github.com/containers/podman/v3/libpod/network/types"
"github.com/containers/podman/v3/pkg/rootlessport"
"github.com/pkg/errors"
rkport "github.com/rootless-containers/rootlesskit/pkg/port"
rkbuiltin "github.com/rootless-containers/rootlesskit/pkg/port/builtin"
rkportutil "github.com/rootless-containers/rootlesskit/pkg/port/portutil"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
const (
// ReexecChildKey is used internally for the second reexec
ReexecChildKey = "rootlessport-child"
reexecChildEnvOpaque = "_CONTAINERS_ROOTLESSPORT_CHILD_OPAQUE"
)
func main() {
if len(os.Args) > 1 {
fmt.Fprintln(os.Stderr, `too many arguments, rootlessport expects a json config via STDIN`)
os.Exit(1)
}
var err error
if os.Args[0] == ReexecChildKey {
err = child()
} else {
err = parent()
}
if err != nil {
fmt.Println(err)
os.Exit(1)
}
}
func loadConfig(r io.Reader) (*rootlessport.Config, io.ReadCloser, io.WriteCloser, error) {
stdin, err := ioutil.ReadAll(r)
if err != nil {
return nil, nil, nil, err
}
var cfg rootlessport.Config
if err := json.Unmarshal(stdin, &cfg); err != nil {
return nil, nil, nil, err
}
if cfg.NetNSPath == "" {
return nil, nil, nil, errors.New("missing NetNSPath")
}
if cfg.ExitFD <= 0 {
return nil, nil, nil, errors.New("missing ExitFD")
}
exitFile := os.NewFile(uintptr(cfg.ExitFD), "exitfile")
if exitFile == nil {
return nil, nil, nil, errors.New("invalid ExitFD")
}
if cfg.ReadyFD <= 0 {
return nil, nil, nil, errors.New("missing ReadyFD")
}
readyFile := os.NewFile(uintptr(cfg.ReadyFD), "readyfile")
if readyFile == nil {
return nil, nil, nil, errors.New("invalid ReadyFD")
}
return &cfg, exitFile, readyFile, nil
}
func parent() error {
// load config from stdin
cfg, exitR, readyW, err := loadConfig(os.Stdin)
if err != nil {
return err
}
socketDir := filepath.Join(cfg.TmpDir, "rp")
err = os.MkdirAll(socketDir, 0700)
if err != nil {
return err
}
// create the parent driver
stateDir, err := ioutil.TempDir(cfg.TmpDir, "rootlessport")
if err != nil {
return err
}
defer os.RemoveAll(stateDir)
driver, err := rkbuiltin.NewParentDriver(&logrusWriter{prefix: "parent: "}, stateDir)
if err != nil {
return err
}
initComplete := make(chan struct{})
quit := make(chan struct{})
errCh := make(chan error)
// start the parent driver. initComplete will be closed when the child connected to the parent.
logrus.Infof("Starting parent driver")
go func() {
driverErr := driver.RunParentDriver(initComplete, quit, nil)
if driverErr != nil {
logrus.WithError(driverErr).Warn("Parent driver exited")
}
errCh <- driverErr
close(errCh)
}()
opaque := driver.OpaqueForChild()
logrus.Infof("opaque=%+v", opaque)
opaqueJSON, err := json.Marshal(opaque)
if err != nil {
return err
}
childQuitR, childQuitW, err := os.Pipe()
if err != nil {
return err
}
defer func() {
// stop the child
logrus.Info("Stopping child driver")
if err := childQuitW.Close(); err != nil {
logrus.WithError(err).Warn("Unable to close childQuitW")
}
}()
// reexec the child process in the child netns
cmd := exec.Command("/proc/self/exe")
cmd.Args = []string{ReexecChildKey}
cmd.Stdin = childQuitR
cmd.Stdout = &logrusWriter{prefix: "child"}
cmd.Stderr = cmd.Stdout
cmd.Env = append(os.Environ(), reexecChildEnvOpaque+"="+string(opaqueJSON))
childNS, err := ns.GetNS(cfg.NetNSPath)
if err != nil {
return err
}
if err := childNS.Do(func(_ ns.NetNS) error {
logrus.Infof("Starting child driver in child netns (%q %v)", cmd.Path, cmd.Args)
return cmd.Start()
}); err != nil {
return err
}
childErrCh := make(chan error)
go func() {
err := cmd.Wait()
childErrCh <- err
close(childErrCh)
}()
defer func() {
if err := unix.Kill(cmd.Process.Pid, unix.SIGTERM); err != nil {
logrus.WithError(err).Warn("Kill child process")
}
}()
logrus.Info("Waiting for initComplete")
// wait for the child to connect to the parent
outer:
for {
select {
case <-initComplete:
logrus.Infof("initComplete is closed; parent and child established the communication channel")
break outer
case err := <-childErrCh:
if err != nil {
return err
}
case err := <-errCh:
if err != nil {
return err
}
}
}
defer func() {
logrus.Info("Stopping parent driver")
quit <- struct{}{}
if err := <-errCh; err != nil {
logrus.WithError(err).Warn("Parent driver returned error on exit")
}
}()
// let parent expose ports
logrus.Infof("Exposing ports %v", cfg.Mappings)
if err := exposePorts(driver, cfg.Mappings, cfg.ChildIP); err != nil {
return err
}
// we only need to have a socket to reload ports when we run under rootless cni
if cfg.RootlessCNI {
socketfile := filepath.Join(socketDir, cfg.ContainerID)
// make sure to remove the file if it exists to prevent EADDRINUSE
_ = os.Remove(socketfile)
// workaround to bypass the 108 char socket path limit
// open the fd and use the path to the fd as bind argument
fd, err := unix.Open(socketDir, unix.O_PATH, 0)
if err != nil {
return err
}
socket, err := net.ListenUnix("unixpacket", &net.UnixAddr{Name: fmt.Sprintf("/proc/self/fd/%d/%s", fd, cfg.ContainerID), Net: "unixpacket"})
if err != nil {
return err
}
err = unix.Close(fd)
// remove the socket file on exit
defer os.Remove(socketfile)
if err != nil {
logrus.Warnf("Failed to close the socketDir fd: %v", err)
}
defer socket.Close()
go serve(socket, driver)
}
logrus.Info("Ready")
// https://github.com/containers/podman/issues/11248
// Copy /dev/null to stdout and stderr to prevent SIGPIPE errors
if f, err := os.OpenFile("/dev/null", os.O_WRONLY, 0755); err == nil {
unix.Dup2(int(f.Fd()), 1) // nolint:errcheck
unix.Dup2(int(f.Fd()), 2) // nolint:errcheck
f.Close()
}
// write and close ReadyFD (convention is same as slirp4netns --ready-fd)
if _, err := readyW.Write([]byte("1")); err != nil {
return err
}
if err := readyW.Close(); err != nil {
return err
}
// wait for ExitFD to be closed
logrus.Info("Waiting for exitfd to be closed")
if _, err := ioutil.ReadAll(exitR); err != nil {
return err
}
return nil
}
func serve(listener net.Listener, pm rkport.Manager) {
for {
conn, err := listener.Accept()
if err != nil {
// we cannot log this error, stderr is already closed
continue
}
ctx := context.TODO()
err = handler(ctx, conn, pm)
if err != nil {
conn.Write([]byte(err.Error()))
} else {
conn.Write([]byte("OK"))
}
conn.Close()
}
}
func handler(ctx context.Context, conn io.Reader, pm rkport.Manager) error {
var childIP string
dec := json.NewDecoder(conn)
err := dec.Decode(&childIP)
if err != nil {
return errors.Wrap(err, "rootless port failed to decode ports")
}
portStatus, err := pm.ListPorts(ctx)
if err != nil {
return errors.Wrap(err, "rootless port failed to list ports")
}
for _, status := range portStatus {
err = pm.RemovePort(ctx, status.ID)
if err != nil {
return errors.Wrap(err, "rootless port failed to remove port")
}
}
// add the ports with the new child IP
for _, status := range portStatus {
// set the new child IP
status.Spec.ChildIP = childIP
_, err = pm.AddPort(ctx, status.Spec)
if err != nil {
return errors.Wrap(err, "rootless port failed to add port")
}
}
return nil
}
func exposePorts(pm rkport.Manager, portMappings []types.OCICNIPortMapping, childIP string) error {
ctx := context.TODO()
for _, i := range portMappings {
hostIP := i.HostIP
if hostIP == "" {
hostIP = "0.0.0.0"
}
spec := rkport.Spec{
Proto: i.Protocol,
ParentIP: hostIP,
ParentPort: int(i.HostPort),
ChildPort: int(i.ContainerPort),
ChildIP: childIP,
}
if err := rkportutil.ValidatePortSpec(spec, nil); err != nil {
return err
}
if _, err := pm.AddPort(ctx, spec); err != nil {
return err
}
}
return nil
}
func child() error {
// load the config from the parent
var opaque map[string]string
if err := json.Unmarshal([]byte(os.Getenv(reexecChildEnvOpaque)), &opaque); err != nil {
return err
}
// start the child driver
quit := make(chan struct{})
errCh := make(chan error)
go func() {
d := rkbuiltin.NewChildDriver(os.Stderr)
dErr := d.RunChildDriver(opaque, quit)
errCh <- dErr
}()
defer func() {
logrus.Info("Stopping child driver")
quit <- struct{}{}
if err := <-errCh; err != nil {
logrus.WithError(err).Warn("Child driver returned error on exit")
}
}()
// wait for stdin to be closed
if _, err := ioutil.ReadAll(os.Stdin); err != nil {
return err
}
return nil
}
type logrusWriter struct {
prefix string
}
func (w *logrusWriter) Write(p []byte) (int, error) {
logrus.Infof("%s%s", w.prefix, string(p))
return len(p), nil
}

View File

@ -236,9 +236,19 @@ case "$TEST_FLAVOR" in
# Use existing host bits when testing is to happen inside a container # Use existing host bits when testing is to happen inside a container
# since this script will run again in that environment. # since this script will run again in that environment.
# shellcheck disable=SC2154 # shellcheck disable=SC2154
if ((CONTAINER==0)) && [[ "$TEST_ENVIRON" == "host" ]]; then if [[ "$TEST_ENVIRON" == "host" ]]; then
if ((CONTAINER)); then
die "Refusing to config. host-test in container";
fi
remove_packaged_podman_files remove_packaged_podman_files
make install PREFIX=/usr ETCDIR=/etc make install PREFIX=/usr ETCDIR=/etc
elif [[ "$TEST_ENVIRON" == "container" ]]; then
if ((CONTAINER)); then
remove_packaged_podman_files
make install PREFIX=/usr ETCDIR=/etc
fi
else
die "Invalid value for $$TEST_ENVIRON=$TEST_ENVIRON"
fi fi
install_test_configs install_test_configs

View File

@ -3,18 +3,8 @@
%global with_check 0 %global with_check 0
%global with_unit_test 0 %global with_unit_test 0
%bcond_without doc %bcond_without doc
%bcond_without debug
%if %{with debug}
%global _find_debuginfo_dwz_opts %{nil}
%global _dwz_low_mem_die_limit 0
%else
%global debug_package %{nil} %global debug_package %{nil}
%endif
%if ! 0%{?gobuild:1}
%define gobuild(o:) go build -buildmode pie -compiler gc -tags="rpm_crashtraceback ${BUILDTAGS:-}" -ldflags "${LDFLAGS:-} -B 0x$(head -c20 /dev/urandom|od -An -tx1|tr -d ' \n') -extldflags '-Wl,-z,relro -Wl,--as-needed -Wl,-z,now -specs=/usr/lib/rpm/redhat/redhat-hardened-ld '" -a -v -x %{?**};
%endif
# podman hack directory # podman hack directory
%define hackdir %{_builddir}/%{repo}-%{shortcommit0} %define hackdir %{_builddir}/%{repo}-%{shortcommit0}
@ -536,6 +526,8 @@ export GOPATH=%{buildroot}/%{gopath}:$(pwd)/vendor:%{gopath}
%{_usr}/lib/systemd/user/podman-auto-update.timer %{_usr}/lib/systemd/user/podman-auto-update.timer
%{_usr}/lib/systemd/user/podman-restart.service %{_usr}/lib/systemd/user/podman-restart.service
%{_usr}/lib/tmpfiles.d/podman.conf %{_usr}/lib/tmpfiles.d/podman.conf
%dir %{_libexecdir}/%{name}
%{_libexecdir}/%{name}/rootlessport
%if 0%{?with_devel} %if 0%{?with_devel}
%files -n libpod-devel -f devel.file-list %files -n libpod-devel -f devel.file-list

View File

@ -484,10 +484,14 @@ func (r *Runtime) setupRootlessPortMappingViaRLK(ctr *Container, netnsPath strin
} }
cfgR := bytes.NewReader(cfgJSON) cfgR := bytes.NewReader(cfgJSON)
var stdout bytes.Buffer var stdout bytes.Buffer
cmd := exec.Command(fmt.Sprintf("/proc/%d/exe", os.Getpid())) path, err := r.config.FindHelperBinary(rootlessport.BinaryName, false)
cmd.Args = []string{rootlessport.ReexecKey} if err != nil {
// Leak one end of the pipe in rootlessport process, the other will be sent to conmon return err
}
cmd := exec.Command(path)
cmd.Args = []string{rootlessport.BinaryName}
// Leak one end of the pipe in rootlessport process, the other will be sent to conmon
if ctr.rootlessPortSyncR != nil { if ctr.rootlessPortSyncR != nil {
defer errorhandling.CloseQuiet(ctr.rootlessPortSyncR) defer errorhandling.CloseQuiet(ctr.rootlessPortSyncR)
} }

View File

@ -77,10 +77,12 @@ let
patchShebangs . patchShebangs .
make bin/podman make bin/podman
make bin/podman-remote make bin/podman-remote
make bin/rootlessport
''; '';
installPhase = '' installPhase = ''
install -Dm755 bin/podman $out/bin/podman install -Dm755 bin/podman $out/bin/podman
install -Dm755 bin/podman-remote $out/bin/podman-remote install -Dm755 bin/podman-remote $out/bin/podman-remote
install -Dm755 bin/rootlessport $out/libexec/podman/rootlessport
''; '';
}; };
in in

View File

@ -75,10 +75,12 @@ let
patchShebangs . patchShebangs .
make bin/podman make bin/podman
make bin/podman-remote make bin/podman-remote
make bin/rootlessport
''; '';
installPhase = '' installPhase = ''
install -Dm755 bin/podman $out/bin/podman install -Dm755 bin/podman $out/bin/podman
install -Dm755 bin/podman-remote $out/bin/podman-remote install -Dm755 bin/podman-remote $out/bin/podman-remote
install -Dm755 bin/rootlessport $out/libexec/podman/rootlessport
''; '';
}; };
in in

View File

@ -12,33 +12,12 @@
package rootlessport package rootlessport
import ( import (
"context"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net"
"os"
"os/exec"
"path/filepath"
"github.com/containernetworking/plugins/pkg/ns"
"github.com/containers/podman/v3/libpod/network/types" "github.com/containers/podman/v3/libpod/network/types"
"github.com/containers/storage/pkg/reexec"
"github.com/pkg/errors"
rkport "github.com/rootless-containers/rootlesskit/pkg/port"
rkbuiltin "github.com/rootless-containers/rootlesskit/pkg/port/builtin"
rkportutil "github.com/rootless-containers/rootlesskit/pkg/port/portutil"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
) )
const ( const (
// ReexecKey is the reexec key for the parent process. // BinaryName is the binary name for the parent process.
ReexecKey = "containers-rootlessport" BinaryName = "rootlessport"
// reexecChildKey is used internally for the second reexec
reexecChildKey = "containers-rootlessport-child"
reexecChildEnvOpaque = "_CONTAINERS_ROOTLESSPORT_CHILD_OPAQUE"
) )
// Config needs to be provided to the process via stdin as a JSON string. // Config needs to be provided to the process via stdin as a JSON string.
@ -53,325 +32,3 @@ type Config struct {
ContainerID string ContainerID string
RootlessCNI bool RootlessCNI bool
} }
func init() {
reexec.Register(ReexecKey, func() {
if err := parent(); err != nil {
fmt.Println(err)
os.Exit(1)
}
})
reexec.Register(reexecChildKey, func() {
if err := child(); err != nil {
fmt.Println(err)
os.Exit(1)
}
})
}
func loadConfig(r io.Reader) (*Config, io.ReadCloser, io.WriteCloser, error) {
stdin, err := ioutil.ReadAll(r)
if err != nil {
return nil, nil, nil, err
}
var cfg Config
if err := json.Unmarshal(stdin, &cfg); err != nil {
return nil, nil, nil, err
}
if cfg.NetNSPath == "" {
return nil, nil, nil, errors.New("missing NetNSPath")
}
if cfg.ExitFD <= 0 {
return nil, nil, nil, errors.New("missing ExitFD")
}
exitFile := os.NewFile(uintptr(cfg.ExitFD), "exitfile")
if exitFile == nil {
return nil, nil, nil, errors.New("invalid ExitFD")
}
if cfg.ReadyFD <= 0 {
return nil, nil, nil, errors.New("missing ReadyFD")
}
readyFile := os.NewFile(uintptr(cfg.ReadyFD), "readyfile")
if readyFile == nil {
return nil, nil, nil, errors.New("invalid ReadyFD")
}
return &cfg, exitFile, readyFile, nil
}
func parent() error {
// load config from stdin
cfg, exitR, readyW, err := loadConfig(os.Stdin)
if err != nil {
return err
}
socketDir := filepath.Join(cfg.TmpDir, "rp")
err = os.MkdirAll(socketDir, 0700)
if err != nil {
return err
}
// create the parent driver
stateDir, err := ioutil.TempDir(cfg.TmpDir, "rootlessport")
if err != nil {
return err
}
defer os.RemoveAll(stateDir)
driver, err := rkbuiltin.NewParentDriver(&logrusWriter{prefix: "parent: "}, stateDir)
if err != nil {
return err
}
initComplete := make(chan struct{})
quit := make(chan struct{})
errCh := make(chan error)
// start the parent driver. initComplete will be closed when the child connected to the parent.
logrus.Infof("Starting parent driver")
go func() {
driverErr := driver.RunParentDriver(initComplete, quit, nil)
if driverErr != nil {
logrus.WithError(driverErr).Warn("Parent driver exited")
}
errCh <- driverErr
close(errCh)
}()
opaque := driver.OpaqueForChild()
logrus.Infof("Opaque=%+v", opaque)
opaqueJSON, err := json.Marshal(opaque)
if err != nil {
return err
}
childQuitR, childQuitW, err := os.Pipe()
if err != nil {
return err
}
defer func() {
// stop the child
logrus.Info("Stopping child driver")
if err := childQuitW.Close(); err != nil {
logrus.WithError(err).Warn("Unable to close childQuitW")
}
}()
// reexec the child process in the child netns
cmd := exec.Command("/proc/self/exe")
cmd.Args = []string{reexecChildKey}
cmd.Stdin = childQuitR
cmd.Stdout = &logrusWriter{prefix: "child"}
cmd.Stderr = cmd.Stdout
cmd.Env = append(os.Environ(), reexecChildEnvOpaque+"="+string(opaqueJSON))
childNS, err := ns.GetNS(cfg.NetNSPath)
if err != nil {
return err
}
if err := childNS.Do(func(_ ns.NetNS) error {
logrus.Infof("Starting child driver in child netns (%q %v)", cmd.Path, cmd.Args)
return cmd.Start()
}); err != nil {
return err
}
childErrCh := make(chan error)
go func() {
err := cmd.Wait()
childErrCh <- err
close(childErrCh)
}()
defer func() {
if err := unix.Kill(cmd.Process.Pid, unix.SIGTERM); err != nil {
logrus.WithError(err).Warn("Kill child process")
}
}()
logrus.Info("Waiting for initComplete")
// wait for the child to connect to the parent
outer:
for {
select {
case <-initComplete:
logrus.Infof("initComplete is closed; parent and child established the communication channel")
break outer
case err := <-childErrCh:
if err != nil {
return err
}
case err := <-errCh:
if err != nil {
return err
}
}
}
defer func() {
logrus.Info("Stopping parent driver")
quit <- struct{}{}
if err := <-errCh; err != nil {
logrus.WithError(err).Warn("Parent driver returned error on exit")
}
}()
// let parent expose ports
logrus.Infof("Exposing ports %v", cfg.Mappings)
if err := exposePorts(driver, cfg.Mappings, cfg.ChildIP); err != nil {
return err
}
// we only need to have a socket to reload ports when we run under rootless cni
if cfg.RootlessCNI {
socketfile := filepath.Join(socketDir, cfg.ContainerID)
// make sure to remove the file if it exists to prevent EADDRINUSE
_ = os.Remove(socketfile)
// workaround to bypass the 108 char socket path limit
// open the fd and use the path to the fd as bind argument
fd, err := unix.Open(socketDir, unix.O_PATH, 0)
if err != nil {
return err
}
socket, err := net.ListenUnix("unixpacket", &net.UnixAddr{Name: fmt.Sprintf("/proc/self/fd/%d/%s", fd, cfg.ContainerID), Net: "unixpacket"})
if err != nil {
return err
}
err = unix.Close(fd)
// remove the socket file on exit
defer os.Remove(socketfile)
if err != nil {
logrus.Warnf("Failed to close the socketDir fd: %v", err)
}
defer socket.Close()
go serve(socket, driver)
}
logrus.Info("Ready")
// https://github.com/containers/podman/issues/11248
// Copy /dev/null to stdout and stderr to prevent SIGPIPE errors
if f, err := os.OpenFile("/dev/null", os.O_WRONLY, 0755); err == nil {
unix.Dup2(int(f.Fd()), 1) // nolint:errcheck
unix.Dup2(int(f.Fd()), 2) // nolint:errcheck
f.Close()
}
// write and close ReadyFD (convention is same as slirp4netns --ready-fd)
if _, err := readyW.Write([]byte("1")); err != nil {
return err
}
if err := readyW.Close(); err != nil {
return err
}
// wait for ExitFD to be closed
logrus.Info("Waiting for exitfd to be closed")
if _, err := ioutil.ReadAll(exitR); err != nil {
return err
}
return nil
}
func serve(listener net.Listener, pm rkport.Manager) {
for {
conn, err := listener.Accept()
if err != nil {
// we cannot log this error, stderr is already closed
continue
}
ctx := context.TODO()
err = handler(ctx, conn, pm)
if err != nil {
conn.Write([]byte(err.Error()))
} else {
conn.Write([]byte("OK"))
}
conn.Close()
}
}
func handler(ctx context.Context, conn io.Reader, pm rkport.Manager) error {
var childIP string
dec := json.NewDecoder(conn)
err := dec.Decode(&childIP)
if err != nil {
return errors.Wrap(err, "rootless port failed to decode ports")
}
portStatus, err := pm.ListPorts(ctx)
if err != nil {
return errors.Wrap(err, "rootless port failed to list ports")
}
for _, status := range portStatus {
err = pm.RemovePort(ctx, status.ID)
if err != nil {
return errors.Wrap(err, "rootless port failed to remove port")
}
}
// add the ports with the new child IP
for _, status := range portStatus {
// set the new child IP
status.Spec.ChildIP = childIP
_, err = pm.AddPort(ctx, status.Spec)
if err != nil {
return errors.Wrap(err, "rootless port failed to add port")
}
}
return nil
}
func exposePorts(pm rkport.Manager, portMappings []types.OCICNIPortMapping, childIP string) error {
ctx := context.TODO()
for _, i := range portMappings {
hostIP := i.HostIP
if hostIP == "" {
hostIP = "0.0.0.0"
}
spec := rkport.Spec{
Proto: i.Protocol,
ParentIP: hostIP,
ParentPort: int(i.HostPort),
ChildPort: int(i.ContainerPort),
ChildIP: childIP,
}
if err := rkportutil.ValidatePortSpec(spec, nil); err != nil {
return err
}
if _, err := pm.AddPort(ctx, spec); err != nil {
return err
}
}
return nil
}
func child() error {
// load the config from the parent
var opaque map[string]string
if err := json.Unmarshal([]byte(os.Getenv(reexecChildEnvOpaque)), &opaque); err != nil {
return err
}
// start the child driver
quit := make(chan struct{})
errCh := make(chan error)
go func() {
d := rkbuiltin.NewChildDriver(os.Stderr)
dErr := d.RunChildDriver(opaque, quit)
errCh <- dErr
}()
defer func() {
logrus.Info("Stopping child driver")
quit <- struct{}{}
if err := <-errCh; err != nil {
logrus.WithError(err).Warn("Child driver returned error on exit")
}
}()
// wait for stdin to be closed
if _, err := ioutil.ReadAll(os.Stdin); err != nil {
return err
}
return nil
}
type logrusWriter struct {
prefix string
}
func (w *logrusWriter) Write(p []byte) (int, error) {
logrus.Infof("%s%s", w.prefix, string(p))
return len(p), nil
}