Merge pull request #11246 from vrothberg/sdnotify=container
Implement SD-NOTIFY proxy in conmon
This commit is contained in:
		
						commit
						7d8650cce8
					
				|  | @ -126,6 +126,10 @@ type Container struct { | |||
| 	// This is true if a container is restored from a checkpoint.
 | ||||
| 	restoreFromCheckpoint bool | ||||
| 
 | ||||
| 	// Used to query the NOTIFY_SOCKET once along with setting up
 | ||||
| 	// mounts etc.
 | ||||
| 	notifySocket string | ||||
| 
 | ||||
| 	slirp4netnsSubnet *net.IPNet | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -352,6 +352,10 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { | |||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	if err := c.mountNotifySocket(g); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	// Get host UID and GID based on the container process UID and GID.
 | ||||
| 	hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid)) | ||||
| 	if err != nil { | ||||
|  | @ -777,6 +781,41 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { | |||
| 	return g.Config, nil | ||||
| } | ||||
| 
 | ||||
| // mountNotifySocket mounts the NOTIFY_SOCKET into the container if it's set
 | ||||
| // and if the sdnotify mode is set to container.  It also sets c.notifySocket
 | ||||
| // to avoid redundantly looking up the env variable.
 | ||||
| func (c *Container) mountNotifySocket(g generate.Generator) error { | ||||
| 	notify, ok := os.LookupEnv("NOTIFY_SOCKET") | ||||
| 	if !ok { | ||||
| 		return nil | ||||
| 	} | ||||
| 	c.notifySocket = notify | ||||
| 
 | ||||
| 	if c.config.SdNotifyMode != define.SdNotifyModeContainer { | ||||
| 		return nil | ||||
| 	} | ||||
| 
 | ||||
| 	notifyDir := filepath.Join(c.bundlePath(), "notify") | ||||
| 	logrus.Debugf("checking notify %q dir", notifyDir) | ||||
| 	if err := os.MkdirAll(notifyDir, 0755); err != nil { | ||||
| 		if !os.IsExist(err) { | ||||
| 			return errors.Wrapf(err, "unable to create notify %q dir", notifyDir) | ||||
| 		} | ||||
| 	} | ||||
| 	if err := label.Relabel(notifyDir, c.MountLabel(), true); err != nil { | ||||
| 		return errors.Wrapf(err, "relabel failed %q", notifyDir) | ||||
| 	} | ||||
| 	logrus.Debugf("add bindmount notify %q dir", notifyDir) | ||||
| 	if _, ok := c.state.BindMounts["/run/notify"]; !ok { | ||||
| 		c.state.BindMounts["/run/notify"] = notifyDir | ||||
| 	} | ||||
| 
 | ||||
| 	// Set the container's notify socket to the proxy socket created by conmon
 | ||||
| 	g.AddProcessEnv("NOTIFY_SOCKET", "/run/notify/notify.sock") | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // systemd expects to have /run, /run/lock and /tmp on tmpfs
 | ||||
| // It also expects to be able to write to /sys/fs/cgroup/systemd and /var/log/journal
 | ||||
| func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) error { | ||||
|  | @ -1730,6 +1769,7 @@ rootless=%d | |||
| 			c.state.BindMounts[dest] = src | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -14,6 +14,7 @@ var initInodes = map[string]bool{ | |||
| 	"/etc/resolv.conf":   true, | ||||
| 	"/proc":              true, | ||||
| 	"/run":               true, | ||||
| 	"/run/notify":        true, | ||||
| 	"/run/.containerenv": true, | ||||
| 	"/run/secrets":       true, | ||||
| 	"/sys":               true, | ||||
|  |  | |||
|  | @ -462,7 +462,7 @@ func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *Ex | |||
| 		Setpgid: true, | ||||
| 	} | ||||
| 
 | ||||
| 	err = startCommandGivenSelinux(execCmd) | ||||
| 	err = startCommandGivenSelinux(execCmd, c) | ||||
| 
 | ||||
| 	// We don't need children pipes  on the parent side
 | ||||
| 	errorhandling.CloseQuiet(childSyncPipe) | ||||
|  |  | |||
|  | @ -364,11 +364,6 @@ func (r *ConmonOCIRuntime) StartContainer(ctr *Container) error { | |||
| 		return err | ||||
| 	} | ||||
| 	env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)} | ||||
| 	if ctr.config.SdNotifyMode == define.SdNotifyModeContainer { | ||||
| 		if notify, ok := os.LookupEnv("NOTIFY_SOCKET"); ok { | ||||
| 			env = append(env, fmt.Sprintf("NOTIFY_SOCKET=%s", notify)) | ||||
| 		} | ||||
| 	} | ||||
| 	if path, ok := os.LookupEnv("PATH"); ok { | ||||
| 		env = append(env, fmt.Sprintf("PATH=%s", path)) | ||||
| 	} | ||||
|  | @ -1014,12 +1009,6 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if ctr.config.SdNotifyMode == define.SdNotifyModeIgnore { | ||||
| 		if err := os.Unsetenv("NOTIFY_SOCKET"); err != nil { | ||||
| 			logrus.Warnf("Error unsetting NOTIFY_SOCKET %v", err) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	pidfile := ctr.config.PidFile | ||||
| 	if pidfile == "" { | ||||
| 		pidfile = filepath.Join(ctr.state.RunDir, "pidfile") | ||||
|  | @ -1027,6 +1016,10 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co | |||
| 
 | ||||
| 	args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag) | ||||
| 
 | ||||
| 	if ctr.config.SdNotifyMode == define.SdNotifyModeContainer && ctr.notifySocket != "" { | ||||
| 		args = append(args, fmt.Sprintf("--sdnotify-socket=%s", ctr.notifySocket)) | ||||
| 	} | ||||
| 
 | ||||
| 	if ctr.config.Spec.Process.Terminal { | ||||
| 		args = append(args, "-t") | ||||
| 	} else if ctr.config.Stdin { | ||||
|  | @ -1171,7 +1164,8 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co | |||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	err = startCommandGivenSelinux(cmd) | ||||
| 	err = startCommandGivenSelinux(cmd, ctr) | ||||
| 
 | ||||
| 	// regardless of whether we errored or not, we no longer need the children pipes
 | ||||
| 	childSyncPipe.Close() | ||||
| 	childStartPipe.Close() | ||||
|  | @ -1203,7 +1197,13 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co | |||
| 		// conmon not having a pid file is a valid state, so don't set it if we don't have it
 | ||||
| 		logrus.Infof("Got Conmon PID as %d", conmonPID) | ||||
| 		ctr.state.ConmonPID = conmonPID | ||||
| 		if ctr.config.SdNotifyMode != define.SdNotifyModeIgnore { | ||||
| 
 | ||||
| 		// Send the MAINPID via sdnotify if needed.
 | ||||
| 		switch ctr.config.SdNotifyMode { | ||||
| 		case define.SdNotifyModeContainer, define.SdNotifyModeIgnore: | ||||
| 		// Nothing to do or conmon takes care of it already.
 | ||||
| 
 | ||||
| 		default: | ||||
| 			if sent, err := daemon.SdNotify(false, fmt.Sprintf("MAINPID=%d", conmonPID)); err != nil { | ||||
| 				logrus.Errorf("Error notifying systemd of Conmon PID: %v", err) | ||||
| 			} else if sent { | ||||
|  | @ -1239,11 +1239,6 @@ func (r *ConmonOCIRuntime) configureConmonEnv(ctr *Container, runtimeDir string) | |||
| 	} | ||||
| 
 | ||||
| 	extraFiles := make([]*os.File, 0) | ||||
| 	if ctr.config.SdNotifyMode == define.SdNotifyModeContainer { | ||||
| 		if notify, ok := os.LookupEnv("NOTIFY_SOCKET"); ok { | ||||
| 			env = append(env, fmt.Sprintf("NOTIFY_SOCKET=%s", notify)) | ||||
| 		} | ||||
| 	} | ||||
| 	if !r.sdNotify { | ||||
| 		if listenfds, ok := os.LookupEnv("LISTEN_FDS"); ok { | ||||
| 			env = append(env, fmt.Sprintf("LISTEN_FDS=%s", listenfds), "LISTEN_PID=1") | ||||
|  | @ -1335,7 +1330,23 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p | |||
| 
 | ||||
| // startCommandGivenSelinux starts a container ensuring to set the labels of
 | ||||
| // the process to make sure SELinux doesn't block conmon communication, if SELinux is enabled
 | ||||
| func startCommandGivenSelinux(cmd *exec.Cmd) error { | ||||
| func startCommandGivenSelinux(cmd *exec.Cmd, ctr *Container) error { | ||||
| 	// Make sure to unset the NOTIFY_SOCKET and reset if afterwards if needed.
 | ||||
| 	switch ctr.config.SdNotifyMode { | ||||
| 	case define.SdNotifyModeContainer, define.SdNotifyModeIgnore: | ||||
| 		if ctr.notifySocket != "" { | ||||
| 			if err := os.Unsetenv("NOTIFY_SOCKET"); err != nil { | ||||
| 				logrus.Warnf("Error unsetting NOTIFY_SOCKET %v", err) | ||||
| 			} | ||||
| 
 | ||||
| 			defer func() { | ||||
| 				if err := os.Setenv("NOTIFY_SOCKET", ctr.notifySocket); err != nil { | ||||
| 					logrus.Errorf("Error resetting NOTIFY_SOCKET=%s", ctr.notifySocket) | ||||
| 				} | ||||
| 			}() | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if !selinux.GetEnabled() { | ||||
| 		return cmd.Start() | ||||
| 	} | ||||
|  |  | |||
|  | @ -221,7 +221,6 @@ function _confirm_update() { | |||
| } | ||||
| 
 | ||||
| @test "podman auto-update - label io.containers.autoupdate=local with rollback" { | ||||
|     skip "This test flakes way too often, see #11175" | ||||
|     # sdnotify fails with runc 1.0.0-3-dev2 on Ubuntu. Let's just | ||||
|     # assume that we work only with crun, nothing else. | ||||
|     # [copied from 260-sdnotify.bats] | ||||
|  |  | |||
|  | @ -130,6 +130,8 @@ function _assert_mainpid_is_conmon() { | |||
|     _stop_socat | ||||
| } | ||||
| 
 | ||||
| # These tests can fail in dev. environment because of SELinux. | ||||
| # quick fix: chcon -t container_runtime_exec_t ./bin/podman | ||||
| @test "sdnotify : container" { | ||||
|     # Sigh... we need to pull a humongous image because it has systemd-notify. | ||||
|     # (IMPORTANT: fedora:32 and above silently removed systemd-notify; this | ||||
|  | @ -150,7 +152,7 @@ function _assert_mainpid_is_conmon() { | |||
|     wait_for_ready $cid | ||||
| 
 | ||||
|     run_podman logs $cid | ||||
|     is "${lines[0]}" "/.*/container\.sock/notify" "NOTIFY_SOCKET is passed to container" | ||||
|     is "${lines[0]}" "/run/notify/notify.sock" "NOTIFY_SOCKET is passed to container" | ||||
| 
 | ||||
|     # With container, READY=1 isn't necessarily the last message received; | ||||
|     # just look for it anywhere in received messages | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue