diff --git a/cmd/podman/containers/create.go b/cmd/podman/containers/create.go index a6550f37f2..e25c90f4d7 100644 --- a/cmd/podman/containers/create.go +++ b/cmd/podman/containers/create.go @@ -257,7 +257,7 @@ func CreateInit(c *cobra.Command, vals entities.ContainerCreateOptions, isInfra if registry.IsRemote() { return vals, errors.New("the '--group-add keep-groups' option is not supported in remote mode") } - vals.Annotation = append(vals.Annotation, "run.oci.keep_original_groups=1") + vals.Annotation = append(vals.Annotation, fmt.Sprintf("%s=1", define.RunOCIKeepOriginalGroups)) } else { groups = append(groups, g) } diff --git a/docs/source/markdown/options/security-opt.md b/docs/source/markdown/options/security-opt.md index 252bede2ff..8ac58b124a 100644 --- a/docs/source/markdown/options/security-opt.md +++ b/docs/source/markdown/options/security-opt.md @@ -18,6 +18,8 @@ Security Options Note: Labeling can be disabled for all <<|pods/>>containers by setting label=false in the **containers.conf** (`/etc/containers/containers.conf` or `$HOME/.config/containers/containers.conf`) file. +- **label=nested**: Allows SELinux modifications within the container. Containers are allowed to modify SELinux labels on files and processes, as long as SELinux policy allows. Without **nested**, containers view SELinux as disabled, even when it is enabled on the host. Containers are prevented from setting any labels. + - **mask**=_/path/1:/path/2_: The paths to mask separated by a colon. A masked path cannot be accessed inside the container<>. - **no-new-privileges**: Disable container processes from gaining additional privileges. diff --git a/libpod/container_config.go b/libpod/container_config.go index 81c912aabb..6aabc817ac 100644 --- a/libpod/container_config.go +++ b/libpod/container_config.go @@ -219,6 +219,8 @@ type ContainerSecurityConfig struct { // Libpod - mostly used in rootless containers where the user running // Libpod wants to retain their UID inside the container. AddCurrentUserPasswdEntry bool `json:"addCurrentUserPasswdEntry,omitempty"` + // LabelNested, allow labeling separation from within a container + LabelNested bool `json:"label_nested"` } // ContainerNameSpaceConfig is an embedded sub-config providing diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 6ca63f9e20..d2d0e953ab 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -32,8 +32,13 @@ var ( ) func (c *Container) mountSHM(shmOptions string) error { + contextType := "context" + if c.config.LabelNested { + contextType = "rootcontext" + } + if err := unix.Mount("shm", c.config.ShmDir, "tmpfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, - label.FormatMountLabel(shmOptions, c.config.MountLabel)); err != nil { + label.FormatMountLabelByType(shmOptions, c.config.MountLabel, contextType)); err != nil { return fmt.Errorf("failed to mount shm tmpfs %q: %w", c.config.ShmDir, err) } return nil diff --git a/libpod/define/annotations.go b/libpod/define/annotations.go index a70f83f785..72b5b18aa2 100644 --- a/libpod/define/annotations.go +++ b/libpod/define/annotations.go @@ -1,6 +1,12 @@ package define const ( + // RunOCIMountContextType tells the OCI runtime which context mount + // type to use. context, rootcontext, fscontext, defcontext + RunOCIMountContextType = "run.oci.mount_context_type" + // RunOCIKeepOriginalGroups tells the OCI runtime to leak the users + // current groups into the container + RunOCIKeepOriginalGroups = "run.oci.keep_original_groups" // InspectAnnotationCIDFile is used by Inspect to determine if a // container ID file was created for the container. // If an annotation with this key is found in the OCI spec, it will be @@ -58,7 +64,6 @@ const ( // If an annotation with this key is found in the OCI spec, it will be // used in the output of Inspect(). InspectAnnotationApparmor = "io.podman.annotations.apparmor" - // InspectResponseTrue is a boolean True response for an inspect // annotation. InspectResponseTrue = "TRUE" diff --git a/libpod/options.go b/libpod/options.go index 13ee549478..bc70e4a32c 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -2341,3 +2341,16 @@ func WithMountAllDevices() CtrCreateOption { return nil } } + +// WithLabelNested sets the LabelNested flag allowing label separation within container +func WithLabelNested(nested bool) CtrCreateOption { + return func(ctr *Container) error { + if ctr.valid { + return define.ErrCtrFinalized + } + + ctr.config.LabelNested = nested + + return nil + } +} diff --git a/libpod/runtime_volume_common.go b/libpod/runtime_volume_common.go index 54fc158be0..81fb00f32d 100644 --- a/libpod/runtime_volume_common.go +++ b/libpod/runtime_volume_common.go @@ -120,15 +120,13 @@ func (r *Runtime) newVolume(ctx context.Context, noCreatePluginVolume bool, opti volume.config.StorageImageID = image.ID() // Create a backing container in c/storage. - storageConfig := storage.ContainerOptions{ - LabelOpts: []string{"filetype:container_file_t:s0"}, - } + storageConfig := storage.ContainerOptions{} if len(volume.config.MountLabel) > 0 { context, err := selinux.NewContext(volume.config.MountLabel) if err != nil { return nil, fmt.Errorf("failed to get SELinux context from %s: %w", volume.config.MountLabel, err) } - storageConfig.LabelOpts = []string{fmt.Sprintf("filetype:%s:s0", context["type"])} + storageConfig.LabelOpts = []string{fmt.Sprintf("filetype:%s", context["type"])} } if _, err := r.storageService.CreateContainerStorage(ctx, r.imageContext, imgString, image.ID(), volume.config.StorageName, volume.config.StorageID, storageConfig); err != nil { return nil, fmt.Errorf("creating backing storage for image driver: %w", err) diff --git a/pkg/specgen/generate/container_create.go b/pkg/specgen/generate/container_create.go index 55483b8a26..d73abced7e 100644 --- a/pkg/specgen/generate/container_create.go +++ b/pkg/specgen/generate/container_create.go @@ -482,6 +482,9 @@ func createContainerOptions(rt *libpod.Runtime, s *specgen.SpecGenerator, pod *l options = append(options, libpod.WithLogDriver(s.LogConfiguration.Driver)) } } + if s.ContainerSecurityConfig.LabelNested { + options = append(options, libpod.WithLabelNested(s.ContainerSecurityConfig.LabelNested)) + } // Security options if len(s.SelinuxOpts) > 0 { options = append(options, libpod.WithSecLabels(s.SelinuxOpts)) diff --git a/pkg/specgen/specgen.go b/pkg/specgen/specgen.go index c62f274689..ff91489dbf 100644 --- a/pkg/specgen/specgen.go +++ b/pkg/specgen/specgen.go @@ -396,6 +396,10 @@ type ContainerSecurityConfig struct { // mount temporary file systems ReadWriteTmpfs bool `json:"read_write_tmpfs,omitempty"` + // LabelNested indicates whether or not the container is allowed to + // run fully nested containers including labelling + LabelNested bool `json:"label_nested,omitempty"` + // Umask is the umask the init process of the container will be run with. Umask string `json:"umask,omitempty"` // ProcOpts are the options used for the proc mount. diff --git a/pkg/specgenutil/specgen.go b/pkg/specgenutil/specgen.go index 6cfc14d9fd..4bf889fec9 100644 --- a/pkg/specgenutil/specgen.go +++ b/pkg/specgenutil/specgen.go @@ -620,53 +620,57 @@ func FillOutSpecGen(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions } for _, opt := range c.SecurityOpt { - if opt == "no-new-privileges" { - s.ContainerSecurityConfig.NoNewPrivileges = true + // Docker deprecated the ":" syntax but still supports it, + // so we need to as well + var con []string + if strings.Contains(opt, "=") { + con = strings.SplitN(opt, "=", 2) } else { - // Docker deprecated the ":" syntax but still supports it, - // so we need to as well - var con []string - if strings.Contains(opt, "=") { - con = strings.SplitN(opt, "=", 2) - } else { - con = strings.SplitN(opt, ":", 2) + con = strings.SplitN(opt, ":", 2) + } + if len(con) != 2 && + con[0] != "no-new-privileges" { + return fmt.Errorf("invalid --security-opt 1: %q", opt) + } + switch con[0] { + case "apparmor": + s.ContainerSecurityConfig.ApparmorProfile = con[1] + s.Annotations[define.InspectAnnotationApparmor] = con[1] + case "label": + if con[1] == "nested" { + s.ContainerSecurityConfig.LabelNested = true + continue } - if len(con) != 2 { - return fmt.Errorf("invalid --security-opt 1: %q", opt) - } - switch con[0] { - case "apparmor": - s.ContainerSecurityConfig.ApparmorProfile = con[1] - s.Annotations[define.InspectAnnotationApparmor] = con[1] - case "label": - // TODO selinux opts and label opts are the same thing - s.ContainerSecurityConfig.SelinuxOpts = append(s.ContainerSecurityConfig.SelinuxOpts, con[1]) - s.Annotations[define.InspectAnnotationLabel] = strings.Join(s.ContainerSecurityConfig.SelinuxOpts, ",label=") - case "mask": - s.ContainerSecurityConfig.Mask = append(s.ContainerSecurityConfig.Mask, strings.Split(con[1], ":")...) - case "proc-opts": - s.ProcOpts = strings.Split(con[1], ",") - case "seccomp": - s.SeccompProfilePath = con[1] - s.Annotations[define.InspectAnnotationSeccomp] = con[1] + // TODO selinux opts and label opts are the same thing + s.ContainerSecurityConfig.SelinuxOpts = append(s.ContainerSecurityConfig.SelinuxOpts, con[1]) + s.Annotations[define.InspectAnnotationLabel] = strings.Join(s.ContainerSecurityConfig.SelinuxOpts, ",label=") + case "mask": + s.ContainerSecurityConfig.Mask = append(s.ContainerSecurityConfig.Mask, strings.Split(con[1], ":")...) + case "proc-opts": + s.ProcOpts = strings.Split(con[1], ",") + case "seccomp": + s.SeccompProfilePath = con[1] + s.Annotations[define.InspectAnnotationSeccomp] = con[1] // this option is for docker compatibility, it is the same as unmask=ALL - case "systempaths": - if con[1] == "unconfined" { - s.ContainerSecurityConfig.Unmask = append(s.ContainerSecurityConfig.Unmask, []string{"ALL"}...) - } else { - return fmt.Errorf("invalid systempaths option %q, only `unconfined` is supported", con[1]) - } - case "unmask": - s.ContainerSecurityConfig.Unmask = append(s.ContainerSecurityConfig.Unmask, con[1:]...) - case "no-new-privileges": - noNewPrivileges, err := strconv.ParseBool(con[1]) + case "systempaths": + if con[1] == "unconfined" { + s.ContainerSecurityConfig.Unmask = append(s.ContainerSecurityConfig.Unmask, []string{"ALL"}...) + } else { + return fmt.Errorf("invalid systempaths option %q, only `unconfined` is supported", con[1]) + } + case "unmask": + s.ContainerSecurityConfig.Unmask = append(s.ContainerSecurityConfig.Unmask, con[1:]...) + case "no-new-privileges": + noNewPrivileges := true + if len(con) == 2 { + noNewPrivileges, err = strconv.ParseBool(con[1]) if err != nil { return fmt.Errorf("invalid --security-opt 2: %q", opt) } - s.ContainerSecurityConfig.NoNewPrivileges = noNewPrivileges - default: - return fmt.Errorf("invalid --security-opt 2: %q", opt) } + s.ContainerSecurityConfig.NoNewPrivileges = noNewPrivileges + default: + return fmt.Errorf("invalid --security-opt 2: %q", opt) } } @@ -690,6 +694,17 @@ func FillOutSpecGen(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions if len(s.Volumes) == 0 || len(c.Volume) != 0 { s.Volumes = volumes } + + if s.ContainerSecurityConfig.LabelNested { + // Need to unmask the SELinux file system + s.Unmask = append(s.Unmask, "/sys/fs/selinux", "/proc") + s.Mounts = append(s.Mounts, specs.Mount{ + Source: "/sys/fs/selinux", + Destination: "/sys/fs/selinux", + Type: define.TypeBind, + }) + s.Annotations[define.RunOCIMountContextType] = "rootcontext" + } // TODO make sure these work in clone if len(s.OverlayVolumes) == 0 { s.OverlayVolumes = overlayVolumes diff --git a/test/e2e/containers_conf_test.go b/test/e2e/containers_conf_test.go index 399824badd..90c4c7b42f 100644 --- a/test/e2e/containers_conf_test.go +++ b/test/e2e/containers_conf_test.go @@ -7,6 +7,7 @@ import ( "path/filepath" "strings" + "github.com/containers/podman/v4/libpod/define" . "github.com/containers/podman/v4/test/utils" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" @@ -111,7 +112,7 @@ var _ = Describe("Verify podman containers.conf usage", func() { result := podmanTest.Podman([]string{"top", "test1", "capeff"}) result.WaitWithDefaultTimeout() Expect(result).Should(Exit(0)) - Expect(result.Out.Contents()).To( + Expect(result.OutputToString()).To( And( ContainSubstring("FOWNER"), ContainSubstring("SETFCAP"), @@ -128,7 +129,7 @@ var _ = Describe("Verify podman containers.conf usage", func() { result := podmanTest.Podman([]string{"container", "top", "test1", "capeff"}) result.WaitWithDefaultTimeout() Expect(result).Should(Exit(0)) - Expect(result.Out.Contents()).ToNot( + Expect(result.OutputToString()).ToNot( And( ContainSubstring("SETUID"), ContainSubstring("FOWNER"), @@ -266,7 +267,7 @@ var _ = Describe("Verify podman containers.conf usage", func() { session.WaitWithDefaultTimeout() Expect(session).Should(Exit(0)) Expect(session.OutputToStringArray()).To(ContainElement(HavePrefix("search"))) - Expect(session.Out.Contents()).To( + Expect(session.OutputToString()).To( And( ContainSubstring("foobar.com"), ContainSubstring("1.2.3.4"), @@ -322,7 +323,7 @@ var _ = Describe("Verify podman containers.conf usage", func() { session.WaitWithDefaultTimeout() Expect(session).Should(Exit(0)) Expect(session.OutputToStringArray()).To(ContainElement(HavePrefix("search"))) - Expect(session.Out.Contents()).To( + Expect(session.OutputToString()).To( And( ContainSubstring("foobar.com"), ContainSubstring("1.2.3.4"), @@ -333,26 +334,26 @@ var _ = Describe("Verify podman containers.conf usage", func() { session = podmanTest.Podman([]string{"run", "--rm", ALPINE, "cat", "/proc/sys/net/ipv4/ping_group_range"}) session.WaitWithDefaultTimeout() Expect(session).Should(Exit(0)) - Expect(session.Out.Contents()).To(ContainSubstring("1000")) + Expect(session.OutputToString()).To(ContainSubstring("1000")) // shm-size session = podmanTest.Podman([]string{"run", ALPINE, "grep", "shm", "/proc/self/mounts"}) session.WaitWithDefaultTimeout() Expect(session).Should(Exit(0)) - Expect(session.Out.Contents()).To(ContainSubstring("size=200k")) + Expect(session.OutputToString()).To(ContainSubstring("size=200k")) // ulimits session = podmanTest.Podman([]string{"run", "--rm", fedoraMinimal, "ulimit", "-n"}) session.WaitWithDefaultTimeout() Expect(session).Should(Exit(0)) - Expect(session.Out.Contents()).To(ContainSubstring("500")) + Expect(session.OutputToString()).To(ContainSubstring("500")) // Configuration that comes from remote client // Timezone session = podmanTest.Podman([]string{"run", ALPINE, "date", "+'%H %Z'"}) session.WaitWithDefaultTimeout() Expect(session).Should(Exit(0)) - Expect(session.Out.Contents()).To( + Expect(session.OutputToString()).To( Or( ContainSubstring("EST"), ContainSubstring("EDT"), @@ -366,21 +367,21 @@ var _ = Describe("Verify podman containers.conf usage", func() { }) It("add annotations", func() { - // containers.conf is set to "run.oci.keep_original_groups=1" + // containers.conf is set to "run.oci.keep_original_groups=1" session := podmanTest.Podman([]string{"create", "--rm", "--name", "test", fedoraMinimal}) session.WaitWithDefaultTimeout() Expect(session).Should(Exit(0)) inspect := podmanTest.Podman([]string{"inspect", "--format", "{{ .Config.Annotations }}", "test"}) inspect.WaitWithDefaultTimeout() - Expect(inspect.Out.Contents()).To(ContainSubstring("run.oci.keep_original_groups:1")) + Expect(inspect.OutputToString()).To(ContainSubstring(fmt.Sprintf("%s:1", define.RunOCIKeepOriginalGroups))) }) It("--add-host and no-hosts=true fails", func() { session := podmanTest.Podman([]string{"run", "-dt", "--add-host", "test1:127.0.0.1", ALPINE, "top"}) session.WaitWithDefaultTimeout() Expect(session).To(ExitWithError()) - Expect(session.Err.Contents()).To(ContainSubstring("--no-hosts and --add-host cannot be set together")) + Expect(session.ErrorToString()).To(ContainSubstring("--no-hosts and --add-host cannot be set together")) session = podmanTest.Podman([]string{"run", "-dt", "--add-host", "test1:127.0.0.1", "--no-hosts=false", ALPINE, "top"}) session.WaitWithDefaultTimeout() @@ -391,12 +392,12 @@ var _ = Describe("Verify podman containers.conf usage", func() { session := podmanTest.Podman([]string{"run", "--rm", "--name", "test", ALPINE, "cat", "/etc/hosts"}) session.WaitWithDefaultTimeout() Expect(session).Should(Exit(0)) - Expect(session.Out.Contents()).ToNot(ContainSubstring("test")) + Expect(session.OutputToString()).ToNot(ContainSubstring("test")) session = podmanTest.Podman([]string{"run", "--rm", "--name", "test", "--no-hosts=false", ALPINE, "cat", "/etc/hosts"}) session.WaitWithDefaultTimeout() Expect(session).Should(Exit(0)) - Expect(session.Out.Contents()).To(ContainSubstring("test")) + Expect(session.OutputToString()).To(ContainSubstring("test")) }) It("seccomp profile path", func() { @@ -462,7 +463,7 @@ var _ = Describe("Verify podman containers.conf usage", func() { session = podmanTest.Podman([]string{"info", "--format", "{{.Store.ImageCopyTmpDir}}"}) session.WaitWithDefaultTimeout() Expect(session).Should(Exit(0)) - Expect(session.Out.Contents()).To(ContainSubstring(storagePath)) + Expect(session.OutputToString()).To(ContainSubstring(storagePath)) containersConf = []byte("[engine]\nimage_copy_tmp_dir=\"storage1\"") err = os.WriteFile(configPath, containersConf, os.ModePerm) @@ -472,7 +473,7 @@ var _ = Describe("Verify podman containers.conf usage", func() { session = podmanTest.Podman([]string{"info", "--format", "{{.Store.ImageCopyTmpDir}}"}) session.WaitWithDefaultTimeout() Expect(session).Should(Exit(125)) - Expect(session.Err.Contents()).To(ContainSubstring("invalid image_copy_tmp_dir value \"storage1\" (relative paths are not accepted)")) + Expect(session.ErrorToString()).To(ContainSubstring("invalid image_copy_tmp_dir value \"storage1\" (relative paths are not accepted)")) os.Setenv("TMPDIR", "/hoge") session = podmanTest.Podman([]string{"info", "--format", "{{.Store.ImageCopyTmpDir}}"}) @@ -490,7 +491,7 @@ var _ = Describe("Verify podman containers.conf usage", func() { result := podmanTest.Podman([]string{"system", "service", "--help"}) result.WaitWithDefaultTimeout() Expect(result).Should(Exit(0)) - Expect(result.Out.Contents()).To(ContainSubstring("(default 1234)")) + Expect(result.OutputToString()).To(ContainSubstring("(default 1234)")) }) It("bad infra_image name", func() { @@ -512,17 +513,17 @@ var _ = Describe("Verify podman containers.conf usage", func() { result := podmanTest.Podman([]string{"pod", "create", "--infra-image", infra2}) result.WaitWithDefaultTimeout() Expect(result).Should(Exit(125)) - Expect(result.Err.Contents()).To(ContainSubstring(error2String)) + Expect(result.ErrorToString()).To(ContainSubstring(error2String)) result = podmanTest.Podman([]string{"pod", "create"}) result.WaitWithDefaultTimeout() Expect(result).Should(Exit(125)) - Expect(result.Err.Contents()).To(ContainSubstring(errorString)) + Expect(result.ErrorToString()).To(ContainSubstring(errorString)) result = podmanTest.Podman([]string{"create", "--pod", "new:pod1", ALPINE}) result.WaitWithDefaultTimeout() Expect(result).Should(Exit(125)) - Expect(result.Err.Contents()).To(ContainSubstring(errorString)) + Expect(result.ErrorToString()).To(ContainSubstring(errorString)) }) It("set .engine.remote=true", func() { diff --git a/test/system/410-selinux.bats b/test/system/410-selinux.bats index d23e687b29..2347fcc447 100644 --- a/test/system/410-selinux.bats +++ b/test/system/410-selinux.bats @@ -277,4 +277,20 @@ function check_label() { is "$output" "${RELABEL} $tmpdir" "Shared Relabel Correctly" } +@test "podman selinux nested" { + skip_if_no_selinux + + ROOTCONTEXT='rw,rootcontext="system_u:object_r:container_file_t:s0:c1,c2"' + SELINUXMNT="selinuxfs.*(rw,nosuid,noexec,relatime)" + + SELINUXMNT="tmpfs.*selinux.*\(ro" + run_podman run --rm --security-opt label=level:s0:c1,c2 $IMAGE mount + assert "$output" !~ "${ROOTCONTEXT}" "Don't use rootcontext" + assert "$output" =~ "${SELINUXMNT}" "Mount SELinux file system readwrite" + + run_podman run --rm --security-opt label=nested --security-opt label=level:s0:c1,c2 $IMAGE mount + assert "$output" =~ "${ROOTCONTEXT}" "Uses rootcontext" + assert "$output" =~ "${SELINUXMNT}" "Mount SELinux file system readwrite" +} + # vim: filetype=sh