405 lines
12 KiB
Rust
405 lines
12 KiB
Rust
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
use std::ffi::{c_char, CString, OsStr};
|
|
use std::fs::{self, OpenOptions, Permissions};
|
|
use std::io::{self, ErrorKind};
|
|
use std::os::unix::ffi::OsStrExt;
|
|
use std::os::unix::fs::{MetadataExt, PermissionsExt};
|
|
use std::process::{Command, Stdio};
|
|
use std::str;
|
|
|
|
use anyhow::{anyhow, bail, ensure, Result};
|
|
use camino::{Utf8Path, Utf8PathBuf};
|
|
use nix::mount::{MntFlags, MsFlags};
|
|
use serde::Deserialize;
|
|
|
|
// When the container image's entrypoint is /sbin/init or similar, Podman gives the entrypoint (and
|
|
// exec entrypoint) process an SELinux label of, for instance:
|
|
//
|
|
// system_u:system_r:container_init_t:s0:c276,c638
|
|
//
|
|
// However, we are going to change our entrypoint to something else, so we need to use the
|
|
// "standard" label that Podman otherwise gives, which in this case would be:
|
|
//
|
|
// system_u:system_r:container_t:s0:c276,c638
|
|
//
|
|
// This function performs that mapping.
|
|
pub fn fix_selinux_label(process: &mut oci_spec::runtime::Process) {
|
|
if let Some(label) = process.selinux_label() {
|
|
let new_label = label.replace("container_init_t", "container_t");
|
|
process.set_selinux_label(Some(new_label));
|
|
}
|
|
}
|
|
|
|
pub fn set_file_context(path: impl AsRef<Utf8Path>, context: &str) -> Result<()> {
|
|
extern "C" {
|
|
fn lsetfilecon(path: *const c_char, con: *const c_char) -> i32;
|
|
}
|
|
|
|
let path = CString::new(path.as_ref().as_os_str().as_bytes())?;
|
|
let context = CString::new(context.as_bytes())?;
|
|
|
|
if unsafe { lsetfilecon(path.as_ptr(), context.as_ptr()) } != 0 {
|
|
return Err(io::Error::last_os_error().into());
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub fn is_mountpoint(path: impl AsRef<Utf8Path>) -> Result<bool> {
|
|
let parent = path
|
|
.as_ref()
|
|
.parent()
|
|
.ok_or_else(|| anyhow!("path does not have a parent"))?;
|
|
|
|
let path_dev = match fs::symlink_metadata(path.as_ref()) {
|
|
Ok(meta) => meta.dev(),
|
|
Err(e) if e.kind() == ErrorKind::NotFound => return Ok(false),
|
|
Err(e) => return Err(e.into()),
|
|
};
|
|
|
|
let parent_dev = fs::symlink_metadata(parent)?.dev();
|
|
|
|
Ok(path_dev != parent_dev)
|
|
}
|
|
|
|
pub fn bind_mount_file(from: impl AsRef<Utf8Path>, to: impl AsRef<Utf8Path>) -> Result<()> {
|
|
// ensure target exists
|
|
|
|
if let Some(parent) = to.as_ref().parent() {
|
|
fs::create_dir_all(parent)?;
|
|
}
|
|
|
|
OpenOptions::new()
|
|
.create(true)
|
|
.append(true)
|
|
.open(to.as_ref())?;
|
|
|
|
// bind mount file
|
|
|
|
if let Err(e) = nix::mount::mount(
|
|
Some(from.as_ref().as_std_path()),
|
|
to.as_ref().as_std_path(),
|
|
Option::<&str>::None,
|
|
MsFlags::MS_BIND,
|
|
Option::<&str>::None,
|
|
) {
|
|
bail!(
|
|
"mount({:?}, {:?}, NULL, MS_BIND, NULL) failed: {}",
|
|
from.as_ref(),
|
|
to.as_ref(),
|
|
e
|
|
);
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn escape_path(mount_option: impl AsRef<Utf8Path>) -> String {
|
|
mount_option
|
|
.as_ref()
|
|
.as_str()
|
|
.replace('\\', "\\\\")
|
|
.replace(',', "\\,")
|
|
}
|
|
|
|
fn escape_context(mount_option: &str) -> String {
|
|
assert!(!mount_option.contains('"'));
|
|
format!("\"{}\"", mount_option)
|
|
}
|
|
|
|
/// Expose directory `from` at `to` with the given SELinux `context`, if any, recursively applied.
|
|
///
|
|
/// This does *not* modify the SELinux context of `from` nor of files under `from`.
|
|
///
|
|
/// If `read_only` is false, `scratch_dir` must belong to the same file system as `from` and be a
|
|
/// separate subtree.
|
|
///
|
|
/// TODO: Is this a neat relabeling trick or simply a bad hack?
|
|
pub fn bind_mount_dir_with_different_context(
|
|
from: impl AsRef<Utf8Path>,
|
|
to: impl AsRef<Utf8Path>,
|
|
scratch_dir: impl AsRef<Utf8Path>,
|
|
context: Option<&str>,
|
|
read_only: bool,
|
|
) -> Result<()> {
|
|
fs::create_dir_all(to.as_ref())?;
|
|
|
|
let mut options = if read_only {
|
|
fs::create_dir_all(scratch_dir.as_ref())?;
|
|
|
|
format!(
|
|
"lowerdir={}:{}",
|
|
escape_path(scratch_dir.as_ref()),
|
|
escape_path(from)
|
|
)
|
|
} else {
|
|
let layer_dir = scratch_dir.as_ref().join("layer");
|
|
let work_dir = scratch_dir.as_ref().join("work");
|
|
|
|
fs::create_dir_all(&layer_dir)?;
|
|
fs::create_dir_all(&work_dir)?;
|
|
|
|
format!(
|
|
"lowerdir={},upperdir={},workdir={}",
|
|
escape_path(layer_dir),
|
|
escape_path(from),
|
|
escape_path(&work_dir),
|
|
)
|
|
};
|
|
|
|
if let Some(context) = context {
|
|
options = format!("{},context={}", options, escape_context(context));
|
|
}
|
|
|
|
if let Err(e) = nix::mount::mount(
|
|
Some("overlay"),
|
|
to.as_ref().as_std_path(),
|
|
Some("overlay"),
|
|
MsFlags::empty(),
|
|
Some(options.as_str()),
|
|
) {
|
|
bail!(
|
|
"mount(\"overlay\", {:?}, \"overlay\", 0, {:?}) failed: {}",
|
|
to.as_ref(),
|
|
options,
|
|
e,
|
|
);
|
|
}
|
|
|
|
if !read_only {
|
|
// Make any necessary manual cleanup a bit easier by ensuring the workdir is accessible to
|
|
// the user that Podman is running under.
|
|
fs::set_permissions(
|
|
scratch_dir.as_ref().join("work/work"),
|
|
Permissions::from_mode(0o700),
|
|
)?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub fn ensure_unmounted(path: impl AsRef<Utf8Path>) -> Result<()> {
|
|
while is_mountpoint(&path)? {
|
|
nix::mount::umount2(path.as_ref().as_std_path(), MntFlags::MNT_DETACH)?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
pub trait SpecExt {
|
|
fn root_path(&self) -> Result<&Utf8Path>;
|
|
fn mount_label(&self) -> Option<&str>;
|
|
fn linux_devices(&self) -> &[oci_spec::runtime::LinuxDevice];
|
|
|
|
fn mounts_push(&mut self, mount: oci_spec::runtime::Mount);
|
|
fn linux_resources_devices_push(
|
|
&mut self,
|
|
linux_device_cgroup: oci_spec::runtime::LinuxDeviceCgroup,
|
|
);
|
|
fn process_capabilities_insert_beip(&mut self, capability: oci_spec::runtime::Capability);
|
|
fn linux_seccomp_syscalls_push_front(&mut self, linux_syscall: oci_spec::runtime::LinuxSyscall);
|
|
}
|
|
|
|
impl SpecExt for oci_spec::runtime::Spec {
|
|
fn root_path(&self) -> Result<&Utf8Path> {
|
|
let path = self.root().as_ref().unwrap().path().as_path().try_into()?;
|
|
Ok(path)
|
|
}
|
|
|
|
fn mount_label(&self) -> Option<&str> {
|
|
self.linux().as_ref()?.mount_label().as_deref()
|
|
}
|
|
|
|
fn linux_devices(&self) -> &[oci_spec::runtime::LinuxDevice] {
|
|
let linux = match self.linux().as_ref() {
|
|
Some(linux) => linux,
|
|
None => return &[],
|
|
};
|
|
|
|
let devices = match linux.devices() {
|
|
Some(devices) => devices,
|
|
None => return &[],
|
|
};
|
|
|
|
devices.as_slice()
|
|
}
|
|
|
|
fn mounts_push(&mut self, mount: oci_spec::runtime::Mount) {
|
|
let mut mounts = self.mounts().clone().unwrap_or_default();
|
|
mounts.push(mount);
|
|
self.set_mounts(Some(mounts));
|
|
}
|
|
|
|
fn linux_resources_devices_push(
|
|
&mut self,
|
|
linux_device_cgroup: oci_spec::runtime::LinuxDeviceCgroup,
|
|
) {
|
|
self.set_linux({
|
|
let mut linux = self.linux().clone().expect("linux config");
|
|
linux.set_resources({
|
|
let mut resources = linux.resources().clone().unwrap_or_default();
|
|
resources.set_devices({
|
|
let mut devices = resources.devices().clone().unwrap_or_default();
|
|
devices.push(linux_device_cgroup);
|
|
Some(devices)
|
|
});
|
|
Some(resources)
|
|
});
|
|
Some(linux)
|
|
});
|
|
}
|
|
|
|
fn process_capabilities_insert_beip(&mut self, capability: oci_spec::runtime::Capability) {
|
|
self.set_process({
|
|
let mut process = self.process().clone().expect("process config");
|
|
process.set_capabilities({
|
|
let mut capabilities = process.capabilities().clone().unwrap_or_default();
|
|
|
|
fn insert(
|
|
cap: oci_spec::runtime::Capability,
|
|
to: &Option<oci_spec::runtime::Capabilities>,
|
|
) -> Option<oci_spec::runtime::Capabilities> {
|
|
let mut caps = to.clone().unwrap_or_default();
|
|
caps.insert(cap);
|
|
Some(caps)
|
|
}
|
|
|
|
capabilities.set_bounding(insert(capability, capabilities.bounding()));
|
|
capabilities.set_effective(insert(capability, capabilities.effective()));
|
|
capabilities.set_inheritable(insert(capability, capabilities.inheritable()));
|
|
capabilities.set_permitted(insert(capability, capabilities.permitted()));
|
|
|
|
Some(capabilities)
|
|
});
|
|
Some(process)
|
|
});
|
|
}
|
|
|
|
fn linux_seccomp_syscalls_push_front(
|
|
&mut self,
|
|
linux_syscall: oci_spec::runtime::LinuxSyscall,
|
|
) {
|
|
self.set_linux({
|
|
let mut linux = self.linux().clone().expect("linux config");
|
|
linux.set_seccomp({
|
|
let mut seccomp = linux.seccomp().clone();
|
|
if let Some(seccomp) = &mut seccomp {
|
|
seccomp.set_syscalls({
|
|
let mut syscalls = seccomp.syscalls().clone().unwrap_or_default();
|
|
syscalls.insert(0, linux_syscall);
|
|
Some(syscalls)
|
|
});
|
|
}
|
|
seccomp
|
|
});
|
|
Some(linux)
|
|
});
|
|
}
|
|
}
|
|
|
|
pub fn find_single_file_in_dirs(
|
|
dir_paths: impl IntoIterator<Item = impl AsRef<Utf8Path>>,
|
|
ignore_files: &[impl AsRef<Utf8Path>],
|
|
) -> Result<Utf8PathBuf> {
|
|
let mut candidate: Option<Utf8PathBuf> = None;
|
|
|
|
for dir_path in dir_paths {
|
|
let dir_path = dir_path.as_ref();
|
|
|
|
if dir_path.is_dir() {
|
|
for entry in dir_path.read_dir()? {
|
|
let e = entry?;
|
|
|
|
if !e.file_type()?.is_file() {
|
|
continue; // we only care about regular files
|
|
}
|
|
|
|
let path: Utf8PathBuf = e.path().try_into()?;
|
|
|
|
if ignore_files.iter().any(|f| path == f.as_ref()) {
|
|
continue; // file is in `ignore_files`
|
|
}
|
|
|
|
ensure!(candidate.is_none(), "more than one file found");
|
|
|
|
candidate = Some(path);
|
|
}
|
|
}
|
|
}
|
|
|
|
candidate.ok_or_else(|| anyhow!("no files found"))
|
|
}
|
|
|
|
#[derive(Deserialize)]
|
|
pub struct VmImageInfo {
|
|
#[serde(skip)]
|
|
pub path: Utf8PathBuf,
|
|
|
|
#[serde(rename = "virtual-size")]
|
|
pub size: u64,
|
|
|
|
pub format: String,
|
|
}
|
|
|
|
impl VmImageInfo {
|
|
pub fn of(vm_image_path: impl AsRef<Utf8Path>) -> Result<VmImageInfo> {
|
|
let vm_image_path = vm_image_path.as_ref().to_path_buf();
|
|
|
|
let output = Command::new("qemu-img")
|
|
.arg("info")
|
|
.arg("--output=json")
|
|
.arg(vm_image_path.as_os_str())
|
|
.stdout(Stdio::piped())
|
|
.output()?;
|
|
|
|
ensure!(
|
|
output.status.success(),
|
|
"`qemu-img info` failed: {}",
|
|
String::from_utf8_lossy(&output.stderr)
|
|
);
|
|
|
|
let mut info: VmImageInfo = serde_json::from_slice(&output.stdout)?;
|
|
info.path = vm_image_path;
|
|
|
|
Ok(info)
|
|
}
|
|
}
|
|
|
|
pub fn create_overlay_vm_image(
|
|
overlay_vm_image_path: &Utf8Path,
|
|
base_vm_image_info: &VmImageInfo,
|
|
) -> Result<()> {
|
|
let output = Command::new("qemu-img")
|
|
.arg("create")
|
|
.arg("-q")
|
|
.arg("-f")
|
|
.arg("qcow2")
|
|
.arg("-u")
|
|
.arg("-F")
|
|
.arg(&base_vm_image_info.format)
|
|
.arg("-b")
|
|
.arg(&base_vm_image_info.path)
|
|
.arg(overlay_vm_image_path)
|
|
.arg(base_vm_image_info.size.to_string())
|
|
.output()?;
|
|
|
|
ensure!(
|
|
output.status.success(),
|
|
"`qemu-img create` failed: {}",
|
|
String::from_utf8_lossy(&output.stderr)
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Run `crun`.
|
|
///
|
|
/// `crun` will inherit this process' standard streams.
|
|
pub fn crun(args: impl IntoIterator<Item = impl AsRef<OsStr>>) -> Result<()> {
|
|
let status = Command::new("crun").args(args).spawn()?.wait()?;
|
|
ensure!(status.success(), "crun failed");
|
|
|
|
Ok(())
|
|
}
|