diff --git a/crates/runc-shim/src/runc.rs b/crates/runc-shim/src/runc.rs index 1e6146a..97903e8 100644 --- a/crates/runc-shim/src/runc.rs +++ b/crates/runc-shim/src/runc.rs @@ -14,6 +14,8 @@ limitations under the License. */ +#[cfg(target_os = "linux")] +use std::sync::RwLock; use std::{ convert::TryFrom, os::{ @@ -29,6 +31,8 @@ use std::{ }; use async_trait::async_trait; +#[cfg(target_os = "linux")] +use cgroups_rs::fs::Cgroup; use containerd_shim::{ api::{CreateTaskRequest, ExecProcessRequest, Options, Status}, asynchronous::monitor::{monitor_subscribe, monitor_unsubscribe, Subscription}, @@ -66,19 +70,6 @@ use crate::{ io::Stdio, }; -/// check the process is zombie -#[cfg(target_os = "linux")] -fn is_zombie_process(pid: i32) -> bool { - if let Ok(status) = std::fs::read_to_string(format!("/proc/{}/status", pid)) { - for line in status.lines() { - if line.starts_with("State:") && line.contains('Z') { - return true; - } - } - } - false -} - pub type ExecProcess = ProcessTemplate; pub type InitProcess = ProcessTemplate; @@ -139,6 +130,12 @@ impl ContainerFactory for RuncFactory { let config = CreateConfig::default(); self.do_create(&mut init, config).await?; + #[cfg(target_os = "linux")] + { + *init.lifecycle.cgroup_cache.write().unwrap() = + containerd_shim::cgroup::get_cgroup(init.pid as u32).ok(); + } + let container = RuncContainer { id: id.to_string(), bundle: bundle.to_string(), @@ -151,6 +148,7 @@ impl ContainerFactory for RuncFactory { }, processes: Default::default(), }; + Ok(container) } @@ -270,6 +268,9 @@ pub struct RuncInitLifecycle { opts: Options, bundle: String, exit_signal: Arc, + /// Cache for cgroup paths to avoid repeated /proc//cgroup parsing + #[cfg(target_os = "linux")] + cgroup_cache: RwLock>, } #[async_trait] @@ -327,15 +328,18 @@ impl ProcessLifecycle for RuncInitLifecycle { )); } - // check the process is zombie - if is_zombie_process(p.pid) { + // Check if cgroup still exists before attempting update + if !self.ensure_init_cgroup_exists().await { return Err(other!( - "failed to update resources because process {} is a zombie", + "failed to update resources because cgroup for process {} has been released", p.pid )); } - - containerd_shim::cgroup::update_resources(p.pid as u32, resources) + let cgroup_guard = p.lifecycle.cgroup_cache.read().unwrap(); + let cgroup = cgroup_guard + .as_ref() + .ok_or_else(|| other!("cgroup cache is empty for process {}", p.pid))?; + containerd_shim::cgroup::update_resources(cgroup, resources) } #[cfg(not(target_os = "linux"))] @@ -352,15 +356,18 @@ impl ProcessLifecycle for RuncInitLifecycle { )); } - // check the process is zombie - if is_zombie_process(p.pid) { + // Check if cgroup still exists before attempting to collect stats + if !self.ensure_init_cgroup_exists().await { return Err(other!( - "failed to collect metrics because process {} is a zombie", + "failed to collect metrics because cgroup for process {} has been released", p.pid )); } - - containerd_shim::cgroup::collect_metrics(p.pid as u32) + let cgroup_guard = p.lifecycle.cgroup_cache.read().unwrap(); + let cgroup = cgroup_guard + .as_ref() + .ok_or_else(|| other!("cgroup cache is empty for process {}", p.pid))?; + containerd_shim::cgroup::collect_metrics(cgroup) } #[cfg(not(target_os = "linux"))] @@ -436,6 +443,20 @@ impl RuncInitLifecycle { opts, bundle: bundle.to_string(), exit_signal: Default::default(), + #[cfg(target_os = "linux")] + cgroup_cache: RwLock::new(None), + } + } + + /// Ensure cgroup exists and cache the path information + /// Returns true if cgroup exists, false if released + #[cfg(target_os = "linux")] + async fn ensure_init_cgroup_exists(&self) -> bool { + let cache = self.cgroup_cache.read().unwrap(); + if let Some(ref cached) = *cache { + cached.exists() + } else { + false } } } diff --git a/crates/shim/src/cgroup.rs b/crates/shim/src/cgroup.rs index 1c8b912..c901ee8 100644 --- a/crates/shim/src/cgroup.rs +++ b/crates/shim/src/cgroup.rs @@ -109,13 +109,11 @@ fn write_process_oom_score(pid: u32, score: i64) -> Result<()> { /// Collect process cgroup stats, return only necessary parts of it #[cfg_attr(feature = "tracing", tracing::instrument(level = "info"))] -pub fn collect_metrics(pid: u32) -> Result { +pub fn collect_metrics(cgroup: &Cgroup) -> Result { let mut metrics = Metrics::new(); - let cgroup = get_cgroup(pid)?; - // to make it easy, fill the necessary metrics only. - for sub_system in Cgroup::subsystems(&cgroup) { + for sub_system in Cgroup::subsystems(cgroup) { match sub_system { Subsystem::Cpu(cpu_ctr) => { let mut cpu_usage = CPUUsage::new(); @@ -204,7 +202,7 @@ pub fn collect_metrics(pid: u32) -> Result { // get_cgroup will return either cgroup v1 or v2 depending on system configuration #[cfg_attr(feature = "tracing", tracing::instrument(level = "info"))] -fn get_cgroup(pid: u32) -> Result { +pub fn get_cgroup(pid: u32) -> Result { let hierarchies = hierarchies::auto(); let cgroup = if hierarchies.v2() { let path = get_cgroups_v2_path_by_pid(pid)?; @@ -250,11 +248,8 @@ fn parse_cgroups_v2_path(content: &str) -> Result { /// Update process cgroup limits #[cfg_attr(feature = "tracing", tracing::instrument(level = "info"))] -pub fn update_resources(pid: u32, resources: &LinuxResources) -> Result<()> { - // get container main process cgroup - let cgroup = get_cgroup(pid)?; - - for sub_system in Cgroup::subsystems(&cgroup) { +pub fn update_resources(cgroup: &Cgroup, resources: &LinuxResources) -> Result<()> { + for sub_system in Cgroup::subsystems(cgroup) { match sub_system { Subsystem::Pid(pid_ctr) => { // set maximum number of PIDs