Fully encapsulate process metrics in `mod process` (#41)

The `process` module exposes a `Sensor` type that is different from
other types called `Sensor`. Most `Sensor` types instrument other
types with telemetry. The `process::Sensor` type, on the other hand,
is used to read system metrics from the `/proc` filesystem, returning
a metrics summary.

Furthermore, `telemetry::metrics::Root` owns the process start time
metric.

In the interest of making the telemetry system more modular, this moves
all process-related telemetry concerns into the `process` module.
Instead of exposing a `Sensor` that produces metrics, a single public
`Process` type implements `fmt::Display` directly.

This removes process-related concerns from `telemetry/metrics/mod.rs` to
setup further refactoring along these lines.
This commit is contained in:
Oliver Gould 2018-08-06 14:09:33 -07:00 committed by GitHub
parent 1774c87400
commit 4e79348af7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 145 additions and 124 deletions

View File

@ -31,7 +31,7 @@ use std::fmt::{self, Display};
use std::hash::Hash; use std::hash::Hash;
use std::marker::PhantomData; use std::marker::PhantomData;
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
use std::time::{UNIX_EPOCH, Duration, Instant}; use std::time::{Duration, Instant};
use indexmap::IndexMap; use indexmap::IndexMap;
@ -116,9 +116,7 @@ struct Root {
tls_config: TlsConfigScopes, tls_config: TlsConfigScopes,
tls_config_last_reload_seconds: Option<Gauge>, tls_config_last_reload_seconds: Option<Gauge>,
process_metrics: Option<process::Sensor>, process: process::Process,
start_time: Gauge,
} }
@ -183,9 +181,6 @@ impl<'a, M: FmtMetric> Metric<'a, M> {
impl Root { impl Root {
metrics! { metrics! {
process_start_time_seconds: Gauge {
"Time that the process started (in seconds since the UNIX epoch)"
},
tls_config_last_reload_seconds: Gauge { tls_config_last_reload_seconds: Gauge {
"Timestamp of when the TLS configuration files were last reloaded \ "Timestamp of when the TLS configuration files were last reloaded \
successfully (in seconds since the UNIX epoch)" successfully (in seconds since the UNIX epoch)"
@ -193,18 +188,8 @@ impl Root {
} }
pub fn new(process: &Arc<ctx::Process>) -> Self { pub fn new(process: &Arc<ctx::Process>) -> Self {
let t0 = process.start_time
.duration_since(UNIX_EPOCH)
.expect("process start time")
.as_secs();
let process_metrics = process::Sensor::new()
.map_err(|e| info!("{}", e))
.ok();
Self { Self {
start_time: t0.into(), process: process::Process::new(&process),
process_metrics,
.. Root::default() .. Root::default()
} }
} }
@ -259,15 +244,7 @@ impl fmt::Display for Root {
Self::tls_config_last_reload_seconds.fmt_metric(f, timestamp)?; Self::tls_config_last_reload_seconds.fmt_metric(f, timestamp)?;
} }
if let Some(ref process_metrics) = self.process_metrics { self.process.fmt(f)?;
match process_metrics.metrics() {
Ok(process) => process.fmt(f)?,
Err(e) => warn!("error collecting process metrics: {:?}", e),
}
};
Self::process_start_time_seconds.fmt_help(f)?;
Self::process_start_time_seconds.fmt_metric(f, self.start_time)?;
Ok(()) Ok(())
} }

View File

@ -1,18 +1,74 @@
use std::fmt; use std::fmt;
use super::{Counter, Gauge, Metric}; use std::time::UNIX_EPOCH;
pub use self::imp::Sensor; use ctx;
use super::{Gauge, Metric};
#[derive(Copy, Clone)] use self::system::System;
pub struct ProcessMetrics {
cpu_seconds_total: Counter, #[derive(Debug, Default)]
open_fds: Gauge, pub struct Process {
max_fds: Option<Gauge>, start_time: Gauge,
virtual_memory_bytes: Gauge, system: Option<System>,
resident_memory_bytes: Gauge,
} }
impl ProcessMetrics { impl Process {
metrics! {
process_start_time_seconds: Gauge {
"Time that the process started (in seconds since the UNIX epoch)"
}
}
pub fn new(process: &ctx::Process) -> Self {
let t0 = process.start_time
.duration_since(UNIX_EPOCH)
.expect("process start time")
.as_secs();
let system = match System::new() {
Ok(s) => Some(s),
Err(err) => {
debug!("failed to load system stats: {}", err);
None
}
};
Self {
start_time: t0.into(),
system,
}
}
}
impl fmt::Display for Process {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Self::process_start_time_seconds.fmt_help(f)?;
Self::process_start_time_seconds.fmt_metric(f, self.start_time)?;
if let Some(ref sys) = self.system {
sys.fmt(f)?;
}
Ok(())
}
}
#[cfg(target_os = "linux")]
mod system {
use super::*;
use super::super::{Counter, Gauge};
use std::{io, fs};
use procinfo::pid;
use libc::{self, pid_t};
#[derive(Debug)]
pub(super) struct System {
page_size: u64,
clock_ticks_per_sec: u64,
}
impl System {
metrics! { metrics! {
process_cpu_seconds_total: Counter { process_cpu_seconds_total: Counter {
"Total user and system CPU time spent in seconds." "Total user and system CPU time spent in seconds."
@ -28,95 +84,16 @@ impl ProcessMetrics {
} }
} }
impl fmt::Display for ProcessMetrics { impl System {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { pub fn new() -> io::Result<Self> {
Self::process_cpu_seconds_total.fmt_help(f)?; let page_size = Self::sysconf(libc::_SC_PAGESIZE, "page size")?;
Self::process_cpu_seconds_total.fmt_metric( let clock_ticks_per_sec = Self::sysconf(libc::_SC_CLK_TCK, "clock ticks per second")?;
f, Ok(Self {
self.cpu_seconds_total
)?;
Self::process_open_fds.fmt_help(f)?;
Self::process_open_fds.fmt_metric(f, self.open_fds)?;
if let Some(ref max_fds) = self.max_fds {
Self::process_max_fds.fmt_help(f)?;
Self::process_max_fds.fmt_metric(f, *max_fds)?;
}
Self::process_virtual_memory_bytes.fmt_help(f)?;
Self::process_virtual_memory_bytes.fmt_metric(
f,
self.virtual_memory_bytes
)?;
Self::process_resident_memory_bytes.fmt_help(f)?;
Self::process_resident_memory_bytes.fmt_metric(
f,
self.resident_memory_bytes
)
}
}
#[cfg(target_os = "linux")]
mod imp {
use super::*;
use super::super::{Counter, Gauge};
use std::{io, fs};
use procinfo::pid;
use libc::{self, pid_t};
#[derive(Debug)]
pub struct Sensor {
page_size: u64,
clock_ticks_per_sec: u64,
}
fn sysconf(num: libc::c_int, name: &'static str) -> Result<u64, io::Error> {
match unsafe { libc::sysconf(num) } {
e if e <= 0 => {
let error = io::Error::last_os_error();
error!("error getting {}: {:?}", name, error);
Err(error)
},
val => Ok(val as u64),
}
}
impl Sensor {
pub fn new() -> io::Result<Sensor> {
let page_size = sysconf(libc::_SC_PAGESIZE, "page size")?;
let clock_ticks_per_sec = sysconf(libc::_SC_CLK_TCK, "clock ticks per second")?;
Ok(Sensor {
page_size, page_size,
clock_ticks_per_sec, clock_ticks_per_sec,
}) })
} }
pub fn metrics(&self) -> io::Result<ProcessMetrics> {
// XXX potentially blocking call
let stat = pid::stat_self()?;
let clock_ticks = stat.utime as u64 + stat.stime as u64;
let cpu_seconds_total = Counter::from(clock_ticks / self.clock_ticks_per_sec);
let virtual_memory_bytes = Gauge::from(stat.vsize as u64);
let resident_memory_bytes = Gauge::from(stat.rss as u64 * self.page_size);
let metrics = ProcessMetrics {
cpu_seconds_total,
virtual_memory_bytes,
resident_memory_bytes,
open_fds: open_fds(stat.pid)?,
max_fds: max_fds()?,
};
Ok(metrics)
}
}
fn open_fds(pid: pid_t) -> io::Result<Gauge> { fn open_fds(pid: pid_t) -> io::Result<Gauge> {
let mut open = 0; let mut open = 0;
for f in fs::read_dir(format!("/proc/{}/fd", pid))? { for f in fs::read_dir(format!("/proc/{}/fd", pid))? {
@ -133,27 +110,94 @@ mod imp {
.map(|max| Gauge::from(max as u64)); .map(|max| Gauge::from(max as u64));
Ok(max_fds) Ok(max_fds)
} }
fn sysconf(num: libc::c_int, name: &'static str) -> Result<u64, io::Error> {
match unsafe { libc::sysconf(num) } {
e if e <= 0 => {
let error = io::Error::last_os_error();
error!("error getting {}: {:?}", name, error);
Err(error)
},
val => Ok(val as u64),
}
}
}
impl fmt::Display for System {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
// XXX potentially blocking call
let stat = match pid::stat_self() {
Ok(stat) => stat,
Err(err) => {
warn!("failed to read process stats: {}", err);
return Ok(());
}
};
let clock_ticks = stat.utime as u64 + stat.stime as u64;
Self::process_cpu_seconds_total.fmt_help(f)?;
Self::process_cpu_seconds_total.fmt_metric(
f,
Counter::from(clock_ticks / self.clock_ticks_per_sec),
)?;
match Self::open_fds(stat.pid) {
Ok(open_fds) => {
Self::process_open_fds.fmt_help(f)?;
Self::process_open_fds.fmt_metric(f, open_fds)?;
}
Err(err) => {
warn!("could not determine process_open_fds: {}", err);
return Ok(());
}
}
match Self::max_fds() {
Ok(None) => {}
Ok(Some(ref max_fds)) => {
Self::process_max_fds.fmt_help(f)?;
Self::process_max_fds.fmt_metric(f, *max_fds)?;
}
Err(err) => {
warn!("could not determine process_max_fds: {}", err);
return Ok(());
}
}
Self::process_virtual_memory_bytes.fmt_help(f)?;
Self::process_virtual_memory_bytes.fmt_metric(
f,
Gauge::from(stat.vsize as u64),
)?;
Self::process_resident_memory_bytes.fmt_help(f)?;
Self::process_resident_memory_bytes.fmt_metric(
f,
Gauge::from(stat.rss as u64 * self.page_size),
)
}
}
} }
#[cfg(not(target_os = "linux"))] #[cfg(not(target_os = "linux"))]
mod imp { mod system {
use super::*; use std::{fmt, io};
use std::io;
#[derive(Debug)] #[derive(Debug)]
pub struct Sensor {} pub(super) struct System {}
impl Sensor { impl System {
pub fn new() -> io::Result<Sensor> { pub fn new() -> io::Result<Self> {
Err(io::Error::new( Err(io::Error::new(
io::ErrorKind::Other, io::ErrorKind::Other,
"procinfo not supported on this operating system" "procinfo not supported on this operating system"
)) ))
} }
pub fn metrics(&self) -> io::Result<ProcessMetrics> {
unreachable!("process::Sensor::metrics() on unsupported OS!")
}
} }
impl fmt::Display for System {
fn fmt(&self, _: &mut fmt::Formatter) -> fmt::Result {
Ok(())
}
}
} }