Fully encapsulate process metrics in `mod process` (#41)

The `process` module exposes a `Sensor` type that is different from
other types called `Sensor`. Most `Sensor` types instrument other
types with telemetry. The `process::Sensor` type, on the other hand,
is used to read system metrics from the `/proc` filesystem, returning
a metrics summary.

Furthermore, `telemetry::metrics::Root` owns the process start time
metric.

In the interest of making the telemetry system more modular, this moves
all process-related telemetry concerns into the `process` module.
Instead of exposing a `Sensor` that produces metrics, a single public
`Process` type implements `fmt::Display` directly.

This removes process-related concerns from `telemetry/metrics/mod.rs` to
setup further refactoring along these lines.
This commit is contained in:
Oliver Gould 2018-08-06 14:09:33 -07:00 committed by GitHub
parent 1774c87400
commit 4e79348af7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 145 additions and 124 deletions

View File

@ -31,7 +31,7 @@ use std::fmt::{self, Display};
use std::hash::Hash;
use std::marker::PhantomData;
use std::sync::{Arc, Mutex};
use std::time::{UNIX_EPOCH, Duration, Instant};
use std::time::{Duration, Instant};
use indexmap::IndexMap;
@ -116,9 +116,7 @@ struct Root {
tls_config: TlsConfigScopes,
tls_config_last_reload_seconds: Option<Gauge>,
process_metrics: Option<process::Sensor>,
start_time: Gauge,
process: process::Process,
}
@ -183,9 +181,6 @@ impl<'a, M: FmtMetric> Metric<'a, M> {
impl Root {
metrics! {
process_start_time_seconds: Gauge {
"Time that the process started (in seconds since the UNIX epoch)"
},
tls_config_last_reload_seconds: Gauge {
"Timestamp of when the TLS configuration files were last reloaded \
successfully (in seconds since the UNIX epoch)"
@ -193,18 +188,8 @@ impl Root {
}
pub fn new(process: &Arc<ctx::Process>) -> Self {
let t0 = process.start_time
.duration_since(UNIX_EPOCH)
.expect("process start time")
.as_secs();
let process_metrics = process::Sensor::new()
.map_err(|e| info!("{}", e))
.ok();
Self {
start_time: t0.into(),
process_metrics,
process: process::Process::new(&process),
.. Root::default()
}
}
@ -259,15 +244,7 @@ impl fmt::Display for Root {
Self::tls_config_last_reload_seconds.fmt_metric(f, timestamp)?;
}
if let Some(ref process_metrics) = self.process_metrics {
match process_metrics.metrics() {
Ok(process) => process.fmt(f)?,
Err(e) => warn!("error collecting process metrics: {:?}", e),
}
};
Self::process_start_time_seconds.fmt_help(f)?;
Self::process_start_time_seconds.fmt_metric(f, self.start_time)?;
self.process.fmt(f)?;
Ok(())
}

View File

@ -1,65 +1,59 @@
use std::fmt;
use super::{Counter, Gauge, Metric};
use std::time::UNIX_EPOCH;
pub use self::imp::Sensor;
use ctx;
use super::{Gauge, Metric};
#[derive(Copy, Clone)]
pub struct ProcessMetrics {
cpu_seconds_total: Counter,
open_fds: Gauge,
max_fds: Option<Gauge>,
virtual_memory_bytes: Gauge,
resident_memory_bytes: Gauge,
use self::system::System;
#[derive(Debug, Default)]
pub struct Process {
start_time: Gauge,
system: Option<System>,
}
impl ProcessMetrics {
impl Process {
metrics! {
process_cpu_seconds_total: Counter {
"Total user and system CPU time spent in seconds."
},
process_open_fds: Gauge { "Number of open file descriptors." },
process_max_fds: Gauge { "Maximum number of open file descriptors." },
process_virtual_memory_bytes: Gauge {
"Virtual memory size in bytes."
},
process_resident_memory_bytes: Gauge {
"Resident memory size in bytes."
process_start_time_seconds: Gauge {
"Time that the process started (in seconds since the UNIX epoch)"
}
}
pub fn new(process: &ctx::Process) -> Self {
let t0 = process.start_time
.duration_since(UNIX_EPOCH)
.expect("process start time")
.as_secs();
let system = match System::new() {
Ok(s) => Some(s),
Err(err) => {
debug!("failed to load system stats: {}", err);
None
}
};
Self {
start_time: t0.into(),
system,
}
}
}
impl fmt::Display for ProcessMetrics {
impl fmt::Display for Process {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Self::process_cpu_seconds_total.fmt_help(f)?;
Self::process_cpu_seconds_total.fmt_metric(
f,
self.cpu_seconds_total
)?;
Self::process_start_time_seconds.fmt_help(f)?;
Self::process_start_time_seconds.fmt_metric(f, self.start_time)?;
Self::process_open_fds.fmt_help(f)?;
Self::process_open_fds.fmt_metric(f, self.open_fds)?;
if let Some(ref max_fds) = self.max_fds {
Self::process_max_fds.fmt_help(f)?;
Self::process_max_fds.fmt_metric(f, *max_fds)?;
if let Some(ref sys) = self.system {
sys.fmt(f)?;
}
Self::process_virtual_memory_bytes.fmt_help(f)?;
Self::process_virtual_memory_bytes.fmt_metric(
f,
self.virtual_memory_bytes
)?;
Self::process_resident_memory_bytes.fmt_help(f)?;
Self::process_resident_memory_bytes.fmt_metric(
f,
self.resident_memory_bytes
)
Ok(())
}
}
#[cfg(target_os = "linux")]
mod imp {
mod system {
use super::*;
use super::super::{Counter, Gauge};
@ -69,91 +63,141 @@ mod imp {
use libc::{self, pid_t};
#[derive(Debug)]
pub struct Sensor {
pub(super) struct System {
page_size: u64,
clock_ticks_per_sec: u64,
}
fn sysconf(num: libc::c_int, name: &'static str) -> Result<u64, io::Error> {
match unsafe { libc::sysconf(num) } {
e if e <= 0 => {
let error = io::Error::last_os_error();
error!("error getting {}: {:?}", name, error);
Err(error)
impl System {
metrics! {
process_cpu_seconds_total: Counter {
"Total user and system CPU time spent in seconds."
},
val => Ok(val as u64),
process_open_fds: Gauge { "Number of open file descriptors." },
process_max_fds: Gauge { "Maximum number of open file descriptors." },
process_virtual_memory_bytes: Gauge {
"Virtual memory size in bytes."
},
process_resident_memory_bytes: Gauge {
"Resident memory size in bytes."
}
}
}
impl Sensor {
pub fn new() -> io::Result<Sensor> {
let page_size = sysconf(libc::_SC_PAGESIZE, "page size")?;
let clock_ticks_per_sec = sysconf(libc::_SC_CLK_TCK, "clock ticks per second")?;
Ok(Sensor {
impl System {
pub fn new() -> io::Result<Self> {
let page_size = Self::sysconf(libc::_SC_PAGESIZE, "page size")?;
let clock_ticks_per_sec = Self::sysconf(libc::_SC_CLK_TCK, "clock ticks per second")?;
Ok(Self {
page_size,
clock_ticks_per_sec,
})
}
pub fn metrics(&self) -> io::Result<ProcessMetrics> {
// XXX potentially blocking call
let stat = pid::stat_self()?;
let clock_ticks = stat.utime as u64 + stat.stime as u64;
let cpu_seconds_total = Counter::from(clock_ticks / self.clock_ticks_per_sec);
let virtual_memory_bytes = Gauge::from(stat.vsize as u64);
let resident_memory_bytes = Gauge::from(stat.rss as u64 * self.page_size);
let metrics = ProcessMetrics {
cpu_seconds_total,
virtual_memory_bytes,
resident_memory_bytes,
open_fds: open_fds(stat.pid)?,
max_fds: max_fds()?,
};
Ok(metrics)
fn open_fds(pid: pid_t) -> io::Result<Gauge> {
let mut open = 0;
for f in fs::read_dir(format!("/proc/{}/fd", pid))? {
if !f?.file_type()?.is_dir() {
open += 1;
}
}
Ok(Gauge::from(open))
}
}
fn max_fds() -> io::Result<Option<Gauge>> {
let limit = pid::limits_self()?.max_open_files;
let max_fds = limit.soft.or(limit.hard)
.map(|max| Gauge::from(max as u64));
Ok(max_fds)
}
fn open_fds(pid: pid_t) -> io::Result<Gauge> {
let mut open = 0;
for f in fs::read_dir(format!("/proc/{}/fd", pid))? {
if !f?.file_type()?.is_dir() {
open += 1;
fn sysconf(num: libc::c_int, name: &'static str) -> Result<u64, io::Error> {
match unsafe { libc::sysconf(num) } {
e if e <= 0 => {
let error = io::Error::last_os_error();
error!("error getting {}: {:?}", name, error);
Err(error)
},
val => Ok(val as u64),
}
}
Ok(Gauge::from(open))
}
fn max_fds() -> io::Result<Option<Gauge>> {
let limit = pid::limits_self()?.max_open_files;
let max_fds = limit.soft.or(limit.hard)
.map(|max| Gauge::from(max as u64));
Ok(max_fds)
impl fmt::Display for System {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
// XXX potentially blocking call
let stat = match pid::stat_self() {
Ok(stat) => stat,
Err(err) => {
warn!("failed to read process stats: {}", err);
return Ok(());
}
};
let clock_ticks = stat.utime as u64 + stat.stime as u64;
Self::process_cpu_seconds_total.fmt_help(f)?;
Self::process_cpu_seconds_total.fmt_metric(
f,
Counter::from(clock_ticks / self.clock_ticks_per_sec),
)?;
match Self::open_fds(stat.pid) {
Ok(open_fds) => {
Self::process_open_fds.fmt_help(f)?;
Self::process_open_fds.fmt_metric(f, open_fds)?;
}
Err(err) => {
warn!("could not determine process_open_fds: {}", err);
return Ok(());
}
}
match Self::max_fds() {
Ok(None) => {}
Ok(Some(ref max_fds)) => {
Self::process_max_fds.fmt_help(f)?;
Self::process_max_fds.fmt_metric(f, *max_fds)?;
}
Err(err) => {
warn!("could not determine process_max_fds: {}", err);
return Ok(());
}
}
Self::process_virtual_memory_bytes.fmt_help(f)?;
Self::process_virtual_memory_bytes.fmt_metric(
f,
Gauge::from(stat.vsize as u64),
)?;
Self::process_resident_memory_bytes.fmt_help(f)?;
Self::process_resident_memory_bytes.fmt_metric(
f,
Gauge::from(stat.rss as u64 * self.page_size),
)
}
}
}
#[cfg(not(target_os = "linux"))]
mod imp {
use super::*;
use std::io;
mod system {
use std::{fmt, io};
#[derive(Debug)]
pub struct Sensor {}
pub(super) struct System {}
impl Sensor {
pub fn new() -> io::Result<Sensor> {
impl System {
pub fn new() -> io::Result<Self> {
Err(io::Error::new(
io::ErrorKind::Other,
"procinfo not supported on this operating system"
))
}
pub fn metrics(&self) -> io::Result<ProcessMetrics> {
unreachable!("process::Sensor::metrics() on unsupported OS!")
}
}
impl fmt::Display for System {
fn fmt(&self, _: &mut fmt::Formatter) -> fmt::Result {
Ok(())
}
}
}