mirror of https://github.com/linkerd/linkerd2.git
proxy: Add process stats to proxy metrics (on Linux) (#1128)
This branch adds process stats to the proxy's metrics, as described in https://prometheus.io/docs/instrumenting/writing_clientlibs/#process-metrics. In particular, it adds metrics for the process's total CPU time, number of open file descriptors and max file descriptors, virtual memory size, and resident set size. This branch adds a dependency on the `procinfo` crate. Since this crate and the syscalls it wraps are Linux-specific, these stats are only reported on Linux. On other operating systems, they aren't reported. Manual testing Metrics scrape: ``` eliza@ares:~$ curl http://localhost:4191/metrics # HELP process_cpu_seconds_total Total user and system CPU time spent in seconds. # TYPE process_cpu_seconds_total counter process_cpu_seconds_total 0 # HELP process_open_fds Number of open file descriptors. # TYPE process_open_fds gauge process_open_fds 19 # HELP process_max_fds Maximum number of open file descriptors. # TYPE process_max_fds gauge process_max_fds 1024 # HELP process_virtual_memory_bytes Virtual memory size in bytes. # TYPE process_virtual_memory_bytes gauge process_virtual_memory_bytes 45252608 # HELP process_resident_memory_bytes Resident memory size in bytes. # TYPE process_resident_memory_bytes gauge process_resident_memory_bytes 12132352 # HELP process_start_time_seconds Time that the process started (in seconds since the UNIX epoch) # TYPE process_start_time_seconds gauge process_start_time_seconds 1529017536 ``` Note that the `process_cpu_seconds_total` stat is 0 because I just launched this conduit instance and it's not seeing any load; it does go up after i sent a few requests to it. Confirm RSS & virtual memory stats w/ `ps`, and get Conduit's pid so we can check the fd stats (note that `ps` reports virt/rss in kb while Conduit's metrics reports them in bytes): ``` eliza@ares:~$ ps aux | grep conduit | grep -v grep eliza 16766 0.0 0.0 44192 12956 pts/2 Sl+ 16:05 0:00 target/debug/conduit-proxy ``` Count conduit process's open fds: ``` eliza@ares:~$ cd /proc/16766/fd eliza@ares:/proc/16766/fd$ ls -l | wc -l 18 ``` Signed-off-by: Eliza Weisman <eliza@buoyant.io>
This commit is contained in:
parent
aee845e40f
commit
2f0ad66257
43
Cargo.lock
43
Cargo.lock
|
@ -141,6 +141,7 @@ dependencies = [
|
|||
"libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"net2 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"procinfo 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"prost 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"prost-types 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quickcheck 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -648,6 +649,11 @@ name = "nodrop"
|
|||
version = "0.1.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "2.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "num"
|
||||
version = "0.1.42"
|
||||
|
@ -717,6 +723,17 @@ dependencies = [
|
|||
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "procinfo"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"byteorder 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"nom 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rustc_version 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prost"
|
||||
version = "0.4.0"
|
||||
|
@ -885,6 +902,14 @@ name = "rustc-demangle"
|
|||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "rustc_version"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustls"
|
||||
version = "0.12.0"
|
||||
|
@ -917,6 +942,19 @@ dependencies = [
|
|||
"untrusted 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "semver-parser"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "slab"
|
||||
version = "0.4.0"
|
||||
|
@ -1562,6 +1600,7 @@ dependencies = [
|
|||
"checksum multimap 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2eb04b9f127583ed176e163fb9ec6f3e793b87e21deedd5734a69386a18a0151"
|
||||
"checksum net2 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)" = "9044faf1413a1057267be51b5afba8eb1090bd2231c693664aa1db716fe1eae0"
|
||||
"checksum nodrop 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "9a2228dca57108069a5262f2ed8bd2e82496d2e074a06d1ccc7ce1687b6ae0a2"
|
||||
"checksum nom 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf51a729ecf40266a2368ad335a5fdde43471f545a967109cd62146ecf8b66ff"
|
||||
"checksum num 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "4703ad64153382334aa8db57c637364c322d3372e097840c72000dabdcf6156e"
|
||||
"checksum num-integer 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)" = "f8d26da319fb45674985c78f1d1caf99aa4941f785d384a2ae36d0740bc3e2fe"
|
||||
"checksum num-iter 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)" = "4b226df12c5a59b63569dd57fafb926d91b385dfce33d8074a412411b689d593"
|
||||
|
@ -1571,6 +1610,7 @@ dependencies = [
|
|||
"checksum percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831"
|
||||
"checksum petgraph 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)" = "7a7e5234c228fbfa874c86a77f685886127f82e0aef602ad1d48333fcac6ad61"
|
||||
"checksum proc-macro2 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "effdb53b25cdad54f8f48843d67398f7ef2e14f12c1b4cb4effc549a6462a4d6"
|
||||
"checksum procinfo 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6ab1427f3d2635891f842892dda177883dca0639e05fe66796a62c9d2f23b49c"
|
||||
"checksum prost 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b9f36c478cd43382388dfc3a3679af175c03d19ed8039e79a3e4447e944cd3f3"
|
||||
"checksum prost-build 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b6325275b85605f58f576456a47af44417edf5956a6f670bb59fbe12aff69597"
|
||||
"checksum prost-derive 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9787d1977ea72e8066d58e46ae66100324a2815e677897fe78dfe54958f48252"
|
||||
|
@ -1590,10 +1630,13 @@ dependencies = [
|
|||
"checksum resolv-conf 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8e1b086bb6a2659d6ba66e4aa21bde8a53ec03587cd5c80b83bdc3a330f35cab"
|
||||
"checksum ring 0.13.0-alpha5 (registry+https://github.com/rust-lang/crates.io-index)" = "3845516753f91b4511f9b17c917ea6fa4bc5a7853a9947b0f66731aff51cdef5"
|
||||
"checksum rustc-demangle 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "aee45432acc62f7b9a108cc054142dac51f979e69e71ddce7d6fc7adf29e817e"
|
||||
"checksum rustc_version 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a54aa04a10c68c1c4eacb4337fd883b435997ede17a9385784b990777686b09a"
|
||||
"checksum rustls 0.12.0 (git+https://github.com/ctz/rustls)" = "<none>"
|
||||
"checksum safemem 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e27a8b19b835f7aea908818e871f5cc3a5a186550c30773be987e155e8163d8f"
|
||||
"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27"
|
||||
"checksum sct 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b4540aed8d71a5de961a8902cf356e28122bd62695eb5be1c214f84d8704097c"
|
||||
"checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
|
||||
"checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
|
||||
"checksum slab 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fdeff4cd9ecff59ec7e3744cbca73dfe5ac35c2aedb2cfba8a1c715a18912e9d"
|
||||
"checksum smallvec 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "03dab98ab5ded3a8b43b2c80751194608d0b2aa0f1d46cf95d1c35e192844aa7"
|
||||
"checksum socket2 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "ff606e0486e88f5fc6cfeb3966e434fb409abbc7a3ab495238f70a1ca97f789d"
|
||||
|
|
|
@ -63,6 +63,7 @@ untrusted = "0.6.1"
|
|||
libc = "0.2"
|
||||
# We can use the `crates.io` version of `inotify` once 0.5.2 has been released.
|
||||
inotify = { git = "https://github.com/inotify-rs/inotify" }
|
||||
procinfo = "0.4.2"
|
||||
|
||||
[dev-dependencies]
|
||||
net2 = "0.2"
|
||||
|
|
|
@ -23,6 +23,8 @@ extern crate libc;
|
|||
extern crate log;
|
||||
#[cfg_attr(test, macro_use)]
|
||||
extern crate indexmap;
|
||||
#[cfg(target_os = "linux")]
|
||||
extern crate procinfo;
|
||||
extern crate prost;
|
||||
extern crate prost_types;
|
||||
#[cfg(test)]
|
||||
|
|
|
@ -43,6 +43,12 @@ impl Into<u64> for Counter {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<u64> for Counter {
|
||||
fn from(value: u64) -> Self {
|
||||
Counter(Wrapping(value))
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::Add for Counter {
|
||||
type Output = Self;
|
||||
fn add(self, Counter(rhs): Self) -> Self::Output {
|
||||
|
|
|
@ -56,6 +56,7 @@ mod histogram;
|
|||
mod http;
|
||||
mod labels;
|
||||
mod latency;
|
||||
mod process;
|
||||
mod record;
|
||||
mod serve;
|
||||
mod transport;
|
||||
|
@ -109,6 +110,8 @@ struct Root {
|
|||
transports: transport::OpenScopes,
|
||||
transport_closes: transport::CloseScopes,
|
||||
|
||||
process_metrics: Option<process::Sensor>,
|
||||
|
||||
start_time: Gauge,
|
||||
}
|
||||
|
||||
|
@ -182,8 +185,13 @@ impl Root {
|
|||
.expect("process start time")
|
||||
.as_secs();
|
||||
|
||||
let process_metrics = process::Sensor::new()
|
||||
.map_err(|e| info!("{}", e))
|
||||
.ok();
|
||||
|
||||
Self {
|
||||
start_time: t0.into(),
|
||||
process_metrics,
|
||||
.. Root::default()
|
||||
}
|
||||
}
|
||||
|
@ -227,6 +235,13 @@ impl fmt::Display for Root {
|
|||
self.transports.fmt(f)?;
|
||||
self.transport_closes.fmt(f)?;
|
||||
|
||||
if let Some(ref process_metrics) = self.process_metrics {
|
||||
match process_metrics.metrics() {
|
||||
Ok(process) => process.fmt(f)?,
|
||||
Err(e) => warn!("error collecting process metrics: {:?}", e),
|
||||
}
|
||||
};
|
||||
|
||||
Self::process_start_time_seconds.fmt_help(f)?;
|
||||
Self::process_start_time_seconds.fmt_metric(f, self.start_time)?;
|
||||
|
||||
|
|
|
@ -0,0 +1,151 @@
|
|||
use std::fmt;
|
||||
use super::{Counter, Gauge, Metric};
|
||||
|
||||
pub use self::imp::Sensor;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct ProcessMetrics {
|
||||
cpu_seconds_total: Counter,
|
||||
open_fds: Gauge,
|
||||
max_fds: Option<Gauge>,
|
||||
virtual_memory_bytes: Gauge,
|
||||
resident_memory_bytes: Gauge,
|
||||
}
|
||||
|
||||
impl ProcessMetrics {
|
||||
metrics! {
|
||||
process_cpu_seconds_total: Counter {
|
||||
"Total user and system CPU time spent in seconds."
|
||||
},
|
||||
process_open_fds: Gauge { "Number of open file descriptors." },
|
||||
process_max_fds: Gauge { "Maximum number of open file descriptors." },
|
||||
process_virtual_memory_bytes: Gauge {
|
||||
"Virtual memory size in bytes."
|
||||
},
|
||||
process_resident_memory_bytes: Gauge {
|
||||
"Resident memory size in bytes."
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for ProcessMetrics {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
Self::process_cpu_seconds_total.fmt_help(f)?;
|
||||
Self::process_cpu_seconds_total.fmt_metric(
|
||||
f,
|
||||
self.cpu_seconds_total
|
||||
)?;
|
||||
|
||||
Self::process_open_fds.fmt_help(f)?;
|
||||
Self::process_open_fds.fmt_metric(f, self.open_fds)?;
|
||||
|
||||
if let Some(ref max_fds) = self.max_fds {
|
||||
Self::process_max_fds.fmt_help(f)?;
|
||||
Self::process_max_fds.fmt_metric(f, *max_fds)?;
|
||||
}
|
||||
|
||||
Self::process_virtual_memory_bytes.fmt_help(f)?;
|
||||
Self::process_virtual_memory_bytes.fmt_metric(
|
||||
f,
|
||||
self.virtual_memory_bytes
|
||||
)?;
|
||||
|
||||
Self::process_resident_memory_bytes.fmt_help(f)?;
|
||||
Self::process_resident_memory_bytes.fmt_metric(
|
||||
f,
|
||||
self.resident_memory_bytes
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
mod imp {
|
||||
use super::*;
|
||||
use super::super::{Counter, Gauge};
|
||||
|
||||
use std::{io, fs};
|
||||
|
||||
use procinfo::pid;
|
||||
use libc::{self, pid_t};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Sensor {
|
||||
page_size: usize,
|
||||
}
|
||||
|
||||
impl Sensor {
|
||||
pub fn new() -> io::Result<Sensor> {
|
||||
let page_size = match unsafe { libc::sysconf(libc::_SC_PAGESIZE) } {
|
||||
e if e < 0 => {
|
||||
let error = io::Error::last_os_error();
|
||||
error!("error getting page size: {:?}", error);
|
||||
return Err(error);
|
||||
},
|
||||
page_size => page_size as usize,
|
||||
};
|
||||
Ok(Sensor {
|
||||
page_size,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn metrics(&self) -> io::Result<ProcessMetrics> {
|
||||
// XXX potentially blocking call
|
||||
let stat = pid::stat_self()?;
|
||||
|
||||
let cpu_seconds_total = Counter::from((stat.utime + stat.stime) as u64);
|
||||
let virtual_memory_bytes = Gauge::from(stat.vsize as u64);
|
||||
let resident_memory_bytes = Gauge::from((stat.rss * self.page_size) as u64);
|
||||
|
||||
let metrics = ProcessMetrics {
|
||||
cpu_seconds_total,
|
||||
virtual_memory_bytes,
|
||||
resident_memory_bytes,
|
||||
open_fds: open_fds(stat.pid)?,
|
||||
max_fds: max_fds()?,
|
||||
};
|
||||
|
||||
Ok(metrics)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn open_fds(pid: pid_t) -> io::Result<Gauge> {
|
||||
let mut open = 0;
|
||||
for f in fs::read_dir(format!("/proc/{}/fd", pid))? {
|
||||
if !f?.file_type()?.is_dir() {
|
||||
open += 1;
|
||||
}
|
||||
}
|
||||
Ok(Gauge::from(open))
|
||||
}
|
||||
|
||||
fn max_fds() -> io::Result<Option<Gauge>> {
|
||||
let limit = pid::limits_self()?.max_open_files;
|
||||
let max_fds = limit.soft.or(limit.hard)
|
||||
.map(|max| Gauge::from(max as u64));
|
||||
Ok(max_fds)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
mod imp {
|
||||
use super::*;
|
||||
use std::io;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Sensor {}
|
||||
|
||||
impl Sensor {
|
||||
pub fn new() -> io::Result<Sensor> {
|
||||
Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
"procinfo not supported on this operating system"
|
||||
))
|
||||
}
|
||||
|
||||
pub fn metrics(&self) -> io::Result<ProcessMetrics> {
|
||||
unreachable!("process::Sensor::metrics() on unsupported OS!")
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue