diff --git a/proxy/src/telemetry/metrics/prometheus.rs b/proxy/src/telemetry/metrics/prometheus.rs index 9c241192d..db7823ff4 100644 --- a/proxy/src/telemetry/metrics/prometheus.rs +++ b/proxy/src/telemetry/metrics/prometheus.rs @@ -413,7 +413,10 @@ impl fmt::Display for RequestLabels { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "authority=\"{}\",", self.authority)?; if let Some(ref outbound) = self.outbound_labels { - write!(f, "direction=\"outbound\",{}", outbound)?; + write!(f, "direction=\"outbound\"{comma}{dst}", + comma = if !outbound.is_empty() { "," } else { "" }, + dst = outbound + )?; } else { write!(f, "direction=\"inbound\"")?; } @@ -425,16 +428,25 @@ impl fmt::Display for RequestLabels { } + +// ===== impl OutboundLabels ===== + +impl OutboundLabels { + fn is_empty(&self) -> bool { + self.namespace.is_none() && self.dst.is_none() + } +} + impl fmt::Display for OutboundLabels { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match *self { OutboundLabels { namespace: Some(ref ns), dst: Some(ref dst) } => - write!(f, "dst_namespace=\"{}\",dst_{},", ns, dst), + write!(f, "dst_namespace=\"{}\",dst_{}", ns, dst), OutboundLabels { namespace: None, dst: Some(ref dst), } => - write!(f, "dst_{},", dst), + write!(f, "dst_{}", dst), OutboundLabels { namespace: Some(ref ns), dst: None, } => - write!(f, "dst_namespace=\"{}\",", ns), + write!(f, "dst_namespace=\"{}\"", ns), OutboundLabels { namespace: None, dst: None, } => write!(f, ""), } diff --git a/proxy/tests/telemetry.rs b/proxy/tests/telemetry.rs index 24fc25048..b608b8cdc 100644 --- a/proxy/tests/telemetry.rs +++ b/proxy/tests/telemetry.rs @@ -267,7 +267,7 @@ macro_rules! assert_contains { } #[test] -fn metrics_endpoint_request_count() { +fn metrics_endpoint_inbound_request_count() { let _ = env_logger::try_init(); info!("running test server"); @@ -296,7 +296,38 @@ fn metrics_endpoint_request_count() { } #[test] -fn metrics_endpoint_response_latency() { +fn metrics_endpoint_outbound_request_count() { + let _ = env_logger::try_init(); + + info!("running test server"); + let srv = server::new().route("/hey", "hello").run(); + + let ctrl = controller::new() + .destination("tele.test.svc.cluster.local", srv.addr) + .run(); + let proxy = proxy::new() + .controller(ctrl) + .outbound(srv) + .metrics_flush_interval(Duration::from_millis(500)) + .run(); + let client = client::new(proxy.outbound, "tele.test.svc.cluster.local"); + let metrics = client::http1(proxy.metrics, "localhost"); + + // prior to seeing any requests, request count should be empty. + assert!(!metrics.get("/metrics") + .contains("request_total{authority=\"tele.test.svc.cluster.local\",direction=\"outbound\"}")); + + info!("client.get(/hey)"); + assert_eq!(client.get("/hey"), "hello"); + + let scrape = metrics.get("/metrics"); + // after seeing a request, the request count should be 1. + assert_contains!(scrape, "request_total{authority=\"tele.test.svc.cluster.local\",direction=\"outbound\"} 1"); + +} + +#[test] +fn metrics_endpoint_inbound_response_latency() { let _ = env_logger::try_init(); info!("running test server"); @@ -376,3 +407,128 @@ fn metrics_endpoint_response_latency() { assert_contains!(scrape, "response_latency_ms_count{authority=\"tele.test.svc.cluster.local\",direction=\"inbound\",status_code=\"200\"} 4"); } + + +#[test] +fn metrics_endpoint_outbound_response_latency() { + let _ = env_logger::try_init(); + + info!("running test server"); + let srv = server::new() + .route_with_latency("/hey", "hello", Duration::from_millis(500)) + .route_with_latency("/hi", "good morning", Duration::from_millis(40)) + .run(); + + let ctrl = controller::new() + .destination("tele.test.svc.cluster.local", srv.addr) + .run(); + let proxy = proxy::new() + .controller(ctrl) + .outbound(srv) + .metrics_flush_interval(Duration::from_millis(500)) + .run(); + let client = client::new(proxy.outbound, "tele.test.svc.cluster.local"); + let metrics = client::http1(proxy.metrics, "localhost"); + + info!("client.get(/hey)"); + assert_eq!(client.get("/hey"), "hello"); + + let scrape = metrics.get("/metrics"); + // assert the >=1000ms bucket is incremented by our request with 500ms + // extra latency. + assert_contains!(scrape, + "response_latency_ms_bucket{authority=\"tele.test.svc.cluster.local\",direction=\"outbound\",status_code=\"200\",le=\"1000\"} 1"); + // the histogram's count should be 1. + assert_contains!(scrape, + "response_latency_ms_count{authority=\"tele.test.svc.cluster.local\",direction=\"outbound\",status_code=\"200\"} 1"); + // TODO: we're not going to make any assertions about the + // response_latency_ms_sum stat, since its granularity depends on the actual + // observed latencies, which may vary a bit. we could make more reliable + // assertions about that stat if we were using a mock timer, though, as the + // observed latency values would be predictable. + + info!("client.get(/hi)"); + assert_eq!(client.get("/hi"), "good morning"); + + let scrape = metrics.get("/metrics"); + + // request with 40ms extra latency should fall into the 50ms bucket. + assert_contains!(scrape, + "response_latency_ms_bucket{authority=\"tele.test.svc.cluster.local\",direction=\"outbound\",status_code=\"200\",le=\"50\"} 1"); + // 1000ms bucket should be incremented as well, since it counts *all* + // bservations less than or equal to 1000ms, even if they also increment + // other buckets. + assert_contains!(scrape, + "response_latency_ms_bucket{authority=\"tele.test.svc.cluster.local\",direction=\"outbound\",status_code=\"200\",le=\"1000\"} 2"); + // the histogram's total count should be 2. + assert_contains!(scrape, + "response_latency_ms_count{authority=\"tele.test.svc.cluster.local\",direction=\"outbound\",status_code=\"200\"} 2"); + + info!("client.get(/hi)"); + assert_eq!(client.get("/hi"), "good morning"); + + let scrape = metrics.get("/metrics"); + // request with 40ms extra latency should fall into the 50ms bucket. + assert_contains!(scrape, + "response_latency_ms_bucket{authority=\"tele.test.svc.cluster.local\",direction=\"outbound\",status_code=\"200\",le=\"50\"} 2"); + // 1000ms bucket should be incremented as well. + assert_contains!(scrape, + "response_latency_ms_bucket{authority=\"tele.test.svc.cluster.local\",direction=\"outbound\",status_code=\"200\",le=\"1000\"} 3"); + // the histogram's total count should be 3. + assert_contains!(scrape, + "response_latency_ms_count{authority=\"tele.test.svc.cluster.local\",direction=\"outbound\",status_code=\"200\"} 3"); + + info!("client.get(/hey)"); + assert_eq!(client.get("/hey"), "hello"); + + let scrape = metrics.get("/metrics"); + // 50ms bucket should be un-changed by the request with 500ms latency. + assert_contains!(scrape, + "response_latency_ms_bucket{authority=\"tele.test.svc.cluster.local\",direction=\"outbound\",status_code=\"200\",le=\"50\"} 2"); + // 1000ms bucket should be incremented. + assert_contains!(scrape, + "response_latency_ms_bucket{authority=\"tele.test.svc.cluster.local\",direction=\"outbound\",status_code=\"200\",le=\"1000\"} 4"); + // the histogram's total count should be 4. + assert_contains!(scrape, + "response_latency_ms_count{authority=\"tele.test.svc.cluster.local\",direction=\"outbound\",status_code=\"200\"} 4"); +} + +#[test] +fn metrics_have_no_double_commas() { + // Test for regressions to runconduit/conduit#600. + let _ = env_logger::try_init(); + + info!("running test server"); + let inbound_srv = server::new().route("/hey", "hello").run(); + let outbound_srv = server::new().route("/hey", "hello").run(); + + let ctrl = controller::new() + .destination("tele.test.svc.cluster.local", outbound_srv.addr) + .run(); + let proxy = proxy::new() + .controller(ctrl) + .inbound(inbound_srv) + .outbound(outbound_srv) + .metrics_flush_interval(Duration::from_millis(500)) + .run(); + let client = client::new(proxy.inbound, "tele.test.svc.cluster.local"); + let metrics = client::http1(proxy.metrics, "localhost"); + + let scrape = metrics.get("/metrics"); + assert!(!scrape.contains(",,")); + + info!("inbound.get(/hey)"); + assert_eq!(client.get("/hey"), "hello"); + + let scrape = metrics.get("/metrics"); + assert!(!scrape.contains(",,"), "inbound metrics had double comma"); + + let client = client::new(proxy.outbound, "tele.test.svc.cluster.local"); + + info!("outbound.get(/hey)"); + assert_eq!(client.get("/hey"), "hello"); + + let scrape = metrics.get("/metrics"); + assert!(!scrape.contains(",,"), "outbound metrics had double comma"); + +}