diff --git a/controller/script/simulate-proxy/main.go b/controller/script/simulate-proxy/main.go index ba5ac6080..c50c1c2cc 100644 --- a/controller/script/simulate-proxy/main.go +++ b/controller/script/simulate-proxy/main.go @@ -35,9 +35,7 @@ type simulatedProxy struct { type proxyMetricCollectors struct { requestTotals *prom.CounterVec responseTotals *prom.CounterVec - requestDurationMs *prom.HistogramVec responseLatencyMs *prom.HistogramVec - responseDurationMs *prom.HistogramVec tcpAcceptOpenTotal prom.Counter tcpAcceptCloseTotal *prom.CounterVec tcpConnectOpenTotal prom.Counter @@ -159,9 +157,6 @@ func (s *simulatedProxy) generateProxyTraffic() { // inbound requests s.inbound.requestTotals.With(prom.Labels{}).Add(float64(inboundRandomCount)) - for _, latency := range randomLatencies(inboundRandomCount) { - s.inbound.requestDurationMs.With(prom.Labels{}).Observe(latency) - } // inbound responses inboundResponseLabels := randomResponseLabels() @@ -169,9 +164,6 @@ func (s *simulatedProxy) generateProxyTraffic() { for _, latency := range randomLatencies(inboundRandomCount) { s.inbound.responseLatencyMs.With(inboundResponseLabels).Observe(latency) } - for _, latency := range randomLatencies(inboundRandomCount) { - s.inbound.responseDurationMs.With(inboundResponseLabels).Observe(latency) - } s.inbound.generateTCPStats(inboundRandomCount) @@ -186,9 +178,6 @@ func (s *simulatedProxy) generateProxyTraffic() { // outbound requests s.outbound.requestTotals.With(outboundLabels).Add(float64(outboundRandomCount)) - for _, latency := range randomLatencies(outboundRandomCount) { - s.outbound.requestDurationMs.With(outboundLabels).Observe(latency) - } // outbound resposnes outboundResponseLabels := outboundLabels @@ -199,9 +188,6 @@ func (s *simulatedProxy) generateProxyTraffic() { for _, latency := range randomLatencies(outboundRandomCount) { s.outbound.responseLatencyMs.With(outboundResponseLabels).Observe(latency) } - for _, latency := range randomLatencies(outboundRandomCount) { - s.outbound.responseDurationMs.With(outboundResponseLabels).Observe(latency) - } s.outbound.generateTCPStats(outboundRandomCount) } @@ -449,13 +435,6 @@ func newSimulatedProxy(pod v1.Pod, deployments []string, replicaSets *k8s.Replic Help: "A counter of the number of responses the proxy has received", ConstLabels: constLabels, }, responseLabels) - requestDurationMs := prom.NewHistogramVec( - prom.HistogramOpts{ - Name: "request_duration_ms", - Help: "A histogram of the duration of a request", - ConstLabels: constLabels, - Buckets: latencyBucketBounds, - }, requestLabels) responseLatencyMs := prom.NewHistogramVec( prom.HistogramOpts{ Name: "response_latency_ms", @@ -463,13 +442,6 @@ func newSimulatedProxy(pod v1.Pod, deployments []string, replicaSets *k8s.Replic ConstLabels: constLabels, Buckets: latencyBucketBounds, }, responseLabels) - responseDurationMs := prom.NewHistogramVec( - prom.HistogramOpts{ - Name: "response_duration_ms", - Help: "A histogram of the duration of a response", - ConstLabels: constLabels, - Buckets: latencyBucketBounds, - }, responseLabels) tcpAcceptOpenTotal := prom.NewCounterVec( prom.CounterOpts{ Name: "tcp_accept_open_total", @@ -545,9 +517,7 @@ func newSimulatedProxy(pod v1.Pod, deployments []string, replicaSets *k8s.Replic inbound: &proxyMetricCollectors{ requestTotals: requestTotals.MustCurryWith(inboundLabels), responseTotals: responseTotals.MustCurryWith(inboundLabels), - requestDurationMs: requestDurationMs.MustCurryWith(inboundLabels).(*prom.HistogramVec), responseLatencyMs: responseLatencyMs.MustCurryWith(inboundLabels).(*prom.HistogramVec), - responseDurationMs: responseDurationMs.MustCurryWith(inboundLabels).(*prom.HistogramVec), tcpAcceptOpenTotal: tcpAcceptOpenTotal.With(inboundTCPLabels), tcpAcceptCloseTotal: tcpAcceptCloseTotal.MustCurryWith(inboundTCPLabels), tcpConnectOpenTotal: tcpConnectOpenTotal.With(inboundTCPLabels), @@ -560,9 +530,7 @@ func newSimulatedProxy(pod v1.Pod, deployments []string, replicaSets *k8s.Replic outbound: &proxyMetricCollectors{ requestTotals: requestTotals.MustCurryWith(outboundLabels), responseTotals: responseTotals.MustCurryWith(outboundLabels), - requestDurationMs: requestDurationMs.MustCurryWith(outboundLabels).(*prom.HistogramVec), responseLatencyMs: responseLatencyMs.MustCurryWith(outboundLabels).(*prom.HistogramVec), - responseDurationMs: responseDurationMs.MustCurryWith(outboundLabels).(*prom.HistogramVec), tcpAcceptOpenTotal: tcpAcceptOpenTotal.With(outboundLabels), tcpAcceptCloseTotal: tcpAcceptCloseTotal.MustCurryWith(outboundLabels), tcpConnectOpenTotal: tcpConnectOpenTotal.With(outboundLabels), @@ -577,9 +545,7 @@ func newSimulatedProxy(pod v1.Pod, deployments []string, replicaSets *k8s.Replic proxy.registerer.MustRegister( requestTotals, responseTotals, - requestDurationMs, responseLatencyMs, - responseDurationMs, tcpAcceptOpenTotal, tcpAcceptCloseTotal, tcpConnectOpenTotal, diff --git a/doc/proxy-metrics.md b/doc/proxy-metrics.md index 6df0e8687..b970fe740 100644 --- a/doc/proxy-metrics.md +++ b/doc/proxy-metrics.md @@ -14,21 +14,11 @@ port (default: `:4191`) in the [Prometheus format][prom-format]: A counter of the number of requests the proxy has received. This is incremented when the request stream begins. -### `request_duration_ms` - -A histogram of the duration of a request. This is measured from when the -request headers are received to when the request stream has completed. - ### `response_total` A counter of the number of responses the proxy has received. This is incremented when the response stream ends. -### `response_duration_ms` - -A histogram of the duration of a response. This is measured from when the -response headers are received to when the response stream has completed. - ### `response_latency_ms` A histogram of the total latency of a response. This is measured from when the diff --git a/proxy/src/telemetry/metrics/mod.rs b/proxy/src/telemetry/metrics/mod.rs index 2fbf20275..f961e03d3 100644 --- a/proxy/src/telemetry/metrics/mod.rs +++ b/proxy/src/telemetry/metrics/mod.rs @@ -61,10 +61,8 @@ pub use self::labels::DstLabels; #[derive(Debug, Clone)] struct Metrics { request_total: Metric>, - request_duration: Metric>, response_total: Metric>, - response_duration: Metric>, response_latency: Metric>, tcp_accept_open_total: Metric>, @@ -152,25 +150,11 @@ impl Metrics { "A counter of the number of requests the proxy has received.", ); - let request_duration = Metric::>::new( - "request_duration_ms", - "A histogram of the duration of a request. This is measured from \ - when the request headers are received to when the request \ - stream has completed.", - ); - let response_total = Metric::>::new( "response_total", "A counter of the number of responses the proxy has received.", ); - let response_duration = Metric::>::new( - "response_duration_ms", - "A histogram of the duration of a response. This is measured from \ - when the response headers are received to when the response \ - stream has completed.", - ); - let response_latency = Metric::>::new( "response_latency_ms", "A histogram of the total latency of a response. This is measured \ @@ -219,9 +203,7 @@ impl Metrics { Metrics { request_total, - request_duration, response_total, - response_duration, response_latency, tcp_accept_open_total, tcp_accept_close_total, @@ -242,22 +224,6 @@ impl Metrics { .or_insert_with(Counter::default) } - fn request_duration(&mut self, - labels: &Arc) - -> &mut Histogram { - self.request_duration.values - .entry(labels.clone()) - .or_insert_with(Histogram::default) - } - - fn response_duration(&mut self, - labels: &Arc) - -> &mut Histogram { - self.response_duration.values - .entry(labels.clone()) - .or_insert_with(Histogram::default) - } - fn response_latency(&mut self, labels: &Arc) -> &mut Histogram { @@ -334,9 +300,7 @@ impl Metrics { impl fmt::Display for Metrics { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { writeln!(f, "{}", self.request_total)?; - writeln!(f, "{}", self.request_duration)?; writeln!(f, "{}", self.response_total)?; - writeln!(f, "{}", self.response_duration)?; writeln!(f, "{}", self.response_latency)?; writeln!(f, "{}", self.tcp_accept_open_total)?; writeln!(f, "{}", self.tcp_accept_close_total)?; @@ -504,21 +468,17 @@ impl Aggregate { // when the stream *finishes*. }, - Event::StreamRequestFail(ref req, ref fail) => { + Event::StreamRequestFail(ref req, _) => { let labels = Arc::new(RequestLabels::new(req)); self.update(|metrics| { - *metrics.request_duration(&labels) += - fail.since_request_open; *metrics.request_total(&labels).incr(); }) }, - Event::StreamRequestEnd(ref req, ref end) => { + Event::StreamRequestEnd(ref req, _) => { let labels = Arc::new(RequestLabels::new(req)); self.update(|metrics| { *metrics.request_total(&labels).incr(); - *metrics.request_duration(&labels) += - end.since_request_open; }) }, @@ -529,7 +489,6 @@ impl Aggregate { )); self.update(|metrics| { *metrics.response_total(&labels).incr(); - *metrics.response_duration(&labels) += end.since_response_open; *metrics.response_latency(&labels) += end.since_request_open; }); }, @@ -539,7 +498,6 @@ impl Aggregate { let labels = Arc::new(ResponseLabels::fail(res)); self.update(|metrics| { *metrics.response_total(&labels).incr(); - *metrics.response_duration(&labels) += fail.since_response_open; *metrics.response_latency(&labels) += fail.since_request_open; }); }, diff --git a/proxy/tests/telemetry.rs b/proxy/tests/telemetry.rs index b61da1f95..9621d0dbe 100644 --- a/proxy/tests/telemetry.rs +++ b/proxy/tests/telemetry.rs @@ -403,48 +403,6 @@ fn metrics_endpoint_outbound_response_latency() { "response_latency_ms_count{authority=\"tele.test.svc.cluster.local\",direction=\"outbound\",classification=\"success\",status_code=\"200\"} 4"); } -// https://github.com/runconduit/conduit/issues/613 -#[test] -#[cfg_attr(not(feature = "flaky_tests"), ignore)] -fn metrics_endpoint_inbound_request_duration() { - let _ = env_logger::try_init(); - let Fixture { client, metrics, proxy: _proxy } = Fixture::inbound(); - - // request with body should increment request_duration - info!("client.get(/)"); - assert_eq!(client.get("/"), "hello"); - - assert_contains!(metrics.get("/metrics"), - "request_duration_ms_count{authority=\"tele.test.svc.cluster.local\",direction=\"inbound\"} 1"); - - // request without body should also increment request_duration - info!("client.get(/)"); - assert_eq!(client.get("/"), "hello"); - - assert_contains!(metrics.get("/metrics"), - "request_duration_ms_count{authority=\"tele.test.svc.cluster.local\",direction=\"inbound\"} 2"); -} - -// https://github.com/runconduit/conduit/issues/613 -#[test] -#[cfg_attr(not(feature = "flaky_tests"), ignore)] -fn metrics_endpoint_outbound_request_duration() { - let _ = env_logger::try_init(); - let Fixture { client, metrics, proxy: _proxy } = Fixture::outbound(); - - info!("client.get(/)"); - assert_eq!(client.get("/"), "hello"); - - assert_contains!(metrics.get("/metrics"), - "request_duration_ms_count{authority=\"tele.test.svc.cluster.local\",direction=\"outbound\"} 1"); - - info!("client.get(/)"); - assert_eq!(client.get("/"), "hello"); - - assert_contains!(metrics.get("/metrics"), - "request_duration_ms_count{authority=\"tele.test.svc.cluster.local\",direction=\"outbound\"} 2"); -} - // Tests for destination labels provided by control plane service discovery. mod outbound_dst_labels { use super::support::*; @@ -544,9 +502,7 @@ mod outbound_dst_labels { info!("client.get(/)"); assert_eq!(client.get("/"), "hello"); assert_contains!(metrics.get("/metrics"), - "request_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"bar\"} 1"); - assert_contains!(metrics.get("/metrics"), - "response_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"bar\",classification=\"success\",status_code=\"200\"} 1"); + "response_latency_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"bar\",classification=\"success\",status_code=\"200\"} 1"); assert_contains!(metrics.get("/metrics"), "request_total{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"bar\"} 1"); assert_contains!(metrics.get("/metrics"), @@ -578,9 +534,7 @@ mod outbound_dst_labels { assert_eq!(client.get("/"), "hello"); // the first request should be labeled with `dst_addr_label="foo"` assert_contains!(metrics.get("/metrics"), - "request_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"unchanged\"} 1"); - assert_contains!(metrics.get("/metrics"), - "response_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"unchanged\",classification=\"success\",status_code=\"200\"} 1"); + "response_latency_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"unchanged\",classification=\"success\",status_code=\"200\"} 1"); assert_contains!(metrics.get("/metrics"), "request_total{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"unchanged\"} 1"); assert_contains!(metrics.get("/metrics"), @@ -598,18 +552,14 @@ mod outbound_dst_labels { assert_eq!(client.get("/"), "hello"); // the second request should increment stats labeled with `dst_addr_label="bar"` assert_contains!(metrics.get("/metrics"), - "request_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"bar\",dst_set_label=\"unchanged\"} 1"); - assert_contains!(metrics.get("/metrics"), - "response_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"bar\",dst_set_label=\"unchanged\",classification=\"success\",status_code=\"200\"} 1"); + "response_latency_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"bar\",dst_set_label=\"unchanged\",classification=\"success\",status_code=\"200\"} 1"); assert_contains!(metrics.get("/metrics"), "request_total{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"bar\",dst_set_label=\"unchanged\"} 1"); assert_contains!(metrics.get("/metrics"), "response_total{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"bar\",dst_set_label=\"unchanged\",classification=\"success\",status_code=\"200\"} 1"); // stats recorded from the first request should still be present. assert_contains!(metrics.get("/metrics"), - "request_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"unchanged\"} 1"); - assert_contains!(metrics.get("/metrics"), - "response_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"unchanged\",classification=\"success\",status_code=\"200\"} 1"); + "response_latency_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"unchanged\",classification=\"success\",status_code=\"200\"} 1"); assert_contains!(metrics.get("/metrics"), "request_total{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"unchanged\"} 1"); assert_contains!(metrics.get("/metrics"), @@ -639,9 +589,7 @@ mod outbound_dst_labels { assert_eq!(client.get("/"), "hello"); // the first request should be labeled with `dst_addr_label="foo"` assert_contains!(metrics.get("/metrics"), - "request_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"foo\"} 1"); - assert_contains!(metrics.get("/metrics"), - "response_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"foo\",classification=\"success\",status_code=\"200\"} 1"); + "response_latency_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"foo\",classification=\"success\",status_code=\"200\"} 1"); assert_contains!(metrics.get("/metrics"), "request_total{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"foo\"} 1"); assert_contains!(metrics.get("/metrics"), @@ -658,18 +606,14 @@ mod outbound_dst_labels { assert_eq!(client.get("/"), "hello"); // the second request should increment stats labeled with `dst_addr_label="bar"` assert_contains!(metrics.get("/metrics"), - "request_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"bar\"} 1"); - assert_contains!(metrics.get("/metrics"), - "response_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"bar\",classification=\"success\",status_code=\"200\"} 1"); + "response_latency_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"bar\",classification=\"success\",status_code=\"200\"} 1"); assert_contains!(metrics.get("/metrics"), "request_total{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"bar\"} 1"); assert_contains!(metrics.get("/metrics"), "response_total{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"bar\",classification=\"success\",status_code=\"200\"} 1"); // stats recorded from the first request should still be present. assert_contains!(metrics.get("/metrics"), - "request_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"foo\"} 1"); - assert_contains!(metrics.get("/metrics"), - "response_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"foo\",classification=\"success\",status_code=\"200\"} 1"); + "response_latency_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"foo\",classification=\"success\",status_code=\"200\"} 1"); assert_contains!(metrics.get("/metrics"), "request_total{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"foo\"} 1"); assert_contains!(metrics.get("/metrics"),