proxy: remove unused metrics (#826)

This PR removes the unused `request_duration_ms` and `response_duration_ms` histogram metrics from the proxy. It also removes them from the `simulate-proxy` script's output, and from `docs/proxy-metrics.md`

Closes #821
This commit is contained in:
Eliza Weisman 2018-04-23 16:05:20 -07:00 committed by GitHub
parent 39eccb09e2
commit d9112abc93
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 9 additions and 151 deletions

View File

@ -35,9 +35,7 @@ type simulatedProxy struct {
type proxyMetricCollectors struct {
requestTotals *prom.CounterVec
responseTotals *prom.CounterVec
requestDurationMs *prom.HistogramVec
responseLatencyMs *prom.HistogramVec
responseDurationMs *prom.HistogramVec
tcpAcceptOpenTotal prom.Counter
tcpAcceptCloseTotal *prom.CounterVec
tcpConnectOpenTotal prom.Counter
@ -159,9 +157,6 @@ func (s *simulatedProxy) generateProxyTraffic() {
// inbound requests
s.inbound.requestTotals.With(prom.Labels{}).Add(float64(inboundRandomCount))
for _, latency := range randomLatencies(inboundRandomCount) {
s.inbound.requestDurationMs.With(prom.Labels{}).Observe(latency)
}
// inbound responses
inboundResponseLabels := randomResponseLabels()
@ -169,9 +164,6 @@ func (s *simulatedProxy) generateProxyTraffic() {
for _, latency := range randomLatencies(inboundRandomCount) {
s.inbound.responseLatencyMs.With(inboundResponseLabels).Observe(latency)
}
for _, latency := range randomLatencies(inboundRandomCount) {
s.inbound.responseDurationMs.With(inboundResponseLabels).Observe(latency)
}
s.inbound.generateTCPStats(inboundRandomCount)
@ -186,9 +178,6 @@ func (s *simulatedProxy) generateProxyTraffic() {
// outbound requests
s.outbound.requestTotals.With(outboundLabels).Add(float64(outboundRandomCount))
for _, latency := range randomLatencies(outboundRandomCount) {
s.outbound.requestDurationMs.With(outboundLabels).Observe(latency)
}
// outbound resposnes
outboundResponseLabels := outboundLabels
@ -199,9 +188,6 @@ func (s *simulatedProxy) generateProxyTraffic() {
for _, latency := range randomLatencies(outboundRandomCount) {
s.outbound.responseLatencyMs.With(outboundResponseLabels).Observe(latency)
}
for _, latency := range randomLatencies(outboundRandomCount) {
s.outbound.responseDurationMs.With(outboundResponseLabels).Observe(latency)
}
s.outbound.generateTCPStats(outboundRandomCount)
}
@ -449,13 +435,6 @@ func newSimulatedProxy(pod v1.Pod, deployments []string, replicaSets *k8s.Replic
Help: "A counter of the number of responses the proxy has received",
ConstLabels: constLabels,
}, responseLabels)
requestDurationMs := prom.NewHistogramVec(
prom.HistogramOpts{
Name: "request_duration_ms",
Help: "A histogram of the duration of a request",
ConstLabels: constLabels,
Buckets: latencyBucketBounds,
}, requestLabels)
responseLatencyMs := prom.NewHistogramVec(
prom.HistogramOpts{
Name: "response_latency_ms",
@ -463,13 +442,6 @@ func newSimulatedProxy(pod v1.Pod, deployments []string, replicaSets *k8s.Replic
ConstLabels: constLabels,
Buckets: latencyBucketBounds,
}, responseLabels)
responseDurationMs := prom.NewHistogramVec(
prom.HistogramOpts{
Name: "response_duration_ms",
Help: "A histogram of the duration of a response",
ConstLabels: constLabels,
Buckets: latencyBucketBounds,
}, responseLabels)
tcpAcceptOpenTotal := prom.NewCounterVec(
prom.CounterOpts{
Name: "tcp_accept_open_total",
@ -545,9 +517,7 @@ func newSimulatedProxy(pod v1.Pod, deployments []string, replicaSets *k8s.Replic
inbound: &proxyMetricCollectors{
requestTotals: requestTotals.MustCurryWith(inboundLabels),
responseTotals: responseTotals.MustCurryWith(inboundLabels),
requestDurationMs: requestDurationMs.MustCurryWith(inboundLabels).(*prom.HistogramVec),
responseLatencyMs: responseLatencyMs.MustCurryWith(inboundLabels).(*prom.HistogramVec),
responseDurationMs: responseDurationMs.MustCurryWith(inboundLabels).(*prom.HistogramVec),
tcpAcceptOpenTotal: tcpAcceptOpenTotal.With(inboundTCPLabels),
tcpAcceptCloseTotal: tcpAcceptCloseTotal.MustCurryWith(inboundTCPLabels),
tcpConnectOpenTotal: tcpConnectOpenTotal.With(inboundTCPLabels),
@ -560,9 +530,7 @@ func newSimulatedProxy(pod v1.Pod, deployments []string, replicaSets *k8s.Replic
outbound: &proxyMetricCollectors{
requestTotals: requestTotals.MustCurryWith(outboundLabels),
responseTotals: responseTotals.MustCurryWith(outboundLabels),
requestDurationMs: requestDurationMs.MustCurryWith(outboundLabels).(*prom.HistogramVec),
responseLatencyMs: responseLatencyMs.MustCurryWith(outboundLabels).(*prom.HistogramVec),
responseDurationMs: responseDurationMs.MustCurryWith(outboundLabels).(*prom.HistogramVec),
tcpAcceptOpenTotal: tcpAcceptOpenTotal.With(outboundLabels),
tcpAcceptCloseTotal: tcpAcceptCloseTotal.MustCurryWith(outboundLabels),
tcpConnectOpenTotal: tcpConnectOpenTotal.With(outboundLabels),
@ -577,9 +545,7 @@ func newSimulatedProxy(pod v1.Pod, deployments []string, replicaSets *k8s.Replic
proxy.registerer.MustRegister(
requestTotals,
responseTotals,
requestDurationMs,
responseLatencyMs,
responseDurationMs,
tcpAcceptOpenTotal,
tcpAcceptCloseTotal,
tcpConnectOpenTotal,

View File

@ -14,21 +14,11 @@ port (default: `:4191`) in the [Prometheus format][prom-format]:
A counter of the number of requests the proxy has received. This is incremented
when the request stream begins.
### `request_duration_ms`
A histogram of the duration of a request. This is measured from when the
request headers are received to when the request stream has completed.
### `response_total`
A counter of the number of responses the proxy has received. This is
incremented when the response stream ends.
### `response_duration_ms`
A histogram of the duration of a response. This is measured from when the
response headers are received to when the response stream has completed.
### `response_latency_ms`
A histogram of the total latency of a response. This is measured from when the

View File

@ -61,10 +61,8 @@ pub use self::labels::DstLabels;
#[derive(Debug, Clone)]
struct Metrics {
request_total: Metric<Counter, Arc<RequestLabels>>,
request_duration: Metric<Histogram, Arc<RequestLabels>>,
response_total: Metric<Counter, Arc<ResponseLabels>>,
response_duration: Metric<Histogram, Arc<ResponseLabels>>,
response_latency: Metric<Histogram, Arc<ResponseLabels>>,
tcp_accept_open_total: Metric<Counter, Arc<TransportLabels>>,
@ -152,25 +150,11 @@ impl Metrics {
"A counter of the number of requests the proxy has received.",
);
let request_duration = Metric::<Histogram, Arc<RequestLabels>>::new(
"request_duration_ms",
"A histogram of the duration of a request. This is measured from \
when the request headers are received to when the request \
stream has completed.",
);
let response_total = Metric::<Counter, Arc<ResponseLabels>>::new(
"response_total",
"A counter of the number of responses the proxy has received.",
);
let response_duration = Metric::<Histogram, Arc<ResponseLabels>>::new(
"response_duration_ms",
"A histogram of the duration of a response. This is measured from \
when the response headers are received to when the response \
stream has completed.",
);
let response_latency = Metric::<Histogram, Arc<ResponseLabels>>::new(
"response_latency_ms",
"A histogram of the total latency of a response. This is measured \
@ -219,9 +203,7 @@ impl Metrics {
Metrics {
request_total,
request_duration,
response_total,
response_duration,
response_latency,
tcp_accept_open_total,
tcp_accept_close_total,
@ -242,22 +224,6 @@ impl Metrics {
.or_insert_with(Counter::default)
}
fn request_duration(&mut self,
labels: &Arc<RequestLabels>)
-> &mut Histogram {
self.request_duration.values
.entry(labels.clone())
.or_insert_with(Histogram::default)
}
fn response_duration(&mut self,
labels: &Arc<ResponseLabels>)
-> &mut Histogram {
self.response_duration.values
.entry(labels.clone())
.or_insert_with(Histogram::default)
}
fn response_latency(&mut self,
labels: &Arc<ResponseLabels>)
-> &mut Histogram {
@ -334,9 +300,7 @@ impl Metrics {
impl fmt::Display for Metrics {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
writeln!(f, "{}", self.request_total)?;
writeln!(f, "{}", self.request_duration)?;
writeln!(f, "{}", self.response_total)?;
writeln!(f, "{}", self.response_duration)?;
writeln!(f, "{}", self.response_latency)?;
writeln!(f, "{}", self.tcp_accept_open_total)?;
writeln!(f, "{}", self.tcp_accept_close_total)?;
@ -504,21 +468,17 @@ impl Aggregate {
// when the stream *finishes*.
},
Event::StreamRequestFail(ref req, ref fail) => {
Event::StreamRequestFail(ref req, _) => {
let labels = Arc::new(RequestLabels::new(req));
self.update(|metrics| {
*metrics.request_duration(&labels) +=
fail.since_request_open;
*metrics.request_total(&labels).incr();
})
},
Event::StreamRequestEnd(ref req, ref end) => {
Event::StreamRequestEnd(ref req, _) => {
let labels = Arc::new(RequestLabels::new(req));
self.update(|metrics| {
*metrics.request_total(&labels).incr();
*metrics.request_duration(&labels) +=
end.since_request_open;
})
},
@ -529,7 +489,6 @@ impl Aggregate {
));
self.update(|metrics| {
*metrics.response_total(&labels).incr();
*metrics.response_duration(&labels) += end.since_response_open;
*metrics.response_latency(&labels) += end.since_request_open;
});
},
@ -539,7 +498,6 @@ impl Aggregate {
let labels = Arc::new(ResponseLabels::fail(res));
self.update(|metrics| {
*metrics.response_total(&labels).incr();
*metrics.response_duration(&labels) += fail.since_response_open;
*metrics.response_latency(&labels) += fail.since_request_open;
});
},

View File

@ -403,48 +403,6 @@ fn metrics_endpoint_outbound_response_latency() {
"response_latency_ms_count{authority=\"tele.test.svc.cluster.local\",direction=\"outbound\",classification=\"success\",status_code=\"200\"} 4");
}
// https://github.com/runconduit/conduit/issues/613
#[test]
#[cfg_attr(not(feature = "flaky_tests"), ignore)]
fn metrics_endpoint_inbound_request_duration() {
let _ = env_logger::try_init();
let Fixture { client, metrics, proxy: _proxy } = Fixture::inbound();
// request with body should increment request_duration
info!("client.get(/)");
assert_eq!(client.get("/"), "hello");
assert_contains!(metrics.get("/metrics"),
"request_duration_ms_count{authority=\"tele.test.svc.cluster.local\",direction=\"inbound\"} 1");
// request without body should also increment request_duration
info!("client.get(/)");
assert_eq!(client.get("/"), "hello");
assert_contains!(metrics.get("/metrics"),
"request_duration_ms_count{authority=\"tele.test.svc.cluster.local\",direction=\"inbound\"} 2");
}
// https://github.com/runconduit/conduit/issues/613
#[test]
#[cfg_attr(not(feature = "flaky_tests"), ignore)]
fn metrics_endpoint_outbound_request_duration() {
let _ = env_logger::try_init();
let Fixture { client, metrics, proxy: _proxy } = Fixture::outbound();
info!("client.get(/)");
assert_eq!(client.get("/"), "hello");
assert_contains!(metrics.get("/metrics"),
"request_duration_ms_count{authority=\"tele.test.svc.cluster.local\",direction=\"outbound\"} 1");
info!("client.get(/)");
assert_eq!(client.get("/"), "hello");
assert_contains!(metrics.get("/metrics"),
"request_duration_ms_count{authority=\"tele.test.svc.cluster.local\",direction=\"outbound\"} 2");
}
// Tests for destination labels provided by control plane service discovery.
mod outbound_dst_labels {
use super::support::*;
@ -544,9 +502,7 @@ mod outbound_dst_labels {
info!("client.get(/)");
assert_eq!(client.get("/"), "hello");
assert_contains!(metrics.get("/metrics"),
"request_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"bar\"} 1");
assert_contains!(metrics.get("/metrics"),
"response_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"bar\",classification=\"success\",status_code=\"200\"} 1");
"response_latency_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"bar\",classification=\"success\",status_code=\"200\"} 1");
assert_contains!(metrics.get("/metrics"),
"request_total{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"bar\"} 1");
assert_contains!(metrics.get("/metrics"),
@ -578,9 +534,7 @@ mod outbound_dst_labels {
assert_eq!(client.get("/"), "hello");
// the first request should be labeled with `dst_addr_label="foo"`
assert_contains!(metrics.get("/metrics"),
"request_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"unchanged\"} 1");
assert_contains!(metrics.get("/metrics"),
"response_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"unchanged\",classification=\"success\",status_code=\"200\"} 1");
"response_latency_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"unchanged\",classification=\"success\",status_code=\"200\"} 1");
assert_contains!(metrics.get("/metrics"),
"request_total{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"unchanged\"} 1");
assert_contains!(metrics.get("/metrics"),
@ -598,18 +552,14 @@ mod outbound_dst_labels {
assert_eq!(client.get("/"), "hello");
// the second request should increment stats labeled with `dst_addr_label="bar"`
assert_contains!(metrics.get("/metrics"),
"request_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"bar\",dst_set_label=\"unchanged\"} 1");
assert_contains!(metrics.get("/metrics"),
"response_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"bar\",dst_set_label=\"unchanged\",classification=\"success\",status_code=\"200\"} 1");
"response_latency_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"bar\",dst_set_label=\"unchanged\",classification=\"success\",status_code=\"200\"} 1");
assert_contains!(metrics.get("/metrics"),
"request_total{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"bar\",dst_set_label=\"unchanged\"} 1");
assert_contains!(metrics.get("/metrics"),
"response_total{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"bar\",dst_set_label=\"unchanged\",classification=\"success\",status_code=\"200\"} 1");
// stats recorded from the first request should still be present.
assert_contains!(metrics.get("/metrics"),
"request_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"unchanged\"} 1");
assert_contains!(metrics.get("/metrics"),
"response_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"unchanged\",classification=\"success\",status_code=\"200\"} 1");
"response_latency_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"unchanged\",classification=\"success\",status_code=\"200\"} 1");
assert_contains!(metrics.get("/metrics"),
"request_total{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_addr_label=\"foo\",dst_set_label=\"unchanged\"} 1");
assert_contains!(metrics.get("/metrics"),
@ -639,9 +589,7 @@ mod outbound_dst_labels {
assert_eq!(client.get("/"), "hello");
// the first request should be labeled with `dst_addr_label="foo"`
assert_contains!(metrics.get("/metrics"),
"request_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"foo\"} 1");
assert_contains!(metrics.get("/metrics"),
"response_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"foo\",classification=\"success\",status_code=\"200\"} 1");
"response_latency_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"foo\",classification=\"success\",status_code=\"200\"} 1");
assert_contains!(metrics.get("/metrics"),
"request_total{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"foo\"} 1");
assert_contains!(metrics.get("/metrics"),
@ -658,18 +606,14 @@ mod outbound_dst_labels {
assert_eq!(client.get("/"), "hello");
// the second request should increment stats labeled with `dst_addr_label="bar"`
assert_contains!(metrics.get("/metrics"),
"request_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"bar\"} 1");
assert_contains!(metrics.get("/metrics"),
"response_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"bar\",classification=\"success\",status_code=\"200\"} 1");
"response_latency_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"bar\",classification=\"success\",status_code=\"200\"} 1");
assert_contains!(metrics.get("/metrics"),
"request_total{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"bar\"} 1");
assert_contains!(metrics.get("/metrics"),
"response_total{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"bar\",classification=\"success\",status_code=\"200\"} 1");
// stats recorded from the first request should still be present.
assert_contains!(metrics.get("/metrics"),
"request_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"foo\"} 1");
assert_contains!(metrics.get("/metrics"),
"response_duration_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"foo\",classification=\"success\",status_code=\"200\"} 1");
"response_latency_ms_count{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"foo\",classification=\"success\",status_code=\"200\"} 1");
assert_contains!(metrics.get("/metrics"),
"request_total{authority=\"labeled.test.svc.cluster.local\",direction=\"outbound\",dst_set_label=\"foo\"} 1");
assert_contains!(metrics.get("/metrics"),