add metrics rpc conventions implement (#4838)

* add metrics rpc conventions

* handle format violations

* handle format violations

* resolve the comments suggestion

* update RpcClientMetrics format

* update RpcServerMetrics format

* resolve time precision and cardinality issue

* invoke buildServerFallbackView method

* add RpcServerMetricsTest and RpcClientMetricsTest

* server metrics attibutes remove net.perr* and add net.host

* Update instrumentation-api/src/main/java/io/opentelemetry/instrumentation/api/instrumenter/rpc/RpcServerMetrics.java

* Update instrumentation-api/src/main/java/io/opentelemetry/instrumentation/api/instrumenter/rpc/RpcClientMetrics.java

Co-authored-by: Trask Stalnaker <trask.stalnaker@gmail.com>
This commit is contained in:
yangtaoran 2021-12-11 08:29:57 +08:00 committed by GitHub
parent e5ca1275d4
commit c59bebf6e2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 605 additions and 0 deletions

View File

@ -0,0 +1,115 @@
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/
package io.opentelemetry.instrumentation.api.instrumenter.rpc;
import io.opentelemetry.api.common.AttributeKey;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.common.AttributesBuilder;
import io.opentelemetry.semconv.trace.attributes.SemanticAttributes;
import java.util.HashSet;
import java.util.Set;
import java.util.function.BiConsumer;
// this is temporary, see
// https://github.com/open-telemetry/opentelemetry-java-instrumentation/issues/3962#issuecomment-906606325
@SuppressWarnings("rawtypes")
final class MetricsView {
private static final Set<AttributeKey> alwaysInclude = buildAlwaysInclude();
private static final Set<AttributeKey> clientView = buildClientView();
private static final Set<AttributeKey> clientFallbackView = buildClientFallbackView();
private static final Set<AttributeKey> serverView = buildServerView();
private static final Set<AttributeKey> serverFallbackView = buildServerFallbackView();
private static Set<AttributeKey> buildAlwaysInclude() {
// the list of recommended metrics attributes is from
// https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/semantic_conventions/rpc.md#attributes
Set<AttributeKey> view = new HashSet<>();
view.add(SemanticAttributes.RPC_SYSTEM);
view.add(SemanticAttributes.RPC_SERVICE);
view.add(SemanticAttributes.RPC_METHOD);
return view;
}
private static Set<AttributeKey> buildClientView() {
// the list of rpc client metrics attributes is from
// https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/semantic_conventions/rpc.md#attributes
Set<AttributeKey> view = new HashSet<>(alwaysInclude);
view.add(SemanticAttributes.NET_PEER_NAME);
view.add(SemanticAttributes.NET_PEER_PORT);
view.add(SemanticAttributes.NET_TRANSPORT);
return view;
}
private static Set<AttributeKey> buildClientFallbackView() {
// the list of rpc client metrics attributes is from
// https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/semantic_conventions/rpc.md#attributes
Set<AttributeKey> view = new HashSet<>(alwaysInclude);
view.add(SemanticAttributes.NET_PEER_IP);
view.add(SemanticAttributes.NET_PEER_PORT);
view.add(SemanticAttributes.NET_TRANSPORT);
return view;
}
private static Set<AttributeKey> buildServerView() {
// the list of rpc server metrics attributes is from
// https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/semantic_conventions/rpc.md#attributes
Set<AttributeKey> view = new HashSet<>(alwaysInclude);
view.add(SemanticAttributes.NET_HOST_NAME);
view.add(SemanticAttributes.NET_TRANSPORT);
return view;
}
private static Set<AttributeKey> buildServerFallbackView() {
// the list of rpc server metrics attributes is from
// https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/semantic_conventions/rpc.md#attributes
Set<AttributeKey> view = new HashSet<>(alwaysInclude);
view.add(SemanticAttributes.NET_HOST_IP);
view.add(SemanticAttributes.NET_TRANSPORT);
return view;
}
private static <T> boolean containsAttribute(
AttributeKey<T> key, Attributes startAttributes, Attributes endAttributes) {
return startAttributes.get(key) != null || endAttributes.get(key) != null;
}
static Attributes applyClientView(Attributes startAttributes, Attributes endAttributes) {
Set<AttributeKey> fullSet = clientView;
if (!containsAttribute(SemanticAttributes.NET_PEER_NAME, startAttributes, endAttributes)) {
fullSet = clientFallbackView;
}
return applyView(fullSet, startAttributes, endAttributes);
}
static Attributes applyServerView(Attributes startAttributes, Attributes endAttributes) {
Set<AttributeKey> fullSet = serverView;
if (!containsAttribute(SemanticAttributes.NET_HOST_NAME, startAttributes, endAttributes)) {
fullSet = serverFallbackView;
}
return applyView(fullSet, startAttributes, endAttributes);
}
static Attributes applyView(
Set<AttributeKey> view, Attributes startAttributes, Attributes endAttributes) {
AttributesBuilder filtered = Attributes.builder();
applyView(filtered, startAttributes, view);
applyView(filtered, endAttributes, view);
return filtered.build();
}
@SuppressWarnings("unchecked")
private static void applyView(
AttributesBuilder filtered, Attributes attributes, Set<AttributeKey> view) {
attributes.forEach(
(BiConsumer<AttributeKey, Object>)
(key, value) -> {
if (view.contains(key)) {
filtered.put(key, value);
}
});
}
}

View File

@ -0,0 +1,90 @@
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/
package io.opentelemetry.instrumentation.api.instrumenter.rpc;
import static io.opentelemetry.instrumentation.api.instrumenter.rpc.MetricsView.applyClientView;
import com.google.auto.value.AutoValue;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.metrics.DoubleHistogram;
import io.opentelemetry.api.metrics.Meter;
import io.opentelemetry.context.Context;
import io.opentelemetry.context.ContextKey;
import io.opentelemetry.instrumentation.api.annotations.UnstableApi;
import io.opentelemetry.instrumentation.api.instrumenter.RequestListener;
import io.opentelemetry.instrumentation.api.instrumenter.RequestMetrics;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* {@link RequestListener} which keeps track of <a
* href="https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/semantic_conventions/rpc.md#rpc-client">RPC
* client metrics</a>.
*
* <p>To use this class, you may need to add the {@code opentelemetry-api-metrics} artifact to your
* dependencies.
*/
@UnstableApi
public final class RpcClientMetrics implements RequestListener {
private static final double NANOS_PER_MS = TimeUnit.MILLISECONDS.toNanos(1);
private static final ContextKey<RpcClientMetrics.State> RPC_CLIENT_REQUEST_METRICS_STATE =
ContextKey.named("rpc-client-request-metrics-state");
private static final Logger logger = LoggerFactory.getLogger(RpcClientMetrics.class);
private final DoubleHistogram clientDurationHistogram;
private RpcClientMetrics(Meter meter) {
clientDurationHistogram =
meter
.histogramBuilder("rpc.client.duration")
.setDescription("The duration of an outbound RPC invocation")
.setUnit("milliseconds")
.build();
}
/**
* Returns a {@link RequestMetrics} which can be used to enable recording of {@link
* RpcClientMetrics} on an {@link
* io.opentelemetry.instrumentation.api.instrumenter.InstrumenterBuilder}.
*/
@UnstableApi
public static RequestMetrics get() {
return RpcClientMetrics::new;
}
@Override
public Context start(Context context, Attributes startAttributes, long startNanos) {
return context.with(
RPC_CLIENT_REQUEST_METRICS_STATE,
new AutoValue_RpcClientMetrics_State(startAttributes, startNanos));
}
@Override
public void end(Context context, Attributes endAttributes, long endNanos) {
State state = context.get(RPC_CLIENT_REQUEST_METRICS_STATE);
if (state == null) {
logger.debug(
"No state present when ending context {}. Cannot record RPC request metrics.", context);
return;
}
clientDurationHistogram.record(
(endNanos - state.startTimeNanos()) / NANOS_PER_MS,
applyClientView(state.startAttributes(), endAttributes),
context);
}
@AutoValue
abstract static class State {
abstract Attributes startAttributes();
abstract long startTimeNanos();
}
}

View File

@ -0,0 +1,90 @@
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/
package io.opentelemetry.instrumentation.api.instrumenter.rpc;
import static io.opentelemetry.instrumentation.api.instrumenter.rpc.MetricsView.applyServerView;
import com.google.auto.value.AutoValue;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.metrics.DoubleHistogram;
import io.opentelemetry.api.metrics.Meter;
import io.opentelemetry.context.Context;
import io.opentelemetry.context.ContextKey;
import io.opentelemetry.instrumentation.api.annotations.UnstableApi;
import io.opentelemetry.instrumentation.api.instrumenter.RequestListener;
import io.opentelemetry.instrumentation.api.instrumenter.RequestMetrics;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* {@link RequestListener} which keeps track of <a
* href="https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/semantic_conventions/rpc.md#rpc-server">RPC
* server metrics</a>.
*
* <p>To use this class, you may need to add the {@code opentelemetry-api-metrics} artifact to your
* dependencies.
*/
@UnstableApi
public final class RpcServerMetrics implements RequestListener {
private static final double NANOS_PER_MS = TimeUnit.MILLISECONDS.toNanos(1);
private static final ContextKey<RpcServerMetrics.State> RPC_SERVER_REQUEST_METRICS_STATE =
ContextKey.named("rpc-server-request-metrics-state");
private static final Logger logger = LoggerFactory.getLogger(RpcServerMetrics.class);
private final DoubleHistogram serverDurationHistogram;
private RpcServerMetrics(Meter meter) {
serverDurationHistogram =
meter
.histogramBuilder("rpc.server.duration")
.setDescription("The duration of an inbound RPC invocation")
.setUnit("milliseconds")
.build();
}
/**
* Returns a {@link RequestMetrics} which can be used to enable recording of {@link
* RpcServerMetrics} on an {@link
* io.opentelemetry.instrumentation.api.instrumenter.InstrumenterBuilder}.
*/
@UnstableApi
public static RequestMetrics get() {
return RpcServerMetrics::new;
}
@Override
public Context start(Context context, Attributes startAttributes, long startNanos) {
return context.with(
RPC_SERVER_REQUEST_METRICS_STATE,
new AutoValue_RpcServerMetrics_State(startAttributes, startNanos));
}
@Override
public void end(Context context, Attributes endAttributes, long endNanos) {
State state = context.get(RPC_SERVER_REQUEST_METRICS_STATE);
if (state == null) {
logger.debug(
"No state present when ending context {}. Cannot record RPC request metrics.", context);
return;
}
serverDurationHistogram.record(
(endNanos - state.startTimeNanos()) / NANOS_PER_MS,
applyServerView(state.startAttributes(), endAttributes),
context);
}
@AutoValue
abstract static class State {
abstract Attributes startAttributes();
abstract long startTimeNanos();
}
}

View File

@ -0,0 +1,156 @@
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/
package io.opentelemetry.instrumentation.api.instrumenter.rpc;
import static io.opentelemetry.sdk.testing.assertj.OpenTelemetryAssertions.attributeEntry;
import static org.assertj.core.api.Assertions.assertThat;
import static org.awaitility.Awaitility.await;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.trace.Span;
import io.opentelemetry.api.trace.SpanContext;
import io.opentelemetry.api.trace.TraceFlags;
import io.opentelemetry.api.trace.TraceState;
import io.opentelemetry.context.Context;
import io.opentelemetry.instrumentation.api.instrumenter.RequestListener;
import io.opentelemetry.sdk.metrics.SdkMeterProvider;
import io.opentelemetry.sdk.metrics.data.MetricData;
import io.opentelemetry.sdk.metrics.testing.InMemoryMetricReader;
import io.opentelemetry.sdk.testing.assertj.metrics.MetricAssertions;
import io.opentelemetry.semconv.trace.attributes.SemanticAttributes;
import java.util.Collection;
import java.util.concurrent.TimeUnit;
import org.junit.jupiter.api.Test;
class RpcClientMetricsTest {
@Test
void collectsMetrics() {
InMemoryMetricReader metricReader = new InMemoryMetricReader();
SdkMeterProvider meterProvider =
SdkMeterProvider.builder().registerMetricReader(metricReader).build();
RequestListener listener = RpcClientMetrics.get().create(meterProvider.get("test"));
Attributes requestAttributes =
Attributes.builder()
.put(SemanticAttributes.RPC_SYSTEM, "grpc")
.put(SemanticAttributes.RPC_SERVICE, "myservice.EchoService")
.put(SemanticAttributes.RPC_METHOD, "exampleMethod")
.build();
Attributes responseAttributes1 =
Attributes.builder()
.put(SemanticAttributes.NET_PEER_NAME, "example.com")
.put(SemanticAttributes.NET_PEER_IP, "127.0.0.1")
.put(SemanticAttributes.NET_PEER_PORT, 8080)
.put(SemanticAttributes.NET_TRANSPORT, "ip_tcp")
.build();
Attributes responseAttributes2 =
Attributes.builder()
.put(SemanticAttributes.NET_PEER_IP, "127.0.0.1")
.put(SemanticAttributes.NET_PEER_PORT, 8080)
.put(SemanticAttributes.NET_TRANSPORT, "ip_tcp")
.build();
Context parent =
Context.root()
.with(
Span.wrap(
SpanContext.create(
"ff01020304050600ff0a0b0c0d0e0f00",
"090a0b0c0d0e0f00",
TraceFlags.getSampled(),
TraceState.getDefault())));
Context context1 = listener.start(parent, requestAttributes, nanos(100));
// TODO(anuraaga): Remove await from this file after 1.8.0 hopefully fixes
// https://github.com/open-telemetry/opentelemetry-java/issues/3725
await()
.untilAsserted(
() -> {
Collection<MetricData> metrics = metricReader.collectAllMetrics();
assertThat(metrics).isEmpty();
});
Context context2 = listener.start(Context.root(), requestAttributes, nanos(150));
await()
.untilAsserted(
() -> {
Collection<MetricData> metrics = metricReader.collectAllMetrics();
assertThat(metrics).isEmpty();
});
listener.end(context1, responseAttributes1, nanos(250));
await()
.untilAsserted(
() -> {
Collection<MetricData> metrics = metricReader.collectAllMetrics();
assertThat(metrics).hasSize(1);
assertThat(metrics)
.anySatisfy(
metric ->
MetricAssertions.assertThat(metric)
.hasName("rpc.client.duration")
.hasDoubleHistogram()
.points()
.satisfiesExactly(
point -> {
MetricAssertions.assertThat(point)
.hasSum(150 /* millis */)
.attributes()
.containsOnly(
attributeEntry("rpc.system", "grpc"),
attributeEntry("rpc.service", "myservice.EchoService"),
attributeEntry("rpc.method", "exampleMethod"),
attributeEntry("net.peer.name", "example.com"),
attributeEntry("net.peer.port", 8080),
attributeEntry("net.transport", "ip_tcp"));
MetricAssertions.assertThat(point).exemplars().hasSize(1);
MetricAssertions.assertThat(point.getExemplars().get(0))
.hasTraceId("ff01020304050600ff0a0b0c0d0e0f00")
.hasSpanId("090a0b0c0d0e0f00");
}));
});
listener.end(context2, responseAttributes2, nanos(300));
await()
.untilAsserted(
() -> {
Collection<MetricData> metrics = metricReader.collectAllMetrics();
assertThat(metrics).hasSize(1);
assertThat(metrics)
.anySatisfy(
metric ->
MetricAssertions.assertThat(metric)
.hasName("rpc.client.duration")
.hasDoubleHistogram()
.points()
.satisfiesExactly(
point -> {
MetricAssertions.assertThat(point)
.hasSum(150 /* millis */)
.attributes()
.containsOnly(
attributeEntry("rpc.system", "grpc"),
attributeEntry("rpc.service", "myservice.EchoService"),
attributeEntry("rpc.method", "exampleMethod"),
attributeEntry("net.peer.ip", "127.0.0.1"),
attributeEntry("net.peer.port", 8080),
attributeEntry("net.transport", "ip_tcp"));
}));
});
}
private static long nanos(int millis) {
return TimeUnit.MILLISECONDS.toNanos(millis);
}
}

View File

@ -0,0 +1,154 @@
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/
package io.opentelemetry.instrumentation.api.instrumenter.rpc;
import static io.opentelemetry.sdk.testing.assertj.OpenTelemetryAssertions.attributeEntry;
import static org.assertj.core.api.Assertions.assertThat;
import static org.awaitility.Awaitility.await;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.trace.Span;
import io.opentelemetry.api.trace.SpanContext;
import io.opentelemetry.api.trace.TraceFlags;
import io.opentelemetry.api.trace.TraceState;
import io.opentelemetry.context.Context;
import io.opentelemetry.instrumentation.api.instrumenter.RequestListener;
import io.opentelemetry.sdk.metrics.SdkMeterProvider;
import io.opentelemetry.sdk.metrics.data.MetricData;
import io.opentelemetry.sdk.metrics.testing.InMemoryMetricReader;
import io.opentelemetry.sdk.testing.assertj.metrics.MetricAssertions;
import io.opentelemetry.semconv.trace.attributes.SemanticAttributes;
import java.util.Collection;
import java.util.concurrent.TimeUnit;
import org.junit.jupiter.api.Test;
class RpcServerMetricsTest {
@Test
void collectsMetrics() {
InMemoryMetricReader metricReader = new InMemoryMetricReader();
SdkMeterProvider meterProvider =
SdkMeterProvider.builder().registerMetricReader(metricReader).build();
RequestListener listener = RpcServerMetrics.get().create(meterProvider.get("test"));
Attributes requestAttributes =
Attributes.builder()
.put(SemanticAttributes.RPC_SYSTEM, "grpc")
.put(SemanticAttributes.RPC_SERVICE, "myservice.EchoService")
.put(SemanticAttributes.RPC_METHOD, "exampleMethod")
.build();
Attributes responseAttributes1 =
Attributes.builder()
.put(SemanticAttributes.NET_HOST_NAME, "example.com")
.put(SemanticAttributes.NET_HOST_IP, "127.0.0.1")
.put(SemanticAttributes.NET_HOST_PORT, 8080)
.put(SemanticAttributes.NET_TRANSPORT, "ip_tcp")
.build();
Attributes responseAttributes2 =
Attributes.builder()
.put(SemanticAttributes.NET_HOST_IP, "127.0.0.1")
.put(SemanticAttributes.NET_HOST_PORT, 8080)
.put(SemanticAttributes.NET_TRANSPORT, "ip_tcp")
.build();
Context parent =
Context.root()
.with(
Span.wrap(
SpanContext.create(
"ff01020304050600ff0a0b0c0d0e0f00",
"090a0b0c0d0e0f00",
TraceFlags.getSampled(),
TraceState.getDefault())));
Context context1 = listener.start(parent, requestAttributes, nanos(100));
// TODO(anuraaga): Remove await from this file after 1.8.0 hopefully fixes
// https://github.com/open-telemetry/opentelemetry-java/issues/3725
await()
.untilAsserted(
() -> {
Collection<MetricData> metrics = metricReader.collectAllMetrics();
assertThat(metrics).isEmpty();
});
Context context2 = listener.start(Context.root(), requestAttributes, nanos(150));
await()
.untilAsserted(
() -> {
Collection<MetricData> metrics = metricReader.collectAllMetrics();
assertThat(metrics).isEmpty();
});
listener.end(context1, responseAttributes1, nanos(250));
await()
.untilAsserted(
() -> {
Collection<MetricData> metrics = metricReader.collectAllMetrics();
assertThat(metrics).hasSize(1);
assertThat(metrics)
.anySatisfy(
metric ->
MetricAssertions.assertThat(metric)
.hasName("rpc.server.duration")
.hasDoubleHistogram()
.points()
.satisfiesExactly(
point -> {
MetricAssertions.assertThat(point)
.hasSum(150 /* millis */)
.attributes()
.containsOnly(
attributeEntry("rpc.system", "grpc"),
attributeEntry("rpc.service", "myservice.EchoService"),
attributeEntry("rpc.method", "exampleMethod"),
attributeEntry("net.host.name", "example.com"),
attributeEntry("net.transport", "ip_tcp"));
MetricAssertions.assertThat(point).exemplars().hasSize(1);
MetricAssertions.assertThat(point.getExemplars().get(0))
.hasTraceId("ff01020304050600ff0a0b0c0d0e0f00")
.hasSpanId("090a0b0c0d0e0f00");
}));
});
listener.end(context2, responseAttributes2, nanos(300));
await()
.untilAsserted(
() -> {
Collection<MetricData> metrics = metricReader.collectAllMetrics();
assertThat(metrics).hasSize(1);
assertThat(metrics)
.anySatisfy(
metric ->
MetricAssertions.assertThat(metric)
.hasName("rpc.server.duration")
.hasDoubleHistogram()
.points()
.satisfiesExactly(
point -> {
MetricAssertions.assertThat(point)
.hasSum(150 /* millis */)
.attributes()
.containsOnly(
attributeEntry("rpc.system", "grpc"),
attributeEntry("rpc.service", "myservice.EchoService"),
attributeEntry("rpc.method", "exampleMethod"),
attributeEntry("net.host.ip", "127.0.0.1"),
attributeEntry("net.transport", "ip_tcp"));
}));
});
}
private static long nanos(int millis) {
return TimeUnit.MILLISECONDS.toNanos(millis);
}
}