Make sure AWS SDK spans suppress client spans all the time. (#1837)

* Make sure AWS SDK spans suppress client spans all the time.

* Make more consistent with other instrumentation

* Update instrumentation/netty/netty-4.1/javaagent/src/main/java/io/opentelemetry/javaagent/instrumentation/netty/v4_1/client/NettyHttpClientTracer.java

Co-authored-by: Trask Stalnaker <trask.stalnaker@gmail.com>

* more dragons

* Grammar

* README

Co-authored-by: Trask Stalnaker <trask.stalnaker@gmail.com>
This commit is contained in:
Anuraag Agrawal 2020-12-07 17:10:43 +09:00 committed by GitHub
parent 7199595730
commit c5a257619a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 104 additions and 153 deletions

View File

@ -9,9 +9,16 @@ muzzle {
}
dependencies {
implementation project(':instrumentation:aws-sdk:aws-sdk-2.2:library')
implementation(project(':instrumentation:aws-sdk:aws-sdk-2.2:library')) {
exclude group: 'io.opentelemetry', module: 'opentelemetry-extension-trace-propagators'
}
library group: 'software.amazon.awssdk', name: 'aws-core', version: '2.2.0'
testImplementation project(':instrumentation:aws-sdk:aws-sdk-2.2:testing')
// Make sure these don't add HTTP headers
testImplementation project(':instrumentation:apache-httpclient:apache-httpclient-4.0:javaagent')
testImplementation project(':instrumentation:netty:netty-4.1:javaagent')
testImplementation deps.opentelemetryTraceProps
}

View File

@ -1,100 +0,0 @@
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/
package io.opentelemetry.javaagent.instrumentation.awssdk.v2_2;
import static io.opentelemetry.javaagent.instrumentation.awssdk.v2_2.TracingExecutionInterceptor.ScopeHolder.CURRENT;
import static io.opentelemetry.javaagent.tooling.ClassLoaderMatcher.hasClassesNamed;
import static io.opentelemetry.javaagent.tooling.bytebuddy.matcher.AgentElementMatchers.extendsClass;
import static io.opentelemetry.javaagent.tooling.matcher.NameMatchers.namedOneOf;
import static java.util.Collections.singletonMap;
import static net.bytebuddy.matcher.ElementMatchers.isMethod;
import static net.bytebuddy.matcher.ElementMatchers.isPublic;
import static net.bytebuddy.matcher.ElementMatchers.nameStartsWith;
import static net.bytebuddy.matcher.ElementMatchers.named;
import io.opentelemetry.context.Scope;
import io.opentelemetry.javaagent.tooling.TypeInstrumentation;
import java.util.Map;
import net.bytebuddy.asm.Advice;
import net.bytebuddy.description.method.MethodDescription;
import net.bytebuddy.description.type.TypeDescription;
import net.bytebuddy.matcher.ElementMatcher;
import software.amazon.awssdk.core.internal.http.pipeline.stages.MakeAsyncHttpRequestStage;
/**
* Separate instrumentation class to close aws request scope right after request has been submitted
* for execution for Sync clients.
*/
public class AwsHttpClientInstrumentation implements TypeInstrumentation {
@Override
public ElementMatcher<ClassLoader> classLoaderOptimization() {
return hasClassesNamed(
"software.amazon.awssdk.core.internal.http.pipeline.stages.MakeHttpRequestStage");
}
@Override
public ElementMatcher<TypeDescription> typeMatcher() {
return nameStartsWith("software.amazon.awssdk.")
.and(
extendsClass(
namedOneOf(
"software.amazon.awssdk.core.internal.http.pipeline.stages.MakeHttpRequestStage",
"software.amazon.awssdk.core.internal.http.pipeline.stages.MakeAsyncHttpRequestStage")));
}
@Override
public Map<? extends ElementMatcher<? super MethodDescription>, String> transformers() {
return singletonMap(
isMethod().and(isPublic()).and(named("execute")),
AwsHttpClientInstrumentation.class.getName() + "$AwsHttpClientAdvice");
}
public static class AwsHttpClientAdvice {
// scope.close here doesn't actually close the span.
/**
* FIXME: This is a hack to prevent netty instrumentation from messing things up.
*
* <p>Currently netty instrumentation cannot handle way AWS SDK makes http requests. If AWS SDK
* make a netty call with active scope then continuation will be created that would never be
* closed preventing whole trace from reporting. This happens because netty switches channels
* between connection and request stages and netty instrumentation cannot find continuation
* stored in channel attributes.
*/
@Advice.OnMethodEnter(suppress = Throwable.class)
public static boolean methodEnter(@Advice.This Object thiz) {
if (thiz instanceof MakeAsyncHttpRequestStage) {
Scope scope = CURRENT.get();
if (scope != null) {
CURRENT.set(null);
scope.close();
return true;
}
}
return false;
}
@Advice.OnMethodExit(onThrowable = Throwable.class, suppress = Throwable.class)
public static void methodExit(@Advice.Enter boolean scopeAlreadyClosed) {
if (!scopeAlreadyClosed) {
Scope scope = CURRENT.get();
if (scope != null) {
CURRENT.set(null);
scope.close();
}
}
}
/**
* This is to make muzzle think we need TracingExecutionInterceptor to make sure we do not apply
* this instrumentation when TracingExecutionInterceptor would not work.
*/
public static void muzzleCheck() {
TracingExecutionInterceptor.muzzleCheck();
}
}
}

View File

@ -10,7 +10,7 @@ import static io.opentelemetry.javaagent.tooling.ClassLoaderMatcher.hasClassesNa
import com.google.auto.service.AutoService;
import io.opentelemetry.javaagent.tooling.InstrumentationModule;
import io.opentelemetry.javaagent.tooling.TypeInstrumentation;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import net.bytebuddy.matcher.ElementMatcher;
@ -40,7 +40,6 @@ public class AwsSdkInstrumentationModule extends InstrumentationModule {
@Override
public List<TypeInstrumentation> typeInstrumentations() {
return Arrays.asList(
new AwsHttpClientInstrumentation(), new AwsSdkInitializationInstrumentation());
return Collections.singletonList(new AwsSdkInitializationInstrumentation());
}
}

View File

@ -5,8 +5,6 @@
package io.opentelemetry.javaagent.instrumentation.awssdk.v2_2;
import io.opentelemetry.context.Context;
import io.opentelemetry.context.Scope;
import io.opentelemetry.instrumentation.awssdk.v2_2.AwsSdk;
import java.io.InputStream;
import java.nio.ByteBuffer;
@ -41,10 +39,6 @@ import software.amazon.awssdk.http.SdkHttpResponse;
*/
public class TracingExecutionInterceptor implements ExecutionInterceptor {
public static class ScopeHolder {
public static final ThreadLocal<Scope> CURRENT = new ThreadLocal<>();
}
private final ExecutionInterceptor delegate;
public TracingExecutionInterceptor() {
@ -98,12 +92,6 @@ public class TracingExecutionInterceptor implements ExecutionInterceptor {
public void beforeTransmission(
BeforeTransmission context, ExecutionAttributes executionAttributes) {
delegate.beforeTransmission(context, executionAttributes);
Context parentContext = AwsSdk.getContextFromAttributes(executionAttributes);
if (parentContext != null) {
// This scope will be closed by AwsHttpClientInstrumentation since ExecutionInterceptor API
// doesn't provide a way to run code in the same thread after transmission has been scheduled.
ScopeHolder.CURRENT.set(parentContext.makeCurrent());
}
}
@Override

View File

@ -0,0 +1,24 @@
# AWS Java SDK v2 Instrumentation
Instrumentation for [AWS Java SDK v2](https://github.com/aws/aws-sdk-java-v2).
## Usage
To register instrumentation on an SDK client, register the interceptor when creating it.
```java
DynamoDbClient client = DynamoDbClient.builder()
.overrideConfiguration(ClientOverrideConfiguration.builder()
.addExecutionInterceptor(AwsSdk.newInterceptor()))
.build())
.build();
```
## Trace propagation
The AWS SDK instrumentation currently only supports injecting the trace header into the request
using the [AWS Trace Header](https://docs.aws.amazon.com/xray/latest/devguide/xray-concepts.html#xray-concepts-tracingheader) format.
This format is the only format recognized by AWS managed services, and populating will allow
propagating the trace through them. If this does not fulfill your use case, perhaps because you are
using the same SDK with a different non-AWS managed service, let us know so we can provide
configuration for this behavior.

View File

@ -1,6 +1,8 @@
apply from: "$rootDir/gradle/instrumentation-library.gradle"
dependencies {
implementation deps.opentelemetryTraceProps
library group: 'software.amazon.awssdk', name: 'aws-core', version: '2.2.0'
testImplementation project(':instrumentation:aws-sdk:aws-sdk-2.2:testing')

View File

@ -9,6 +9,7 @@ import io.opentelemetry.api.trace.Span;
import io.opentelemetry.api.trace.Span.Kind;
import io.opentelemetry.api.trace.Tracer;
import io.opentelemetry.api.trace.attributes.SemanticAttributes;
import io.opentelemetry.context.Context;
import io.opentelemetry.context.propagation.TextMapPropagator.Setter;
import io.opentelemetry.instrumentation.api.tracer.BaseTracer;
import io.opentelemetry.instrumentation.api.tracer.HttpClientTracer;
@ -77,15 +78,9 @@ final class AwsSdkHttpClientTracer
return super.onRequest(span, sdkHttpRequest);
}
public Span getOrCreateSpan(String name, Tracer tracer, Kind kind) {
io.opentelemetry.context.Context context = io.opentelemetry.context.Context.current();
Span clientSpan = context.get(BaseTracer.CONTEXT_CLIENT_SPAN_KEY);
if (clientSpan != null) {
// We don't want to create two client spans for a given client call, suppress inner spans.
return Span.getInvalid();
}
return tracer.spanBuilder(name).setSpanKind(kind).setParent(context).startSpan();
public Context startSpan(Context parentContext, String name, Tracer tracer, Kind kind) {
Span clientSpan =
tracer.spanBuilder(name).setSpanKind(kind).setParent(parentContext).startSpan();
return parentContext.with(clientSpan).with(BaseTracer.CONTEXT_CLIENT_SPAN_KEY, clientSpan);
}
}

View File

@ -9,14 +9,16 @@ import static io.opentelemetry.instrumentation.awssdk.v2_2.AwsSdk.getContextFrom
import static io.opentelemetry.instrumentation.awssdk.v2_2.AwsSdkHttpClientTracer.tracer;
import static io.opentelemetry.instrumentation.awssdk.v2_2.RequestType.ofSdkRequest;
import io.opentelemetry.api.OpenTelemetry;
import io.opentelemetry.api.trace.Span;
import io.opentelemetry.api.trace.Span.Kind;
import io.opentelemetry.context.Scope;
import io.opentelemetry.extension.trace.propagation.AwsXRayPropagator;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.Map;
import org.checkerframework.checker.nullness.qual.Nullable;
import software.amazon.awssdk.awscore.AwsResponse;
import software.amazon.awssdk.core.ClientType;
import software.amazon.awssdk.core.SdkRequest;
import software.amazon.awssdk.core.SdkResponse;
import software.amazon.awssdk.core.interceptor.Context;
@ -33,9 +35,8 @@ final class TracingExecutionInterceptor implements ExecutionInterceptor {
// instrumentation, and won't conflict with usage outside javaagent instrumentation
static final ExecutionAttribute<io.opentelemetry.context.Context> CONTEXT_ATTRIBUTE =
new ExecutionAttribute<>(TracingExecutionInterceptor.class.getName() + ".Context");
// the class name is part of the attribute name, so that it will be shaded when used in javaagent
// instrumentation, and won't conflict with usage outside javaagent instrumentation
static final ExecutionAttribute<Scope> SCOPE_ATTRIBUTE =
new ExecutionAttribute<>(TracingExecutionInterceptor.class.getName() + ".Scope");
static final ExecutionAttribute<RequestType> REQUEST_TYPE_ATTRIBUTE =
new ExecutionAttribute<>(TracingExecutionInterceptor.class.getName() + ".RequestType");
@ -79,29 +80,36 @@ final class TracingExecutionInterceptor implements ExecutionInterceptor {
@Override
public void beforeExecution(
Context.BeforeExecution context, ExecutionAttributes executionAttributes) {
Span span = tracer().getOrCreateSpan(spanName(executionAttributes), AwsSdk.tracer(), kind);
executionAttributes.putAttribute(
CONTEXT_ATTRIBUTE, io.opentelemetry.context.Context.current().with(span));
io.opentelemetry.context.Context parentContext = io.opentelemetry.context.Context.current();
if (!tracer().shouldStartSpan(parentContext)) {
return;
}
io.opentelemetry.context.Context otelContext =
tracer().startSpan(parentContext, spanName(executionAttributes), AwsSdk.tracer(), kind);
executionAttributes.putAttribute(CONTEXT_ATTRIBUTE, otelContext);
RequestType type = ofSdkRequest(context.request());
if (type != null) {
executionAttributes.putAttribute(REQUEST_TYPE_ATTRIBUTE, type);
}
if (executionAttributes
.getAttribute(SdkExecutionAttribute.CLIENT_TYPE)
.equals(ClientType.SYNC)) {
// We can only activate context for synchronous clients, which allows downstream
// instrumentation like Apache to know about the SDK span.
executionAttributes.putAttribute(SCOPE_ATTRIBUTE, otelContext.makeCurrent());
}
}
@Override
public SdkHttpRequest modifyHttpRequest(
Context.ModifyHttpRequest context, ExecutionAttributes executionAttributes) {
io.opentelemetry.context.Context otelContext =
executionAttributes.getAttribute(CONTEXT_ATTRIBUTE);
// Never null in practice unless another interceptor cleared out the attribute, which
// is theoretically possible.
io.opentelemetry.context.Context otelContext = getContextFromAttributes(executionAttributes);
if (otelContext == null) {
return context.httpRequest();
}
SdkHttpRequest.Builder builder = context.httpRequest().toBuilder();
OpenTelemetry.getGlobalPropagators()
.getTextMapPropagator()
.inject(otelContext, builder, AwsSdkInjectAdapter.INSTANCE);
AwsXRayPropagator.getInstance().inject(otelContext, builder, AwsSdkInjectAdapter.INSTANCE);
return builder.build();
}
@ -109,16 +117,18 @@ final class TracingExecutionInterceptor implements ExecutionInterceptor {
public void afterMarshalling(
Context.AfterMarshalling context, ExecutionAttributes executionAttributes) {
io.opentelemetry.context.Context otelContext = getContextFromAttributes(executionAttributes);
if (otelContext != null) {
Span span = Span.fromContext(otelContext);
tracer().onRequest(span, context.httpRequest());
SdkRequestDecorator decorator = decorator(executionAttributes);
if (decorator != null) {
decorator.decorate(span, context.request(), executionAttributes);
}
decorateWithGenericRequestData(span, context.request());
decorateWithExAttributesData(span, executionAttributes);
if (otelContext == null) {
return;
}
Span span = Span.fromContext(otelContext);
tracer().onRequest(span, context.httpRequest());
SdkRequestDecorator decorator = decorator(executionAttributes);
if (decorator != null) {
decorator.decorate(span, context.request(), executionAttributes);
}
decorateWithGenericRequestData(span, context.request());
decorateWithExAttributesData(span, executionAttributes);
}
private void decorateWithGenericRequestData(Span span, SdkRequest request) {
@ -146,6 +156,10 @@ final class TracingExecutionInterceptor implements ExecutionInterceptor {
@Override
public void afterExecution(
Context.AfterExecution context, ExecutionAttributes executionAttributes) {
Scope scope = executionAttributes.getAttribute(SCOPE_ATTRIBUTE);
if (scope != null) {
scope.close();
}
io.opentelemetry.context.Context otelContext = getContextFromAttributes(executionAttributes);
if (otelContext != null) {
clearAttributes(executionAttributes);

View File

@ -148,7 +148,8 @@ abstract class AbstractAws2ClientTest extends InstrumentationSpecification {
}
}
}
server.lastRequest.headers.get("traceparent") != null
server.lastRequest.headers.get("X-Amzn-Trace-Id") != null
server.lastRequest.headers.get("traceparent") == null
}
static dynamoDbRequestDataTable(client) {
@ -226,7 +227,8 @@ abstract class AbstractAws2ClientTest extends InstrumentationSpecification {
}
}
}
server.lastRequest.headers.get("traceparent") != null
server.lastRequest.headers.get("X-Amzn-Trace-Id") != null
server.lastRequest.headers.get("traceparent") == null
where:
service | operation | method | path | requestId | builder | call | body
@ -314,7 +316,8 @@ abstract class AbstractAws2ClientTest extends InstrumentationSpecification {
}
}
}
server.lastRequest.headers.get("traceparent") != null
server.lastRequest.headers.get("X-Amzn-Trace-Id") != null
server.lastRequest.headers.get("traceparent") == null
where:
service | operation | method | path | requestId | builder | call | body

View File

@ -33,7 +33,7 @@ public class HttpClientRequestTracingHandler extends ChannelOutboundHandlerAdapt
parentContext = Context.current();
}
if (!tracer().shouldStartSpan(parentContext)) {
if (!tracer().shouldStartSpan(parentContext, request)) {
ctx.write(msg, prm);
return;
}

View File

@ -11,6 +11,7 @@ import static io.opentelemetry.javaagent.instrumentation.netty.v4_1.client.Netty
import io.netty.handler.codec.http.HttpHeaders;
import io.netty.handler.codec.http.HttpRequest;
import io.netty.handler.codec.http.HttpResponse;
import io.opentelemetry.context.Context;
import io.opentelemetry.context.propagation.TextMapPropagator.Setter;
import io.opentelemetry.instrumentation.api.tracer.HttpClientTracer;
import java.net.URI;
@ -65,6 +66,24 @@ public class NettyHttpClientTracer
return SETTER;
}
public boolean shouldStartSpan(Context parentContext, HttpRequest request) {
if (!super.shouldStartSpan(parentContext)) {
return false;
}
// The AWS SDK uses Netty for asynchronous clients but constructs a request signature before
// beginning transport. This means we MUST suppress Netty spans we would normally create or
// they will inject their own trace header, which does not match what was present when the
// signature was computed, breaking the SDK request completely. We have not found how to
// cleanly propagate context from the SDK instrumentation, which executes on an application
// thread, to Netty instrumentation, which executes on event loops. If it's possible, it may
// require instrumenting internal classes. Using a header which is more or less guaranteed to
// always exist is arguably more stable.
if (request.headers().contains("amz-sdk-invocation-id")) {
return false;
}
return true;
}
@Override
protected String getInstrumentationName() {
return "io.opentelemetry.javaagent.netty";