Merge pull request #1335 from DataDog/revert-1160-tyler/stacktrace

Revert "Add config to capture stacktrace when a span duration exceeds threshold"
This commit is contained in:
Laplie Anderson 2020-03-24 10:20:32 -04:00 committed by GitHub
commit d4ce0ad558
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 12 additions and 114 deletions

View File

@ -12,7 +12,6 @@ class TagsAssert {
private final BigInteger spanParentId private final BigInteger spanParentId
private final Map<String, Object> tags private final Map<String, Object> tags
private final Set<String> assertedTags = new TreeSet<>() private final Set<String> assertedTags = new TreeSet<>()
private final Set<String> ignoredTags = ["slow.stack"] // Don't error if this tag isn't checked.
private TagsAssert(DDSpan span) { private TagsAssert(DDSpan span) {
this.spanParentId = span.parentId this.spanParentId = span.parentId
@ -103,7 +102,6 @@ class TagsAssert {
void assertTagsAllVerified() { void assertTagsAllVerified() {
def set = new TreeMap<>(tags).keySet() def set = new TreeMap<>(tags).keySet()
set.removeAll(assertedTags) set.removeAll(assertedTags)
set.removeAll(ignoredTags)
// The primary goal is to ensure the set is empty. // The primary goal is to ensure the set is empty.
// tags and assertedTags are included via an "always true" comparison // tags and assertedTags are included via an "always true" comparison
// so they provide better context in the error message. // so they provide better context in the error message.

View File

@ -23,7 +23,6 @@ import java.util.Properties;
import java.util.Set; import java.util.Set;
import java.util.SortedSet; import java.util.SortedSet;
import java.util.UUID; import java.util.UUID;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import lombok.Getter; import lombok.Getter;
import lombok.ToString; import lombok.ToString;
@ -86,8 +85,6 @@ public class Config {
public static final String SPLIT_BY_TAGS = "trace.split-by-tags"; public static final String SPLIT_BY_TAGS = "trace.split-by-tags";
public static final String SCOPE_DEPTH_LIMIT = "trace.scope.depth.limit"; public static final String SCOPE_DEPTH_LIMIT = "trace.scope.depth.limit";
public static final String PARTIAL_FLUSH_MIN_SPANS = "trace.partial.flush.min.spans"; public static final String PARTIAL_FLUSH_MIN_SPANS = "trace.partial.flush.min.spans";
public static final String SPAN_DURATION_ABOVE_AVERAGE_STACKTRACE_MILLIS =
"trace.span.duration-above-average.stacktrace.millis";
public static final String RUNTIME_CONTEXT_FIELD_INJECTION = public static final String RUNTIME_CONTEXT_FIELD_INJECTION =
"trace.runtime.context.field.injection"; "trace.runtime.context.field.injection";
public static final String PROPAGATION_STYLE_EXTRACT = "propagation.style.extract"; public static final String PROPAGATION_STYLE_EXTRACT = "propagation.style.extract";
@ -164,8 +161,6 @@ public class Config {
private static final String DEFAULT_SPLIT_BY_TAGS = ""; private static final String DEFAULT_SPLIT_BY_TAGS = "";
private static final int DEFAULT_SCOPE_DEPTH_LIMIT = 100; private static final int DEFAULT_SCOPE_DEPTH_LIMIT = 100;
private static final int DEFAULT_PARTIAL_FLUSH_MIN_SPANS = 1000; private static final int DEFAULT_PARTIAL_FLUSH_MIN_SPANS = 1000;
private static final int DEFAULT_SPAN_DURATION_ABOVE_AVERAGE_STACKTRACE_MILLIS =
(int) TimeUnit.SECONDS.toMillis(1);
private static final String DEFAULT_PROPAGATION_STYLE_EXTRACT = PropagationStyle.DATADOG.name(); private static final String DEFAULT_PROPAGATION_STYLE_EXTRACT = PropagationStyle.DATADOG.name();
private static final String DEFAULT_PROPAGATION_STYLE_INJECT = PropagationStyle.DATADOG.name(); private static final String DEFAULT_PROPAGATION_STYLE_INJECT = PropagationStyle.DATADOG.name();
private static final boolean DEFAULT_JMX_FETCH_ENABLED = true; private static final boolean DEFAULT_JMX_FETCH_ENABLED = true;
@ -238,7 +233,6 @@ public class Config {
@Getter private final Set<String> splitByTags; @Getter private final Set<String> splitByTags;
@Getter private final Integer scopeDepthLimit; @Getter private final Integer scopeDepthLimit;
@Getter private final Integer partialFlushMinSpans; @Getter private final Integer partialFlushMinSpans;
@Getter private final long spanDurationAboveAverageStacktraceNanos;
@Getter private final boolean runtimeContextFieldInjection; @Getter private final boolean runtimeContextFieldInjection;
@Getter private final Set<PropagationStyle> propagationStylesToExtract; @Getter private final Set<PropagationStyle> propagationStylesToExtract;
@Getter private final Set<PropagationStyle> propagationStylesToInject; @Getter private final Set<PropagationStyle> propagationStylesToInject;
@ -361,13 +355,6 @@ public class Config {
partialFlushMinSpans = partialFlushMinSpans =
getIntegerSettingFromEnvironment(PARTIAL_FLUSH_MIN_SPANS, DEFAULT_PARTIAL_FLUSH_MIN_SPANS); getIntegerSettingFromEnvironment(PARTIAL_FLUSH_MIN_SPANS, DEFAULT_PARTIAL_FLUSH_MIN_SPANS);
spanDurationAboveAverageStacktraceNanos =
TimeUnit.MILLISECONDS.toNanos(
getIntegerSettingFromEnvironment(
SPAN_DURATION_ABOVE_AVERAGE_STACKTRACE_MILLIS,
DEFAULT_SPAN_DURATION_ABOVE_AVERAGE_STACKTRACE_MILLIS)
.longValue());
runtimeContextFieldInjection = runtimeContextFieldInjection =
getBooleanSettingFromEnvironment( getBooleanSettingFromEnvironment(
RUNTIME_CONTEXT_FIELD_INJECTION, DEFAULT_RUNTIME_CONTEXT_FIELD_INJECTION); RUNTIME_CONTEXT_FIELD_INJECTION, DEFAULT_RUNTIME_CONTEXT_FIELD_INJECTION);
@ -555,15 +542,6 @@ public class Config {
partialFlushMinSpans = partialFlushMinSpans =
getPropertyIntegerValue(properties, PARTIAL_FLUSH_MIN_SPANS, parent.partialFlushMinSpans); getPropertyIntegerValue(properties, PARTIAL_FLUSH_MIN_SPANS, parent.partialFlushMinSpans);
// do we care about the integer downcast here?
spanDurationAboveAverageStacktraceNanos =
TimeUnit.MILLISECONDS.toNanos(
getPropertyIntegerValue(
properties,
SPAN_DURATION_ABOVE_AVERAGE_STACKTRACE_MILLIS,
(int)
TimeUnit.NANOSECONDS.toMillis(parent.spanDurationAboveAverageStacktraceNanos)));
runtimeContextFieldInjection = runtimeContextFieldInjection =
getPropertyBooleanValue( getPropertyBooleanValue(
properties, RUNTIME_CONTEXT_FIELD_INJECTION, parent.runtimeContextFieldInjection); properties, RUNTIME_CONTEXT_FIELD_INJECTION, parent.runtimeContextFieldInjection);

View File

@ -1,6 +1,5 @@
package datadog.opentracing; package datadog.opentracing;
import datadog.trace.api.Config;
import datadog.trace.api.DDTags; import datadog.trace.api.DDTags;
import datadog.trace.api.interceptor.MutableSpan; import datadog.trace.api.interceptor.MutableSpan;
import datadog.trace.api.sampling.PrioritySampling; import datadog.trace.api.sampling.PrioritySampling;
@ -92,6 +91,16 @@ public class DDSpan implements Span, MutableSpan {
return durationNano.get() != 0; return durationNano.get() != 0;
} }
private void finishAndAddToTrace(final long durationNano) {
// ensure a min duration of 1
if (this.durationNano.compareAndSet(0, Math.max(1, durationNano))) {
log.debug("Finished: {}", this);
context.getTrace().addSpan(this);
} else {
log.debug("{} - already finished!", this);
}
}
@Override @Override
public final void finish() { public final void finish() {
if (startTimeNano > 0) { if (startTimeNano > 0) {
@ -107,69 +116,6 @@ public class DDSpan implements Span, MutableSpan {
finishAndAddToTrace(TimeUnit.MICROSECONDS.toNanos(stoptimeMicros - startTimeMicro)); finishAndAddToTrace(TimeUnit.MICROSECONDS.toNanos(stoptimeMicros - startTimeMicro));
} }
private void finishAndAddToTrace(final long durationNano) {
// ensure a min duration of 1
if (this.durationNano.compareAndSet(0, Math.max(1, durationNano))) {
log.debug("Finished: {}", this);
addStacktraceIfThresholdExceeded();
context.getTrace().addSpan(this);
} else {
log.debug("{} - already finished!", this);
}
}
// Track the average span duration per thread.
private static final ThreadLocal<Long> SPAN_COUNT = new ThreadLocal();
private static final ThreadLocal<Long> AVG_DURATION = new ThreadLocal();
private static final ThreadLocal<Long> AVG_REMAINDER = new ThreadLocal();
private long getOr0(final ThreadLocal<Long> threadLocal) {
final Long value = threadLocal.get();
return value == null ? 0 : value;
}
private void addStacktraceIfThresholdExceeded() {
final long spanDurationStacktraceNanos =
Config.get().getSpanDurationAboveAverageStacktraceNanos();
if (spanDurationStacktraceNanos <= 0
// If this span was finished async, then the stacktrace will be less meaningful.
|| context.threadId != Thread.currentThread().getId()) {
return;
}
final long duration = durationNano.get();
final long count = getOr0(SPAN_COUNT) + 1;
long average = getOr0(AVG_DURATION);
long remainder = getOr0(AVG_REMAINDER);
// http://www.onemanclapping.org/2012/03/calculating-long-running-averages.html
average += (duration + remainder) / count;
remainder = (duration + remainder) % count;
SPAN_COUNT.set(count);
AVG_DURATION.set(average);
AVG_REMAINDER.set(remainder);
if (isError() || duration <= average + spanDurationStacktraceNanos) {
return;
}
final StringBuilder stack = new StringBuilder();
// TODO: use StackWalker api for java 9+ jvm's?
boolean skipNext = true;
for (final StackTraceElement element : Thread.currentThread().getStackTrace()) {
final boolean tracingClass = element.getClassName().startsWith("datadog.opentracing.");
if (skipNext || tracingClass) {
skipNext = tracingClass;
continue;
}
stack.append("\tat ");
stack.append(element);
stack.append("\n");
}
// Prob not worth dealing with the trailing newline.
setTag("slow.stack", stack.toString());
}
@Override @Override
public DDSpan setError(final boolean error) { public DDSpan setError(final boolean error) {
context.setErrorFlag(error); context.setErrorFlag(error);

View File

@ -70,8 +70,7 @@ public class DDSpanContext implements io.opentracing.SpanContext {
// Additional Metadata // Additional Metadata
private final String threadName = Thread.currentThread().getName(); private final String threadName = Thread.currentThread().getName();
// Visible for use in DDSpan private final long threadId = Thread.currentThread().getId();
final long threadId = Thread.currentThread().getId();
private final Map<String, String> serviceNameMappings; private final Map<String, String> serviceNameMappings;

View File

@ -1,14 +1,13 @@
package datadog.opentracing package datadog.opentracing
import datadog.opentracing.propagation.ExtractedContext import datadog.opentracing.propagation.ExtractedContext
import datadog.opentracing.propagation.TagContext import datadog.opentracing.propagation.TagContext
import datadog.trace.api.Config
import datadog.trace.api.sampling.PrioritySampling import datadog.trace.api.sampling.PrioritySampling
import datadog.trace.common.sampling.RateByServiceSampler import datadog.trace.common.sampling.RateByServiceSampler
import datadog.trace.common.writer.ListWriter import datadog.trace.common.writer.ListWriter
import datadog.trace.util.test.DDSpecification import datadog.trace.util.test.DDSpecification
import io.opentracing.SpanContext import io.opentracing.SpanContext
import org.spockframework.util.ReflectionUtil
import java.util.concurrent.TimeUnit import java.util.concurrent.TimeUnit
@ -170,28 +169,6 @@ class DDSpanTest extends DDSpecification {
span.durationNano == 1 span.durationNano == 1
} }
def "stacktrace captured when duration exceeds average + configured threshold"() {
setup:
// Get the part of the stack before this test is called.
def acceptRemaining = false
def originalStack = Thread.currentThread().stackTrace
def stackTraceElements = originalStack.dropWhile {
if (it.className == ReflectionUtil.name) {
acceptRemaining = true
}
return !acceptRemaining
}
def stack = "\tat " + stackTraceElements.join("\n\tat ") + "\n"
def span = tracer.buildSpan("test").start()
span.finish(span.startTimeMicro + DDSpan.AVG_DURATION.get() + TimeUnit.NANOSECONDS.toMicros(Config.get().spanDurationAboveAverageStacktraceNanos) + 1)
def actual = span.tags["slow.stack"].toString()
expect:
!stack.isEmpty()
actual.endsWith(stack)
}
def "priority sampling metric set only on root span"() { def "priority sampling metric set only on root span"() {
setup: setup:
def parent = tracer.buildSpan("testParent").start() def parent = tracer.buildSpan("testParent").start()