diff --git a/dd-trace-api/src/main/java/datadog/trace/api/Config.java b/dd-trace-api/src/main/java/datadog/trace/api/Config.java index 1160966ebd..7d0ea7a49d 100644 --- a/dd-trace-api/src/main/java/datadog/trace/api/Config.java +++ b/dd-trace-api/src/main/java/datadog/trace/api/Config.java @@ -86,8 +86,8 @@ public class Config { public static final String SPLIT_BY_TAGS = "trace.split-by-tags"; public static final String SCOPE_DEPTH_LIMIT = "trace.scope.depth.limit"; public static final String PARTIAL_FLUSH_MIN_SPANS = "trace.partial.flush.min.spans"; - public static final String SPAN_DURATION_STACKTRACE_MILLIS = - "trace.span.duration.stacktrace.millis"; + public static final String SPAN_DURATION_ABOVE_AVERAGE_STACKTRACE_MILLIS = + "trace.span.duration-above-average.stacktrace.millis"; public static final String RUNTIME_CONTEXT_FIELD_INJECTION = "trace.runtime.context.field.injection"; public static final String PROPAGATION_STYLE_EXTRACT = "propagation.style.extract"; @@ -164,7 +164,7 @@ public class Config { private static final String DEFAULT_SPLIT_BY_TAGS = ""; private static final int DEFAULT_SCOPE_DEPTH_LIMIT = 100; private static final int DEFAULT_PARTIAL_FLUSH_MIN_SPANS = 1000; - private static final int DEFAULT_SPAN_DURATION_STACKTRACE_MILLIS = + private static final int DEFAULT_SPAN_DURATION_ABOVE_AVERAGE_STACKTRACE_MILLIS = (int) TimeUnit.SECONDS.toMillis(1); private static final String DEFAULT_PROPAGATION_STYLE_EXTRACT = PropagationStyle.DATADOG.name(); private static final String DEFAULT_PROPAGATION_STYLE_INJECT = PropagationStyle.DATADOG.name(); @@ -238,7 +238,7 @@ public class Config { @Getter private final Set splitByTags; @Getter private final Integer scopeDepthLimit; @Getter private final Integer partialFlushMinSpans; - @Getter private final long spanDurationStacktraceNanos; + @Getter private final long spanDurationAboveAverageStacktraceNanos; @Getter private final boolean runtimeContextFieldInjection; @Getter private final Set propagationStylesToExtract; @Getter private final Set propagationStylesToInject; @@ -361,10 +361,11 @@ public class Config { partialFlushMinSpans = getIntegerSettingFromEnvironment(PARTIAL_FLUSH_MIN_SPANS, DEFAULT_PARTIAL_FLUSH_MIN_SPANS); - spanDurationStacktraceNanos = + spanDurationAboveAverageStacktraceNanos = TimeUnit.MILLISECONDS.toNanos( getIntegerSettingFromEnvironment( - SPAN_DURATION_STACKTRACE_MILLIS, DEFAULT_SPAN_DURATION_STACKTRACE_MILLIS) + SPAN_DURATION_ABOVE_AVERAGE_STACKTRACE_MILLIS, + DEFAULT_SPAN_DURATION_ABOVE_AVERAGE_STACKTRACE_MILLIS) .longValue()); runtimeContextFieldInjection = @@ -555,12 +556,13 @@ public class Config { getPropertyIntegerValue(properties, PARTIAL_FLUSH_MIN_SPANS, parent.partialFlushMinSpans); // do we care about the integer downcast here? - spanDurationStacktraceNanos = + spanDurationAboveAverageStacktraceNanos = TimeUnit.MILLISECONDS.toNanos( getPropertyIntegerValue( properties, - SPAN_DURATION_STACKTRACE_MILLIS, - (int) TimeUnit.NANOSECONDS.toMillis(parent.spanDurationStacktraceNanos))); + SPAN_DURATION_ABOVE_AVERAGE_STACKTRACE_MILLIS, + (int) + TimeUnit.NANOSECONDS.toMillis(parent.spanDurationAboveAverageStacktraceNanos))); runtimeContextFieldInjection = getPropertyBooleanValue( diff --git a/dd-trace-ot/src/main/java/datadog/opentracing/DDSpan.java b/dd-trace-ot/src/main/java/datadog/opentracing/DDSpan.java index f0413032fc..c526d8caf5 100644 --- a/dd-trace-ot/src/main/java/datadog/opentracing/DDSpan.java +++ b/dd-trace-ot/src/main/java/datadog/opentracing/DDSpan.java @@ -92,6 +92,21 @@ public class DDSpan implements Span, MutableSpan { return durationNano.get() != 0; } + @Override + public final void finish() { + if (startTimeNano > 0) { + // no external clock was used, so we can rely on nano time + finishAndAddToTrace(context.getTrace().getCurrentTimeNano() - startTimeNano); + } else { + finish(Clock.currentMicroTime()); + } + } + + @Override + public final void finish(final long stoptimeMicros) { + finishAndAddToTrace(TimeUnit.MICROSECONDS.toNanos(stoptimeMicros - startTimeMicro)); + } + private void finishAndAddToTrace(final long durationNano) { // ensure a min duration of 1 if (this.durationNano.compareAndSet(0, Math.max(1, durationNano))) { @@ -103,15 +118,41 @@ public class DDSpan implements Span, MutableSpan { } } + // Track the average span duration per thread. + private static final ThreadLocal SPAN_COUNT = new ThreadLocal(); + private static final ThreadLocal AVG_DURATION = new ThreadLocal(); + private static final ThreadLocal AVG_REMAINDER = new ThreadLocal(); + + private long getOr0(final ThreadLocal threadLocal) { + final Long value = threadLocal.get(); + return value == null ? 0 : value; + } + private void addStacktraceIfThresholdExceeded() { - final long spanDurationStacktraceNanos = Config.get().getSpanDurationStacktraceNanos(); - if (isError() - || spanDurationStacktraceNanos <= 0 - || durationNano.get() <= spanDurationStacktraceNanos + final long spanDurationStacktraceNanos = + Config.get().getSpanDurationAboveAverageStacktraceNanos(); + if (spanDurationStacktraceNanos <= 0 // If this span was finished async, then the stacktrace will be less meaningful. || context.threadId != Thread.currentThread().getId()) { return; } + + final long duration = durationNano.get(); + final long count = getOr0(SPAN_COUNT) + 1; + long average = getOr0(AVG_DURATION); + long remainder = getOr0(AVG_REMAINDER); + + // http://www.onemanclapping.org/2012/03/calculating-long-running-averages.html + average += (duration + remainder) / count; + remainder = (duration + remainder) % count; + + SPAN_COUNT.set(count); + AVG_DURATION.set(average); + AVG_REMAINDER.set(remainder); + + if (isError() || duration <= average + spanDurationStacktraceNanos) { + return; + } final StringBuilder stack = new StringBuilder(); // TODO: use StackWalker api for java 9+ jvm's? boolean skipNext = true; @@ -129,21 +170,6 @@ public class DDSpan implements Span, MutableSpan { setTag("slow.stack", stack.toString()); } - @Override - public final void finish() { - if (startTimeNano > 0) { - // no external clock was used, so we can rely on nano time - finishAndAddToTrace(context.getTrace().getCurrentTimeNano() - startTimeNano); - } else { - finish(Clock.currentMicroTime()); - } - } - - @Override - public final void finish(final long stoptimeMicros) { - finishAndAddToTrace(TimeUnit.MICROSECONDS.toNanos(stoptimeMicros - startTimeMicro)); - } - @Override public DDSpan setError(final boolean error) { context.setErrorFlag(error); diff --git a/dd-trace-ot/src/test/groovy/datadog/opentracing/DDSpanTest.groovy b/dd-trace-ot/src/test/groovy/datadog/opentracing/DDSpanTest.groovy index 82d5d196c7..cf79ee5769 100644 --- a/dd-trace-ot/src/test/groovy/datadog/opentracing/DDSpanTest.groovy +++ b/dd-trace-ot/src/test/groovy/datadog/opentracing/DDSpanTest.groovy @@ -170,7 +170,7 @@ class DDSpanTest extends DDSpecification { span.durationNano == 1 } - def "stacktrace captured when duration exceeds configured threshold"() { + def "stacktrace captured when duration exceeds average + configured threshold"() { setup: // Get the part of the stack before this test is called. def acceptRemaining = false @@ -184,7 +184,7 @@ class DDSpanTest extends DDSpecification { def stack = "\tat " + stackTraceElements.join("\n\tat ") + "\n" def span = tracer.buildSpan("test").start() - span.finish(span.startTimeMicro + TimeUnit.NANOSECONDS.toMicros(Config.get().spanDurationStacktraceNanos) + 1) + span.finish(span.startTimeMicro + DDSpan.AVG_DURATION.get() + TimeUnit.NANOSECONDS.toMicros(Config.get().spanDurationAboveAverageStacktraceNanos) + 1) def actual = span.tags["slow.stack"].toString() expect: