Make threshold based on average duration

This should help avoid too frequent of stacktraces when the configured threshold is a normal duration.
Alternatively, we could make it a percentage above the average.
This commit is contained in:
Tyler Benson 2020-03-10 15:08:12 -07:00
parent 1cbb874c7c
commit 2f64edaea4
3 changed files with 58 additions and 30 deletions

View File

@ -86,8 +86,8 @@ public class Config {
public static final String SPLIT_BY_TAGS = "trace.split-by-tags"; public static final String SPLIT_BY_TAGS = "trace.split-by-tags";
public static final String SCOPE_DEPTH_LIMIT = "trace.scope.depth.limit"; public static final String SCOPE_DEPTH_LIMIT = "trace.scope.depth.limit";
public static final String PARTIAL_FLUSH_MIN_SPANS = "trace.partial.flush.min.spans"; public static final String PARTIAL_FLUSH_MIN_SPANS = "trace.partial.flush.min.spans";
public static final String SPAN_DURATION_STACKTRACE_MILLIS = public static final String SPAN_DURATION_ABOVE_AVERAGE_STACKTRACE_MILLIS =
"trace.span.duration.stacktrace.millis"; "trace.span.duration-above-average.stacktrace.millis";
public static final String RUNTIME_CONTEXT_FIELD_INJECTION = public static final String RUNTIME_CONTEXT_FIELD_INJECTION =
"trace.runtime.context.field.injection"; "trace.runtime.context.field.injection";
public static final String PROPAGATION_STYLE_EXTRACT = "propagation.style.extract"; public static final String PROPAGATION_STYLE_EXTRACT = "propagation.style.extract";
@ -164,7 +164,7 @@ public class Config {
private static final String DEFAULT_SPLIT_BY_TAGS = ""; private static final String DEFAULT_SPLIT_BY_TAGS = "";
private static final int DEFAULT_SCOPE_DEPTH_LIMIT = 100; private static final int DEFAULT_SCOPE_DEPTH_LIMIT = 100;
private static final int DEFAULT_PARTIAL_FLUSH_MIN_SPANS = 1000; private static final int DEFAULT_PARTIAL_FLUSH_MIN_SPANS = 1000;
private static final int DEFAULT_SPAN_DURATION_STACKTRACE_MILLIS = private static final int DEFAULT_SPAN_DURATION_ABOVE_AVERAGE_STACKTRACE_MILLIS =
(int) TimeUnit.SECONDS.toMillis(1); (int) TimeUnit.SECONDS.toMillis(1);
private static final String DEFAULT_PROPAGATION_STYLE_EXTRACT = PropagationStyle.DATADOG.name(); private static final String DEFAULT_PROPAGATION_STYLE_EXTRACT = PropagationStyle.DATADOG.name();
private static final String DEFAULT_PROPAGATION_STYLE_INJECT = PropagationStyle.DATADOG.name(); private static final String DEFAULT_PROPAGATION_STYLE_INJECT = PropagationStyle.DATADOG.name();
@ -238,7 +238,7 @@ public class Config {
@Getter private final Set<String> splitByTags; @Getter private final Set<String> splitByTags;
@Getter private final Integer scopeDepthLimit; @Getter private final Integer scopeDepthLimit;
@Getter private final Integer partialFlushMinSpans; @Getter private final Integer partialFlushMinSpans;
@Getter private final long spanDurationStacktraceNanos; @Getter private final long spanDurationAboveAverageStacktraceNanos;
@Getter private final boolean runtimeContextFieldInjection; @Getter private final boolean runtimeContextFieldInjection;
@Getter private final Set<PropagationStyle> propagationStylesToExtract; @Getter private final Set<PropagationStyle> propagationStylesToExtract;
@Getter private final Set<PropagationStyle> propagationStylesToInject; @Getter private final Set<PropagationStyle> propagationStylesToInject;
@ -361,10 +361,11 @@ public class Config {
partialFlushMinSpans = partialFlushMinSpans =
getIntegerSettingFromEnvironment(PARTIAL_FLUSH_MIN_SPANS, DEFAULT_PARTIAL_FLUSH_MIN_SPANS); getIntegerSettingFromEnvironment(PARTIAL_FLUSH_MIN_SPANS, DEFAULT_PARTIAL_FLUSH_MIN_SPANS);
spanDurationStacktraceNanos = spanDurationAboveAverageStacktraceNanos =
TimeUnit.MILLISECONDS.toNanos( TimeUnit.MILLISECONDS.toNanos(
getIntegerSettingFromEnvironment( getIntegerSettingFromEnvironment(
SPAN_DURATION_STACKTRACE_MILLIS, DEFAULT_SPAN_DURATION_STACKTRACE_MILLIS) SPAN_DURATION_ABOVE_AVERAGE_STACKTRACE_MILLIS,
DEFAULT_SPAN_DURATION_ABOVE_AVERAGE_STACKTRACE_MILLIS)
.longValue()); .longValue());
runtimeContextFieldInjection = runtimeContextFieldInjection =
@ -555,12 +556,13 @@ public class Config {
getPropertyIntegerValue(properties, PARTIAL_FLUSH_MIN_SPANS, parent.partialFlushMinSpans); getPropertyIntegerValue(properties, PARTIAL_FLUSH_MIN_SPANS, parent.partialFlushMinSpans);
// do we care about the integer downcast here? // do we care about the integer downcast here?
spanDurationStacktraceNanos = spanDurationAboveAverageStacktraceNanos =
TimeUnit.MILLISECONDS.toNanos( TimeUnit.MILLISECONDS.toNanos(
getPropertyIntegerValue( getPropertyIntegerValue(
properties, properties,
SPAN_DURATION_STACKTRACE_MILLIS, SPAN_DURATION_ABOVE_AVERAGE_STACKTRACE_MILLIS,
(int) TimeUnit.NANOSECONDS.toMillis(parent.spanDurationStacktraceNanos))); (int)
TimeUnit.NANOSECONDS.toMillis(parent.spanDurationAboveAverageStacktraceNanos)));
runtimeContextFieldInjection = runtimeContextFieldInjection =
getPropertyBooleanValue( getPropertyBooleanValue(

View File

@ -92,6 +92,21 @@ public class DDSpan implements Span, MutableSpan {
return durationNano.get() != 0; return durationNano.get() != 0;
} }
@Override
public final void finish() {
if (startTimeNano > 0) {
// no external clock was used, so we can rely on nano time
finishAndAddToTrace(context.getTrace().getCurrentTimeNano() - startTimeNano);
} else {
finish(Clock.currentMicroTime());
}
}
@Override
public final void finish(final long stoptimeMicros) {
finishAndAddToTrace(TimeUnit.MICROSECONDS.toNanos(stoptimeMicros - startTimeMicro));
}
private void finishAndAddToTrace(final long durationNano) { private void finishAndAddToTrace(final long durationNano) {
// ensure a min duration of 1 // ensure a min duration of 1
if (this.durationNano.compareAndSet(0, Math.max(1, durationNano))) { if (this.durationNano.compareAndSet(0, Math.max(1, durationNano))) {
@ -103,15 +118,41 @@ public class DDSpan implements Span, MutableSpan {
} }
} }
// Track the average span duration per thread.
private static final ThreadLocal<Long> SPAN_COUNT = new ThreadLocal();
private static final ThreadLocal<Long> AVG_DURATION = new ThreadLocal();
private static final ThreadLocal<Long> AVG_REMAINDER = new ThreadLocal();
private long getOr0(final ThreadLocal<Long> threadLocal) {
final Long value = threadLocal.get();
return value == null ? 0 : value;
}
private void addStacktraceIfThresholdExceeded() { private void addStacktraceIfThresholdExceeded() {
final long spanDurationStacktraceNanos = Config.get().getSpanDurationStacktraceNanos(); final long spanDurationStacktraceNanos =
if (isError() Config.get().getSpanDurationAboveAverageStacktraceNanos();
|| spanDurationStacktraceNanos <= 0 if (spanDurationStacktraceNanos <= 0
|| durationNano.get() <= spanDurationStacktraceNanos
// If this span was finished async, then the stacktrace will be less meaningful. // If this span was finished async, then the stacktrace will be less meaningful.
|| context.threadId != Thread.currentThread().getId()) { || context.threadId != Thread.currentThread().getId()) {
return; return;
} }
final long duration = durationNano.get();
final long count = getOr0(SPAN_COUNT) + 1;
long average = getOr0(AVG_DURATION);
long remainder = getOr0(AVG_REMAINDER);
// http://www.onemanclapping.org/2012/03/calculating-long-running-averages.html
average += (duration + remainder) / count;
remainder = (duration + remainder) % count;
SPAN_COUNT.set(count);
AVG_DURATION.set(average);
AVG_REMAINDER.set(remainder);
if (isError() || duration <= average + spanDurationStacktraceNanos) {
return;
}
final StringBuilder stack = new StringBuilder(); final StringBuilder stack = new StringBuilder();
// TODO: use StackWalker api for java 9+ jvm's? // TODO: use StackWalker api for java 9+ jvm's?
boolean skipNext = true; boolean skipNext = true;
@ -129,21 +170,6 @@ public class DDSpan implements Span, MutableSpan {
setTag("slow.stack", stack.toString()); setTag("slow.stack", stack.toString());
} }
@Override
public final void finish() {
if (startTimeNano > 0) {
// no external clock was used, so we can rely on nano time
finishAndAddToTrace(context.getTrace().getCurrentTimeNano() - startTimeNano);
} else {
finish(Clock.currentMicroTime());
}
}
@Override
public final void finish(final long stoptimeMicros) {
finishAndAddToTrace(TimeUnit.MICROSECONDS.toNanos(stoptimeMicros - startTimeMicro));
}
@Override @Override
public DDSpan setError(final boolean error) { public DDSpan setError(final boolean error) {
context.setErrorFlag(error); context.setErrorFlag(error);

View File

@ -170,7 +170,7 @@ class DDSpanTest extends DDSpecification {
span.durationNano == 1 span.durationNano == 1
} }
def "stacktrace captured when duration exceeds configured threshold"() { def "stacktrace captured when duration exceeds average + configured threshold"() {
setup: setup:
// Get the part of the stack before this test is called. // Get the part of the stack before this test is called.
def acceptRemaining = false def acceptRemaining = false
@ -184,7 +184,7 @@ class DDSpanTest extends DDSpecification {
def stack = "\tat " + stackTraceElements.join("\n\tat ") + "\n" def stack = "\tat " + stackTraceElements.join("\n\tat ") + "\n"
def span = tracer.buildSpan("test").start() def span = tracer.buildSpan("test").start()
span.finish(span.startTimeMicro + TimeUnit.NANOSECONDS.toMicros(Config.get().spanDurationStacktraceNanos) + 1) span.finish(span.startTimeMicro + DDSpan.AVG_DURATION.get() + TimeUnit.NANOSECONDS.toMicros(Config.get().spanDurationAboveAverageStacktraceNanos) + 1)
def actual = span.tags["slow.stack"].toString() def actual = span.tags["slow.stack"].toString()
expect: expect: