Make threshold based on average duration
This should help avoid too frequent of stacktraces when the configured threshold is a normal duration. Alternatively, we could make it a percentage above the average.
This commit is contained in:
parent
1cbb874c7c
commit
2f64edaea4
|
@ -86,8 +86,8 @@ public class Config {
|
||||||
public static final String SPLIT_BY_TAGS = "trace.split-by-tags";
|
public static final String SPLIT_BY_TAGS = "trace.split-by-tags";
|
||||||
public static final String SCOPE_DEPTH_LIMIT = "trace.scope.depth.limit";
|
public static final String SCOPE_DEPTH_LIMIT = "trace.scope.depth.limit";
|
||||||
public static final String PARTIAL_FLUSH_MIN_SPANS = "trace.partial.flush.min.spans";
|
public static final String PARTIAL_FLUSH_MIN_SPANS = "trace.partial.flush.min.spans";
|
||||||
public static final String SPAN_DURATION_STACKTRACE_MILLIS =
|
public static final String SPAN_DURATION_ABOVE_AVERAGE_STACKTRACE_MILLIS =
|
||||||
"trace.span.duration.stacktrace.millis";
|
"trace.span.duration-above-average.stacktrace.millis";
|
||||||
public static final String RUNTIME_CONTEXT_FIELD_INJECTION =
|
public static final String RUNTIME_CONTEXT_FIELD_INJECTION =
|
||||||
"trace.runtime.context.field.injection";
|
"trace.runtime.context.field.injection";
|
||||||
public static final String PROPAGATION_STYLE_EXTRACT = "propagation.style.extract";
|
public static final String PROPAGATION_STYLE_EXTRACT = "propagation.style.extract";
|
||||||
|
@ -164,7 +164,7 @@ public class Config {
|
||||||
private static final String DEFAULT_SPLIT_BY_TAGS = "";
|
private static final String DEFAULT_SPLIT_BY_TAGS = "";
|
||||||
private static final int DEFAULT_SCOPE_DEPTH_LIMIT = 100;
|
private static final int DEFAULT_SCOPE_DEPTH_LIMIT = 100;
|
||||||
private static final int DEFAULT_PARTIAL_FLUSH_MIN_SPANS = 1000;
|
private static final int DEFAULT_PARTIAL_FLUSH_MIN_SPANS = 1000;
|
||||||
private static final int DEFAULT_SPAN_DURATION_STACKTRACE_MILLIS =
|
private static final int DEFAULT_SPAN_DURATION_ABOVE_AVERAGE_STACKTRACE_MILLIS =
|
||||||
(int) TimeUnit.SECONDS.toMillis(1);
|
(int) TimeUnit.SECONDS.toMillis(1);
|
||||||
private static final String DEFAULT_PROPAGATION_STYLE_EXTRACT = PropagationStyle.DATADOG.name();
|
private static final String DEFAULT_PROPAGATION_STYLE_EXTRACT = PropagationStyle.DATADOG.name();
|
||||||
private static final String DEFAULT_PROPAGATION_STYLE_INJECT = PropagationStyle.DATADOG.name();
|
private static final String DEFAULT_PROPAGATION_STYLE_INJECT = PropagationStyle.DATADOG.name();
|
||||||
|
@ -238,7 +238,7 @@ public class Config {
|
||||||
@Getter private final Set<String> splitByTags;
|
@Getter private final Set<String> splitByTags;
|
||||||
@Getter private final Integer scopeDepthLimit;
|
@Getter private final Integer scopeDepthLimit;
|
||||||
@Getter private final Integer partialFlushMinSpans;
|
@Getter private final Integer partialFlushMinSpans;
|
||||||
@Getter private final long spanDurationStacktraceNanos;
|
@Getter private final long spanDurationAboveAverageStacktraceNanos;
|
||||||
@Getter private final boolean runtimeContextFieldInjection;
|
@Getter private final boolean runtimeContextFieldInjection;
|
||||||
@Getter private final Set<PropagationStyle> propagationStylesToExtract;
|
@Getter private final Set<PropagationStyle> propagationStylesToExtract;
|
||||||
@Getter private final Set<PropagationStyle> propagationStylesToInject;
|
@Getter private final Set<PropagationStyle> propagationStylesToInject;
|
||||||
|
@ -361,10 +361,11 @@ public class Config {
|
||||||
partialFlushMinSpans =
|
partialFlushMinSpans =
|
||||||
getIntegerSettingFromEnvironment(PARTIAL_FLUSH_MIN_SPANS, DEFAULT_PARTIAL_FLUSH_MIN_SPANS);
|
getIntegerSettingFromEnvironment(PARTIAL_FLUSH_MIN_SPANS, DEFAULT_PARTIAL_FLUSH_MIN_SPANS);
|
||||||
|
|
||||||
spanDurationStacktraceNanos =
|
spanDurationAboveAverageStacktraceNanos =
|
||||||
TimeUnit.MILLISECONDS.toNanos(
|
TimeUnit.MILLISECONDS.toNanos(
|
||||||
getIntegerSettingFromEnvironment(
|
getIntegerSettingFromEnvironment(
|
||||||
SPAN_DURATION_STACKTRACE_MILLIS, DEFAULT_SPAN_DURATION_STACKTRACE_MILLIS)
|
SPAN_DURATION_ABOVE_AVERAGE_STACKTRACE_MILLIS,
|
||||||
|
DEFAULT_SPAN_DURATION_ABOVE_AVERAGE_STACKTRACE_MILLIS)
|
||||||
.longValue());
|
.longValue());
|
||||||
|
|
||||||
runtimeContextFieldInjection =
|
runtimeContextFieldInjection =
|
||||||
|
@ -555,12 +556,13 @@ public class Config {
|
||||||
getPropertyIntegerValue(properties, PARTIAL_FLUSH_MIN_SPANS, parent.partialFlushMinSpans);
|
getPropertyIntegerValue(properties, PARTIAL_FLUSH_MIN_SPANS, parent.partialFlushMinSpans);
|
||||||
|
|
||||||
// do we care about the integer downcast here?
|
// do we care about the integer downcast here?
|
||||||
spanDurationStacktraceNanos =
|
spanDurationAboveAverageStacktraceNanos =
|
||||||
TimeUnit.MILLISECONDS.toNanos(
|
TimeUnit.MILLISECONDS.toNanos(
|
||||||
getPropertyIntegerValue(
|
getPropertyIntegerValue(
|
||||||
properties,
|
properties,
|
||||||
SPAN_DURATION_STACKTRACE_MILLIS,
|
SPAN_DURATION_ABOVE_AVERAGE_STACKTRACE_MILLIS,
|
||||||
(int) TimeUnit.NANOSECONDS.toMillis(parent.spanDurationStacktraceNanos)));
|
(int)
|
||||||
|
TimeUnit.NANOSECONDS.toMillis(parent.spanDurationAboveAverageStacktraceNanos)));
|
||||||
|
|
||||||
runtimeContextFieldInjection =
|
runtimeContextFieldInjection =
|
||||||
getPropertyBooleanValue(
|
getPropertyBooleanValue(
|
||||||
|
|
|
@ -92,6 +92,21 @@ public class DDSpan implements Span, MutableSpan {
|
||||||
return durationNano.get() != 0;
|
return durationNano.get() != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final void finish() {
|
||||||
|
if (startTimeNano > 0) {
|
||||||
|
// no external clock was used, so we can rely on nano time
|
||||||
|
finishAndAddToTrace(context.getTrace().getCurrentTimeNano() - startTimeNano);
|
||||||
|
} else {
|
||||||
|
finish(Clock.currentMicroTime());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final void finish(final long stoptimeMicros) {
|
||||||
|
finishAndAddToTrace(TimeUnit.MICROSECONDS.toNanos(stoptimeMicros - startTimeMicro));
|
||||||
|
}
|
||||||
|
|
||||||
private void finishAndAddToTrace(final long durationNano) {
|
private void finishAndAddToTrace(final long durationNano) {
|
||||||
// ensure a min duration of 1
|
// ensure a min duration of 1
|
||||||
if (this.durationNano.compareAndSet(0, Math.max(1, durationNano))) {
|
if (this.durationNano.compareAndSet(0, Math.max(1, durationNano))) {
|
||||||
|
@ -103,15 +118,41 @@ public class DDSpan implements Span, MutableSpan {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Track the average span duration per thread.
|
||||||
|
private static final ThreadLocal<Long> SPAN_COUNT = new ThreadLocal();
|
||||||
|
private static final ThreadLocal<Long> AVG_DURATION = new ThreadLocal();
|
||||||
|
private static final ThreadLocal<Long> AVG_REMAINDER = new ThreadLocal();
|
||||||
|
|
||||||
|
private long getOr0(final ThreadLocal<Long> threadLocal) {
|
||||||
|
final Long value = threadLocal.get();
|
||||||
|
return value == null ? 0 : value;
|
||||||
|
}
|
||||||
|
|
||||||
private void addStacktraceIfThresholdExceeded() {
|
private void addStacktraceIfThresholdExceeded() {
|
||||||
final long spanDurationStacktraceNanos = Config.get().getSpanDurationStacktraceNanos();
|
final long spanDurationStacktraceNanos =
|
||||||
if (isError()
|
Config.get().getSpanDurationAboveAverageStacktraceNanos();
|
||||||
|| spanDurationStacktraceNanos <= 0
|
if (spanDurationStacktraceNanos <= 0
|
||||||
|| durationNano.get() <= spanDurationStacktraceNanos
|
|
||||||
// If this span was finished async, then the stacktrace will be less meaningful.
|
// If this span was finished async, then the stacktrace will be less meaningful.
|
||||||
|| context.threadId != Thread.currentThread().getId()) {
|
|| context.threadId != Thread.currentThread().getId()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final long duration = durationNano.get();
|
||||||
|
final long count = getOr0(SPAN_COUNT) + 1;
|
||||||
|
long average = getOr0(AVG_DURATION);
|
||||||
|
long remainder = getOr0(AVG_REMAINDER);
|
||||||
|
|
||||||
|
// http://www.onemanclapping.org/2012/03/calculating-long-running-averages.html
|
||||||
|
average += (duration + remainder) / count;
|
||||||
|
remainder = (duration + remainder) % count;
|
||||||
|
|
||||||
|
SPAN_COUNT.set(count);
|
||||||
|
AVG_DURATION.set(average);
|
||||||
|
AVG_REMAINDER.set(remainder);
|
||||||
|
|
||||||
|
if (isError() || duration <= average + spanDurationStacktraceNanos) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
final StringBuilder stack = new StringBuilder();
|
final StringBuilder stack = new StringBuilder();
|
||||||
// TODO: use StackWalker api for java 9+ jvm's?
|
// TODO: use StackWalker api for java 9+ jvm's?
|
||||||
boolean skipNext = true;
|
boolean skipNext = true;
|
||||||
|
@ -129,21 +170,6 @@ public class DDSpan implements Span, MutableSpan {
|
||||||
setTag("slow.stack", stack.toString());
|
setTag("slow.stack", stack.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public final void finish() {
|
|
||||||
if (startTimeNano > 0) {
|
|
||||||
// no external clock was used, so we can rely on nano time
|
|
||||||
finishAndAddToTrace(context.getTrace().getCurrentTimeNano() - startTimeNano);
|
|
||||||
} else {
|
|
||||||
finish(Clock.currentMicroTime());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final void finish(final long stoptimeMicros) {
|
|
||||||
finishAndAddToTrace(TimeUnit.MICROSECONDS.toNanos(stoptimeMicros - startTimeMicro));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DDSpan setError(final boolean error) {
|
public DDSpan setError(final boolean error) {
|
||||||
context.setErrorFlag(error);
|
context.setErrorFlag(error);
|
||||||
|
|
|
@ -170,7 +170,7 @@ class DDSpanTest extends DDSpecification {
|
||||||
span.durationNano == 1
|
span.durationNano == 1
|
||||||
}
|
}
|
||||||
|
|
||||||
def "stacktrace captured when duration exceeds configured threshold"() {
|
def "stacktrace captured when duration exceeds average + configured threshold"() {
|
||||||
setup:
|
setup:
|
||||||
// Get the part of the stack before this test is called.
|
// Get the part of the stack before this test is called.
|
||||||
def acceptRemaining = false
|
def acceptRemaining = false
|
||||||
|
@ -184,7 +184,7 @@ class DDSpanTest extends DDSpecification {
|
||||||
def stack = "\tat " + stackTraceElements.join("\n\tat ") + "\n"
|
def stack = "\tat " + stackTraceElements.join("\n\tat ") + "\n"
|
||||||
|
|
||||||
def span = tracer.buildSpan("test").start()
|
def span = tracer.buildSpan("test").start()
|
||||||
span.finish(span.startTimeMicro + TimeUnit.NANOSECONDS.toMicros(Config.get().spanDurationStacktraceNanos) + 1)
|
span.finish(span.startTimeMicro + DDSpan.AVG_DURATION.get() + TimeUnit.NANOSECONDS.toMicros(Config.get().spanDurationAboveAverageStacktraceNanos) + 1)
|
||||||
def actual = span.tags["slow.stack"].toString()
|
def actual = span.tags["slow.stack"].toString()
|
||||||
|
|
||||||
expect:
|
expect:
|
||||||
|
|
Loading…
Reference in New Issue