From 2bdf54bd6e643b66d781836beb4ffd1c7d78f9d4 Mon Sep 17 00:00:00 2001 From: Nikolay Martynov Date: Fri, 15 Jun 2018 10:29:06 -0400 Subject: [PATCH 1/2] Fix span start boundaries Before: span start had millisecond precision because we just use `currentTimeMillis` to get span start time. This creates weirdly looking traces where spans 'fly' way out of parent spans if they have sub-ms length. After: we establish 'trace start time' with millisecond precision and measure all span start and stop times from that. This means all relative times are maintained with nanosecond precision (or whateve OS clock fives us). This is POC and some things are not yet fixed. E.g. JMS1 instrumentation injects time into span manually - and it is not apparent how to pake it do so relative to trace clock. Note: going forward this should allow us to completely get rid of 'double time keeping' we currently have in `DDSpan`. --- .../main/java/datadog/opentracing/DDSpan.java | 57 ++++++++++--------- .../datadog/opentracing/PendingTrace.java | 24 ++++++++ .../java/datadog/trace/common/util/Clock.java | 14 ++++- .../datadog/opentracing/DDSpanTest.groovy | 6 ++ .../opentracing/PendingTraceTest.groovy | 10 +++- 5 files changed, 82 insertions(+), 29 deletions(-) diff --git a/dd-trace-ot/src/main/java/datadog/opentracing/DDSpan.java b/dd-trace-ot/src/main/java/datadog/opentracing/DDSpan.java index fa979c6912..b8a555c984 100644 --- a/dd-trace-ot/src/main/java/datadog/opentracing/DDSpan.java +++ b/dd-trace-ot/src/main/java/datadog/opentracing/DDSpan.java @@ -32,10 +32,17 @@ public class DDSpan implements Span, MutableSpan { /** The context attached to the span */ private final DDSpanContext context; - /** Creation time of the span in microseconds. Must be greater than zero. */ + /** + * Creation time of the span in microseconds provided by external clock. Must be greater than + * zero. + */ private final long startTimeMicro; - /** Creation time of span in system relative nanotime (may be negative) */ + /** + * Creation time of span in nanoseconds. We use combination of millisecond-precision clock and + * nanosecond-precision offset from start of the trace. See {@link PendingTrace} for details. Must + * be greater than zero. + */ private final long startTimeNano; /** @@ -54,19 +61,19 @@ public class DDSpan implements Span, MutableSpan { * @param context the context used for the span */ DDSpan(final long timestampMicro, final DDSpanContext context) { - this.context = context; - // record the start time in nano (current milli + nano delta) if (timestampMicro <= 0L) { - this.startTimeMicro = Clock.currentMicroTime(); - this.startTimeNano = Clock.currentNanoTicks(); + // record the start time + startTimeMicro = Clock.currentMicroTime(); + startTimeNano = context.getTrace().getCurrentTimeNano(); } else { - this.startTimeMicro = timestampMicro; - // timestamp might have come from an external clock, so don't bother with nanotime. - this.startTimeNano = 0; + startTimeMicro = timestampMicro; + // Timestamp have come from an external clock, so use startTimeNano as a flag + startTimeNano = 0; } - this.context.getTrace().registerSpan(this); + + context.getTrace().registerSpan(this); } @JsonIgnore @@ -74,16 +81,20 @@ public class DDSpan implements Span, MutableSpan { return durationNano.get() != 0; } + private void finishAndAddToTrace(final long durationNano) { + // ensure a min duration of 1 + if (this.durationNano.compareAndSet(0, Math.max(1, durationNano))) { + context.getTrace().addSpan(this); + } else { + log.debug("{} - already finished!", this); + } + } + @Override public final void finish() { - if (startTimeNano != 0) { - // no external clock was used, so we can rely on nanotime, but still ensure a min duration of 1. - if (this.durationNano.compareAndSet( - 0, Math.max(1, Clock.currentNanoTicks() - startTimeNano))) { - context.getTrace().addSpan(this); - } else { - log.debug("{} - already finished!", this); - } + if (startTimeNano > 0) { + // no external clock was used, so we can rely on nano time + finishAndAddToTrace(context.getTrace().getCurrentTimeNano() - startTimeNano); } else { finish(Clock.currentMicroTime()); } @@ -91,13 +102,7 @@ public class DDSpan implements Span, MutableSpan { @Override public final void finish(final long stoptimeMicros) { - // Ensure that duration is at least 1. Less than 1 is possible due to our use of system clock instead of nano time. - if (this.durationNano.compareAndSet( - 0, Math.max(1, TimeUnit.MICROSECONDS.toNanos(stoptimeMicros - this.startTimeMicro)))) { - context.getTrace().addSpan(this); - } else { - log.debug("{} - already finished!", this); - } + finishAndAddToTrace(TimeUnit.MICROSECONDS.toNanos(stoptimeMicros - startTimeMicro)); } /** @@ -288,7 +293,7 @@ public class DDSpan implements Span, MutableSpan { @Override @JsonGetter("start") public long getStartTime() { - return startTimeMicro * 1000L; + return startTimeNano > 0 ? startTimeNano : TimeUnit.MICROSECONDS.toNanos(startTimeMicro); } @Override diff --git a/dd-trace-ot/src/main/java/datadog/opentracing/PendingTrace.java b/dd-trace-ot/src/main/java/datadog/opentracing/PendingTrace.java index 24446c766a..5e3eb8533e 100644 --- a/dd-trace-ot/src/main/java/datadog/opentracing/PendingTrace.java +++ b/dd-trace-ot/src/main/java/datadog/opentracing/PendingTrace.java @@ -1,6 +1,7 @@ package datadog.opentracing; import datadog.opentracing.scopemanager.ContinuableScope; +import datadog.trace.common.util.Clock; import java.io.Closeable; import java.lang.ref.Reference; import java.lang.ref.ReferenceQueue; @@ -29,6 +30,11 @@ public class PendingTrace extends ConcurrentLinkedDeque { private final DDTracer tracer; private final long traceId; + /** Trace start time in nano seconds measured up to a millisecond accuracy */ + private final long startTimeNano; + /** Nano second ticks value at trace start */ + private final long startNanoTicks; + private final ReferenceQueue referenceQueue = new ReferenceQueue(); private final Set> weakReferences = Collections.newSetFromMap(new ConcurrentHashMap, Boolean>()); @@ -41,9 +47,27 @@ public class PendingTrace extends ConcurrentLinkedDeque { PendingTrace(final DDTracer tracer, final long traceId) { this.tracer = tracer; this.traceId = traceId; + + this.startTimeNano = Clock.currentNanoTime(); + this.startNanoTicks = Clock.currentNanoTicks(); + SPAN_CLEANER.pendingTraces.add(this); } + /** + * Current timestamp in nanoseconds. + * + *

Note: it is not possible to get 'real' nanosecond time. This method uses trace start time + * (which has millisecond precision) as a reference and it gets time with nanosecond precision + * after that. This means time measured within same Trace in different Spans is relatively correct + * with nanosecond precision. + * + * @return timestamp in nanoseconds + */ + public long getCurrentTimeNano() { + return startTimeNano + Math.max(0, Clock.currentNanoTicks() - startNanoTicks); + } + public void registerSpan(final DDSpan span) { if (span.context().getTraceId() != traceId) { log.debug("{} - span registered for wrong trace ({})", span, traceId); diff --git a/dd-trace-ot/src/main/java/datadog/trace/common/util/Clock.java b/dd-trace-ot/src/main/java/datadog/trace/common/util/Clock.java index 33800b2346..dc48c00871 100644 --- a/dd-trace-ot/src/main/java/datadog/trace/common/util/Clock.java +++ b/dd-trace-ot/src/main/java/datadog/trace/common/util/Clock.java @@ -26,7 +26,7 @@ public class Clock { * * @return The current nanos ticks */ - public static synchronized long currentNanoTicks() { + public static long currentNanoTicks() { return System.nanoTime(); } @@ -35,7 +35,17 @@ public class Clock { * * @return the current epoch time in micros */ - public static synchronized long currentMicroTime() { + public static long currentMicroTime() { return TimeUnit.MILLISECONDS.toMicros(System.currentTimeMillis()); } + + /** + * Get the current time in nanos. The actual precision is the millis Note: this will overflow in + * ~290 years after epoch + * + * @return the current epoch time in nanos + */ + public static long currentNanoTime() { + return TimeUnit.MILLISECONDS.toNanos(System.currentTimeMillis()); + } } diff --git a/dd-trace-ot/src/test/groovy/datadog/opentracing/DDSpanTest.groovy b/dd-trace-ot/src/test/groovy/datadog/opentracing/DDSpanTest.groovy index 2aff3c6d35..06f471b8c4 100644 --- a/dd-trace-ot/src/test/groovy/datadog/opentracing/DDSpanTest.groovy +++ b/dd-trace-ot/src/test/groovy/datadog/opentracing/DDSpanTest.groovy @@ -103,6 +103,8 @@ class DDSpanTest extends Specification { def total = System.nanoTime() - start expect: + // Generous 5 seconds to execute this test + Math.abs(TimeUnit.NANOSECONDS.toSeconds(span.startTime) - TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis())) < 5 span.durationNano > betweenDur span.durationNano < total span.durationNano % mod > 0 // Very slim chance of a false negative. @@ -121,6 +123,8 @@ class DDSpanTest extends Specification { def total = Math.max(1, System.currentTimeMillis() - start) expect: + // Generous 5 seconds to execute this test + Math.abs(TimeUnit.NANOSECONDS.toSeconds(span.startTime) - TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis())) < 5 span.durationNano >= TimeUnit.MILLISECONDS.toNanos(betweenDur) span.durationNano <= TimeUnit.MILLISECONDS.toNanos(total) span.durationNano % mod == 0 || span.durationNano == 1 @@ -138,6 +142,8 @@ class DDSpanTest extends Specification { def total = System.currentTimeMillis() - start + 1 expect: + // Generous 5 seconds to execute this test + Math.abs(TimeUnit.NANOSECONDS.toSeconds(span.startTime) - TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis())) < 5 span.durationNano >= TimeUnit.MILLISECONDS.toNanos(betweenDur) span.durationNano <= TimeUnit.MILLISECONDS.toNanos(total) span.durationNano % mod == 0 diff --git a/dd-trace-ot/src/test/groovy/datadog/opentracing/PendingTraceTest.groovy b/dd-trace-ot/src/test/groovy/datadog/opentracing/PendingTraceTest.groovy index 6bc49376e6..2f1d7f94ca 100644 --- a/dd-trace-ot/src/test/groovy/datadog/opentracing/PendingTraceTest.groovy +++ b/dd-trace-ot/src/test/groovy/datadog/opentracing/PendingTraceTest.groovy @@ -4,6 +4,8 @@ import datadog.trace.common.writer.ListWriter import spock.lang.Specification import spock.lang.Subject +import java.util.concurrent.TimeUnit + class PendingTraceTest extends Specification { def writer = new ListWriter() @@ -150,7 +152,7 @@ class PendingTraceTest extends Specification { } - def "child spans created after trace written" () { + def "child spans created after trace written"() { setup: rootSpan.finish() // this shouldn't happen, but it's possible users of the api @@ -164,4 +166,10 @@ class PendingTraceTest extends Specification { trace.asList() == [rootSpan] writer == [[rootSpan]] } + + def "test getCurrentTimeNano"() { + expect: + // Generous 5 seconds to execute this test + Math.abs(TimeUnit.NANOSECONDS.toSeconds(trace.currentTimeNano) - TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis())) < 5 + } } From d50154f15948d6d97298061933472f0d1c1114c7 Mon Sep 17 00:00:00 2001 From: Nikolay Martynov Date: Mon, 25 Jun 2018 09:28:01 -0400 Subject: [PATCH 2/2] PendingTrace: add a TODO to move time fields into DDTracer --- dd-trace-ot/src/main/java/datadog/opentracing/PendingTrace.java | 1 + 1 file changed, 1 insertion(+) diff --git a/dd-trace-ot/src/main/java/datadog/opentracing/PendingTrace.java b/dd-trace-ot/src/main/java/datadog/opentracing/PendingTrace.java index 5e3eb8533e..9d39b2e41f 100644 --- a/dd-trace-ot/src/main/java/datadog/opentracing/PendingTrace.java +++ b/dd-trace-ot/src/main/java/datadog/opentracing/PendingTrace.java @@ -30,6 +30,7 @@ public class PendingTrace extends ConcurrentLinkedDeque { private final DDTracer tracer; private final long traceId; + // TODO: consider moving these time fields into DDTracer to ensure that traces have precise relative time /** Trace start time in nano seconds measured up to a millisecond accuracy */ private final long startTimeNano; /** Nano second ticks value at trace start */