Add exception sampling event (#1297)

This commit is contained in:
Jaroslav Bachorik 2020-04-20 16:31:54 +02:00 committed by GitHub
parent 5aeca66dcb
commit eb272bcc00
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
27 changed files with 1557 additions and 26 deletions

View File

@ -17,12 +17,7 @@ dependencies {
compile project(':dd-java-agent:agent-profiling:profiling-controller')
testCompile deps.junit5
testCompile group: 'org.mockito', name: 'mockito-core', version: '3.1.0'
testCompile group: 'org.mockito', name: 'mockito-junit-jupiter', version: '3.1.0'
// Mockito dependency above pulls older version of Bytebuddy that fails to work on java13,
// so force correct version here. Note: we can remove this once Mockito upgrades.
testCompile deps.bytebuddy
testCompile deps.bytebuddyagent
testCompile deps.mockito
testCompile group: 'org.hamcrest', name: 'hamcrest', version: '2.1'
}

View File

@ -241,3 +241,5 @@ jdk.ZStatisticsSampler#enabled=true
jdk.ZStatisticsSampler#threshold=10 ms
datadog.Scope#enabled=true
datadog.Scope#threshold=10 ms
datadog.ExceptionSample#enabled=true
datadog.ExceptionCount#enabled=true

View File

@ -17,12 +17,7 @@ dependencies {
testCompile deps.junit5
testCompile deps.guava
testCompile group: 'org.mockito', name: 'mockito-core', version: '3.1.0'
testCompile group: 'org.mockito', name: 'mockito-junit-jupiter', version: '3.1.0'
// Mockito dependency above pulls older version of Bytebuddy that fails to work on java13,
// so force correct version here. Note: we can remove this once Mockito upgrades.
testCompile deps.bytebuddy
testCompile deps.bytebuddyagent
testCompile deps.mockito
testCompile group: 'org.awaitility', name: 'awaitility', version: '4.0.1'
}

View File

@ -27,10 +27,7 @@ dependencies {
testCompile deps.junit5
testCompile project(':dd-java-agent:agent-profiling:profiling-testing')
testCompile group: 'org.mockito', name: 'mockito-core', version: '3.1.0'
testCompile group: 'org.mockito', name: 'mockito-junit-jupiter', version: '3.1.0'
testCompile deps.bytebuddy
testCompile deps.bytebuddyagent
testCompile deps.mockito
testCompile group: 'com.squareup.okhttp3', name: 'mockwebserver', version: versions.okhttp
}

View File

@ -20,7 +20,7 @@ public final class Constants {
"datadog.trace.bootstrap",
"datadog.trace.context",
"datadog.trace.instrumentation.api",
"io.opentracing",
"io.opentracing"
};
// This is used in IntegrationTestUtils.java

View File

@ -89,6 +89,9 @@ public class GlobalIgnoresMatcher<T extends TypeDescription>
}
if (name.startsWith("java.")) {
if (name.equals("java.lang.Exception")) {
return false;
}
if (name.equals("java.net.URL") || name.equals("java.net.HttpURLConnection")) {
return false;
}

View File

@ -0,0 +1,45 @@
ext {
minJavaVersionForTests = JavaVersion.VERSION_11
// Zulu has backported profiling support
forceJdk = ['ZULU8']
// By default tests with be compiled for `minJavaVersionForTests` version,
// but in this case we would like to avoid this since we would like to run with ZULU8
skipSettingTestJavaVersion = true
enableJunitPlatform = true
}
apply from: "${rootDir}/gradle/java.gradle"
apply plugin: "idea"
dependencies {
compile project(':dd-java-agent:agent-tooling')
testCompile deps.junit5
testCompile deps.jmc
testCompile deps.commonsMath
testCompile deps.mockito
}
sourceCompatibility = JavaVersion.VERSION_1_7
targetCompatibility = JavaVersion.VERSION_1_7
// Must use Java 11 to build JFR enabled code - there is no JFR in OpenJDK 8 (revisit once JFR in Java 8 is available)
[compileMain_java11Java, compileTestJava].each {
it.sourceCompatibility = JavaVersion.VERSION_1_8
it.targetCompatibility = JavaVersion.VERSION_1_8
it.doFirst {
// Disable '-processing' because some annotations are not claimed.
// Disable '-options' because we are compiling for java8 without specifying bootstrap - intentionally.
// Disable '-path' because we do not have some of the paths seem to be missing.
// Compile to 8 compatible byte code
options.compilerArgs.addAll(['-source', '8', '-target', '8', '-Xlint:all,-processing,-options,-path'])
options.fork = true
options.forkOptions.javaHome = file(System.env.JAVA_11_HOME)
}
}
idea {
module {
jdkName = '11'
}
}

View File

@ -0,0 +1,80 @@
package datadog.exceptions.instrumentation;
import static net.bytebuddy.matcher.ElementMatchers.is;
import static net.bytebuddy.matcher.ElementMatchers.isConstructor;
import static net.bytebuddy.matcher.ElementMatchers.none;
import com.google.auto.service.AutoService;
import datadog.trace.agent.tooling.Instrumenter;
import datadog.trace.api.Config;
import java.util.Collections;
import java.util.Map;
import net.bytebuddy.description.method.MethodDescription;
import net.bytebuddy.description.type.TypeDescription;
import net.bytebuddy.matcher.ElementMatcher;
/**
* Provides instrumentation of {@linkplain Exception} constructor. <br>
* {@linkplain Exception}, as opposed to {@linkplain Throwable} was deliberately chosen such that we
* don't instrument {@linkplain Error} class/subclasses since they are tracked by a native JFR event
* already.
*/
@AutoService(Instrumenter.class)
public final class ExceptionInstrumentation extends Instrumenter.Default {
private final boolean hasJfr;
public ExceptionInstrumentation() {
super("exceptions");
/* Check only for the open-sources JFR implementation.
* If it is ever needed to support also the closed sourced JDK 8 version the check should be
* enhanced.
* Need this custom check because ClassLoaderMatcher.hasClassesNamed() does not support bootstrap class loader yet.
* Note: the downside of this is that we load some JFR classes at startup.
* Note2: we cannot check that we can load ExceptionSampleEvent because it is not available on the class path yet.
*/
hasJfr = ClassLoader.getSystemClassLoader().getResource("jdk/jfr/Event.class") != null;
}
@Override
public String[] helperClassNames() {
/*
* Since the only instrumentation target is java.lang.Exception which is loaded by bootstrap classloader
* it is ok to use helper classes instead of hacking around a Java 8 specific bootstrap.
*/
return hasJfr
? new String[] {
"com.datadog.profiling.exceptions.StreamingSampler",
"com.datadog.profiling.exceptions.StreamingSampler$Counts",
"com.datadog.profiling.exceptions.StreamingSampler$RollWindowTask",
"com.datadog.profiling.exceptions.ExceptionCountEvent",
"com.datadog.profiling.exceptions.ExceptionHistogram",
"com.datadog.profiling.exceptions.ExceptionHistogram$Pair",
"com.datadog.profiling.exceptions.ExceptionProfiling",
"com.datadog.profiling.exceptions.ExceptionSampleEvent",
"com.datadog.profiling.exceptions.ExceptionSampler"
}
: new String[0];
}
@Override
public ElementMatcher<? super TypeDescription> typeMatcher() {
if (hasJfr) {
// match only java.lang.Exception since java.lang.Error is tracked by another JFR event
return is(Exception.class);
}
return none();
}
@Override
public Map<? extends ElementMatcher<? super MethodDescription>, String> transformers() {
if (hasJfr) {
return Collections.singletonMap(isConstructor(), packageName + ".ExceptionAdvice");
}
return Collections.emptyMap();
}
@Override
protected boolean defaultEnabled() {
return Config.get().isProfilingEnabled();
}
}

View File

@ -0,0 +1,30 @@
package com.datadog.profiling.exceptions;
import jdk.jfr.Category;
import jdk.jfr.Description;
import jdk.jfr.Enabled;
import jdk.jfr.Event;
import jdk.jfr.Label;
import jdk.jfr.Name;
import jdk.jfr.Period;
import jdk.jfr.StackTrace;
@Name("datadog.ExceptionCount")
@Label("ExceptionCount")
@Description("Datadog exception count event.")
@Category("Datadog")
@Period(value = "endChunk")
@StackTrace(false)
@Enabled
public class ExceptionCountEvent extends Event {
@Label("Exception type")
private String type;
@Label("Exception count")
private long count;
public ExceptionCountEvent(String type, long count) {
this.type = type;
this.count = count;
}
}

View File

@ -0,0 +1,149 @@
package com.datadog.profiling.exceptions;
import datadog.trace.api.Config;
import jdk.jfr.EventType;
import jdk.jfr.FlightRecorder;
import lombok.extern.slf4j.Slf4j;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.LongAdder;
import java.util.stream.Stream;
/**
* A simple exception type histogram implementation.<br>
* It tracks a fixed number of exception types and for each of them it keeps the number of instances created since
* the last {@linkplain ExceptionHistogram#emit()} call (or creating a new {@linkplain ExceptionHistogram} instance
* if {@linkplain ExceptionHistogram#emit()} hasn't been called yet).<br>
*
* An {@linkplain ExceptionHistogram} instance is registered with JFR to call {@linkplain ExceptionHistogram#emit()}
* method at chunk end, as specified in {@linkplain ExceptionCountEvent} class. This callback will then emit a number
* of {@linkplain ExceptionCountEvent} events.
*/
@Slf4j
public class ExceptionHistogram {
static final String CLIPPED_ENTRY_TYPE_NAME = "TOO-MANY-EXCEPTIONS";
private final Map<String, AtomicLong> histogram = new ConcurrentHashMap<>();
private final int maxTopItems;
private final int maxSize;
private final EventType exceptionCountEventType;
private final Runnable eventHook;
ExceptionHistogram(final Config config) {
maxTopItems = config.getProfilingExceptionHistogramTopItems();
maxSize = config.getProfilingExceptionHistogramMaxCollectionSize();
exceptionCountEventType = EventType.getEventType(ExceptionCountEvent.class);
eventHook = this::emit;
FlightRecorder.addPeriodicEvent(ExceptionCountEvent.class, eventHook);
}
/**
* Remove this instance from JFR periodic events callbacks
*/
void deregister() {
FlightRecorder.removePeriodicEvent(eventHook);
}
/**
* Record a new exception instance
* @param exception instance
* @return {@literal true} if this is the first record of the given exception type; {@literal false} otherwise
*/
public boolean record(final Exception exception) {
if (exception == null) {
return false;
}
return record(exception.getClass().getCanonicalName());
}
private boolean record(String typeName) {
if (!exceptionCountEventType.isEnabled()) {
return false;
}
if (!histogram.containsKey(typeName) && histogram.size() >= maxSize) {
log.debug("Histogram is too big, skipping adding new entry: {}", typeName);
// Overwrite type name to limit total number of entries in the histogram
typeName = CLIPPED_ENTRY_TYPE_NAME;
}
long count = histogram
.computeIfAbsent(
typeName,
k -> new AtomicLong()
)
.getAndIncrement();
/*
* This is supposed to signal that a particular exception type was seen the first time in a particular time span.
* !ATTENTION! This will work on best-effort basis - namely all overflowing exception which are recorded
* as 'TOO-MANY-EXCEPTIONS' will receive only one common 'first hit'.
*/
return count == 0;
}
private void emit() {
if (!exceptionCountEventType.isEnabled()) {
return;
}
doEmit();
}
void doEmit() {
Stream<Pair<String, Long>> items =
histogram
.entrySet()
.stream()
.map(e -> Pair.of(e.getKey(), e.getValue().getAndSet(0)))
.filter(p -> p.getValue() != 0)
.sorted((l1, l2) -> Long.compare(l2.getValue(), l1.getValue()));
if (maxTopItems > 0) {
items = items.limit(maxTopItems);
}
emitEvents(items);
// Stream is 'materialized' by `forEach` call above so we have to do clean up after that
// Otherwise we would keep entries for one extra iteration
histogram.entrySet().removeIf(e -> e.getValue().get() == 0L);
}
// important that this is non-final and package private; allows concurrency tests
void emitEvents(Stream<Pair<String, Long>> items) {
items.forEach(e -> createAndCommitEvent(e.getKey(), e.getValue()));
}
private void createAndCommitEvent(final String type, final long count) {
final ExceptionCountEvent event = new ExceptionCountEvent(type, count);
if (event.shouldCommit()) {
event.commit();
}
}
static class Pair<K, V> {
final K key;
final V value;
public static <K, V> Pair<K, V> of(final K key, final V value) {
return new Pair<>(key, value);
}
public Pair(final K key, final V value) {
this.key = key;
this.value = value;
}
public K getKey() {
return key;
}
public V getValue() {
return value;
}
}
}

View File

@ -0,0 +1,44 @@
package com.datadog.profiling.exceptions;
import datadog.trace.api.Config;
/**
* JVM-wide singleton exception profiling service. Uses {@linkplain Config} class to configure
* itself using either system properties, environment or properties override.
*/
public final class ExceptionProfiling {
private static final ExceptionProfiling INSTANCE = new ExceptionProfiling(Config.get());
/**
* Get a pre-configured shared instance.
*
* @return the shared instance
*/
public static ExceptionProfiling getInstance() {
return ExceptionProfiling.INSTANCE;
}
private final ExceptionHistogram histogram;
private final ExceptionSampler sampler;
private ExceptionProfiling(final Config config) {
this(new ExceptionSampler(config), new ExceptionHistogram(config));
}
ExceptionProfiling(final ExceptionSampler sampler, final ExceptionHistogram histogram) {
this.sampler = sampler;
this.histogram = histogram;
}
public ExceptionSampleEvent process(final Exception e) {
// always record the exception in histogram
final boolean firstHit = histogram.record(e);
final boolean sampled = sampler.sample();
if (firstHit || sampled) {
return new ExceptionSampleEvent(e, sampled, firstHit);
}
return null;
}
}

View File

@ -0,0 +1,44 @@
package com.datadog.profiling.exceptions;
import jdk.jfr.Category;
import jdk.jfr.Description;
import jdk.jfr.Event;
import jdk.jfr.Label;
import jdk.jfr.Name;
@Name("datadog.ExceptionSample")
@Label("ExceptionSample")
@Description("Datadog exception sample event.")
@Category("Datadog")
public class ExceptionSampleEvent extends Event {
@Label("Exception Type")
private final String type;
@Label("Exception message")
private final String message;
/** JFR may truncate the stack trace - so store original length as well. */
@Label("Exception stackdepth")
private final int stackDepth;
@Label("Sampled")
private final boolean sampled;
@Label("First occurrence")
private final boolean firstOccurrence;
public ExceptionSampleEvent(
final Exception e, final boolean sampled, final boolean firstOccurrence) {
/*
* TODO: we should have some tests for this class.
* Unfortunately at the moment this is not easily possible because we cannot build tests with groovy that
* are compiled against java11 SDK - this seems to be gradle-groovy interaction limitation.
* Writing these tests in java seems like would introduce more noise.
*/
type = e.getClass().getName();
message = e.getMessage();
stackDepth = e.getStackTrace().length;
this.sampled = sampled;
this.firstOccurrence = firstOccurrence;
}
}

View File

@ -0,0 +1,48 @@
package com.datadog.profiling.exceptions;
import datadog.trace.api.Config;
import java.time.Duration;
import java.time.temporal.ChronoUnit;
import jdk.jfr.EventType;
final class ExceptionSampler {
/*
* Fixed 0.5 second sampling window.
* Logic in StreamingSampler relies on sampling window being small compared to (in our case) recording duration:
* sampler may overshoot on one given window but should average to samplesPerWindow in the long run.
*/
private static final Duration SAMPLING_WINDOW = Duration.of(500, ChronoUnit.MILLIS);
private final StreamingSampler sampler;
private final EventType exceptionSampleType;
ExceptionSampler(final Config config) {
this(SAMPLING_WINDOW, getSamplesPerWindow(config), samplingWindowsPerRecording(config));
}
ExceptionSampler(final Duration windowDuration, final int samplesPerWindow, final int lookback) {
sampler = new StreamingSampler(windowDuration, samplesPerWindow, lookback);
exceptionSampleType = EventType.getEventType(ExceptionSampleEvent.class);
}
private static int samplingWindowsPerRecording(final Config config) {
/*
* Java8 doesn't have dividedBy#Duration so we have to implement poor man's version.
* None of these durations should be big enough to warrant dealing with bigints.
* We also do not care about nanoseconds here.
*/
return (int)
Math.min(
Duration.of(config.getProfilingUploadPeriod(), ChronoUnit.SECONDS).toMillis()
/ SAMPLING_WINDOW.toMillis(),
Integer.MAX_VALUE);
}
private static int getSamplesPerWindow(final Config config) {
return config.getProfilingExceptionSampleLimit() / samplingWindowsPerRecording(config);
}
boolean sample() {
return exceptionSampleType.isEnabled() && sampler.sample();
}
}

View File

@ -0,0 +1,186 @@
package com.datadog.profiling.exceptions;
import datadog.common.exec.CommonTaskExecutor;
import datadog.common.exec.CommonTaskExecutor.Task;
import java.time.Duration;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.atomic.LongAdder;
/**
* A streaming (non-remembering) sampler.
*
* <p>The sampler attempts to generate at most N samples per fixed time window in randomized
* fashion. For this it divides the timeline into 'sampling windows' of constant duration. Each
* sampling window targets a constant number of samples which are scattered randomly (uniform
* distribution) throughout the window duration and once the window is over the real stats of
* incoming events and the number of gathered samples is used to recompute the target probability to
* use in the following window.
*
* <p>This will guarantee, if the windows are not excessively large, that the sampler will be able
* to adjust to the changes in the rate of incoming events.
*
* <p>However, there might so rapid changes in incoming events rate that we will optimistically use
* all allowed samples well before the current window has elapsed or, on the other end of the
* spectrum, there will be to few incoming events and the sampler will not be able to generate the
* target number of samples.
*
* <p>To smooth out these hicups the sampler maintains an under-sampling budget which can be used
* to compensate for too rapid changes in the incoming events rate and maintain the target average
* number of samples per window.
*/
class StreamingSampler {
/*
* Number of windows to look back when computing carried over budget.
* This value is `approximate' since we use EMA to keep running average.
*/
private static final int CARRIED_OVER_BUDGET_LOOK_BACK = 16;
private static final class Counts {
private final LongAdder testCounter = new LongAdder();
private final AtomicLong sampleCounter = new AtomicLong(0L);
void addTest() {
testCounter.increment();
}
boolean addSample(final long limit) {
return sampleCounter.getAndUpdate(s -> s + (s < limit ? 1 : 0)) < limit;
}
}
/*
* Exponential Moving Average (EMA) last element weight.
* Check out papers about using EMA for streaming data - eg.
* https://nestedsoftware.com/2018/04/04/exponential-moving-average-on-streaming-data-4hhl.24876.html
*
* Corresponds to 'lookback' of N values:
* With T being the index of the most recent value the lookback of N values means that for all values with index
* T-K, where K > N, the relative weight of that value computed as (1 - alpha)^K is less or equal than the
* weight assigned by a plain arithmetic average (= 1/N).
*/
private final double emaAlpha;
private final int samplesPerWindow;
private final AtomicReference<Counts> countsRef;
// these attributes need to be volatile since they are accessed from user threds as well as the
// maintenance one
private volatile double probability = 1d;
private volatile long samplesBudget;
// these attributes are accessed solely from the window maintenance thread
private double totalCountRunningAverage = 0d;
private double avgSamples;
private final double budgetAlpha;
/**
* Create a new sampler instance
*
* @param windowDuration the sampling window duration
* @param samplesPerWindow the maximum number of samples in the sampling window
* @param lookback the number of windows to consider in averaging the sampling rate
* @param taskExecutor common task executor to use for periodic rolls
*/
StreamingSampler(
final Duration windowDuration,
final int samplesPerWindow,
final int lookback,
final CommonTaskExecutor taskExecutor) {
this.samplesPerWindow = samplesPerWindow;
samplesBudget = samplesPerWindow + CARRIED_OVER_BUDGET_LOOK_BACK * samplesPerWindow;
emaAlpha = computeIntervalAlpha(lookback);
budgetAlpha = computeIntervalAlpha(CARRIED_OVER_BUDGET_LOOK_BACK);
countsRef = new AtomicReference<>(new Counts());
taskExecutor.scheduleAtFixedRate(
RollWindowTask.INSTANCE,
this,
windowDuration.toNanos(),
windowDuration.toNanos(),
TimeUnit.NANOSECONDS,
"exception sampling window roll");
}
/**
* Create a new sampler instance with automatic window roll.
*
* @param windowDuration the sampling window duration
* @param samplesPerWindow the maximum number of samples in the sampling window
* @param lookback the number of windows to consider in averaging the sampling rate
*/
StreamingSampler(final Duration windowDuration, final int samplesPerWindow, final int lookback) {
this(windowDuration, samplesPerWindow, lookback, CommonTaskExecutor.INSTANCE);
}
/**
* Provides binary answer whether the current event is to be sampled
*
* @return {@literal true} if the event should be sampled
*/
final boolean sample() {
final Counts counts = countsRef.get();
counts.addTest();
if (ThreadLocalRandom.current().nextDouble() < probability) {
return counts.addSample(samplesBudget);
}
return false;
}
private void rollWindow() {
/*
* Atomically replace the Counts instance such that sample requests during window maintenance will be
* using the newly created counts instead of the ones currently processed by the maintenance routine.
*/
final Counts counts = countsRef.getAndSet(new Counts());
final long totalCount = counts.testCounter.sum();
final long sampledCount = counts.sampleCounter.get();
samplesBudget = calculateBudgetEma(sampledCount);
if (totalCountRunningAverage == 0) {
totalCountRunningAverage = totalCount;
} else {
totalCountRunningAverage =
totalCountRunningAverage + emaAlpha * (totalCount - totalCountRunningAverage);
}
if (totalCountRunningAverage <= 0) {
probability = 1;
} else {
probability = Math.min(samplesBudget / totalCountRunningAverage, 1d);
}
}
private long calculateBudgetEma(final long sampledCount) {
avgSamples =
Double.isNaN(avgSamples)
? sampledCount
: avgSamples + budgetAlpha * (sampledCount - avgSamples);
return Math.round(Math.max(samplesPerWindow - avgSamples, 0) * CARRIED_OVER_BUDGET_LOOK_BACK);
}
private static double computeIntervalAlpha(final int lookback) {
return 1 - Math.pow(lookback, -1d / lookback);
}
/*
* Important to use explicit class to avoid implicit hard references to StreamingSampler from within scheduler
*/
private static class RollWindowTask implements Task<StreamingSampler> {
static final RollWindowTask INSTANCE = new RollWindowTask();
@Override
public void run(final StreamingSampler target) {
target.rollWindow();
}
}
}

View File

@ -0,0 +1,27 @@
package datadog.exceptions.instrumentation;
import com.datadog.profiling.exceptions.ExceptionProfiling;
import com.datadog.profiling.exceptions.ExceptionSampleEvent;
import net.bytebuddy.asm.Advice;
public class ExceptionAdvice {
@Advice.OnMethodExit(suppress = Throwable.class)
public static void onExit(@Advice.This final Exception e) {
/*
* We may get into a situation when this is called before ExceptionProfiling had a chance
* to fully initialize. So despite the fact that this returns static singleton this may
* return null sometimes.
*/
if (ExceptionProfiling.getInstance() == null) {
return;
}
/*
* JFR will assign the stacktrace depending on the place where the event is committed.
* Therefore we need to commit the event here, right in the 'Exception' constructor
*/
final ExceptionSampleEvent event = ExceptionProfiling.getInstance().process(e);
if (event != null && event.shouldCommit()) {
event.commit();
}
}
}

View File

@ -0,0 +1,315 @@
package com.datadog.profiling.exceptions;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSortedMap;
import datadog.trace.api.Config;
import java.io.IOException;
import java.time.Instant;
import java.util.Comparator;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.Phaser;
import java.util.stream.Stream;
import jdk.jfr.FlightRecorder;
import jdk.jfr.Recording;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.openjdk.jmc.common.item.Aggregators;
import org.openjdk.jmc.common.item.Attribute;
import org.openjdk.jmc.common.item.IAttribute;
import org.openjdk.jmc.common.item.IItemCollection;
import org.openjdk.jmc.common.item.ItemFilters;
import org.openjdk.jmc.common.unit.IQuantity;
import org.openjdk.jmc.common.unit.UnitLookup;
import org.openjdk.jmc.flightrecorder.CouldNotLoadRecordingException;
import org.openjdk.jmc.flightrecorder.JfrLoaderToolkit;
public class ExceptionHistogramTest {
private static final IAttribute<String> TYPE =
Attribute.attr("type", "type", "Exception type", UnitLookup.PLAIN_TEXT);
private static final IAttribute<IQuantity> COUNT =
Attribute.attr("count", "count", "Exception count", UnitLookup.NUMBER);
private static final Comparator<Exception> EXCEPTION_COMPARATOR =
new Comparator<Exception>() {
@Override
public int compare(final Exception e1, final Exception e2) {
return e1.getClass().getCanonicalName().compareTo(e2.getClass().getCanonicalName());
}
@Override
public boolean equals(final Object obj) {
return this == obj;
}
};
private static final int MAX_ITEMS = 2;
private static final int MAX_SIZE = 2;
private Recording recording;
private Recording snapshot;
private ExceptionHistogram instance;
@BeforeEach
public void setup() {
recording = new Recording();
recording.enable("datadog.ExceptionCount");
recording.start();
final Properties properties = new Properties();
properties.setProperty(
Config.PROFILING_EXCEPTION_HISTOGRAM_TOP_ITEMS, Integer.toString(MAX_ITEMS));
instance = new ExceptionHistogram(Config.get(properties));
}
@AfterEach
public void tearDown() {
if (snapshot != null) {
snapshot.close();
}
recording.close();
instance.deregister();
}
@Test
public void testFirstHitConcurrent() {
Phaser phaser = new Phaser(2);
ExceptionHistogram histogram =
new ExceptionHistogram(Config.get()) {
@Override
void emitEvents(Stream<ExceptionHistogram.Pair<String, Long>> items) {
super.emitEvents(items);
// #1 - histo sums are reset but 0 entries not removed yet
phaser.arriveAndAwaitAdvance();
// #2 - safe to leave the emit() method
phaser.arriveAndAwaitAdvance();
}
};
// don't want the JFR integration active here
histogram.deregister();
for (int i = 0; i < 5; i++) {
boolean firstHit = histogram.record(new NullPointerException());
assertEquals(i == 0, firstHit);
}
// start emitting in a separate thread
new Thread(histogram::doEmit).start();
// wait for #1 - this is the point where data race can happen if new exceptions are recording
// during 'emit()'
phaser.arriveAndAwaitAdvance();
// make sure that any exception recording during 'emit()' has a correct 'first hit' status
assertTrue(histogram.record(new NullPointerException()));
// unblock #2 such that 'emit()' may continue
phaser.arrive();
// the subsequent exception recording will not be a 'first hit'
assertFalse(histogram.record(new NullPointerException()));
}
@Test
public void testExceptionsRecorded()
throws IOException, CouldNotLoadRecordingException, InterruptedException {
writeExceptions(
ImmutableMap.of(
new NullPointerException(),
8,
new IllegalArgumentException(),
5,
new RuntimeException(),
1));
final Instant firstRecordingNow = Instant.now();
snapshot = FlightRecorder.getFlightRecorder().takeSnapshot();
final IItemCollection firstRecording = getEvents(snapshot, Instant.MIN, firstRecordingNow);
assertEquals(MAX_ITEMS, firstRecording.getAggregate(Aggregators.count()).longValue());
assertEquals(
8,
firstRecording
.apply(ItemFilters.equals(TYPE, NullPointerException.class.getCanonicalName()))
.getAggregate(Aggregators.sum(COUNT))
.longValue());
assertEquals(
5,
firstRecording
.apply(ItemFilters.equals(TYPE, IllegalArgumentException.class.getCanonicalName()))
.getAggregate(Aggregators.sum(COUNT))
.longValue());
snapshot.close();
// Sleep to make sure we get new batch of exceptions only
Thread.sleep(1000);
writeExceptions(
ImmutableMap.of(
new RuntimeException(),
8,
new NullPointerException(),
5,
new IllegalArgumentException(),
1));
snapshot = FlightRecorder.getFlightRecorder().takeSnapshot();
final IItemCollection secondRecording =
getEvents(snapshot, firstRecordingNow.plusMillis(1000), Instant.MAX);
assertEquals(MAX_ITEMS, secondRecording.getAggregate(Aggregators.count()).longValue());
assertEquals(
8,
secondRecording
.apply(ItemFilters.equals(TYPE, RuntimeException.class.getCanonicalName()))
.getAggregate(Aggregators.sum(COUNT))
.longValue());
assertEquals(
5,
secondRecording
.apply(ItemFilters.equals(TYPE, NullPointerException.class.getCanonicalName()))
.getAggregate(Aggregators.sum(COUNT))
.longValue());
snapshot.close();
}
@Test
public void testHistogramSizeIsLimited()
throws IOException, CouldNotLoadRecordingException, InterruptedException {
instance.deregister();
final Properties properties = new Properties();
properties.setProperty(
Config.PROFILING_EXCEPTION_HISTOGRAM_MAX_COLLECTION_SIZE, Integer.toString(MAX_SIZE));
instance = new ExceptionHistogram(Config.get(properties));
// Exceptions are written in alphabetical order
writeExceptions(
ImmutableSortedMap.copyOf(
ImmutableMap.of(
new Exception(),
5,
new IllegalArgumentException(),
8,
new NegativeArraySizeException(),
10,
new NullPointerException(),
11),
EXCEPTION_COMPARATOR));
final Instant firstRecordingNow = Instant.now();
snapshot = FlightRecorder.getFlightRecorder().takeSnapshot();
final IItemCollection firstRecording = getEvents(snapshot, Instant.MIN, firstRecordingNow);
assertEquals(MAX_ITEMS + 1, firstRecording.getAggregate(Aggregators.count()).longValue());
assertEquals(
5,
firstRecording
.apply(ItemFilters.equals(TYPE, Exception.class.getCanonicalName()))
.getAggregate(Aggregators.sum(COUNT))
.longValue());
assertEquals(
8,
firstRecording
.apply(ItemFilters.equals(TYPE, IllegalArgumentException.class.getCanonicalName()))
.getAggregate(Aggregators.sum(COUNT))
.longValue());
assertEquals(
21,
firstRecording
.apply(ItemFilters.equals(TYPE, ExceptionHistogram.CLIPPED_ENTRY_TYPE_NAME))
.getAggregate(Aggregators.sum(COUNT))
.longValue());
snapshot.close();
// Sleep to make sure we get new batch of exceptions only
Thread.sleep(1000);
// Exceptions are written in 'code' order
writeExceptions(
ImmutableSortedMap.copyOf(
ImmutableMap.of(
new IllegalArgumentException(),
5,
new NegativeArraySizeException(),
8,
new NullPointerException(),
10,
new RuntimeException(),
11),
EXCEPTION_COMPARATOR));
snapshot = FlightRecorder.getFlightRecorder().takeSnapshot();
final IItemCollection secondRecording =
getEvents(snapshot, firstRecordingNow.plusMillis(1000), Instant.MAX);
assertEquals(MAX_ITEMS + 1, secondRecording.getAggregate(Aggregators.count()).longValue());
assertEquals(
5,
secondRecording
.apply(ItemFilters.equals(TYPE, IllegalArgumentException.class.getCanonicalName()))
.getAggregate(Aggregators.sum(COUNT))
.longValue());
assertEquals(
8,
secondRecording
.apply(ItemFilters.equals(TYPE, NegativeArraySizeException.class.getCanonicalName()))
.getAggregate(Aggregators.sum(COUNT))
.longValue());
assertEquals(
21,
firstRecording
.apply(ItemFilters.equals(TYPE, ExceptionHistogram.CLIPPED_ENTRY_TYPE_NAME))
.getAggregate(Aggregators.sum(COUNT))
.longValue());
snapshot.close();
}
@Test
public void testDisabled() throws IOException, CouldNotLoadRecordingException {
recording.disable("datadog.ExceptionCount");
final Map<Exception, Integer> exceptions =
ImmutableMap.of(
new NullPointerException(),
8,
new IllegalArgumentException(),
5,
new RuntimeException(),
1);
for (final Map.Entry<Exception, Integer> entry : exceptions.entrySet()) {
for (int i = 0; i < entry.getValue(); i++) {
assertFalse(instance.record(entry.getKey()));
}
}
final Recording snapshot = FlightRecorder.getFlightRecorder().takeSnapshot();
final IItemCollection recording = getEvents(snapshot, Instant.MIN, Instant.MAX);
assertEquals(0, recording.getAggregate(Aggregators.count()).longValue());
snapshot.close();
}
private IItemCollection getEvents(
final Recording secondSnapshot, final Instant start, final Instant end)
throws IOException, CouldNotLoadRecordingException {
return JfrLoaderToolkit.loadEvents(secondSnapshot.getStream(start, end))
.apply(ItemFilters.type("datadog.ExceptionCount"));
}
private void writeExceptions(final Map<Exception, Integer> exceptions) {
// Just check that writing null doesn't break anything
instance.record(null);
for (final Map.Entry<Exception, Integer> entry : exceptions.entrySet()) {
for (int i = 0; i < entry.getValue(); i++) {
instance.record(entry.getKey());
}
}
}
}

View File

@ -0,0 +1,490 @@
package com.datadog.profiling.exceptions;
import static java.lang.Math.abs;
import static java.lang.Math.round;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.ArgumentMatchers.same;
import static org.mockito.Mockito.when;
import datadog.common.exec.CommonTaskExecutor;
import datadog.common.exec.CommonTaskExecutor.Task;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Supplier;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.math3.distribution.PoissonDistribution;
import org.apache.commons.math3.stat.descriptive.moment.Mean;
import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation;
import org.apache.commons.math3.util.Pair;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.ArgumentCaptor;
import org.mockito.Captor;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
/**
* Test various hand crafted scenarios of events coming in different patterns. Test both, the
* isolated single threaded execution as well as events arriving on concurrent threads.
*
* <p>The test supports 'benchmark' mode to explore the reliability boundaries where all test cases
* can be run multiple times - the number of iteration is passed in in {@literal
* com.datadog.profiling.exceptions.test-iterations} system property.
*/
@ExtendWith(MockitoExtension.class)
@Slf4j
class StreamingSamplerTest {
private static final Duration WINDOW_DURATION = Duration.ofSeconds(1);
/** Generates windows with numbers of events according to Poisson distribution */
private static final class PoissonWindowEventsSupplier implements Supplier<Integer> {
private final PoissonDistribution distribution;
/** @param eventsPerWindowMean the average number of events per window */
PoissonWindowEventsSupplier(final int eventsPerWindowMean) {
distribution = new PoissonDistribution(eventsPerWindowMean);
distribution.reseedRandomGenerator(12345671);
}
@Override
public Integer get() {
return distribution.sample();
}
@Override
public String toString() {
return "Poisson: ("
+ "mean="
+ distribution.getMean()
+ ", variance="
+ distribution.getNumericalVariance()
+ ")";
}
}
/**
* Generates bursty windows - some of the windows have extremely low number of events while the
* others have very hight number of events.
*/
private static final class BurstingWindowsEventsSupplier implements Supplier<Integer> {
private final Random rnd = new Random(176431);
private final double burstProbability;
private final int minEvents;
private final int maxEvents;
/**
* @param burstProbability the probability of burst window happening
* @param nonBurstEvents number of events in non-burst window
* @param burstEvents number of events in burst window
*/
BurstingWindowsEventsSupplier(
final double burstProbability, final int nonBurstEvents, final int burstEvents) {
this.burstProbability = burstProbability;
this.minEvents = nonBurstEvents;
this.maxEvents = burstEvents;
}
@Override
public Integer get() {
if (rnd.nextDouble() <= burstProbability) {
return maxEvents;
} else {
return minEvents;
}
}
@Override
public String toString() {
return "Burst: ("
+ "probability="
+ burstProbability
+ ", minEvents="
+ minEvents
+ ", maxEvents="
+ maxEvents
+ ')';
}
}
/** Generates windows with constant number of events. */
private static final class ConstantWindowsEventsSupplier implements Supplier<Integer> {
private final int events;
/** @param events number of events per window */
ConstantWindowsEventsSupplier(final int events) {
this.events = events;
}
@Override
public Integer get() {
return events;
}
@Override
public String toString() {
return "Constant: (" + "events=" + events + ')';
}
}
/** Generates a pre-configured repeating sequence of window events */
private static final class RepeatingWindowsEventsSupplier implements Supplier<Integer> {
private final int[] eventsCounts;
private int pointer = 0;
/** @param windowEvents an array of number of events per each window in the sequence */
RepeatingWindowsEventsSupplier(final int... windowEvents) {
this.eventsCounts = Arrays.copyOf(windowEvents, windowEvents.length);
}
@Override
public Integer get() {
try {
return eventsCounts[pointer];
} finally {
pointer = (pointer + 1) % eventsCounts.length;
}
}
@Override
public String toString() {
return "Repeating: (" + "definition=" + Arrays.toString(eventsCounts) + ')';
}
}
private static class WindowSamplingResult {
final int events;
final int samples;
final double sampleIndexSkew;
WindowSamplingResult(int events, int samples, double sampleIndexSkew) {
this.events = events;
this.samples = samples;
this.sampleIndexSkew = sampleIndexSkew;
}
}
private static final StandardDeviation STANDARD_DEVIATION = new StandardDeviation();
private static final Mean MEAN = new Mean();
private static final int WINDOWS = 120;
private static final int SAMPLES_PER_WINDOW = 100;
private static final int LOOKBACK = 30;
@Mock CommonTaskExecutor taskExecutor;
@Captor ArgumentCaptor<Task<StreamingSampler>> rollWindowTaskCaptor;
@Captor ArgumentCaptor<StreamingSampler> rollWindowTargetCaptor;
@Mock ScheduledFuture scheduledFuture;
@BeforeEach
public void setup() {
when(taskExecutor.scheduleAtFixedRate(
rollWindowTaskCaptor.capture(),
rollWindowTargetCaptor.capture(),
eq(WINDOW_DURATION.toNanos()),
eq(WINDOW_DURATION.toNanos()),
same(TimeUnit.NANOSECONDS),
any()))
.thenReturn(scheduledFuture);
}
@Test
public void testBurstLowProbability() throws Exception {
testSampler(new BurstingWindowsEventsSupplier(0.1d, 5, 5000), 40);
}
@Test
public void testBurstHighProbability() throws Exception {
testSampler(new BurstingWindowsEventsSupplier(0.8d, 5, 5000), 20);
}
@Test
public void testPoissonLowFrequency() throws Exception {
testSampler(new PoissonWindowEventsSupplier(153), 15);
}
@Test
public void testPoissonMidFrequency() throws Exception {
testSampler(new PoissonWindowEventsSupplier(283), 15);
}
@Test
public void testPoissonHighFrequency() throws Exception {
testSampler(new PoissonWindowEventsSupplier(1013), 15);
}
@Test
public void testConstantVeryLowLoad() throws Exception {
testSampler(new ConstantWindowsEventsSupplier(1), 10);
}
@Test
public void testConstantLowLoad() throws Exception {
testSampler(new ConstantWindowsEventsSupplier(153), 15);
}
@Test
public void testConstantMediumLoad() throws Exception {
testSampler(new ConstantWindowsEventsSupplier(713), 15);
}
@Test
public void testConstantHighLoad() throws Exception {
testSampler(new ConstantWindowsEventsSupplier(5211), 15);
}
@Test
public void testRepeatingSemiRandom() throws Exception {
testSampler(
new RepeatingWindowsEventsSupplier(180, 200, 0, 0, 0, 1500, 1000, 430, 200, 115, 115, 900),
15);
}
@Test
public void testRepeatingRegularStartWithBurst() throws Exception {
testSampler(new RepeatingWindowsEventsSupplier(1000, 0, 1000, 0, 1000, 0), 15);
}
@Test
public void testRepeatingRegularStartWithLow() throws Exception {
testSampler(new RepeatingWindowsEventsSupplier(0, 1000, 0, 1000, 0, 1000), 15);
}
private void testSampler(final Supplier<Integer> windowEventsSupplier, final int maxErrorPercent)
throws Exception {
int iterations =
Integer.parseInt(
System.getProperty("com.datadog.profiling.exceptions.test-iterations", "1"));
for (int i = 0; i < iterations; i++) {
testSamplerInline(windowEventsSupplier, maxErrorPercent);
for (int numOfThreads = 1; numOfThreads <= 64; numOfThreads *= 2) {
testSamplerConcurrently(numOfThreads, windowEventsSupplier, maxErrorPercent);
}
}
}
private void testSamplerInline(
final Supplier<Integer> windowEventsSupplier, final int maxErrorPercent) {
log.info(
"> mode: {}, windows: {}, SAMPLES_PER_WINDOW: {}, LOOKBACK: {}, max error: {}%",
windowEventsSupplier, WINDOWS, SAMPLES_PER_WINDOW, LOOKBACK, maxErrorPercent);
final StreamingSampler sampler =
new StreamingSampler(WINDOW_DURATION, SAMPLES_PER_WINDOW, LOOKBACK, taskExecutor);
// simulate event generation and sampling for the given number of sampling windows
final long expectedSamples = WINDOWS * SAMPLES_PER_WINDOW;
long allSamples = 0L;
long allEvents = 0L;
final double[] samplesPerWindow = new double[WINDOWS];
final double[] sampleIndexSkewPerWindow = new double[WINDOWS];
for (int w = 0; w < WINDOWS; w++) {
final long samplesBase = 0L;
WindowSamplingResult result = generateWindowEventsAndSample(windowEventsSupplier, sampler);
samplesPerWindow[w] =
(1 - abs((result.samples - samplesBase - expectedSamples) / (double) expectedSamples));
sampleIndexSkewPerWindow[w] = result.sampleIndexSkew;
allSamples += result.samples;
allEvents += result.events;
rollWindow();
}
/*
* Turn all events into samples if their number is <= than the expected number of samples.
*/
final double targetSamples = Math.min(allEvents, expectedSamples);
/*
* Calculate the percentual error based on the expected and the observed number of samples.
*/
final double percentualError = round(((targetSamples - allSamples) / targetSamples) * 100);
reportSampleStatistics(samplesPerWindow, targetSamples, percentualError);
reportSampleIndexSkew(sampleIndexSkewPerWindow);
assertTrue(
abs(percentualError) <= maxErrorPercent,
"abs(("
+ targetSamples
+ " - "
+ allSamples
+ ") / "
+ targetSamples
+ ")% > "
+ maxErrorPercent
+ "%");
}
private void reportSampleStatistics(
double[] samplesPerWindow, double targetSamples, double percentualError) {
final double samplesPerWindowMean = MEAN.evaluate(samplesPerWindow);
final double samplesPerWindowStdev =
STANDARD_DEVIATION.evaluate(samplesPerWindow, samplesPerWindowMean);
log.info(
"\t per window samples = (avg: {}, stdev: {}, estimated total: {})",
samplesPerWindowMean,
samplesPerWindowStdev,
targetSamples);
log.info("\t percentual error = {}%", percentualError);
}
private void reportSampleIndexSkew(double[] sampleIndexSkewPerWindow) {
Pair<Double, Double> skewIndicators = calculateSkewIndicators(sampleIndexSkewPerWindow);
log.info(
"\t avg window skew interval = <-{}%, {}%>",
round(skewIndicators.getFirst() * 100), round(skewIndicators.getSecond() * 100));
}
/**
* Simulate the number of events per window. Perform sampling and capture the number of observed
* events and samples.
*
* @param windowEventsSupplier events generator implementation
* @param sampler sampler instance
* @return a {@linkplain WindowSamplingResult} instance capturing the number of observed events,
* samples and the sample index skew
*/
private WindowSamplingResult generateWindowEventsAndSample(
Supplier<Integer> windowEventsSupplier, StreamingSampler sampler) {
List<Integer> sampleIndices = new ArrayList<>();
int samples = 0;
int events = windowEventsSupplier.get();
for (int i = 0; i < events; i++) {
if (sampler.sample()) {
sampleIndices.add(i);
samples++;
}
}
double sampleIndexMean = MEAN.evaluate(toDoubleArray(sampleIndices));
double sampleIndexSkew = events != 0 ? sampleIndexMean / events : 0;
return new WindowSamplingResult(events, samples, sampleIndexSkew);
}
/**
* Calculate the sample index skew boundaries. A 'sample index skew' is defined as the distance of
* the average sample index in each window from the mean event index in the same window. Given the
* range of the event indices 1..N, the event index mean M calculated as (N - 1)/2 and the sample
* index mean S the skew K is calculated as 'K = M - S'. This gives the skew range of &lt;-0.5,
* 0.5&gt;.
*
* <p>If the samples are spread out completely regularly the skew would be 0. If the beginning of
* the window is favored the skew would be negative and if the tail of the window is favored the
* skew would be positive.
*
* @param sampleIndexSkewPerWindow the index skew per window
* @return a min-max boundaries for the sample index skew
*/
private Pair<Double, Double> calculateSkewIndicators(double[] sampleIndexSkewPerWindow) {
double skewPositiveAvg = 0d;
double skewNegativeAvg = 0d;
int negativeCount = 0;
for (final double skew : sampleIndexSkewPerWindow) {
if (skew >= 0.5d) {
skewPositiveAvg += skew - 0.5d;
} else {
negativeCount++;
skewNegativeAvg += 0.5d - skew;
}
}
final int positiveCount = sampleIndexSkewPerWindow.length - negativeCount;
if (positiveCount > 0) {
skewPositiveAvg /= sampleIndexSkewPerWindow.length - negativeCount;
}
if (negativeCount > 0) {
skewNegativeAvg /= negativeCount;
}
return new Pair<>(skewNegativeAvg, skewPositiveAvg);
}
private static double[] toDoubleArray(final List<? extends Number> data) {
return data.stream().mapToDouble(Number::doubleValue).toArray();
}
private void testSamplerConcurrently(
final int threadCount,
final Supplier<Integer> windowEventsSupplier,
final int maxErrorPercent)
throws Exception {
log.info(
"> threads: {}, mode: {}, windows: {}, SAMPLES_PER_WINDOW: {}, LOOKBACK: {}, max error: {}",
threadCount,
windowEventsSupplier,
WINDOWS,
SAMPLES_PER_WINDOW,
LOOKBACK,
maxErrorPercent);
/*
* This test attempts to simulate concurrent computations by making sure that sampling requests and the window maintenance routine are run in parallel.
* It does not provide coverage of all possible execution sequences but should be good enough for getting the 'ballpark' numbers.
*/
final long expectedSamples = SAMPLES_PER_WINDOW * WINDOWS;
final AtomicLong allSamples = new AtomicLong(0);
final AtomicLong receivedEvents = new AtomicLong(0);
final StreamingSampler sampler =
new StreamingSampler(WINDOW_DURATION, SAMPLES_PER_WINDOW, LOOKBACK, taskExecutor);
for (int w = 0; w < WINDOWS; w++) {
final Thread[] threads = new Thread[threadCount];
for (int i = 0; i < threadCount; i++) {
threads[i] =
new Thread(
() -> {
WindowSamplingResult samplingResult =
generateWindowEventsAndSample(windowEventsSupplier, sampler);
allSamples.addAndGet(samplingResult.samples);
receivedEvents.addAndGet(samplingResult.events);
});
}
for (final Thread t : threads) {
t.start();
}
for (final Thread t : threads) {
t.join();
}
rollWindow();
}
final long samples = allSamples.get();
/*
* Turn all events into samples if their number is <= than the expected number of samples.
*/
final long targetSamples = Math.min(expectedSamples, receivedEvents.get());
/*
* Calculate the percentual error based on the expected and the observed number of samples.
*/
final int percentualError = round(((targetSamples - samples) / (float) targetSamples) * 100);
log.info("\t percentual error = {}%", percentualError);
assertTrue(
abs(percentualError) <= maxErrorPercent,
"abs(("
+ expectedSamples
+ " - "
+ samples
+ ") / "
+ expectedSamples
+ ")% > "
+ maxErrorPercent
+ "%");
}
private void rollWindow() {
rollWindowTaskCaptor.getValue().run(rollWindowTargetCaptor.getValue());
}
}

View File

@ -38,7 +38,7 @@ public class SpockRunner extends Sputnik {
"datadog.trace.bootstrap",
"datadog.trace.context",
"datadog.trace.instrumentation.api",
"io.opentracing",
"io.opentracing"
};
private static final String[] TEST_BOOTSTRAP_PREFIXES;

View File

@ -6,7 +6,6 @@ ext {
minJavaVersionForTests = JavaVersion.VERSION_11
// Zulu has backported profiling support
forceJdk = ['ZULU8']
jmcVersion = '8.0.0-SNAPSHOT'
}
apply from: "${rootDir}/gradle/java.gradle"
@ -23,14 +22,11 @@ jar {
dependencies {
compile project(':dd-trace-api')
compile group: 'org.lz4', name: 'lz4-java', version: '1.7.1'
testCompile project(':dd-smoke-tests')
testCompile project(':dd-java-agent:agent-profiling:profiling-testing')
testCompile "org.openjdk.jmc:common:$jmcVersion"
testCompile "org.openjdk.jmc:flightrecorder:$jmcVersion"
testCompile "org.openjdk.jmc:flightrecorder.rules:$jmcVersion"
testCompile "org.openjdk.jmc:flightrecorder.rules.jdk:$jmcVersion"
compile group: 'org.lz4', name: 'lz4-java', version: '1.7.1'
testCompile deps.jmc
}
tasks.withType(Test).configureEach {

View File

@ -28,6 +28,10 @@ public class ProfilingTestApplication {
private static void tracedMethod() throws InterruptedException {
System.out.println("Tracing");
tracedBusyMethod();
try {
throw new IllegalStateException("test");
} catch (final IllegalStateException ignored) {
}
Thread.sleep(50);
}

View File

@ -113,5 +113,8 @@ class ProfilingIntegrationContinuousProfilesTest extends AbstractSmokeTest {
filteredScopeEvents.size() > 0
filteredScopeEvents.getAggregate(Aggregators.min("datadog.Scope", cpuTimeAttr)).longValue() >= 10_000L
IItemCollection exceptionSampleEvents = events.apply(ItemFilters.type("datadog.ExceptionSample"))
exceptionSampleEvents.size() > 0
}
}

View File

@ -15,5 +15,6 @@ excludedClassesCoverage += [
description = 'dd-trace-api'
dependencies {
compile deps.slf4j
testCompile project(':utils:test-utils')
}

View File

@ -141,6 +141,11 @@ public class Config {
public static final String PROFILING_PROXY_PORT = "profiling.proxy.port";
public static final String PROFILING_PROXY_USERNAME = "profiling.proxy.username";
public static final String PROFILING_PROXY_PASSWORD = "profiling.proxy.password";
public static final String PROFILING_EXCEPTION_SAMPLE_LIMIT = "profiling.exception.sample.limit";
public static final String PROFILING_EXCEPTION_HISTOGRAM_TOP_ITEMS =
"profiling.exception.histogram.top-items";
public static final String PROFILING_EXCEPTION_HISTOGRAM_MAX_COLLECTION_SIZE =
"profiling.exception.histogram.max-collection-size";
public static final String RUNTIME_ID_TAG = "runtime-id";
public static final String SERVICE = "service";
@ -195,6 +200,9 @@ public class Config {
public static final int DEFAULT_PROFILING_UPLOAD_TIMEOUT = 30; // seconds
public static final String DEFAULT_PROFILING_UPLOAD_COMPRESSION = "on";
public static final int DEFAULT_PROFILING_PROXY_PORT = 8080;
public static final int DEFAULT_PROFILING_EXCEPTION_SAMPLE_LIMIT = 10_000;
public static final int DEFAULT_PROFILING_EXCEPTION_HISTOGRAM_TOP_ITEMS = 50;
public static final int DEFAULT_PROFILING_EXCEPTION_HISTOGRAM_MAX_COLLECTION_SIZE = 10000;
private static final String SPLIT_BY_SPACE_OR_COMMA_REGEX = "[,\\s]+";
@ -318,6 +326,9 @@ public class Config {
@Getter private final int profilingProxyPort;
@Getter private final String profilingProxyUsername;
@Getter private final String profilingProxyPassword;
@Getter private final int profilingExceptionSampleLimit;
@Getter private final int profilingExceptionHistogramTopItems;
@Getter private final int profilingExceptionHistogramMaxCollectionSize;
// Values from an optionally provided properties file
private static Properties propertiesFromConfigFile;
@ -520,6 +531,18 @@ public class Config {
profilingProxyUsername = getSettingFromEnvironment(PROFILING_PROXY_USERNAME, null);
profilingProxyPassword = getSettingFromEnvironment(PROFILING_PROXY_PASSWORD, null);
profilingExceptionSampleLimit =
getIntegerSettingFromEnvironment(
PROFILING_EXCEPTION_SAMPLE_LIMIT, DEFAULT_PROFILING_EXCEPTION_SAMPLE_LIMIT);
profilingExceptionHistogramTopItems =
getIntegerSettingFromEnvironment(
PROFILING_EXCEPTION_HISTOGRAM_TOP_ITEMS,
DEFAULT_PROFILING_EXCEPTION_HISTOGRAM_TOP_ITEMS);
profilingExceptionHistogramMaxCollectionSize =
getIntegerSettingFromEnvironment(
PROFILING_EXCEPTION_HISTOGRAM_MAX_COLLECTION_SIZE,
DEFAULT_PROFILING_EXCEPTION_HISTOGRAM_MAX_COLLECTION_SIZE);
// Setting this last because we have a few places where this can come from
apiKey = tmpApiKey;
@ -688,6 +711,21 @@ public class Config {
profilingProxyPassword =
properties.getProperty(PROFILING_PROXY_PASSWORD, parent.profilingProxyPassword);
profilingExceptionSampleLimit =
getPropertyIntegerValue(
properties, PROFILING_EXCEPTION_SAMPLE_LIMIT, parent.profilingExceptionSampleLimit);
profilingExceptionHistogramTopItems =
getPropertyIntegerValue(
properties,
PROFILING_EXCEPTION_HISTOGRAM_TOP_ITEMS,
parent.profilingExceptionHistogramTopItems);
profilingExceptionHistogramMaxCollectionSize =
getPropertyIntegerValue(
properties,
PROFILING_EXCEPTION_HISTOGRAM_MAX_COLLECTION_SIZE,
parent.profilingExceptionHistogramMaxCollectionSize);
log.debug("New instance: {}", this);
}

View File

@ -13,6 +13,9 @@ import static datadog.trace.api.Config.API_KEY_FILE
import static datadog.trace.api.Config.CONFIGURATION_FILE
import static datadog.trace.api.Config.DB_CLIENT_HOST_SPLIT_BY_INSTANCE
import static datadog.trace.api.Config.DEFAULT_JMX_FETCH_STATSD_PORT
import static datadog.trace.api.Config.DEFAULT_PROFILING_EXCEPTION_SAMPLE_LIMIT
import static datadog.trace.api.Config.DEFAULT_PROFILING_EXCEPTION_HISTOGRAM_MAX_COLLECTION_SIZE
import static datadog.trace.api.Config.DEFAULT_PROFILING_EXCEPTION_HISTOGRAM_TOP_ITEMS
import static datadog.trace.api.Config.GLOBAL_TAGS
import static datadog.trace.api.Config.HEADER_TAGS
import static datadog.trace.api.Config.HEALTH_METRICS_ENABLED
@ -37,6 +40,9 @@ import static datadog.trace.api.Config.PRIORITY_SAMPLING
import static datadog.trace.api.Config.PROFILING_API_KEY_FILE_OLD
import static datadog.trace.api.Config.PROFILING_API_KEY_FILE_VERY_OLD
import static datadog.trace.api.Config.PROFILING_ENABLED
import static datadog.trace.api.Config.PROFILING_EXCEPTION_HISTOGRAM_MAX_COLLECTION_SIZE
import static datadog.trace.api.Config.PROFILING_EXCEPTION_HISTOGRAM_TOP_ITEMS
import static datadog.trace.api.Config.PROFILING_EXCEPTION_SAMPLE_LIMIT
import static datadog.trace.api.Config.PROFILING_PROXY_HOST
import static datadog.trace.api.Config.PROFILING_PROXY_PASSWORD
import static datadog.trace.api.Config.PROFILING_PROXY_PORT
@ -152,6 +158,9 @@ class ConfigTest extends DDSpecification {
config.profilingProxyPort == Config.DEFAULT_PROFILING_PROXY_PORT
config.profilingProxyUsername == null
config.profilingProxyPassword == null
config.profilingExceptionSampleLimit == DEFAULT_PROFILING_EXCEPTION_SAMPLE_LIMIT
config.profilingExceptionHistogramTopItems == DEFAULT_PROFILING_EXCEPTION_HISTOGRAM_TOP_ITEMS
config.profilingExceptionHistogramMaxCollectionSize == DEFAULT_PROFILING_EXCEPTION_HISTOGRAM_MAX_COLLECTION_SIZE
config.toString().contains("unnamed-java-app")
@ -219,6 +228,9 @@ class ConfigTest extends DDSpecification {
prop.setProperty(PROFILING_PROXY_PORT, "1118")
prop.setProperty(PROFILING_PROXY_USERNAME, "proxy-username")
prop.setProperty(PROFILING_PROXY_PASSWORD, "proxy-password")
prop.setProperty(PROFILING_EXCEPTION_SAMPLE_LIMIT, "811")
prop.setProperty(PROFILING_EXCEPTION_HISTOGRAM_TOP_ITEMS, "1121")
prop.setProperty(PROFILING_EXCEPTION_HISTOGRAM_MAX_COLLECTION_SIZE, "1122")
when:
Config config = Config.get(prop)
@ -276,6 +288,9 @@ class ConfigTest extends DDSpecification {
config.profilingProxyPort == 1118
config.profilingProxyUsername == "proxy-username"
config.profilingProxyPassword == "proxy-password"
config.profilingExceptionSampleLimit == 811
config.profilingExceptionHistogramTopItems == 1121
config.profilingExceptionHistogramMaxCollectionSize == 1122
}
def "specify overrides via system properties"() {
@ -333,6 +348,9 @@ class ConfigTest extends DDSpecification {
System.setProperty(PREFIX + PROFILING_PROXY_PORT, "1118")
System.setProperty(PREFIX + PROFILING_PROXY_USERNAME, "proxy-username")
System.setProperty(PREFIX + PROFILING_PROXY_PASSWORD, "proxy-password")
System.setProperty(PREFIX + PROFILING_EXCEPTION_SAMPLE_LIMIT, "811")
System.setProperty(PREFIX + PROFILING_EXCEPTION_HISTOGRAM_TOP_ITEMS, "1121")
System.setProperty(PREFIX + PROFILING_EXCEPTION_HISTOGRAM_MAX_COLLECTION_SIZE, "1122")
when:
Config config = new Config()
@ -390,6 +408,9 @@ class ConfigTest extends DDSpecification {
config.profilingProxyPort == 1118
config.profilingProxyUsername == "proxy-username"
config.profilingProxyPassword == "proxy-password"
config.profilingExceptionSampleLimit == 811
config.profilingExceptionHistogramTopItems == 1121
config.profilingExceptionHistogramMaxCollectionSize == 1122
}
def "specify overrides via env vars"() {

View File

@ -19,7 +19,11 @@ ext {
kotlin : "1.3.72",
coroutines : "1.3.0",
dogstatsd : "2.9.0",
jnr_unixsocket: "0.28"
jnr_unixsocket: "0.28",
commons : "3.2",
mockito : '3.3.3',
testcontainers: '1.12.2',
jmc : "8.0.0-SNAPSHOT"
]
deps = [
@ -44,6 +48,7 @@ ext {
// These are the last versions that support guava 20.0. Upgrading has odd interactions with shadow.
dependencies.create(group: 'com.google.guava', name: 'guava', version: "${versions.guava}"),
],
commonsMath : dependencies.create(group: 'org.apache.commons', name: 'commons-math3', version: versions.commons),
// Testing
@ -56,7 +61,11 @@ ext {
],
groovy : "org.codehaus.groovy:groovy-all:${versions.groovy}",
junit5 : "org.junit.jupiter:junit-jupiter:${versions.junit5}",
testcontainers : "org.testcontainers:testcontainers:1.12.2",
mockito : [
dependencies.create(group: 'org.mockito', name: 'mockito-core', version: versions.mockito),
dependencies.create(group: 'org.mockito', name: 'mockito-junit-jupiter', version: versions.mockito)
],
testcontainers : "org.testcontainers:testcontainers:${versions.testcontainers}",
testLogging : [
dependencies.create(group: 'ch.qos.logback', name: 'logback-classic', version: versions.logback),
dependencies.create(group: 'org.slf4j', name: 'log4j-over-slf4j', version: versions.slf4j),
@ -67,6 +76,13 @@ ext {
kotlin : dependencies.create(group: 'org.jetbrains.kotlin', name: 'kotlin-stdlib', version: "${versions.kotlin}"),
coroutines : dependencies.create(group: 'org.jetbrains.kotlinx', name: 'kotlinx-coroutines-core', version: "${versions.coroutines}"),
jmc : [
dependencies.create(group: 'org.openjdk.jmc', name: 'common', version: versions.jmc),
dependencies.create(group: 'org.openjdk.jmc', name: 'flightrecorder', version: versions.jmc),
dependencies.create(group: 'org.openjdk.jmc', name: 'flightrecorder', version: versions.jmc),
dependencies.create(group: 'org.openjdk.jmc', name: 'flightrecorder', version: versions.jmc)
],
// Shared between agent tooling and instrumentation and JMXFetch
shared : [
dependencies.create(group: 'com.datadoghq', name: 'java-dogstatsd-client', version: "${versions.dogstatsd}"),

View File

@ -73,6 +73,7 @@ include ':dd-java-agent:instrumentation:elasticsearch:transport-2'
include ':dd-java-agent:instrumentation:elasticsearch:transport-5'
include ':dd-java-agent:instrumentation:elasticsearch:transport-5.3'
include ':dd-java-agent:instrumentation:elasticsearch:transport-6'
include ':dd-java-agent:instrumentation:exception-profiling'
include ':dd-java-agent:instrumentation:finatra-2.9'
include ':dd-java-agent:instrumentation:glassfish'
include ':dd-java-agent:instrumentation:google-http-client'