Rule based sampling and delaying sampling until necessary

This commit is contained in:
Laplie Anderson 2019-11-14 19:08:46 -05:00
parent 56e4fdebb2
commit d8afd8b2a4
6 changed files with 227 additions and 47 deletions

View File

@ -191,18 +191,20 @@ public class DDSpanContext implements io.opentracing.SpanContext {
this.spanType = spanType;
}
public void setSamplingPriority(final int newPriority) {
/** @return if sampling priority was set by this method invocation */
public boolean setSamplingPriority(final int newPriority) {
if (newPriority == PrioritySampling.UNSET) {
log.debug("{}: Refusing to set samplingPriority to UNSET", this);
return false;
}
if (trace != null) {
final DDSpan rootSpan = trace.getRootSpan();
if (null != rootSpan && rootSpan.context() != this) {
rootSpan.context().setSamplingPriority(newPriority);
return;
return rootSpan.context().setSamplingPriority(newPriority);
}
}
if (newPriority == PrioritySampling.UNSET) {
log.debug("{}: Refusing to set samplingPriority to UNSET", this);
return;
}
// sync with lockSamplingPriority
synchronized (this) {
if (samplingPriorityLocked) {
@ -210,21 +212,22 @@ public class DDSpanContext implements io.opentracing.SpanContext {
"samplingPriority locked at {}. Refusing to set to {}",
getMetrics().get(PRIORITY_SAMPLING_KEY),
newPriority);
return false;
} else {
setMetric(PRIORITY_SAMPLING_KEY, newPriority);
log.debug("Set sampling priority to {}", getMetrics().get(PRIORITY_SAMPLING_KEY));
return true;
}
}
}
/** @return the sampling priority of this span's trace, or null if no priority has been set */
public int getSamplingPriority() {
if (trace != null) {
final DDSpan rootSpan = trace.getRootSpan();
if (null != rootSpan && rootSpan.context() != this) {
return rootSpan.context().getSamplingPriority();
}
}
final Number val = getMetrics().get(PRIORITY_SAMPLING_KEY);
return null == val ? PrioritySampling.UNSET : val.intValue();
}
@ -239,12 +242,11 @@ public class DDSpanContext implements io.opentracing.SpanContext {
* @return true if the sampling priority was locked.
*/
public boolean lockSamplingPriority() {
if (trace != null) {
final DDSpan rootSpan = trace.getRootSpan();
if (null != rootSpan && rootSpan.context() != this) {
return rootSpan.context().lockSamplingPriority();
}
}
// sync with setSamplingPriority
synchronized (this) {
if (getMetrics().get(PRIORITY_SAMPLING_KEY) == null) {
@ -367,13 +369,12 @@ public class DDSpanContext implements io.opentracing.SpanContext {
.append("/")
.append(getResourceName())
.append(" metrics=")
.append(new TreeMap(getMetrics()));
.append(new TreeMap<>(getMetrics()));
if (errorFlag) {
s.append(" *errored*");
}
if (tags != null) {
s.append(" tags=").append(new TreeMap(tags));
}
s.append(" tags=").append(new TreeMap<>(tags));
return s.toString();
}
}

View File

@ -11,7 +11,7 @@ import datadog.trace.api.Config;
import datadog.trace.api.interceptor.MutableSpan;
import datadog.trace.api.interceptor.TraceInterceptor;
import datadog.trace.api.sampling.PrioritySampling;
import datadog.trace.common.sampling.RateByServiceSampler;
import datadog.trace.common.sampling.PrioritySampler;
import datadog.trace.common.sampling.Sampler;
import datadog.trace.common.writer.DDAgentWriter;
import datadog.trace.common.writer.DDApi;
@ -347,7 +347,12 @@ public class DDTracer implements io.opentracing.Tracer, Closeable, datadog.trace
@Override
public <T> void inject(final SpanContext spanContext, final Format<T> format, final T carrier) {
if (carrier instanceof TextMapInject) {
injector.inject((DDSpanContext) spanContext, (TextMapInject) carrier);
final DDSpanContext ddSpanContext = (DDSpanContext) spanContext;
final DDSpan rootSpan = ddSpanContext.getTrace().getRootSpan();
setSamplingPriorityIfNecessary(rootSpan);
injector.inject(ddSpanContext, (TextMapInject) carrier);
} else {
log.debug("Unsupported format for propagation - {}", format.getClass().getName());
}
@ -389,12 +394,30 @@ public class DDTracer implements io.opentracing.Tracer, Closeable, datadog.trace
}
}
incrementTraceCount();
// TODO: current trace implementation doesn't guarantee that first span is the root span
// We may want to reconsider way this check is done.
if (!writtenTrace.isEmpty() && sampler.sample(writtenTrace.get(0))) {
if (!writtenTrace.isEmpty()) {
final DDSpan rootSpan = (DDSpan) writtenTrace.get(0).getLocalRootSpan();
setSamplingPriorityIfNecessary(rootSpan);
final DDSpan spanToSample = rootSpan == null ? writtenTrace.get(0) : rootSpan;
if (sampler.sample(spanToSample)) {
writer.write(writtenTrace);
}
}
}
void setSamplingPriorityIfNecessary(final DDSpan rootSpan) {
// There's a race where multiple threads can see PrioritySampling.UNSET here
// This check skips potential complex sampling priority logic when we know its redundant
// Locks inside DDSpanContext ensure the correct behavior in the race case
if (sampler instanceof PrioritySampler
&& rootSpan != null
&& rootSpan.context().getSamplingPriority() == PrioritySampling.UNSET) {
((PrioritySampler) sampler).setSamplingPriority(rootSpan);
}
}
/** Increment the reported trace count, but do not write a trace. */
void incrementTraceCount() {
@ -487,11 +510,7 @@ public class DDTracer implements io.opentracing.Tracer, Closeable, datadog.trace
}
private DDSpan startSpan() {
final DDSpan span = new DDSpan(timestampMicro, buildSpanContext());
if (sampler instanceof RateByServiceSampler) {
((RateByServiceSampler) sampler).initializeSamplingPriority(span);
}
return span;
return new DDSpan(timestampMicro, buildSpanContext());
}
@Override

View File

@ -0,0 +1,7 @@
package datadog.trace.common.sampling;
import datadog.opentracing.DDSpan;
public interface PrioritySampler {
void setSamplingPriority(DDSpan span);
}

View File

@ -19,7 +19,9 @@ import lombok.extern.slf4j.Slf4j;
* <p>The configuration of (serviceName,env)->rate is configured by the core agent.
*/
@Slf4j
public class RateByServiceSampler implements Sampler, ResponseListener {
public class RateByServiceSampler implements Sampler, PrioritySampler, ResponseListener {
public static final String SAMPLING_AGENT_RATE = "_dd.agent_psr";
/** Key for setting the default/baseline rate */
private static final String DEFAULT_KEY = "service:,env:";
@ -36,18 +38,8 @@ public class RateByServiceSampler implements Sampler, ResponseListener {
}
/** If span is a root span, set the span context samplingPriority to keep or drop */
public void initializeSamplingPriority(final DDSpan span) {
if (span.isRootSpan()) {
// Run the priority sampler on the new span
setSamplingPriorityOnSpanContext(span);
} else if (span.getSamplingPriority() == null) {
// Edge case: If the parent context did not set the priority, run the priority sampler.
// Happens when extracted http context did not send the priority header.
setSamplingPriorityOnSpanContext(span);
}
}
private void setSamplingPriorityOnSpanContext(final DDSpan span) {
@Override
public void setSamplingPriority(final DDSpan span) {
final String serviceName = span.getServiceName();
final String env = getSpanEnv(span);
final String key = "service:" + serviceName + ",env:" + env;
@ -58,10 +50,18 @@ public class RateByServiceSampler implements Sampler, ResponseListener {
sampler = rates.get(DEFAULT_KEY);
}
final boolean priorityWasSet;
if (sampler.sample(span)) {
span.setSamplingPriority(PrioritySampling.SAMPLER_KEEP);
priorityWasSet = span.context().setSamplingPriority(PrioritySampling.SAMPLER_KEEP);
} else {
span.setSamplingPriority(PrioritySampling.SAMPLER_DROP);
priorityWasSet = span.context().setSamplingPriority(PrioritySampling.SAMPLER_DROP);
}
// Only set metrics if we actually set the sampling priority
// We don't know until the call is completed because the lock is internal to DDSpanContext
if (priorityWasSet) {
span.context().setMetric(SAMPLING_AGENT_RATE, sampler.getSampleRate());
}
}

View File

@ -0,0 +1,80 @@
package datadog.trace.common.sampling;
import com.google.common.util.concurrent.RateLimiter;
import datadog.opentracing.DDSpan;
import datadog.trace.api.sampling.PrioritySampling;
import datadog.trace.common.sampling.SamplingRule.AlwaysMatchesSamplingRule;
import datadog.trace.common.sampling.SamplingRule.OperationSamplingRule;
import datadog.trace.common.sampling.SamplingRule.ServiceSamplingRule;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class RuleBasedSampler implements Sampler, PrioritySampler {
private final List<SamplingRule> samplingRules;
private final PrioritySampler fallbackSampler;
private final RateLimiter rateLimiter;
private final double rateLimit;
public static final String SAMPLING_RULE_RATE = "_dd.rule_psr";
public static final String SAMPLING_LIMIT_RATE = "_dd.limit_psr";
public RuleBasedSampler(
final List<SamplingRule> samplingRules,
final double rateLimit,
final PrioritySampler fallbackSampler) {
this.samplingRules = samplingRules;
this.fallbackSampler = fallbackSampler;
rateLimiter = RateLimiter.create(rateLimit);
this.rateLimit = rateLimit;
}
@Override
public boolean sample(final DDSpan span) {
return true;
}
@Override
public void setSamplingPriority(final DDSpan span) {
SamplingRule matchedRule = null;
for (final SamplingRule samplingRule : samplingRules) {
if (samplingRule.matches(span)) {
matchedRule = samplingRule;
break;
}
}
if (matchedRule == null) {
fallbackSampler.setSamplingPriority(span);
} else {
final boolean priorityWasSet;
boolean usedRateLimiter = false;
if (matchedRule.sample(span)) {
usedRateLimiter = true;
if (rateLimiter.tryAcquire()) {
priorityWasSet = span.context().setSamplingPriority(PrioritySampling.SAMPLER_KEEP);
} else {
priorityWasSet = span.context().setSamplingPriority(PrioritySampling.SAMPLER_DROP);
}
} else {
priorityWasSet = span.context().setSamplingPriority(PrioritySampling.SAMPLER_DROP);
}
// Only set metrics if we actually set the sampling priority
// We don't know until the call is completed because the lock is internal to DDSpanContext
if (priorityWasSet) {
span.context().setMetric(SAMPLING_RULE_RATE, matchedRule.getSampler().getSampleRate());
if (usedRateLimiter) {
span.context().setMetric(SAMPLING_LIMIT_RATE, rateLimit);
}
}
}
}
}

View File

@ -0,0 +1,73 @@
package datadog.trace.common.sampling;
import datadog.opentracing.DDSpan;
import java.util.regex.Pattern;
public abstract class SamplingRule {
private final RateSampler sampler;
public SamplingRule(final RateSampler sampler) {
this.sampler = sampler;
}
public abstract boolean matches(DDSpan span);
public boolean sample(final DDSpan span) {
return sampler.sample(span);
}
public RateSampler getSampler() {
return sampler;
}
public static class AlwaysMatchesSamplingRule extends SamplingRule {
public AlwaysMatchesSamplingRule(final RateSampler sampler) {
super(sampler);
}
@Override
public boolean matches(final DDSpan span) {
return true;
}
}
public abstract static class PatternMatchSamplingRule extends SamplingRule {
private final Pattern pattern;
public PatternMatchSamplingRule(final String regex, final RateSampler sampler) {
super(sampler);
this.pattern = Pattern.compile(regex);
}
@Override
public boolean matches(final DDSpan span) {
final String relevantString = getRelevantString(span);
return relevantString != null && pattern.matcher(relevantString).matches();
}
protected abstract String getRelevantString(DDSpan span);
}
public static class ServiceSamplingRule extends PatternMatchSamplingRule {
public ServiceSamplingRule(final String regex, final RateSampler sampler) {
super(regex, sampler);
}
@Override
protected String getRelevantString(final DDSpan span) {
return span.getServiceName();
}
}
public static class OperationSamplingRule extends PatternMatchSamplingRule {
public OperationSamplingRule(final String regex, final RateSampler sampler) {
super(regex, sampler);
}
@Override
protected String getRelevantString(final DDSpan span) {
return span.getOperationName();
}
}
}