Rule based sampling and delaying sampling until necessary
This commit is contained in:
parent
56e4fdebb2
commit
d8afd8b2a4
|
@ -191,18 +191,20 @@ public class DDSpanContext implements io.opentracing.SpanContext {
|
|||
this.spanType = spanType;
|
||||
}
|
||||
|
||||
public void setSamplingPriority(final int newPriority) {
|
||||
/** @return if sampling priority was set by this method invocation */
|
||||
public boolean setSamplingPriority(final int newPriority) {
|
||||
if (newPriority == PrioritySampling.UNSET) {
|
||||
log.debug("{}: Refusing to set samplingPriority to UNSET", this);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (trace != null) {
|
||||
final DDSpan rootSpan = trace.getRootSpan();
|
||||
if (null != rootSpan && rootSpan.context() != this) {
|
||||
rootSpan.context().setSamplingPriority(newPriority);
|
||||
return;
|
||||
return rootSpan.context().setSamplingPriority(newPriority);
|
||||
}
|
||||
}
|
||||
if (newPriority == PrioritySampling.UNSET) {
|
||||
log.debug("{}: Refusing to set samplingPriority to UNSET", this);
|
||||
return;
|
||||
}
|
||||
|
||||
// sync with lockSamplingPriority
|
||||
synchronized (this) {
|
||||
if (samplingPriorityLocked) {
|
||||
|
@ -210,21 +212,22 @@ public class DDSpanContext implements io.opentracing.SpanContext {
|
|||
"samplingPriority locked at {}. Refusing to set to {}",
|
||||
getMetrics().get(PRIORITY_SAMPLING_KEY),
|
||||
newPriority);
|
||||
return false;
|
||||
} else {
|
||||
setMetric(PRIORITY_SAMPLING_KEY, newPriority);
|
||||
log.debug("Set sampling priority to {}", getMetrics().get(PRIORITY_SAMPLING_KEY));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** @return the sampling priority of this span's trace, or null if no priority has been set */
|
||||
public int getSamplingPriority() {
|
||||
if (trace != null) {
|
||||
final DDSpan rootSpan = trace.getRootSpan();
|
||||
if (null != rootSpan && rootSpan.context() != this) {
|
||||
return rootSpan.context().getSamplingPriority();
|
||||
}
|
||||
final DDSpan rootSpan = trace.getRootSpan();
|
||||
if (null != rootSpan && rootSpan.context() != this) {
|
||||
return rootSpan.context().getSamplingPriority();
|
||||
}
|
||||
|
||||
final Number val = getMetrics().get(PRIORITY_SAMPLING_KEY);
|
||||
return null == val ? PrioritySampling.UNSET : val.intValue();
|
||||
}
|
||||
|
@ -239,12 +242,11 @@ public class DDSpanContext implements io.opentracing.SpanContext {
|
|||
* @return true if the sampling priority was locked.
|
||||
*/
|
||||
public boolean lockSamplingPriority() {
|
||||
if (trace != null) {
|
||||
final DDSpan rootSpan = trace.getRootSpan();
|
||||
if (null != rootSpan && rootSpan.context() != this) {
|
||||
return rootSpan.context().lockSamplingPriority();
|
||||
}
|
||||
final DDSpan rootSpan = trace.getRootSpan();
|
||||
if (null != rootSpan && rootSpan.context() != this) {
|
||||
return rootSpan.context().lockSamplingPriority();
|
||||
}
|
||||
|
||||
// sync with setSamplingPriority
|
||||
synchronized (this) {
|
||||
if (getMetrics().get(PRIORITY_SAMPLING_KEY) == null) {
|
||||
|
@ -367,13 +369,12 @@ public class DDSpanContext implements io.opentracing.SpanContext {
|
|||
.append("/")
|
||||
.append(getResourceName())
|
||||
.append(" metrics=")
|
||||
.append(new TreeMap(getMetrics()));
|
||||
.append(new TreeMap<>(getMetrics()));
|
||||
if (errorFlag) {
|
||||
s.append(" *errored*");
|
||||
}
|
||||
if (tags != null) {
|
||||
s.append(" tags=").append(new TreeMap(tags));
|
||||
}
|
||||
|
||||
s.append(" tags=").append(new TreeMap<>(tags));
|
||||
return s.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@ import datadog.trace.api.Config;
|
|||
import datadog.trace.api.interceptor.MutableSpan;
|
||||
import datadog.trace.api.interceptor.TraceInterceptor;
|
||||
import datadog.trace.api.sampling.PrioritySampling;
|
||||
import datadog.trace.common.sampling.RateByServiceSampler;
|
||||
import datadog.trace.common.sampling.PrioritySampler;
|
||||
import datadog.trace.common.sampling.Sampler;
|
||||
import datadog.trace.common.writer.DDAgentWriter;
|
||||
import datadog.trace.common.writer.DDApi;
|
||||
|
@ -347,7 +347,12 @@ public class DDTracer implements io.opentracing.Tracer, Closeable, datadog.trace
|
|||
@Override
|
||||
public <T> void inject(final SpanContext spanContext, final Format<T> format, final T carrier) {
|
||||
if (carrier instanceof TextMapInject) {
|
||||
injector.inject((DDSpanContext) spanContext, (TextMapInject) carrier);
|
||||
final DDSpanContext ddSpanContext = (DDSpanContext) spanContext;
|
||||
|
||||
final DDSpan rootSpan = ddSpanContext.getTrace().getRootSpan();
|
||||
setSamplingPriorityIfNecessary(rootSpan);
|
||||
|
||||
injector.inject(ddSpanContext, (TextMapInject) carrier);
|
||||
} else {
|
||||
log.debug("Unsupported format for propagation - {}", format.getClass().getName());
|
||||
}
|
||||
|
@ -389,10 +394,28 @@ public class DDTracer implements io.opentracing.Tracer, Closeable, datadog.trace
|
|||
}
|
||||
}
|
||||
incrementTraceCount();
|
||||
// TODO: current trace implementation doesn't guarantee that first span is the root span
|
||||
// We may want to reconsider way this check is done.
|
||||
if (!writtenTrace.isEmpty() && sampler.sample(writtenTrace.get(0))) {
|
||||
writer.write(writtenTrace);
|
||||
|
||||
if (!writtenTrace.isEmpty()) {
|
||||
final DDSpan rootSpan = (DDSpan) writtenTrace.get(0).getLocalRootSpan();
|
||||
setSamplingPriorityIfNecessary(rootSpan);
|
||||
|
||||
final DDSpan spanToSample = rootSpan == null ? writtenTrace.get(0) : rootSpan;
|
||||
if (sampler.sample(spanToSample)) {
|
||||
writer.write(writtenTrace);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void setSamplingPriorityIfNecessary(final DDSpan rootSpan) {
|
||||
// There's a race where multiple threads can see PrioritySampling.UNSET here
|
||||
// This check skips potential complex sampling priority logic when we know its redundant
|
||||
// Locks inside DDSpanContext ensure the correct behavior in the race case
|
||||
|
||||
if (sampler instanceof PrioritySampler
|
||||
&& rootSpan != null
|
||||
&& rootSpan.context().getSamplingPriority() == PrioritySampling.UNSET) {
|
||||
|
||||
((PrioritySampler) sampler).setSamplingPriority(rootSpan);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -487,11 +510,7 @@ public class DDTracer implements io.opentracing.Tracer, Closeable, datadog.trace
|
|||
}
|
||||
|
||||
private DDSpan startSpan() {
|
||||
final DDSpan span = new DDSpan(timestampMicro, buildSpanContext());
|
||||
if (sampler instanceof RateByServiceSampler) {
|
||||
((RateByServiceSampler) sampler).initializeSamplingPriority(span);
|
||||
}
|
||||
return span;
|
||||
return new DDSpan(timestampMicro, buildSpanContext());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
package datadog.trace.common.sampling;
|
||||
|
||||
import datadog.opentracing.DDSpan;
|
||||
|
||||
public interface PrioritySampler {
|
||||
void setSamplingPriority(DDSpan span);
|
||||
}
|
|
@ -19,7 +19,9 @@ import lombok.extern.slf4j.Slf4j;
|
|||
* <p>The configuration of (serviceName,env)->rate is configured by the core agent.
|
||||
*/
|
||||
@Slf4j
|
||||
public class RateByServiceSampler implements Sampler, ResponseListener {
|
||||
public class RateByServiceSampler implements Sampler, PrioritySampler, ResponseListener {
|
||||
public static final String SAMPLING_AGENT_RATE = "_dd.agent_psr";
|
||||
|
||||
/** Key for setting the default/baseline rate */
|
||||
private static final String DEFAULT_KEY = "service:,env:";
|
||||
|
||||
|
@ -36,18 +38,8 @@ public class RateByServiceSampler implements Sampler, ResponseListener {
|
|||
}
|
||||
|
||||
/** If span is a root span, set the span context samplingPriority to keep or drop */
|
||||
public void initializeSamplingPriority(final DDSpan span) {
|
||||
if (span.isRootSpan()) {
|
||||
// Run the priority sampler on the new span
|
||||
setSamplingPriorityOnSpanContext(span);
|
||||
} else if (span.getSamplingPriority() == null) {
|
||||
// Edge case: If the parent context did not set the priority, run the priority sampler.
|
||||
// Happens when extracted http context did not send the priority header.
|
||||
setSamplingPriorityOnSpanContext(span);
|
||||
}
|
||||
}
|
||||
|
||||
private void setSamplingPriorityOnSpanContext(final DDSpan span) {
|
||||
@Override
|
||||
public void setSamplingPriority(final DDSpan span) {
|
||||
final String serviceName = span.getServiceName();
|
||||
final String env = getSpanEnv(span);
|
||||
final String key = "service:" + serviceName + ",env:" + env;
|
||||
|
@ -58,10 +50,18 @@ public class RateByServiceSampler implements Sampler, ResponseListener {
|
|||
sampler = rates.get(DEFAULT_KEY);
|
||||
}
|
||||
|
||||
final boolean priorityWasSet;
|
||||
|
||||
if (sampler.sample(span)) {
|
||||
span.setSamplingPriority(PrioritySampling.SAMPLER_KEEP);
|
||||
priorityWasSet = span.context().setSamplingPriority(PrioritySampling.SAMPLER_KEEP);
|
||||
} else {
|
||||
span.setSamplingPriority(PrioritySampling.SAMPLER_DROP);
|
||||
priorityWasSet = span.context().setSamplingPriority(PrioritySampling.SAMPLER_DROP);
|
||||
}
|
||||
|
||||
// Only set metrics if we actually set the sampling priority
|
||||
// We don't know until the call is completed because the lock is internal to DDSpanContext
|
||||
if (priorityWasSet) {
|
||||
span.context().setMetric(SAMPLING_AGENT_RATE, sampler.getSampleRate());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
package datadog.trace.common.sampling;
|
||||
|
||||
import com.google.common.util.concurrent.RateLimiter;
|
||||
import datadog.opentracing.DDSpan;
|
||||
import datadog.trace.api.sampling.PrioritySampling;
|
||||
import datadog.trace.common.sampling.SamplingRule.AlwaysMatchesSamplingRule;
|
||||
import datadog.trace.common.sampling.SamplingRule.OperationSamplingRule;
|
||||
import datadog.trace.common.sampling.SamplingRule.ServiceSamplingRule;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public class RuleBasedSampler implements Sampler, PrioritySampler {
|
||||
private final List<SamplingRule> samplingRules;
|
||||
private final PrioritySampler fallbackSampler;
|
||||
private final RateLimiter rateLimiter;
|
||||
private final double rateLimit;
|
||||
|
||||
public static final String SAMPLING_RULE_RATE = "_dd.rule_psr";
|
||||
public static final String SAMPLING_LIMIT_RATE = "_dd.limit_psr";
|
||||
|
||||
public RuleBasedSampler(
|
||||
final List<SamplingRule> samplingRules,
|
||||
final double rateLimit,
|
||||
final PrioritySampler fallbackSampler) {
|
||||
this.samplingRules = samplingRules;
|
||||
this.fallbackSampler = fallbackSampler;
|
||||
rateLimiter = RateLimiter.create(rateLimit);
|
||||
this.rateLimit = rateLimit;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean sample(final DDSpan span) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setSamplingPriority(final DDSpan span) {
|
||||
SamplingRule matchedRule = null;
|
||||
|
||||
for (final SamplingRule samplingRule : samplingRules) {
|
||||
if (samplingRule.matches(span)) {
|
||||
matchedRule = samplingRule;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (matchedRule == null) {
|
||||
fallbackSampler.setSamplingPriority(span);
|
||||
} else {
|
||||
final boolean priorityWasSet;
|
||||
boolean usedRateLimiter = false;
|
||||
|
||||
if (matchedRule.sample(span)) {
|
||||
usedRateLimiter = true;
|
||||
if (rateLimiter.tryAcquire()) {
|
||||
priorityWasSet = span.context().setSamplingPriority(PrioritySampling.SAMPLER_KEEP);
|
||||
} else {
|
||||
priorityWasSet = span.context().setSamplingPriority(PrioritySampling.SAMPLER_DROP);
|
||||
}
|
||||
} else {
|
||||
priorityWasSet = span.context().setSamplingPriority(PrioritySampling.SAMPLER_DROP);
|
||||
}
|
||||
|
||||
// Only set metrics if we actually set the sampling priority
|
||||
// We don't know until the call is completed because the lock is internal to DDSpanContext
|
||||
if (priorityWasSet) {
|
||||
span.context().setMetric(SAMPLING_RULE_RATE, matchedRule.getSampler().getSampleRate());
|
||||
|
||||
if (usedRateLimiter) {
|
||||
span.context().setMetric(SAMPLING_LIMIT_RATE, rateLimit);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
package datadog.trace.common.sampling;
|
||||
|
||||
import datadog.opentracing.DDSpan;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public abstract class SamplingRule {
|
||||
private final RateSampler sampler;
|
||||
|
||||
public SamplingRule(final RateSampler sampler) {
|
||||
this.sampler = sampler;
|
||||
}
|
||||
|
||||
public abstract boolean matches(DDSpan span);
|
||||
|
||||
public boolean sample(final DDSpan span) {
|
||||
return sampler.sample(span);
|
||||
}
|
||||
|
||||
public RateSampler getSampler() {
|
||||
return sampler;
|
||||
}
|
||||
|
||||
public static class AlwaysMatchesSamplingRule extends SamplingRule {
|
||||
|
||||
public AlwaysMatchesSamplingRule(final RateSampler sampler) {
|
||||
super(sampler);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean matches(final DDSpan span) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
public abstract static class PatternMatchSamplingRule extends SamplingRule {
|
||||
private final Pattern pattern;
|
||||
|
||||
public PatternMatchSamplingRule(final String regex, final RateSampler sampler) {
|
||||
super(sampler);
|
||||
this.pattern = Pattern.compile(regex);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean matches(final DDSpan span) {
|
||||
final String relevantString = getRelevantString(span);
|
||||
return relevantString != null && pattern.matcher(relevantString).matches();
|
||||
}
|
||||
|
||||
protected abstract String getRelevantString(DDSpan span);
|
||||
}
|
||||
|
||||
public static class ServiceSamplingRule extends PatternMatchSamplingRule {
|
||||
public ServiceSamplingRule(final String regex, final RateSampler sampler) {
|
||||
super(regex, sampler);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getRelevantString(final DDSpan span) {
|
||||
return span.getServiceName();
|
||||
}
|
||||
}
|
||||
|
||||
public static class OperationSamplingRule extends PatternMatchSamplingRule {
|
||||
public OperationSamplingRule(final String regex, final RateSampler sampler) {
|
||||
super(regex, sampler);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getRelevantString(final DDSpan span) {
|
||||
return span.getOperationName();
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue