Example / proof of concept to achieve a combination of head-based sampling + a basic form of tail-based sampling at a span level. (#4206)
Co-authored-by: Timothy Mothra <tilee@microsoft.com> Co-authored-by: Cijo Thomas <cithomas@microsoft.com>
This commit is contained in:
parent
a1ea6d6aef
commit
3b1ceba8e6
|
|
@ -249,6 +249,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "getting-started-console", "
|
|||
EndProject
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "getting-started-jaeger", "docs\trace\getting-started-jaeger\getting-started-jaeger.csproj", "{A0C0B77C-6C7B-4EC2-AC61-EA1F489811B9}"
|
||||
EndProject
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "tail-based-sampling-example", "docs\trace\tail-based-sampling-span-level\tail-based-sampling-example.csproj", "{800DB925-6014-4136-AC01-3356CF7CADD3}"
|
||||
EndProject
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "stratified-sampling-example", "docs\trace\stratified-sampling-example\stratified-sampling-example.csproj", "{9C99621C-343E-479C-A943-332DB6129B71}"
|
||||
EndProject
|
||||
Global
|
||||
|
|
@ -525,6 +527,10 @@ Global
|
|||
{A0C0B77C-6C7B-4EC2-AC61-EA1F489811B9}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{A0C0B77C-6C7B-4EC2-AC61-EA1F489811B9}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{A0C0B77C-6C7B-4EC2-AC61-EA1F489811B9}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{800DB925-6014-4136-AC01-3356CF7CADD3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{800DB925-6014-4136-AC01-3356CF7CADD3}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{800DB925-6014-4136-AC01-3356CF7CADD3}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{800DB925-6014-4136-AC01-3356CF7CADD3}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{9C99621C-343E-479C-A943-332DB6129B71}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{9C99621C-343E-479C-A943-332DB6129B71}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{9C99621C-343E-479C-A943-332DB6129B71}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
|
|
@ -568,6 +574,7 @@ Global
|
|||
{DEDE8442-03CA-48CF-99B9-EA224D89D148} = {5B7FB835-3FFF-4BC2-99C5-A5B5FAE3C818}
|
||||
{EF4F6280-14D1-49D4-8095-1AC36E169AA8} = {5B7FB835-3FFF-4BC2-99C5-A5B5FAE3C818}
|
||||
{A0C0B77C-6C7B-4EC2-AC61-EA1F489811B9} = {5B7FB835-3FFF-4BC2-99C5-A5B5FAE3C818}
|
||||
{800DB925-6014-4136-AC01-3356CF7CADD3} = {5B7FB835-3FFF-4BC2-99C5-A5B5FAE3C818}
|
||||
{9C99621C-343E-479C-A943-332DB6129B71} = {5B7FB835-3FFF-4BC2-99C5-A5B5FAE3C818}
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
|
|
|
|||
|
|
@ -0,0 +1,61 @@
|
|||
// <copyright file="ParentBasedElseAlwaysRecordSampler.cs" company="OpenTelemetry Authors">
|
||||
// Copyright The OpenTelemetry Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
// </copyright>
|
||||
|
||||
using OpenTelemetry.Trace;
|
||||
|
||||
namespace SDKBasedSpanLevelTailSamplingSample;
|
||||
|
||||
/// <summary>
|
||||
/// Note: This is a proof-of-concept and is not meant to be used directly in production.
|
||||
/// This is a composite sampler used to achieve a combination of parent-based sampling
|
||||
/// and SDK-side "span-level" tail-based sampling.
|
||||
/// It first invokes a head-sampling mechanism using the parent based sampling approach.
|
||||
/// If the parent based sampler's decision is to sample it (i.e., record and export the span),
|
||||
/// it retains that decision. If not, it returns a "record-only" sampling result that can be
|
||||
/// changed later by a span processor based on span attributes (e.g., failure) that become
|
||||
/// available only by the end of the span.
|
||||
/// </summary>
|
||||
internal class ParentBasedElseAlwaysRecordSampler : Sampler
|
||||
{
|
||||
private const double DefaultSamplingProbabilityForRootSpan = 0.1;
|
||||
private readonly ParentBasedSampler parentBasedSampler;
|
||||
|
||||
public ParentBasedElseAlwaysRecordSampler(double samplingProbabilityForRootSpan = DefaultSamplingProbabilityForRootSpan)
|
||||
{
|
||||
this.parentBasedSampler = new ParentBasedSampler(new TraceIdRatioBasedSampler(samplingProbabilityForRootSpan));
|
||||
}
|
||||
|
||||
public override SamplingResult ShouldSample(in SamplingParameters samplingParameters)
|
||||
{
|
||||
// First, let's sample using the parentbased sampler.
|
||||
var samplingResult = this.parentBasedSampler.ShouldSample(samplingParameters);
|
||||
|
||||
if (samplingResult.Decision != SamplingDecision.Drop)
|
||||
{
|
||||
// Parentbased sampler decided not to drop it, so we will sample this.
|
||||
return samplingResult;
|
||||
}
|
||||
|
||||
// Parentbased sampler decided to drop it. We will return a RecordOnly
|
||||
// decision so that the span filtering processors later in the pipeline
|
||||
// can apply tailbased sampling rules (e.g., to sample all failed spans).
|
||||
// Returning a RecordOnly decision is relevant because:
|
||||
// 1. It causes the Processor pipeline to be invoked.
|
||||
// 2. It causes activity.IsAllDataRequested to return true, so most
|
||||
// instrumentations end up populating the required attributes.
|
||||
return new SamplingResult(SamplingDecision.RecordOnly);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
// <copyright file="Program.cs" company="OpenTelemetry Authors">
|
||||
// Copyright The OpenTelemetry Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
// </copyright>
|
||||
|
||||
using System.Diagnostics;
|
||||
using OpenTelemetry;
|
||||
using OpenTelemetry.Trace;
|
||||
|
||||
namespace SDKBasedSpanLevelTailSamplingSample;
|
||||
|
||||
internal class Program
|
||||
{
|
||||
private static readonly ActivitySource MyActivitySource = new("SDK.TailSampling.POC");
|
||||
|
||||
public static void Main(string[] args)
|
||||
{
|
||||
using var tracerProvider = Sdk.CreateTracerProviderBuilder()
|
||||
.SetSampler(new ParentBasedElseAlwaysRecordSampler())
|
||||
.AddSource("SDK.TailSampling.POC")
|
||||
.AddProcessor(new TailSamplingProcessor())
|
||||
.AddConsoleExporter()
|
||||
.Build();
|
||||
|
||||
var random = new Random(2357);
|
||||
|
||||
// Generate some spans
|
||||
for (var i = 0; i < 50; i++)
|
||||
{
|
||||
using (var activity = MyActivitySource.StartActivity("SayHello"))
|
||||
{
|
||||
activity?.SetTag("foo", "bar");
|
||||
|
||||
// Simulate a mix of failed and successful spans
|
||||
var randomValue = random.Next(5);
|
||||
switch (randomValue)
|
||||
{
|
||||
case 0:
|
||||
activity?.SetStatus(ActivityStatusCode.Error);
|
||||
break;
|
||||
default:
|
||||
activity?.SetStatus(ActivityStatusCode.Ok);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,108 @@
|
|||
# Tail Based Sampling at an activity (span) level: An Example
|
||||
|
||||
This document describes one possible way to achieve a form of tail-based
|
||||
sampling to include all failed activities in addition to head-based sampling.
|
||||
|
||||
It does this by leveraging the extensibility mechanisms in the OpenTelemetry
|
||||
SDK. It uses a combination of a custom sampler and an ActivityProcessor
|
||||
(span processor).
|
||||
|
||||
This is a way to achieve a combination of:
|
||||
|
||||
- Head-based sampling (probabilistic/unbiased sampling), and
|
||||
- Tail-based sampling (a non-probabilistic/biased sampling).
|
||||
|
||||
## How does this sampling example work?
|
||||
|
||||
We use a hybrid approach: we do head based sampling to get a
|
||||
probabilistic subset of all activities which includes both successful activities
|
||||
and failure activities. In addition, we want to capture all failure activities.
|
||||
To do this, if the parent based sampler's decision is to drop it, we return
|
||||
a "Record-Only" sampling result. This ensures that the activity processor
|
||||
receives that activity. In the activity processor, at the end of an activity,
|
||||
we check if it is a failure activity. If so, we change the decision from
|
||||
"Record-Only" to set the sampled flag so that the exporter receives the
|
||||
activity. In this example, each activity is filtered individually without
|
||||
consideration to any other activities.
|
||||
|
||||
This is a basic form of tail-based sampling at an activity level. If an
|
||||
activity failed, we always sample it in addition to all head-sampled
|
||||
activities.
|
||||
|
||||
## When should you consider such an option?
|
||||
|
||||
This is a good option if you want to get all failure activities in addition to
|
||||
head based sampling. With this, you get basic activity level tail-based sampling
|
||||
at a SDK level without having to install any additional components.
|
||||
|
||||
## Tradeoffs
|
||||
|
||||
Tail-sampling this way involves many tradeoffs such as:
|
||||
|
||||
1. Additional performance cost: Unlike head-based sampling where the sampling
|
||||
decision is made at activity creation time, in tail sampling the decision is made
|
||||
only at the end, so there is additional memory/processing cost.
|
||||
|
||||
2. Partial traces: Since this sampling is at a activity level, the generated trace
|
||||
will be partial. For example, if another part of the call tree is successful,
|
||||
those activities may not be exported leading to an incomplete trace.
|
||||
|
||||
3. If multiple exporters are used, this decision will impact all of them:
|
||||
[Issue 3861](https://github.com/open-telemetry/opentelemetry-dotnet/issues/3861).
|
||||
|
||||
## Sample Output
|
||||
|
||||
You should see output such as the below when you run this example.
|
||||
|
||||
```text
|
||||
Including error activity with id
|
||||
00-404ddff248b8f9a9b21e347d68d2640e-035858bc3c168885-01 and status Error
|
||||
Activity.TraceId: 404ddff248b8f9a9b21e347d68d2640e
|
||||
Activity.SpanId: 035858bc3c168885
|
||||
Activity.TraceFlags: Recorded
|
||||
Activity.ActivitySourceName: SDK.TailSampling.POC
|
||||
Activity.DisplayName: SayHello
|
||||
Activity.Kind: Internal
|
||||
Activity.StartTime: 2023-02-09T19:05:32.5563112Z
|
||||
Activity.Duration: 00:00:00.0028144
|
||||
Activity.Tags:
|
||||
foo: bar
|
||||
StatusCode: Error
|
||||
Resource associated with Activity:
|
||||
service.name: unknown_service:Examples.TailBasedSamplingAtSpanLevel
|
||||
|
||||
Dropping activity with id 00-ea861bda268c58d328ab7cbe49851499-daba29055de80a53-00
|
||||
and status Ok
|
||||
|
||||
Including error activity with id
|
||||
00-802dea991247e2d699d943167eb546de-cc120b0bd1741b52-01 and status Error
|
||||
Activity.TraceId: 802dea991247e2d699d943167eb546de
|
||||
Activity.SpanId: cc120b0bd1741b52
|
||||
Activity.TraceFlags: Recorded
|
||||
Activity.ActivitySourceName: SDK.TailSampling.POC
|
||||
Activity.DisplayName: SayHello
|
||||
Activity.Kind: Internal
|
||||
Activity.StartTime: 2023-02-09T19:05:32.7021138Z
|
||||
Activity.Duration: 00:00:00.0000012
|
||||
Activity.Tags:
|
||||
foo: bar
|
||||
StatusCode: Error
|
||||
Resource associated with Activity:
|
||||
service.name: unknown_service:Examples.TailBasedSamplingAtSpanLevel
|
||||
|
||||
Including head-sampled activity with id
|
||||
00-f3c88010615e285c8f3cb3e2bcd70c7f-f9316215f12437c3-01 and status Ok
|
||||
Activity.TraceId: f3c88010615e285c8f3cb3e2bcd70c7f
|
||||
Activity.SpanId: f9316215f12437c3
|
||||
Activity.TraceFlags: Recorded
|
||||
Activity.ActivitySourceName: SDK.TailSampling.POC
|
||||
Activity.DisplayName: SayHello
|
||||
Activity.Kind: Internal
|
||||
Activity.StartTime: 2023-02-09T19:05:32.8519346Z
|
||||
Activity.Duration: 00:00:00.0000034
|
||||
Activity.Tags:
|
||||
foo: bar
|
||||
StatusCode: Ok
|
||||
Resource associated with Activity:
|
||||
service.name: unknown_service:Examples.TailBasedSamplingAtSpanLevel
|
||||
```
|
||||
|
|
@ -0,0 +1,73 @@
|
|||
// <copyright file="TailSamplingProcessor.cs" company="OpenTelemetry Authors">
|
||||
// Copyright The OpenTelemetry Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
// </copyright>
|
||||
|
||||
using System.Diagnostics;
|
||||
using OpenTelemetry;
|
||||
|
||||
namespace SDKBasedSpanLevelTailSamplingSample;
|
||||
|
||||
/// <summary>
|
||||
/// A custom processor for filtering <see cref="Activity"/> instances.
|
||||
/// </summary>
|
||||
internal sealed class TailSamplingProcessor : BaseProcessor<Activity>
|
||||
{
|
||||
public TailSamplingProcessor()
|
||||
: base()
|
||||
{
|
||||
}
|
||||
|
||||
public override void OnEnd(Activity activity)
|
||||
{
|
||||
if (activity.Recorded)
|
||||
{
|
||||
// This means that this activity was included based on head-based sampling,
|
||||
// we continue with that decision and no further change is needed.
|
||||
Console.WriteLine($"Including head-sampled activity with id {activity.Id} and status {activity.Status}");
|
||||
}
|
||||
else
|
||||
{
|
||||
this.IncludeForExportIfFailedActivity(activity);
|
||||
}
|
||||
|
||||
base.OnEnd(activity);
|
||||
}
|
||||
|
||||
// Note: This is used to filter spans at the end of a span.
|
||||
// This is a basic form of tail-based sampling at a span level.
|
||||
// If a span failed, we always sample it in addition to all head-sampled spans.
|
||||
// In this example, each span is filtered individually without consideration to any other spans.
|
||||
// Tail-sampling this way involves many tradeoffs. A few examples of the tradeoffs:
|
||||
// 1. Performance: Unlike head-based sampling where the sampling decision is made at span creation time, in
|
||||
// tail sampling the decision is made only at the end, so there is additional memory cost.
|
||||
// 2. Traces will not be complete: Since this sampling is at a span level, the generated trace will be partial and won't be complete.
|
||||
// For example, if another part of the call tree is successful, those spans may not be sampled in leading to a partial trace.
|
||||
// 3. If multiple exporters are used, this decision will impact all of them: https://github.com/open-telemetry/opentelemetry-dotnet/issues/3861.
|
||||
private void IncludeForExportIfFailedActivity(Activity activity)
|
||||
{
|
||||
if (activity.Status == ActivityStatusCode.Error)
|
||||
{
|
||||
// We decide to always include all the failure spans
|
||||
// Set the recorded flag so that this will be exported.
|
||||
activity.ActivityTraceFlags |= ActivityTraceFlags.Recorded;
|
||||
Console.WriteLine($"Including error activity with id {activity.Id} and status {activity.Status}");
|
||||
}
|
||||
else
|
||||
{
|
||||
// This span is not sampled and exporters won't see this span.
|
||||
Console.WriteLine($"Dropping activity with id {activity.Id} and status {activity.Status}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="$(RepoRoot)\src\OpenTelemetry.Exporter.Console\OpenTelemetry.Exporter.Console.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
Loading…
Reference in New Issue