Honeycomb: Use a deterministic SamplerHook (#5433)

Switch from using the honeycomb beeline's built-in sampling
to a sampler hook which bases its sampling decisions on a
hash of the trace ID. This allows us to do "deterministic"
sampling, where every span in a given trace will either be
sent or not (since the trace ID is the same across all spans
in a trace), giving us more complete traces.

This preserves the same simple (single integer) configuration
of the sample rate. The sample rate can be set differently for
different boulder components (e.g. 1 at the WFE, 100 at the 
RA, and 1000 at the nonce-service), but the sampling rate
denominator should only increase towards the leaves of a
gRPC request path.
This commit is contained in:
Aaron Gable 2021-05-27 13:13:54 -07:00 committed by GitHub
parent 229377aabc
commit e3d194f4b0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 80 additions and 7 deletions

View File

@ -6,7 +6,9 @@ import (
"encoding/json"
"errors"
"fmt"
"hash/fnv"
"io/ioutil"
"math"
"os"
"path"
"strings"
@ -272,12 +274,36 @@ type BeelineConfig struct {
Dataset string
// SampleRate is the (positive integer) denominator of the sample rate.
// Default: 1 (meaning all traces are sent). Set higher to send fewer traces.
SampleRate uint
SampleRate uint32
// Mute disables honeycomb entirely; useful in test environments.
Mute bool
// Many other fields of beeline.Config are omitted as they are not yet used.
}
// makeSampler constructs a SamplerHook which will deterministically decide if
// any given span should be sampled based on its TraceID, which is shared by all
// spans within a trace. If a trace_id can't be found, the span will be sampled.
// A sample rate of 0 defaults to a sample rate of 1 (i.e. all events are sent).
func makeSampler(rate uint32) func(fields map[string]interface{}) (bool, int) {
if rate == 0 {
rate = 1
}
upperBound := math.MaxUint32 / rate
return func(fields map[string]interface{}) (bool, int) {
id, ok := fields["trace.trace_id"].(string)
if !ok {
return true, 0
}
h := fnv.New32()
_, err := h.Write([]byte(id))
if err != nil {
return true, 0
}
return h.Sum32() < upperBound, int(rate)
}
}
// Load converts a BeelineConfig to a beeline.Config, loading the api WriteKey
// and setting the ServiceName automatically.
func (bc *BeelineConfig) Load() (beeline.Config, error) {
@ -291,16 +317,11 @@ func (bc *BeelineConfig) Load() (beeline.Config, error) {
return beeline.Config{}, fmt.Errorf("failed to get write key: %w", err)
}
samplerate := uint(1)
if bc.SampleRate > 1 {
samplerate = bc.SampleRate
}
return beeline.Config{
WriteKey: writekey,
Dataset: bc.Dataset,
ServiceName: path.Base(exec),
SampleRate: samplerate,
SamplerHook: makeSampler(bc.SampleRate),
Mute: bc.Mute,
}, nil
}

View File

@ -1,6 +1,7 @@
package cmd
import (
"fmt"
"regexp"
"strings"
"testing"
@ -96,3 +97,54 @@ func TestTLSConfigLoad(t *testing.T) {
})
}
}
func TestSampler(t *testing.T) {
type subcase struct {
span map[string]interface{}
sampled bool
rate int
}
testCases := []struct {
rate uint32
cases []subcase
}{
// At sample rate 1, both of the well-formed spans should get sampled.
{1, []subcase{
{map[string]interface{}{"trace.trace_id": "foo"}, true, 1},
{map[string]interface{}{"trace.trace_id": ""}, true, 1},
{map[string]interface{}{"trace.trace_id": 1}, true, 0},
{map[string]interface{}{}, true, 0},
}},
// At sample rate 0, it should behave the same as sample rate 1.
{0, []subcase{
{map[string]interface{}{"trace.trace_id": "foo"}, true, 1},
{map[string]interface{}{"trace.trace_id": ""}, true, 1},
{map[string]interface{}{"trace.trace_id": 1}, true, 0},
{map[string]interface{}{}, true, 0},
}},
// At sample rate 2, only one of the well-formed spans should be sampled.
{2, []subcase{
{map[string]interface{}{"trace.trace_id": "foo"}, true, 2},
{map[string]interface{}{"trace.trace_id": ""}, false, 2},
{map[string]interface{}{"trace.trace_id": 1}, true, 0},
{map[string]interface{}{}, true, 0},
}},
// At sample rate 100, neither of the well-formed spans should be sampled.
{100, []subcase{
{map[string]interface{}{"trace.trace_id": "foo"}, false, 100},
{map[string]interface{}{"trace.trace_id": ""}, false, 100},
{map[string]interface{}{"trace.trace_id": 1}, true, 0},
{map[string]interface{}{}, true, 0},
}},
}
for _, tc := range testCases {
s := makeSampler(tc.rate)
for _, c := range tc.cases {
t.Run(fmt.Sprintf("Rate(%d) Id(%s)", tc.rate, c.span["trace.trace_id"]), func(t *testing.T) {
b, i := s(c.span)
test.AssertEquals(t, b, c.sampled)
test.AssertEquals(t, i, c.rate)
})
}
}
}