Honeycomb: Use a deterministic SamplerHook (#5433)
Switch from using the honeycomb beeline's built-in sampling to a sampler hook which bases its sampling decisions on a hash of the trace ID. This allows us to do "deterministic" sampling, where every span in a given trace will either be sent or not (since the trace ID is the same across all spans in a trace), giving us more complete traces. This preserves the same simple (single integer) configuration of the sample rate. The sample rate can be set differently for different boulder components (e.g. 1 at the WFE, 100 at the RA, and 1000 at the nonce-service), but the sampling rate denominator should only increase towards the leaves of a gRPC request path.
This commit is contained in:
parent
229377aabc
commit
e3d194f4b0
|
@ -6,7 +6,9 @@ import (
|
|||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
|
@ -272,12 +274,36 @@ type BeelineConfig struct {
|
|||
Dataset string
|
||||
// SampleRate is the (positive integer) denominator of the sample rate.
|
||||
// Default: 1 (meaning all traces are sent). Set higher to send fewer traces.
|
||||
SampleRate uint
|
||||
SampleRate uint32
|
||||
// Mute disables honeycomb entirely; useful in test environments.
|
||||
Mute bool
|
||||
// Many other fields of beeline.Config are omitted as they are not yet used.
|
||||
}
|
||||
|
||||
// makeSampler constructs a SamplerHook which will deterministically decide if
|
||||
// any given span should be sampled based on its TraceID, which is shared by all
|
||||
// spans within a trace. If a trace_id can't be found, the span will be sampled.
|
||||
// A sample rate of 0 defaults to a sample rate of 1 (i.e. all events are sent).
|
||||
func makeSampler(rate uint32) func(fields map[string]interface{}) (bool, int) {
|
||||
if rate == 0 {
|
||||
rate = 1
|
||||
}
|
||||
upperBound := math.MaxUint32 / rate
|
||||
|
||||
return func(fields map[string]interface{}) (bool, int) {
|
||||
id, ok := fields["trace.trace_id"].(string)
|
||||
if !ok {
|
||||
return true, 0
|
||||
}
|
||||
h := fnv.New32()
|
||||
_, err := h.Write([]byte(id))
|
||||
if err != nil {
|
||||
return true, 0
|
||||
}
|
||||
return h.Sum32() < upperBound, int(rate)
|
||||
}
|
||||
}
|
||||
|
||||
// Load converts a BeelineConfig to a beeline.Config, loading the api WriteKey
|
||||
// and setting the ServiceName automatically.
|
||||
func (bc *BeelineConfig) Load() (beeline.Config, error) {
|
||||
|
@ -291,16 +317,11 @@ func (bc *BeelineConfig) Load() (beeline.Config, error) {
|
|||
return beeline.Config{}, fmt.Errorf("failed to get write key: %w", err)
|
||||
}
|
||||
|
||||
samplerate := uint(1)
|
||||
if bc.SampleRate > 1 {
|
||||
samplerate = bc.SampleRate
|
||||
}
|
||||
|
||||
return beeline.Config{
|
||||
WriteKey: writekey,
|
||||
Dataset: bc.Dataset,
|
||||
ServiceName: path.Base(exec),
|
||||
SampleRate: samplerate,
|
||||
SamplerHook: makeSampler(bc.SampleRate),
|
||||
Mute: bc.Mute,
|
||||
}, nil
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"testing"
|
||||
|
@ -96,3 +97,54 @@ func TestTLSConfigLoad(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSampler(t *testing.T) {
|
||||
type subcase struct {
|
||||
span map[string]interface{}
|
||||
sampled bool
|
||||
rate int
|
||||
}
|
||||
testCases := []struct {
|
||||
rate uint32
|
||||
cases []subcase
|
||||
}{
|
||||
// At sample rate 1, both of the well-formed spans should get sampled.
|
||||
{1, []subcase{
|
||||
{map[string]interface{}{"trace.trace_id": "foo"}, true, 1},
|
||||
{map[string]interface{}{"trace.trace_id": ""}, true, 1},
|
||||
{map[string]interface{}{"trace.trace_id": 1}, true, 0},
|
||||
{map[string]interface{}{}, true, 0},
|
||||
}},
|
||||
// At sample rate 0, it should behave the same as sample rate 1.
|
||||
{0, []subcase{
|
||||
{map[string]interface{}{"trace.trace_id": "foo"}, true, 1},
|
||||
{map[string]interface{}{"trace.trace_id": ""}, true, 1},
|
||||
{map[string]interface{}{"trace.trace_id": 1}, true, 0},
|
||||
{map[string]interface{}{}, true, 0},
|
||||
}},
|
||||
// At sample rate 2, only one of the well-formed spans should be sampled.
|
||||
{2, []subcase{
|
||||
{map[string]interface{}{"trace.trace_id": "foo"}, true, 2},
|
||||
{map[string]interface{}{"trace.trace_id": ""}, false, 2},
|
||||
{map[string]interface{}{"trace.trace_id": 1}, true, 0},
|
||||
{map[string]interface{}{}, true, 0},
|
||||
}},
|
||||
// At sample rate 100, neither of the well-formed spans should be sampled.
|
||||
{100, []subcase{
|
||||
{map[string]interface{}{"trace.trace_id": "foo"}, false, 100},
|
||||
{map[string]interface{}{"trace.trace_id": ""}, false, 100},
|
||||
{map[string]interface{}{"trace.trace_id": 1}, true, 0},
|
||||
{map[string]interface{}{}, true, 0},
|
||||
}},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
s := makeSampler(tc.rate)
|
||||
for _, c := range tc.cases {
|
||||
t.Run(fmt.Sprintf("Rate(%d) Id(%s)", tc.rate, c.span["trace.trace_id"]), func(t *testing.T) {
|
||||
b, i := s(c.span)
|
||||
test.AssertEquals(t, b, c.sampled)
|
||||
test.AssertEquals(t, i, c.rate)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue