diff --git a/cmd/config.go b/cmd/config.go index 5093a6a7b..255e064f1 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -6,7 +6,9 @@ import ( "encoding/json" "errors" "fmt" + "hash/fnv" "io/ioutil" + "math" "os" "path" "strings" @@ -272,12 +274,36 @@ type BeelineConfig struct { Dataset string // SampleRate is the (positive integer) denominator of the sample rate. // Default: 1 (meaning all traces are sent). Set higher to send fewer traces. - SampleRate uint + SampleRate uint32 // Mute disables honeycomb entirely; useful in test environments. Mute bool // Many other fields of beeline.Config are omitted as they are not yet used. } +// makeSampler constructs a SamplerHook which will deterministically decide if +// any given span should be sampled based on its TraceID, which is shared by all +// spans within a trace. If a trace_id can't be found, the span will be sampled. +// A sample rate of 0 defaults to a sample rate of 1 (i.e. all events are sent). +func makeSampler(rate uint32) func(fields map[string]interface{}) (bool, int) { + if rate == 0 { + rate = 1 + } + upperBound := math.MaxUint32 / rate + + return func(fields map[string]interface{}) (bool, int) { + id, ok := fields["trace.trace_id"].(string) + if !ok { + return true, 0 + } + h := fnv.New32() + _, err := h.Write([]byte(id)) + if err != nil { + return true, 0 + } + return h.Sum32() < upperBound, int(rate) + } +} + // Load converts a BeelineConfig to a beeline.Config, loading the api WriteKey // and setting the ServiceName automatically. func (bc *BeelineConfig) Load() (beeline.Config, error) { @@ -291,16 +317,11 @@ func (bc *BeelineConfig) Load() (beeline.Config, error) { return beeline.Config{}, fmt.Errorf("failed to get write key: %w", err) } - samplerate := uint(1) - if bc.SampleRate > 1 { - samplerate = bc.SampleRate - } - return beeline.Config{ WriteKey: writekey, Dataset: bc.Dataset, ServiceName: path.Base(exec), - SampleRate: samplerate, + SamplerHook: makeSampler(bc.SampleRate), Mute: bc.Mute, }, nil } diff --git a/cmd/config_test.go b/cmd/config_test.go index 6988e1e77..68db60a03 100644 --- a/cmd/config_test.go +++ b/cmd/config_test.go @@ -1,6 +1,7 @@ package cmd import ( + "fmt" "regexp" "strings" "testing" @@ -96,3 +97,54 @@ func TestTLSConfigLoad(t *testing.T) { }) } } + +func TestSampler(t *testing.T) { + type subcase struct { + span map[string]interface{} + sampled bool + rate int + } + testCases := []struct { + rate uint32 + cases []subcase + }{ + // At sample rate 1, both of the well-formed spans should get sampled. + {1, []subcase{ + {map[string]interface{}{"trace.trace_id": "foo"}, true, 1}, + {map[string]interface{}{"trace.trace_id": ""}, true, 1}, + {map[string]interface{}{"trace.trace_id": 1}, true, 0}, + {map[string]interface{}{}, true, 0}, + }}, + // At sample rate 0, it should behave the same as sample rate 1. + {0, []subcase{ + {map[string]interface{}{"trace.trace_id": "foo"}, true, 1}, + {map[string]interface{}{"trace.trace_id": ""}, true, 1}, + {map[string]interface{}{"trace.trace_id": 1}, true, 0}, + {map[string]interface{}{}, true, 0}, + }}, + // At sample rate 2, only one of the well-formed spans should be sampled. + {2, []subcase{ + {map[string]interface{}{"trace.trace_id": "foo"}, true, 2}, + {map[string]interface{}{"trace.trace_id": ""}, false, 2}, + {map[string]interface{}{"trace.trace_id": 1}, true, 0}, + {map[string]interface{}{}, true, 0}, + }}, + // At sample rate 100, neither of the well-formed spans should be sampled. + {100, []subcase{ + {map[string]interface{}{"trace.trace_id": "foo"}, false, 100}, + {map[string]interface{}{"trace.trace_id": ""}, false, 100}, + {map[string]interface{}{"trace.trace_id": 1}, true, 0}, + {map[string]interface{}{}, true, 0}, + }}, + } + for _, tc := range testCases { + s := makeSampler(tc.rate) + for _, c := range tc.cases { + t.Run(fmt.Sprintf("Rate(%d) Id(%s)", tc.rate, c.span["trace.trace_id"]), func(t *testing.T) { + b, i := s(c.span) + test.AssertEquals(t, b, c.sampled) + test.AssertEquals(t, i, c.rate) + }) + } + } +}