Opentelemetry: Add option for public endpoints (#6867)

This PR adds a new configuration block specifically for the otelhttp
instrumentation. This block is separate from the existing
"opentelemetry" configuration, and is only relevant when using otelhttp
instrumentation. It does not share any codepath with the existing
configuration, so it is at the top level to indicate which services it
applies to.

There's a bit of plumbing new configuration through. I've adopted the
measured_http package to also set up opentelemetry instead of just
metrics, which should hopefully allow any future changes to be smaller
(just config & there) and more consistent between the wfe2 and ocsp
responder.

There's one option here now, which disables setting
[otelhttp.WithPublicEndpoint](https://pkg.go.dev/go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp#WithPublicEndpoint).
This option is designed to do exactly what we want: Don't accept
incoming spans as parents of the new span created in the server.
Previously we had a setting to disable parent-based sampling to help
with this problem, which doesn't really make sense anymore, so let's
just remove it and simplify that setup path. The default of "false" is
designed to be the safe option. It's set to True in the test/ configs
for integration tests that use traces, and I expect we'll likely set it
true in production eventually once the LBs are configured to handle
tracing themselves.

Fixes #6851
This commit is contained in:
Matthew McPherrin 2023-05-12 15:34:34 -04:00 committed by GitHub
parent 310546a14e
commit 3aae67b8a9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 53 additions and 26 deletions

View File

@ -141,6 +141,9 @@ type Config struct {
Syslog cmd.SyslogConfig
OpenTelemetry cmd.OpenTelemetryConfig
// OpenTelemetryHTTPConfig configures tracing on incoming HTTP requests
OpenTelemetryHTTPConfig cmd.OpenTelemetryHTTPConfig
}
type CacheConfig struct {
@ -356,7 +359,7 @@ func main() {
logger.Infof("WFE using key policy: %#v", kp)
logger.Infof("Server running, listening on %s....", c.WFE.ListenAddress)
handler := wfe.Handler(stats)
handler := wfe.Handler(stats, c.OpenTelemetryHTTPConfig.Options()...)
srv := http.Server{
ReadTimeout: 30 * time.Second,

View File

@ -11,6 +11,7 @@ import (
"github.com/go-sql-driver/mysql"
"github.com/prometheus/client_golang/prometheus"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
"google.golang.org/grpc/resolver"
"github.com/letsencrypt/boulder/config"
@ -483,16 +484,33 @@ type OpenTelemetryConfig struct {
Endpoint string
// SampleRatio is the ratio of new traces to head sample.
// This only affects new traces with no parent with its own sampling decision.
// This only affects new traces without a parent with its own sampling
// decision, and otherwise use the parent's sampling decision.
//
// Set to something between 0 and 1, where 1 is sampling all traces.
// See otel trace.TraceIDRatioBased for details.
// This is primarily meant as a pressure relief if the Endpoint we connect to
// is being overloaded, and we otherwise handle sampling in the collectors.
// See otel trace.ParentBased and trace.TraceIDRatioBased for details.
SampleRatio float64
}
// If true, disable the parent sampler.
// On external-facing services like the WFE, setting this true will
// ensure that any external API users don't influence our own sampling
// decisions.
DisableParentSampler bool
// OpenTelemetryHTTPConfig configures the otelhttp server tracing.
type OpenTelemetryHTTPConfig struct {
// TrustIncomingSpans should only be set true if there's a trusted service
// connecting to Boulder, such as a load balancer that's tracing-aware.
// If false, the default, incoming traces won't be set as the parent.
// See otelhttp.WithPublicEndpoint
TrustIncomingSpans bool
}
// Options returns the otelhttp options for this configuration. They can be
// passed to otelhttp.NewHandler or Boulder's wrapper, measured_http.New.
func (c *OpenTelemetryHTTPConfig) Options() []otelhttp.Option {
var options []otelhttp.Option
if !c.TrustIncomingSpans {
options = append(options, otelhttp.WithPublicEndpoint())
}
return options
}
// DNSProvider contains the configuration for a DNS provider in the bdns package

View File

@ -109,6 +109,9 @@ type Config struct {
Syslog cmd.SyslogConfig
OpenTelemetry cmd.OpenTelemetryConfig
// OpenTelemetryHTTPConfig configures tracing on incoming HTTP requests
OpenTelemetryHTTPConfig cmd.OpenTelemetryHTTPConfig
}
func main() {
@ -214,7 +217,7 @@ as generated by Boulder's ceremony command.
cmd.FailOnError(err, "Could not create filtered source")
}
m := mux(c.OCSPResponder.Path, source, c.OCSPResponder.Timeout.Duration, scope, logger, c.OCSPResponder.LogSampleRate)
m := mux(c.OCSPResponder.Path, source, c.OCSPResponder.Timeout.Duration, scope, c.OpenTelemetryHTTPConfig.Options(), logger, c.OCSPResponder.LogSampleRate)
srv := &http.Server{
ReadTimeout: 30 * time.Second,
@ -258,7 +261,7 @@ func (om *ocspMux) Handler(_ *http.Request) (http.Handler, string) {
return om.handler, "/"
}
func mux(responderPath string, source responder.Source, timeout time.Duration, stats prometheus.Registerer, logger blog.Logger, sampleRate int) http.Handler {
func mux(responderPath string, source responder.Source, timeout time.Duration, stats prometheus.Registerer, oTelHTTPOptions []otelhttp.Option, logger blog.Logger, sampleRate int) http.Handler {
stripPrefix := http.StripPrefix(responderPath, responder.NewResponder(source, timeout, stats, logger, sampleRate))
h := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == "GET" && r.URL.Path == "/" {
@ -268,7 +271,7 @@ func mux(responderPath string, source responder.Source, timeout time.Duration, s
}
stripPrefix.ServeHTTP(w, r)
})
return otelhttp.NewHandler(measured_http.New(&ocspMux{h}, cmd.Clock(), stats), "server")
return measured_http.New(&ocspMux{h}, cmd.Clock(), stats, oTelHTTPOptions...)
}
func init() {

View File

@ -9,6 +9,7 @@ import (
"testing"
"time"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
"golang.org/x/crypto/ocsp"
blog "github.com/letsencrypt/boulder/log"
@ -40,7 +41,7 @@ func TestMux(t *testing.T) {
src, err := responder.NewMemorySource(responses, blog.NewMock())
test.AssertNotError(t, err, "failed to create inMemorySource")
h := mux("/foobar/", src, time.Second, metrics.NoopRegisterer, blog.NewMock(), 1000)
h := mux("/foobar/", src, time.Second, metrics.NoopRegisterer, []otelhttp.Option{}, blog.NewMock(), 1000)
type muxTest struct {
method string

View File

@ -313,16 +313,11 @@ func NewOpenTelemetry(config OpenTelemetryConfig, logger blog.Logger) func(ctx c
FailOnError(err, "Could not create OpenTelemetry resource")
}
// Use a ParentBased sampler to respect the sample decisions on incoming
// traces, and TraceIDRatioBased to randomly sample new traces.
sampler := trace.TraceIDRatioBased(config.SampleRatio)
if !config.DisableParentSampler {
sampler = trace.ParentBased(sampler)
}
opts := []trace.TracerProviderOption{
trace.WithResource(r),
trace.WithSampler(sampler),
// Use a ParentBased sampler to respect the sample decisions on incoming
// traces, and TraceIDRatioBased to randomly sample new traces.
trace.WithSampler(trace.ParentBased(trace.TraceIDRatioBased(config.SampleRatio))),
}
if config.Endpoint != "" {

View File

@ -6,6 +6,7 @@ import (
"github.com/jmhodges/clock"
"github.com/prometheus/client_golang/prometheus"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
)
// responseWriterWithStatus satisfies http.ResponseWriter, but keeps track of the
@ -46,7 +47,7 @@ type MeasuredHandler struct {
stat *prometheus.HistogramVec
}
func New(m serveMux, clk clock.Clock, stats prometheus.Registerer) *MeasuredHandler {
func New(m serveMux, clk clock.Clock, stats prometheus.Registerer, opts ...otelhttp.Option) http.Handler {
responseTime := prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "response_time",
@ -54,11 +55,11 @@ func New(m serveMux, clk clock.Clock, stats prometheus.Registerer) *MeasuredHand
},
[]string{"endpoint", "method", "code"})
stats.MustRegister(responseTime)
return &MeasuredHandler{
return otelhttp.NewHandler(&MeasuredHandler{
serveMux: m,
clk: clk,
stat: responseTime,
}
}, "server", opts...)
}
func (h *MeasuredHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {

View File

@ -67,5 +67,8 @@
"opentelemetry": {
"endpoint": "bjaeger:4317",
"sampleratio": 1
},
"openTelemetryHttpConfig": {
"trustIncomingSpans": true
}
}

View File

@ -109,5 +109,8 @@
"opentelemetry": {
"endpoint": "bjaeger:4317",
"sampleratio": 1
},
"openTelemetryHttpConfig": {
"trustIncomingSpans": true
}
}

View File

@ -22,7 +22,7 @@ import (
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
"go.opentelemetry.io/otel/trace"
"google.golang.org/protobuf/types/known/emptypb"
jose "gopkg.in/go-jose/go-jose.v2"
"gopkg.in/go-jose/go-jose.v2"
"github.com/letsencrypt/boulder/core"
corepb "github.com/letsencrypt/boulder/core/proto"
@ -399,7 +399,7 @@ func (wfe *WebFrontEndImpl) relativeDirectory(request *http.Request, directory m
// Handler returns an http.Handler that uses various functions for
// various ACME-specified paths.
func (wfe *WebFrontEndImpl) Handler(stats prometheus.Registerer) http.Handler {
func (wfe *WebFrontEndImpl) Handler(stats prometheus.Registerer, oTelHTTPOptions ...otelhttp.Option) http.Handler {
m := http.NewServeMux()
// Boulder specific endpoints
wfe.HandleFunc(m, buildIDPath, wfe.BuildID, "GET")
@ -440,7 +440,7 @@ func (wfe *WebFrontEndImpl) Handler(stats prometheus.Registerer) http.Handler {
// meaning we can wind up returning 405 when we mean to return 404. See
// https://github.com/letsencrypt/boulder/issues/717
m.Handle("/", web.NewTopHandler(wfe.log, web.WFEHandlerFunc(wfe.Index)))
return otelhttp.NewHandler(measured_http.New(m, wfe.clk, stats), "server")
return measured_http.New(m, wfe.clk, stats, oTelHTTPOptions...)
}
// Method implementations