stats/opentelemetry: Introduce Tracing API (#7852)

This commit is contained in:
Abhishek Ranjan 2025-01-30 09:16:28 +05:30 committed by GitHub
parent 7e1c9b2029
commit 78eebff58b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 1334 additions and 59 deletions

View File

@ -0,0 +1,36 @@
/*
* Copyright 2024 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Package opentelemetry is EXPERIMENTAL and will be moved to stats/opentelemetry
// package in a later release.
package opentelemetry
import (
"go.opentelemetry.io/otel/propagation"
"go.opentelemetry.io/otel/trace"
)
// TraceOptions contains the tracing settings for OpenTelemetry instrumentation.
type TraceOptions struct {
// TracerProvider is the OpenTelemetry tracer which is required to
// record traces/trace spans for instrumentation. If unset, tracing
// will not be recorded.
TracerProvider trace.TracerProvider
// TextMapPropagator propagates span context through text map carrier.
// If unset, tracing will not be recorded.
TextMapPropagator propagation.TextMapPropagator
}

View File

@ -21,7 +21,10 @@ import (
"sync/atomic"
"time"
otelcodes "go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/trace"
"google.golang.org/grpc"
grpccodes "google.golang.org/grpc/codes"
estats "google.golang.org/grpc/experimental/stats"
istats "google.golang.org/grpc/internal/stats"
"google.golang.org/grpc/metadata"
@ -85,8 +88,12 @@ func (h *clientStatsHandler) unaryInterceptor(ctx context.Context, method string
}
startTime := time.Now()
var span trace.Span
if h.options.isTracingEnabled() {
ctx, span = h.createCallTraceSpan(ctx, method)
}
err := invoker(ctx, method, req, reply, cc, opts...)
h.perCallMetrics(ctx, err, startTime, ci)
h.perCallTracesAndMetrics(ctx, err, startTime, ci, span)
return err
}
@ -119,22 +126,37 @@ func (h *clientStatsHandler) streamInterceptor(ctx context.Context, desc *grpc.S
}
startTime := time.Now()
var span trace.Span
if h.options.isTracingEnabled() {
ctx, span = h.createCallTraceSpan(ctx, method)
}
callback := func(err error) {
h.perCallMetrics(ctx, err, startTime, ci)
h.perCallTracesAndMetrics(ctx, err, startTime, ci, span)
}
opts = append([]grpc.CallOption{grpc.OnFinish(callback)}, opts...)
return streamer(ctx, desc, cc, method, opts...)
}
func (h *clientStatsHandler) perCallMetrics(ctx context.Context, err error, startTime time.Time, ci *callInfo) {
callLatency := float64(time.Since(startTime)) / float64(time.Second) // calculate ASAP
attrs := otelmetric.WithAttributeSet(otelattribute.NewSet(
otelattribute.String("grpc.method", ci.method),
otelattribute.String("grpc.target", ci.target),
otelattribute.String("grpc.status", canonicalString(status.Code(err))),
))
h.clientMetrics.callDuration.Record(ctx, callLatency, attrs)
// perCallTracesAndMetrics records per call trace spans and metrics.
func (h *clientStatsHandler) perCallTracesAndMetrics(ctx context.Context, err error, startTime time.Time, ci *callInfo, ts trace.Span) {
if h.options.isTracingEnabled() {
s := status.Convert(err)
if s.Code() == grpccodes.OK {
ts.SetStatus(otelcodes.Ok, s.Message())
} else {
ts.SetStatus(otelcodes.Error, s.Message())
}
ts.End()
}
if h.options.isMetricsEnabled() {
callLatency := float64(time.Since(startTime)) / float64(time.Second)
attrs := otelmetric.WithAttributeSet(otelattribute.NewSet(
otelattribute.String("grpc.method", ci.method),
otelattribute.String("grpc.target", ci.target),
otelattribute.String("grpc.status", canonicalString(status.Code(err))),
))
h.clientMetrics.callDuration.Record(ctx, callLatency, attrs)
}
}
// TagConn exists to satisfy stats.Handler.
@ -163,15 +185,17 @@ func (h *clientStatsHandler) TagRPC(ctx context.Context, info *stats.RPCTagInfo)
}
ctx = istats.SetLabels(ctx, labels)
}
ai := &attemptInfo{ // populates information about RPC start.
ai := &attemptInfo{
startTime: time.Now(),
xdsLabels: labels.TelemetryLabels,
method: info.FullMethodName,
method: removeLeadingSlash(info.FullMethodName),
}
ri := &rpcInfo{
if h.options.isTracingEnabled() {
ctx, ai = h.traceTagRPC(ctx, ai)
}
return setRPCInfo(ctx, &rpcInfo{
ai: ai,
}
return setRPCInfo(ctx, ri)
})
}
func (h *clientStatsHandler) HandleRPC(ctx context.Context, rs stats.RPCStats) {
@ -180,7 +204,12 @@ func (h *clientStatsHandler) HandleRPC(ctx context.Context, rs stats.RPCStats) {
logger.Error("ctx passed into client side stats handler metrics event handling has no client attempt data present")
return
}
h.processRPCEvent(ctx, rs, ri.ai)
if h.options.isMetricsEnabled() {
h.processRPCEvent(ctx, rs, ri.ai)
}
if h.options.isTracingEnabled() {
populateSpan(rs, ri.ai)
}
}
func (h *clientStatsHandler) processRPCEvent(ctx context.Context, s stats.RPCStats, ai *attemptInfo) {

View File

@ -0,0 +1,54 @@
/*
* Copyright 2024 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opentelemetry
import (
"context"
"strings"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/trace"
otelinternaltracing "google.golang.org/grpc/stats/opentelemetry/internal/tracing"
)
// traceTagRPC populates provided context with a new span using the
// TextMapPropagator supplied in trace options and internal itracing.carrier.
// It creates a new outgoing carrier which serializes information about this
// span into gRPC Metadata, if TextMapPropagator is provided in the trace
// options. if TextMapPropagator is not provided, it returns the context as is.
func (h *clientStatsHandler) traceTagRPC(ctx context.Context, ai *attemptInfo) (context.Context, *attemptInfo) {
mn := "Attempt." + strings.Replace(ai.method, "/", ".", -1)
tracer := otel.Tracer("grpc-open-telemetry")
ctx, span := tracer.Start(ctx, mn)
carrier := otelinternaltracing.NewOutgoingCarrier(ctx)
otel.GetTextMapPropagator().Inject(ctx, carrier)
ai.traceSpan = span
return carrier.Context(), ai
}
// createCallTraceSpan creates a call span to put in the provided context using
// provided TraceProvider. If TraceProvider is nil, it returns context as is.
func (h *clientStatsHandler) createCallTraceSpan(ctx context.Context, method string) (context.Context, trace.Span) {
if h.options.TraceOptions.TracerProvider == nil {
logger.Error("TraceProvider is not provided in trace options")
return ctx, nil
}
mn := strings.Replace(removeLeadingSlash(method), "/", ".", -1)
tracer := otel.Tracer("grpc-open-telemetry")
ctx, span := tracer.Start(ctx, mn, trace.WithSpanKind(trace.SpanKindClient))
return ctx, span
}

File diff suppressed because it is too large Load Diff

View File

@ -27,35 +27,50 @@ import (
"strings"
"time"
otelattribute "go.opentelemetry.io/otel/attribute"
otelmetric "go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/metric/noop"
"go.opentelemetry.io/otel/trace"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
experimental "google.golang.org/grpc/experimental/opentelemetry"
estats "google.golang.org/grpc/experimental/stats"
"google.golang.org/grpc/grpclog"
"google.golang.org/grpc/internal"
"google.golang.org/grpc/stats"
otelinternal "google.golang.org/grpc/stats/opentelemetry/internal"
otelattribute "go.opentelemetry.io/otel/attribute"
otelmetric "go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/metric/noop"
)
func init() {
otelinternal.SetPluginOption = func(o *Options, po otelinternal.PluginOption) {
o.MetricsOptions.pluginOption = po
// Log an error if one of the options is missing.
if (o.TraceOptions.TextMapPropagator == nil) != (o.TraceOptions.TracerProvider == nil) {
logger.Warning("Tracing will not be recorded because traceOptions are not set properly: one of TextMapPropagator or TracerProvider is missing")
}
}
}
var logger = grpclog.Component("otel-plugin")
var canonicalString = internal.CanonicalString.(func(codes.Code) string)
var joinDialOptions = internal.JoinDialOptions.(func(...grpc.DialOption) grpc.DialOption)
var (
logger = grpclog.Component("otel-plugin")
canonicalString = internal.CanonicalString.(func(codes.Code) string)
joinDialOptions = internal.JoinDialOptions.(func(...grpc.DialOption) grpc.DialOption)
)
// Options are the options for OpenTelemetry instrumentation.
type Options struct {
// MetricsOptions are the metrics options for OpenTelemetry instrumentation.
MetricsOptions MetricsOptions
// TraceOptions are the tracing options for OpenTelemetry instrumentation.
TraceOptions experimental.TraceOptions
}
func (o *Options) isMetricsEnabled() bool {
return o.MetricsOptions.MeterProvider != nil
}
func (o *Options) isTracingEnabled() bool {
return o.TraceOptions.TracerProvider != nil
}
// MetricsOptions are the metrics options for OpenTelemetry instrumentation.
@ -187,6 +202,15 @@ type attemptInfo struct {
pluginOptionLabels map[string]string // pluginOptionLabels to attach to metrics emitted
xdsLabels map[string]string
// traceSpan is data used for recording traces.
traceSpan trace.Span
// message counters for sent and received messages (used for
// generating message IDs), and the number of previous RPC attempts for the
// associated call.
countSentMsg uint32
countRecvMsg uint32
previousRPCAttempts uint32
}
type clientMetrics struct {

View File

@ -201,10 +201,12 @@ func (h *serverStatsHandler) TagRPC(ctx context.Context, info *stats.RPCTagInfo)
startTime: time.Now(),
method: removeLeadingSlash(method),
}
ri := &rpcInfo{
ai: ai,
if h.options.isTracingEnabled() {
ctx, ai = h.traceTagRPC(ctx, ai)
}
return setRPCInfo(ctx, ri)
return setRPCInfo(ctx, &rpcInfo{
ai: ai,
})
}
// HandleRPC implements per RPC tracing and stats implementation.
@ -214,7 +216,12 @@ func (h *serverStatsHandler) HandleRPC(ctx context.Context, rs stats.RPCStats) {
logger.Error("ctx passed into server side stats handler metrics event handling has no server call data present")
return
}
h.processRPCData(ctx, rs, ri.ai)
if h.options.isTracingEnabled() {
populateSpan(rs, ri.ai)
}
if h.options.isMetricsEnabled() {
h.processRPCData(ctx, rs, ri.ai)
}
}
func (h *serverStatsHandler) processRPCData(ctx context.Context, s stats.RPCStats, ai *attemptInfo) {

View File

@ -0,0 +1,46 @@
/*
* Copyright 2024 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opentelemetry
import (
"context"
"strings"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/trace"
otelinternaltracing "google.golang.org/grpc/stats/opentelemetry/internal/tracing"
)
// traceTagRPC populates context with new span data using the TextMapPropagator
// supplied in trace options and internal itracing.Carrier. It creates a new
// incoming carrier which extracts an existing span context (if present) by
// deserializing from provided context. If valid span context is extracted, it
// is set as parent of the new span otherwise new span remains the root span.
// If TextMapPropagator is not provided in the trace options, it returns context
// as is.
func (h *serverStatsHandler) traceTagRPC(ctx context.Context, ai *attemptInfo) (context.Context, *attemptInfo) {
mn := strings.Replace(ai.method, "/", ".", -1)
var span trace.Span
tracer := otel.Tracer("grpc-open-telemetry")
ctx = otel.GetTextMapPropagator().Extract(ctx, otelinternaltracing.NewIncomingCarrier(ctx))
// If the context.Context provided in `ctx` to tracer.Start(), contains a
// span then the newly-created Span will be a child of that span,
// otherwise it will be a root span.
ctx, span = tracer.Start(ctx, mn, trace.WithSpanKind(trace.SpanKindServer))
ai.traceSpan = span
return ctx, ai
}

View File

@ -0,0 +1,82 @@
/*
* Copyright 2024 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opentelemetry
import (
"sync/atomic"
"go.opentelemetry.io/otel/attribute"
otelcodes "go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/trace"
"google.golang.org/grpc/stats"
"google.golang.org/grpc/status"
)
// populateSpan populates span information based on stats passed in, representing
// invariants of the RPC lifecycle. It ends the span, triggering its export.
// This function handles attempt spans on the client-side and call spans on the
// server-side.
func populateSpan(rs stats.RPCStats, ai *attemptInfo) {
if ai == nil || ai.traceSpan == nil {
// Shouldn't happen, tagRPC call comes before this function gets called
// which populates this information.
logger.Error("ctx passed into stats handler tracing event handling has no traceSpan present")
return
}
span := ai.traceSpan
switch rs := rs.(type) {
case *stats.Begin:
// Note: Go always added Client and FailFast attributes even though they are not
// defined by the OpenCensus gRPC spec. Thus, they are unimportant for
// correctness.
span.SetAttributes(
attribute.Bool("Client", rs.Client),
attribute.Bool("FailFast", rs.Client),
attribute.Int64("previous-rpc-attempts", int64(ai.previousRPCAttempts)),
attribute.Bool("transparent-retry", rs.IsTransparentRetryAttempt),
)
// increment previous rpc attempts applicable for next attempt
atomic.AddUint32(&ai.previousRPCAttempts, 1)
case *stats.PickerUpdated:
span.AddEvent("Delayed LB pick complete")
case *stats.InPayload:
// message id - "must be calculated as two different counters starting
// from one for sent messages and one for received messages."
ai.countRecvMsg++
span.AddEvent("Inbound compressed message", trace.WithAttributes(
attribute.Int64("sequence-number", int64(ai.countRecvMsg)),
attribute.Int64("message-size", int64(rs.Length)),
attribute.Int64("message-size-compressed", int64(rs.CompressedLength)),
))
case *stats.OutPayload:
ai.countSentMsg++
span.AddEvent("Outbound compressed message", trace.WithAttributes(
attribute.Int64("sequence-number", int64(ai.countSentMsg)),
attribute.Int64("message-size", int64(rs.Length)),
attribute.Int64("message-size-compressed", int64(rs.CompressedLength)),
))
case *stats.End:
if rs.Error != nil {
s := status.Convert(rs.Error)
span.SetStatus(otelcodes.Error, s.Message())
} else {
span.SetStatus(otelcodes.Ok, "Ok")
}
span.End()
}
}