mirror of https://github.com/grpc/grpc-go.git
stats/opentelemetry: Introduce Tracing API (#7852)
This commit is contained in:
parent
7e1c9b2029
commit
78eebff58b
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* Copyright 2024 gRPC authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Package opentelemetry is EXPERIMENTAL and will be moved to stats/opentelemetry
|
||||
// package in a later release.
|
||||
package opentelemetry
|
||||
|
||||
import (
|
||||
"go.opentelemetry.io/otel/propagation"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
||||
// TraceOptions contains the tracing settings for OpenTelemetry instrumentation.
|
||||
type TraceOptions struct {
|
||||
// TracerProvider is the OpenTelemetry tracer which is required to
|
||||
// record traces/trace spans for instrumentation. If unset, tracing
|
||||
// will not be recorded.
|
||||
TracerProvider trace.TracerProvider
|
||||
|
||||
// TextMapPropagator propagates span context through text map carrier.
|
||||
// If unset, tracing will not be recorded.
|
||||
TextMapPropagator propagation.TextMapPropagator
|
||||
}
|
||||
|
|
@ -21,7 +21,10 @@ import (
|
|||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
otelcodes "go.opentelemetry.io/otel/codes"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
"google.golang.org/grpc"
|
||||
grpccodes "google.golang.org/grpc/codes"
|
||||
estats "google.golang.org/grpc/experimental/stats"
|
||||
istats "google.golang.org/grpc/internal/stats"
|
||||
"google.golang.org/grpc/metadata"
|
||||
|
|
@ -85,8 +88,12 @@ func (h *clientStatsHandler) unaryInterceptor(ctx context.Context, method string
|
|||
}
|
||||
|
||||
startTime := time.Now()
|
||||
var span trace.Span
|
||||
if h.options.isTracingEnabled() {
|
||||
ctx, span = h.createCallTraceSpan(ctx, method)
|
||||
}
|
||||
err := invoker(ctx, method, req, reply, cc, opts...)
|
||||
h.perCallMetrics(ctx, err, startTime, ci)
|
||||
h.perCallTracesAndMetrics(ctx, err, startTime, ci, span)
|
||||
return err
|
||||
}
|
||||
|
||||
|
|
@ -119,22 +126,37 @@ func (h *clientStatsHandler) streamInterceptor(ctx context.Context, desc *grpc.S
|
|||
}
|
||||
|
||||
startTime := time.Now()
|
||||
|
||||
var span trace.Span
|
||||
if h.options.isTracingEnabled() {
|
||||
ctx, span = h.createCallTraceSpan(ctx, method)
|
||||
}
|
||||
callback := func(err error) {
|
||||
h.perCallMetrics(ctx, err, startTime, ci)
|
||||
h.perCallTracesAndMetrics(ctx, err, startTime, ci, span)
|
||||
}
|
||||
opts = append([]grpc.CallOption{grpc.OnFinish(callback)}, opts...)
|
||||
return streamer(ctx, desc, cc, method, opts...)
|
||||
}
|
||||
|
||||
func (h *clientStatsHandler) perCallMetrics(ctx context.Context, err error, startTime time.Time, ci *callInfo) {
|
||||
callLatency := float64(time.Since(startTime)) / float64(time.Second) // calculate ASAP
|
||||
attrs := otelmetric.WithAttributeSet(otelattribute.NewSet(
|
||||
otelattribute.String("grpc.method", ci.method),
|
||||
otelattribute.String("grpc.target", ci.target),
|
||||
otelattribute.String("grpc.status", canonicalString(status.Code(err))),
|
||||
))
|
||||
h.clientMetrics.callDuration.Record(ctx, callLatency, attrs)
|
||||
// perCallTracesAndMetrics records per call trace spans and metrics.
|
||||
func (h *clientStatsHandler) perCallTracesAndMetrics(ctx context.Context, err error, startTime time.Time, ci *callInfo, ts trace.Span) {
|
||||
if h.options.isTracingEnabled() {
|
||||
s := status.Convert(err)
|
||||
if s.Code() == grpccodes.OK {
|
||||
ts.SetStatus(otelcodes.Ok, s.Message())
|
||||
} else {
|
||||
ts.SetStatus(otelcodes.Error, s.Message())
|
||||
}
|
||||
ts.End()
|
||||
}
|
||||
if h.options.isMetricsEnabled() {
|
||||
callLatency := float64(time.Since(startTime)) / float64(time.Second)
|
||||
attrs := otelmetric.WithAttributeSet(otelattribute.NewSet(
|
||||
otelattribute.String("grpc.method", ci.method),
|
||||
otelattribute.String("grpc.target", ci.target),
|
||||
otelattribute.String("grpc.status", canonicalString(status.Code(err))),
|
||||
))
|
||||
h.clientMetrics.callDuration.Record(ctx, callLatency, attrs)
|
||||
}
|
||||
}
|
||||
|
||||
// TagConn exists to satisfy stats.Handler.
|
||||
|
|
@ -163,15 +185,17 @@ func (h *clientStatsHandler) TagRPC(ctx context.Context, info *stats.RPCTagInfo)
|
|||
}
|
||||
ctx = istats.SetLabels(ctx, labels)
|
||||
}
|
||||
ai := &attemptInfo{ // populates information about RPC start.
|
||||
ai := &attemptInfo{
|
||||
startTime: time.Now(),
|
||||
xdsLabels: labels.TelemetryLabels,
|
||||
method: info.FullMethodName,
|
||||
method: removeLeadingSlash(info.FullMethodName),
|
||||
}
|
||||
ri := &rpcInfo{
|
||||
if h.options.isTracingEnabled() {
|
||||
ctx, ai = h.traceTagRPC(ctx, ai)
|
||||
}
|
||||
return setRPCInfo(ctx, &rpcInfo{
|
||||
ai: ai,
|
||||
}
|
||||
return setRPCInfo(ctx, ri)
|
||||
})
|
||||
}
|
||||
|
||||
func (h *clientStatsHandler) HandleRPC(ctx context.Context, rs stats.RPCStats) {
|
||||
|
|
@ -180,7 +204,12 @@ func (h *clientStatsHandler) HandleRPC(ctx context.Context, rs stats.RPCStats) {
|
|||
logger.Error("ctx passed into client side stats handler metrics event handling has no client attempt data present")
|
||||
return
|
||||
}
|
||||
h.processRPCEvent(ctx, rs, ri.ai)
|
||||
if h.options.isMetricsEnabled() {
|
||||
h.processRPCEvent(ctx, rs, ri.ai)
|
||||
}
|
||||
if h.options.isTracingEnabled() {
|
||||
populateSpan(rs, ri.ai)
|
||||
}
|
||||
}
|
||||
|
||||
func (h *clientStatsHandler) processRPCEvent(ctx context.Context, s stats.RPCStats, ai *attemptInfo) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Copyright 2024 gRPC authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package opentelemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
otelinternaltracing "google.golang.org/grpc/stats/opentelemetry/internal/tracing"
|
||||
)
|
||||
|
||||
// traceTagRPC populates provided context with a new span using the
|
||||
// TextMapPropagator supplied in trace options and internal itracing.carrier.
|
||||
// It creates a new outgoing carrier which serializes information about this
|
||||
// span into gRPC Metadata, if TextMapPropagator is provided in the trace
|
||||
// options. if TextMapPropagator is not provided, it returns the context as is.
|
||||
func (h *clientStatsHandler) traceTagRPC(ctx context.Context, ai *attemptInfo) (context.Context, *attemptInfo) {
|
||||
mn := "Attempt." + strings.Replace(ai.method, "/", ".", -1)
|
||||
tracer := otel.Tracer("grpc-open-telemetry")
|
||||
ctx, span := tracer.Start(ctx, mn)
|
||||
carrier := otelinternaltracing.NewOutgoingCarrier(ctx)
|
||||
otel.GetTextMapPropagator().Inject(ctx, carrier)
|
||||
ai.traceSpan = span
|
||||
return carrier.Context(), ai
|
||||
}
|
||||
|
||||
// createCallTraceSpan creates a call span to put in the provided context using
|
||||
// provided TraceProvider. If TraceProvider is nil, it returns context as is.
|
||||
func (h *clientStatsHandler) createCallTraceSpan(ctx context.Context, method string) (context.Context, trace.Span) {
|
||||
if h.options.TraceOptions.TracerProvider == nil {
|
||||
logger.Error("TraceProvider is not provided in trace options")
|
||||
return ctx, nil
|
||||
}
|
||||
mn := strings.Replace(removeLeadingSlash(method), "/", ".", -1)
|
||||
tracer := otel.Tracer("grpc-open-telemetry")
|
||||
ctx, span := tracer.Start(ctx, mn, trace.WithSpanKind(trace.SpanKindClient))
|
||||
return ctx, span
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -27,35 +27,50 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
otelattribute "go.opentelemetry.io/otel/attribute"
|
||||
otelmetric "go.opentelemetry.io/otel/metric"
|
||||
"go.opentelemetry.io/otel/metric/noop"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/codes"
|
||||
experimental "google.golang.org/grpc/experimental/opentelemetry"
|
||||
estats "google.golang.org/grpc/experimental/stats"
|
||||
"google.golang.org/grpc/grpclog"
|
||||
"google.golang.org/grpc/internal"
|
||||
"google.golang.org/grpc/stats"
|
||||
otelinternal "google.golang.org/grpc/stats/opentelemetry/internal"
|
||||
|
||||
otelattribute "go.opentelemetry.io/otel/attribute"
|
||||
otelmetric "go.opentelemetry.io/otel/metric"
|
||||
"go.opentelemetry.io/otel/metric/noop"
|
||||
)
|
||||
|
||||
func init() {
|
||||
otelinternal.SetPluginOption = func(o *Options, po otelinternal.PluginOption) {
|
||||
o.MetricsOptions.pluginOption = po
|
||||
// Log an error if one of the options is missing.
|
||||
if (o.TraceOptions.TextMapPropagator == nil) != (o.TraceOptions.TracerProvider == nil) {
|
||||
logger.Warning("Tracing will not be recorded because traceOptions are not set properly: one of TextMapPropagator or TracerProvider is missing")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var logger = grpclog.Component("otel-plugin")
|
||||
|
||||
var canonicalString = internal.CanonicalString.(func(codes.Code) string)
|
||||
|
||||
var joinDialOptions = internal.JoinDialOptions.(func(...grpc.DialOption) grpc.DialOption)
|
||||
var (
|
||||
logger = grpclog.Component("otel-plugin")
|
||||
canonicalString = internal.CanonicalString.(func(codes.Code) string)
|
||||
joinDialOptions = internal.JoinDialOptions.(func(...grpc.DialOption) grpc.DialOption)
|
||||
)
|
||||
|
||||
// Options are the options for OpenTelemetry instrumentation.
|
||||
type Options struct {
|
||||
// MetricsOptions are the metrics options for OpenTelemetry instrumentation.
|
||||
MetricsOptions MetricsOptions
|
||||
// TraceOptions are the tracing options for OpenTelemetry instrumentation.
|
||||
TraceOptions experimental.TraceOptions
|
||||
}
|
||||
|
||||
func (o *Options) isMetricsEnabled() bool {
|
||||
return o.MetricsOptions.MeterProvider != nil
|
||||
}
|
||||
|
||||
func (o *Options) isTracingEnabled() bool {
|
||||
return o.TraceOptions.TracerProvider != nil
|
||||
}
|
||||
|
||||
// MetricsOptions are the metrics options for OpenTelemetry instrumentation.
|
||||
|
|
@ -187,6 +202,15 @@ type attemptInfo struct {
|
|||
|
||||
pluginOptionLabels map[string]string // pluginOptionLabels to attach to metrics emitted
|
||||
xdsLabels map[string]string
|
||||
|
||||
// traceSpan is data used for recording traces.
|
||||
traceSpan trace.Span
|
||||
// message counters for sent and received messages (used for
|
||||
// generating message IDs), and the number of previous RPC attempts for the
|
||||
// associated call.
|
||||
countSentMsg uint32
|
||||
countRecvMsg uint32
|
||||
previousRPCAttempts uint32
|
||||
}
|
||||
|
||||
type clientMetrics struct {
|
||||
|
|
|
|||
|
|
@ -201,10 +201,12 @@ func (h *serverStatsHandler) TagRPC(ctx context.Context, info *stats.RPCTagInfo)
|
|||
startTime: time.Now(),
|
||||
method: removeLeadingSlash(method),
|
||||
}
|
||||
ri := &rpcInfo{
|
||||
ai: ai,
|
||||
if h.options.isTracingEnabled() {
|
||||
ctx, ai = h.traceTagRPC(ctx, ai)
|
||||
}
|
||||
return setRPCInfo(ctx, ri)
|
||||
return setRPCInfo(ctx, &rpcInfo{
|
||||
ai: ai,
|
||||
})
|
||||
}
|
||||
|
||||
// HandleRPC implements per RPC tracing and stats implementation.
|
||||
|
|
@ -214,7 +216,12 @@ func (h *serverStatsHandler) HandleRPC(ctx context.Context, rs stats.RPCStats) {
|
|||
logger.Error("ctx passed into server side stats handler metrics event handling has no server call data present")
|
||||
return
|
||||
}
|
||||
h.processRPCData(ctx, rs, ri.ai)
|
||||
if h.options.isTracingEnabled() {
|
||||
populateSpan(rs, ri.ai)
|
||||
}
|
||||
if h.options.isMetricsEnabled() {
|
||||
h.processRPCData(ctx, rs, ri.ai)
|
||||
}
|
||||
}
|
||||
|
||||
func (h *serverStatsHandler) processRPCData(ctx context.Context, s stats.RPCStats, ai *attemptInfo) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright 2024 gRPC authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package opentelemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
otelinternaltracing "google.golang.org/grpc/stats/opentelemetry/internal/tracing"
|
||||
)
|
||||
|
||||
// traceTagRPC populates context with new span data using the TextMapPropagator
|
||||
// supplied in trace options and internal itracing.Carrier. It creates a new
|
||||
// incoming carrier which extracts an existing span context (if present) by
|
||||
// deserializing from provided context. If valid span context is extracted, it
|
||||
// is set as parent of the new span otherwise new span remains the root span.
|
||||
// If TextMapPropagator is not provided in the trace options, it returns context
|
||||
// as is.
|
||||
func (h *serverStatsHandler) traceTagRPC(ctx context.Context, ai *attemptInfo) (context.Context, *attemptInfo) {
|
||||
mn := strings.Replace(ai.method, "/", ".", -1)
|
||||
var span trace.Span
|
||||
tracer := otel.Tracer("grpc-open-telemetry")
|
||||
ctx = otel.GetTextMapPropagator().Extract(ctx, otelinternaltracing.NewIncomingCarrier(ctx))
|
||||
// If the context.Context provided in `ctx` to tracer.Start(), contains a
|
||||
// span then the newly-created Span will be a child of that span,
|
||||
// otherwise it will be a root span.
|
||||
ctx, span = tracer.Start(ctx, mn, trace.WithSpanKind(trace.SpanKindServer))
|
||||
ai.traceSpan = span
|
||||
return ctx, ai
|
||||
}
|
||||
|
|
@ -0,0 +1,82 @@
|
|||
/*
|
||||
* Copyright 2024 gRPC authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package opentelemetry
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
otelcodes "go.opentelemetry.io/otel/codes"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
"google.golang.org/grpc/stats"
|
||||
"google.golang.org/grpc/status"
|
||||
)
|
||||
|
||||
// populateSpan populates span information based on stats passed in, representing
|
||||
// invariants of the RPC lifecycle. It ends the span, triggering its export.
|
||||
// This function handles attempt spans on the client-side and call spans on the
|
||||
// server-side.
|
||||
func populateSpan(rs stats.RPCStats, ai *attemptInfo) {
|
||||
if ai == nil || ai.traceSpan == nil {
|
||||
// Shouldn't happen, tagRPC call comes before this function gets called
|
||||
// which populates this information.
|
||||
logger.Error("ctx passed into stats handler tracing event handling has no traceSpan present")
|
||||
return
|
||||
}
|
||||
span := ai.traceSpan
|
||||
|
||||
switch rs := rs.(type) {
|
||||
case *stats.Begin:
|
||||
// Note: Go always added Client and FailFast attributes even though they are not
|
||||
// defined by the OpenCensus gRPC spec. Thus, they are unimportant for
|
||||
// correctness.
|
||||
span.SetAttributes(
|
||||
attribute.Bool("Client", rs.Client),
|
||||
attribute.Bool("FailFast", rs.Client),
|
||||
attribute.Int64("previous-rpc-attempts", int64(ai.previousRPCAttempts)),
|
||||
attribute.Bool("transparent-retry", rs.IsTransparentRetryAttempt),
|
||||
)
|
||||
// increment previous rpc attempts applicable for next attempt
|
||||
atomic.AddUint32(&ai.previousRPCAttempts, 1)
|
||||
case *stats.PickerUpdated:
|
||||
span.AddEvent("Delayed LB pick complete")
|
||||
case *stats.InPayload:
|
||||
// message id - "must be calculated as two different counters starting
|
||||
// from one for sent messages and one for received messages."
|
||||
ai.countRecvMsg++
|
||||
span.AddEvent("Inbound compressed message", trace.WithAttributes(
|
||||
attribute.Int64("sequence-number", int64(ai.countRecvMsg)),
|
||||
attribute.Int64("message-size", int64(rs.Length)),
|
||||
attribute.Int64("message-size-compressed", int64(rs.CompressedLength)),
|
||||
))
|
||||
case *stats.OutPayload:
|
||||
ai.countSentMsg++
|
||||
span.AddEvent("Outbound compressed message", trace.WithAttributes(
|
||||
attribute.Int64("sequence-number", int64(ai.countSentMsg)),
|
||||
attribute.Int64("message-size", int64(rs.Length)),
|
||||
attribute.Int64("message-size-compressed", int64(rs.CompressedLength)),
|
||||
))
|
||||
case *stats.End:
|
||||
if rs.Error != nil {
|
||||
s := status.Convert(rs.Error)
|
||||
span.SetStatus(otelcodes.Error, s.Message())
|
||||
} else {
|
||||
span.SetStatus(otelcodes.Ok, "Ok")
|
||||
}
|
||||
span.End()
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue