98 lines
3.7 KiB
Go
98 lines
3.7 KiB
Go
package grpc
|
|
|
|
import (
|
|
"time"
|
|
|
|
grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
|
|
"golang.org/x/net/context"
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/grpc/codes"
|
|
"google.golang.org/grpc/metadata"
|
|
|
|
berrors "github.com/letsencrypt/boulder/errors"
|
|
"github.com/letsencrypt/boulder/features"
|
|
)
|
|
|
|
// serverInterceptor is a gRPC interceptor that adds Prometheus
|
|
// metrics to requests handled by a gRPC server, and wraps Boulder-specific
|
|
// errors for transmission in a grpc/metadata trailer (see bcodes.go).
|
|
type serverInterceptor struct {
|
|
serverMetrics *grpc_prometheus.ServerMetrics
|
|
}
|
|
|
|
const returnOverhead = 20 * time.Millisecond
|
|
const meaningfulWorkOverhead = 100 * time.Millisecond
|
|
|
|
func (si *serverInterceptor) intercept(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) {
|
|
if info == nil {
|
|
return nil, berrors.InternalServerError("passed nil *grpc.UnaryServerInfo")
|
|
}
|
|
|
|
if features.Enabled(features.RPCHeadroom) {
|
|
// Shave 20 milliseconds off the deadline to ensure that if the RPC server times
|
|
// out any sub-calls it makes (like DNS lookups, or onwards RPCs), it has a
|
|
// chance to report that timeout to the client. This allows for more specific
|
|
// errors, e.g "the VA timed out looking up CAA for example.com" (when called
|
|
// from RA.NewCertificate, which was called from WFE.NewCertificate), as
|
|
// opposed to "RA.NewCertificate timed out" (causing a 500).
|
|
// Once we've shaved the deadline, we ensure we have we have at least another
|
|
// 100ms left to do work; otherwise we abort early.
|
|
deadline, ok := ctx.Deadline()
|
|
// Should never happen: there was no deadline.
|
|
if !ok {
|
|
deadline = time.Now().Add(100 * time.Second)
|
|
}
|
|
deadline = deadline.Add(-returnOverhead)
|
|
remaining := deadline.Sub(time.Now())
|
|
if remaining < meaningfulWorkOverhead {
|
|
return nil, grpc.Errorf(codes.DeadlineExceeded, "not enough time left on clock: %s", remaining)
|
|
}
|
|
var cancel func()
|
|
ctx, cancel = context.WithDeadline(ctx, deadline)
|
|
defer cancel()
|
|
}
|
|
|
|
resp, err := si.serverMetrics.UnaryServerInterceptor()(ctx, req, info, handler)
|
|
if err != nil {
|
|
err = wrapError(ctx, err)
|
|
}
|
|
return resp, err
|
|
}
|
|
|
|
// clientInterceptor is a gRPC interceptor that adds Prometheus
|
|
// metrics to sent requests, and disables FailFast. We disable FailFast because
|
|
// non-FailFast mode is most similar to the old AMQP RPC layer: If a client
|
|
// makes a request while all backends are briefly down (e.g. for a restart), the
|
|
// request doesn't necessarily fail. A backend can service the request if it
|
|
// comes back up within the timeout. Under gRPC the same effect is achieved by
|
|
// retries up to the Context deadline.
|
|
type clientInterceptor struct {
|
|
timeout time.Duration
|
|
clientMetrics *grpc_prometheus.ClientMetrics
|
|
}
|
|
|
|
// intercept fulfils the grpc.UnaryClientInterceptor interface, it should be noted that while this API
|
|
// is currently experimental the metrics it reports should be kept as stable as can be, *within reason*.
|
|
func (ci *clientInterceptor) intercept(
|
|
ctx context.Context,
|
|
method string,
|
|
req,
|
|
reply interface{},
|
|
cc *grpc.ClientConn,
|
|
invoker grpc.UnaryInvoker,
|
|
opts ...grpc.CallOption) error {
|
|
localCtx, cancel := context.WithTimeout(ctx, ci.timeout)
|
|
defer cancel()
|
|
// Disable fail-fast so RPCs will retry until deadline, even if all backends
|
|
// are down.
|
|
opts = append(opts, grpc.FailFast(false))
|
|
// Create grpc/metadata.Metadata to encode internal error type if one is returned
|
|
md := metadata.New(nil)
|
|
opts = append(opts, grpc.Trailer(&md))
|
|
err := ci.clientMetrics.UnaryClientInterceptor()(localCtx, method, req, reply, cc, invoker, opts...)
|
|
if err != nil {
|
|
err = unwrapError(err, md)
|
|
}
|
|
return err
|
|
}
|