mirror of https://github.com/grpc/grpc-go.git
client: fix potential panic during RPC retries (#5323)
This commit is contained in:
parent
78b13f27de
commit
799605c228
|
@ -907,14 +907,10 @@ func (cc *ClientConn) healthCheckConfig() *healthCheckConfig {
|
|||
}
|
||||
|
||||
func (cc *ClientConn) getTransport(ctx context.Context, failfast bool, method string) (transport.ClientTransport, func(balancer.DoneInfo), error) {
|
||||
t, done, err := cc.blockingpicker.pick(ctx, failfast, balancer.PickInfo{
|
||||
return cc.blockingpicker.pick(ctx, failfast, balancer.PickInfo{
|
||||
Ctx: ctx,
|
||||
FullMethodName: method,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, nil, toRPCErr(err)
|
||||
}
|
||||
return t, done, nil
|
||||
}
|
||||
|
||||
func (cc *ClientConn) applyServiceConfigAndBalancer(sc *ServiceConfig, configSelector iresolver.ConfigSelector, addrs []resolver.Address) {
|
||||
|
|
|
@ -631,8 +631,8 @@ func (t *http2Client) getCallAuthData(ctx context.Context, audience string, call
|
|||
// the wire. However, there are two notable exceptions:
|
||||
//
|
||||
// 1. If the stream headers violate the max header list size allowed by the
|
||||
// server. In this case there is no reason to retry at all, as it is
|
||||
// assumed the RPC would continue to fail on subsequent attempts.
|
||||
// server. It's possible this could succeed on another transport, even if
|
||||
// it's unlikely, but do not transparently retry.
|
||||
// 2. If the credentials errored when requesting their headers. In this case,
|
||||
// it's possible a retry can fix the problem, but indefinitely transparently
|
||||
// retrying is not appropriate as it is likely the credentials, if they can
|
||||
|
@ -640,8 +640,7 @@ func (t *http2Client) getCallAuthData(ctx context.Context, audience string, call
|
|||
type NewStreamError struct {
|
||||
Err error
|
||||
|
||||
DoNotRetry bool
|
||||
DoNotTransparentRetry bool
|
||||
AllowTransparentRetry bool
|
||||
}
|
||||
|
||||
func (e NewStreamError) Error() string {
|
||||
|
@ -650,11 +649,11 @@ func (e NewStreamError) Error() string {
|
|||
|
||||
// NewStream creates a stream and registers it into the transport as "active"
|
||||
// streams. All non-nil errors returned will be *NewStreamError.
|
||||
func (t *http2Client) NewStream(ctx context.Context, callHdr *CallHdr) (_ *Stream, err error) {
|
||||
func (t *http2Client) NewStream(ctx context.Context, callHdr *CallHdr) (*Stream, error) {
|
||||
ctx = peer.NewContext(ctx, t.getPeer())
|
||||
headerFields, err := t.createHeaderFields(ctx, callHdr)
|
||||
if err != nil {
|
||||
return nil, &NewStreamError{Err: err, DoNotTransparentRetry: true}
|
||||
return nil, &NewStreamError{Err: err, AllowTransparentRetry: false}
|
||||
}
|
||||
s := t.newStream(ctx, callHdr)
|
||||
cleanup := func(err error) {
|
||||
|
@ -754,13 +753,14 @@ func (t *http2Client) NewStream(ctx context.Context, callHdr *CallHdr) (_ *Strea
|
|||
return true
|
||||
}, hdr)
|
||||
if err != nil {
|
||||
return nil, &NewStreamError{Err: err}
|
||||
// Connection closed.
|
||||
return nil, &NewStreamError{Err: err, AllowTransparentRetry: true}
|
||||
}
|
||||
if success {
|
||||
break
|
||||
}
|
||||
if hdrListSizeErr != nil {
|
||||
return nil, &NewStreamError{Err: hdrListSizeErr, DoNotRetry: true}
|
||||
return nil, &NewStreamError{Err: hdrListSizeErr}
|
||||
}
|
||||
firstTry = false
|
||||
select {
|
||||
|
@ -768,9 +768,9 @@ func (t *http2Client) NewStream(ctx context.Context, callHdr *CallHdr) (_ *Strea
|
|||
case <-ctx.Done():
|
||||
return nil, &NewStreamError{Err: ContextErr(ctx.Err())}
|
||||
case <-t.goAway:
|
||||
return nil, &NewStreamError{Err: errStreamDrain}
|
||||
return nil, &NewStreamError{Err: errStreamDrain, AllowTransparentRetry: true}
|
||||
case <-t.ctx.Done():
|
||||
return nil, &NewStreamError{Err: ErrConnClosing}
|
||||
return nil, &NewStreamError{Err: ErrConnClosing, AllowTransparentRetry: true}
|
||||
}
|
||||
}
|
||||
if t.statsHandler != nil {
|
||||
|
|
|
@ -131,7 +131,7 @@ func (pw *pickerWrapper) pick(ctx context.Context, failfast bool, info balancer.
|
|||
}
|
||||
if _, ok := status.FromError(err); ok {
|
||||
// Status error: end the RPC unconditionally with this status.
|
||||
return nil, nil, err
|
||||
return nil, nil, dropError{error: err}
|
||||
}
|
||||
// For all other errors, wait for ready RPCs should block and other
|
||||
// RPCs should fail with unavailable.
|
||||
|
@ -175,3 +175,9 @@ func (pw *pickerWrapper) close() {
|
|||
pw.done = true
|
||||
close(pw.blockingCh)
|
||||
}
|
||||
|
||||
// dropError is a wrapper error that indicates the LB policy wishes to drop the
|
||||
// RPC and not retry it.
|
||||
type dropError struct {
|
||||
error
|
||||
}
|
||||
|
|
197
stream.go
197
stream.go
|
@ -303,14 +303,28 @@ func newClientStreamWithParams(ctx context.Context, desc *StreamDesc, cc *Client
|
|||
}
|
||||
cs.binlog = binarylog.GetMethodLogger(method)
|
||||
|
||||
if err := cs.newAttemptLocked(false /* isTransparent */); err != nil {
|
||||
cs.attempt, err = cs.newAttemptLocked(false /* isTransparent */)
|
||||
if err != nil {
|
||||
cs.finish(err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
op := func(a *csAttempt) error { return a.newStream() }
|
||||
// Pick the transport to use and create a new stream on the transport.
|
||||
// Assign cs.attempt upon success.
|
||||
op := func(a *csAttempt) error {
|
||||
if err := a.getTransport(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := a.newStream(); err != nil {
|
||||
return err
|
||||
}
|
||||
// Because this operation is always called either here (while creating
|
||||
// the clientStream) or by the retry code while locked when replaying
|
||||
// the operation, it is safe to access cs.attempt directly.
|
||||
cs.attempt = a
|
||||
return nil
|
||||
}
|
||||
if err := cs.withRetry(op, func() { cs.bufferForRetryLocked(0, op) }); err != nil {
|
||||
cs.finish(err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
@ -349,9 +363,15 @@ func newClientStreamWithParams(ctx context.Context, desc *StreamDesc, cc *Client
|
|||
return cs, nil
|
||||
}
|
||||
|
||||
// newAttemptLocked creates a new attempt with a transport.
|
||||
// If it succeeds, then it replaces clientStream's attempt with this new attempt.
|
||||
func (cs *clientStream) newAttemptLocked(isTransparent bool) (retErr error) {
|
||||
// newAttemptLocked creates a new csAttempt without a transport or stream.
|
||||
func (cs *clientStream) newAttemptLocked(isTransparent bool) (*csAttempt, error) {
|
||||
if err := cs.ctx.Err(); err != nil {
|
||||
return nil, toRPCErr(err)
|
||||
}
|
||||
if err := cs.cc.ctx.Err(); err != nil {
|
||||
return nil, ErrClientConnClosing
|
||||
}
|
||||
|
||||
ctx := newContextWithRPCInfo(cs.ctx, cs.callInfo.failFast, cs.callInfo.codec, cs.cp, cs.comp)
|
||||
method := cs.callHdr.Method
|
||||
sh := cs.cc.dopts.copts.StatsHandler
|
||||
|
@ -385,27 +405,6 @@ func (cs *clientStream) newAttemptLocked(isTransparent bool) (retErr error) {
|
|||
ctx = trace.NewContext(ctx, trInfo.tr)
|
||||
}
|
||||
|
||||
newAttempt := &csAttempt{
|
||||
ctx: ctx,
|
||||
beginTime: beginTime,
|
||||
cs: cs,
|
||||
dc: cs.cc.dopts.dc,
|
||||
statsHandler: sh,
|
||||
trInfo: trInfo,
|
||||
}
|
||||
defer func() {
|
||||
if retErr != nil {
|
||||
// This attempt is not set in the clientStream, so it's finish won't
|
||||
// be called. Call it here for stats and trace in case they are not
|
||||
// nil.
|
||||
newAttempt.finish(retErr)
|
||||
}
|
||||
}()
|
||||
|
||||
if err := ctx.Err(); err != nil {
|
||||
return toRPCErr(err)
|
||||
}
|
||||
|
||||
if cs.cc.parsedTarget.Scheme == "xds" {
|
||||
// Add extra metadata (metadata that will be added by transport) to context
|
||||
// so the balancer can see them.
|
||||
|
@ -413,16 +412,32 @@ func (cs *clientStream) newAttemptLocked(isTransparent bool) (retErr error) {
|
|||
"content-type", grpcutil.ContentType(cs.callHdr.ContentSubtype),
|
||||
))
|
||||
}
|
||||
t, done, err := cs.cc.getTransport(ctx, cs.callInfo.failFast, cs.callHdr.Method)
|
||||
|
||||
return &csAttempt{
|
||||
ctx: ctx,
|
||||
beginTime: beginTime,
|
||||
cs: cs,
|
||||
dc: cs.cc.dopts.dc,
|
||||
statsHandler: sh,
|
||||
trInfo: trInfo,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (a *csAttempt) getTransport() error {
|
||||
cs := a.cs
|
||||
|
||||
var err error
|
||||
a.t, a.done, err = cs.cc.getTransport(a.ctx, cs.callInfo.failFast, cs.callHdr.Method)
|
||||
if err != nil {
|
||||
if de, ok := err.(dropError); ok {
|
||||
err = de.error
|
||||
a.drop = true
|
||||
}
|
||||
return err
|
||||
}
|
||||
if trInfo != nil {
|
||||
trInfo.firstLine.SetRemoteAddr(t.RemoteAddr())
|
||||
if a.trInfo != nil {
|
||||
a.trInfo.firstLine.SetRemoteAddr(a.t.RemoteAddr())
|
||||
}
|
||||
newAttempt.t = t
|
||||
newAttempt.done = done
|
||||
cs.attempt = newAttempt
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -431,12 +446,21 @@ func (a *csAttempt) newStream() error {
|
|||
cs.callHdr.PreviousAttempts = cs.numRetries
|
||||
s, err := a.t.NewStream(a.ctx, cs.callHdr)
|
||||
if err != nil {
|
||||
// Return without converting to an RPC error so retry code can
|
||||
// inspect.
|
||||
return err
|
||||
nse, ok := err.(*transport.NewStreamError)
|
||||
if !ok {
|
||||
// Unexpected.
|
||||
return err
|
||||
}
|
||||
|
||||
if nse.AllowTransparentRetry {
|
||||
a.allowTransparentRetry = true
|
||||
}
|
||||
|
||||
// Unwrap and convert error.
|
||||
return toRPCErr(nse.Err)
|
||||
}
|
||||
cs.attempt.s = s
|
||||
cs.attempt.p = &parser{r: s}
|
||||
a.s = s
|
||||
a.p = &parser{r: s}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -514,6 +538,11 @@ type csAttempt struct {
|
|||
|
||||
statsHandler stats.Handler
|
||||
beginTime time.Time
|
||||
|
||||
// set for newStream errors that may be transparently retried
|
||||
allowTransparentRetry bool
|
||||
// set for pick errors that are returned as a status
|
||||
drop bool
|
||||
}
|
||||
|
||||
func (cs *clientStream) commitAttemptLocked() {
|
||||
|
@ -533,41 +562,21 @@ func (cs *clientStream) commitAttempt() {
|
|||
// shouldRetry returns nil if the RPC should be retried; otherwise it returns
|
||||
// the error that should be returned by the operation. If the RPC should be
|
||||
// retried, the bool indicates whether it is being retried transparently.
|
||||
func (cs *clientStream) shouldRetry(err error) (bool, error) {
|
||||
if cs.attempt.s == nil {
|
||||
// Error from NewClientStream.
|
||||
nse, ok := err.(*transport.NewStreamError)
|
||||
if !ok {
|
||||
// Unexpected, but assume no I/O was performed and the RPC is not
|
||||
// fatal, so retry indefinitely.
|
||||
return true, nil
|
||||
}
|
||||
func (a *csAttempt) shouldRetry(err error) (bool, error) {
|
||||
cs := a.cs
|
||||
|
||||
// Unwrap and convert error.
|
||||
err = toRPCErr(nse.Err)
|
||||
|
||||
// Never retry DoNotRetry errors, which indicate the RPC should not be
|
||||
// retried due to max header list size violation, etc.
|
||||
if nse.DoNotRetry {
|
||||
return false, err
|
||||
}
|
||||
|
||||
// In the event of a non-IO operation error from NewStream, we never
|
||||
// attempted to write anything to the wire, so we can retry
|
||||
// indefinitely.
|
||||
if !nse.DoNotTransparentRetry {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
if cs.finished || cs.committed {
|
||||
// RPC is finished or committed; cannot retry.
|
||||
if cs.finished || cs.committed || a.drop {
|
||||
// RPC is finished or committed or was dropped by the picker; cannot retry.
|
||||
return false, err
|
||||
}
|
||||
if a.s == nil && a.allowTransparentRetry {
|
||||
return true, nil
|
||||
}
|
||||
// Wait for the trailers.
|
||||
unprocessed := false
|
||||
if cs.attempt.s != nil {
|
||||
<-cs.attempt.s.Done()
|
||||
unprocessed = cs.attempt.s.Unprocessed()
|
||||
if a.s != nil {
|
||||
<-a.s.Done()
|
||||
unprocessed = a.s.Unprocessed()
|
||||
}
|
||||
if cs.firstAttempt && unprocessed {
|
||||
// First attempt, stream unprocessed: transparently retry.
|
||||
|
@ -579,14 +588,14 @@ func (cs *clientStream) shouldRetry(err error) (bool, error) {
|
|||
|
||||
pushback := 0
|
||||
hasPushback := false
|
||||
if cs.attempt.s != nil {
|
||||
if !cs.attempt.s.TrailersOnly() {
|
||||
if a.s != nil {
|
||||
if !a.s.TrailersOnly() {
|
||||
return false, err
|
||||
}
|
||||
|
||||
// TODO(retry): Move down if the spec changes to not check server pushback
|
||||
// before considering this a failure for throttling.
|
||||
sps := cs.attempt.s.Trailer()["grpc-retry-pushback-ms"]
|
||||
sps := a.s.Trailer()["grpc-retry-pushback-ms"]
|
||||
if len(sps) == 1 {
|
||||
var e error
|
||||
if pushback, e = strconv.Atoi(sps[0]); e != nil || pushback < 0 {
|
||||
|
@ -603,10 +612,10 @@ func (cs *clientStream) shouldRetry(err error) (bool, error) {
|
|||
}
|
||||
|
||||
var code codes.Code
|
||||
if cs.attempt.s != nil {
|
||||
code = cs.attempt.s.Status().Code()
|
||||
if a.s != nil {
|
||||
code = a.s.Status().Code()
|
||||
} else {
|
||||
code = status.Convert(err).Code()
|
||||
code = status.Code(err)
|
||||
}
|
||||
|
||||
rp := cs.methodConfig.RetryPolicy
|
||||
|
@ -651,19 +660,24 @@ func (cs *clientStream) shouldRetry(err error) (bool, error) {
|
|||
}
|
||||
|
||||
// Returns nil if a retry was performed and succeeded; error otherwise.
|
||||
func (cs *clientStream) retryLocked(lastErr error) error {
|
||||
func (cs *clientStream) retryLocked(attempt *csAttempt, lastErr error) error {
|
||||
for {
|
||||
cs.attempt.finish(toRPCErr(lastErr))
|
||||
isTransparent, err := cs.shouldRetry(lastErr)
|
||||
attempt.finish(toRPCErr(lastErr))
|
||||
isTransparent, err := attempt.shouldRetry(lastErr)
|
||||
if err != nil {
|
||||
cs.commitAttemptLocked()
|
||||
return err
|
||||
}
|
||||
cs.firstAttempt = false
|
||||
if err := cs.newAttemptLocked(isTransparent); err != nil {
|
||||
attempt, err = cs.newAttemptLocked(isTransparent)
|
||||
if err != nil {
|
||||
// Only returns error if the clientconn is closed or the context of
|
||||
// the stream is canceled.
|
||||
return err
|
||||
}
|
||||
if lastErr = cs.replayBufferLocked(); lastErr == nil {
|
||||
// Note that the first op in the replay buffer always sets cs.attempt
|
||||
// if it is able to pick a transport and create a stream.
|
||||
if lastErr = cs.replayBufferLocked(attempt); lastErr == nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
@ -673,7 +687,10 @@ func (cs *clientStream) Context() context.Context {
|
|||
cs.commitAttempt()
|
||||
// No need to lock before using attempt, since we know it is committed and
|
||||
// cannot change.
|
||||
return cs.attempt.s.Context()
|
||||
if cs.attempt.s != nil {
|
||||
return cs.attempt.s.Context()
|
||||
}
|
||||
return cs.ctx
|
||||
}
|
||||
|
||||
func (cs *clientStream) withRetry(op func(a *csAttempt) error, onSuccess func()) error {
|
||||
|
@ -703,7 +720,7 @@ func (cs *clientStream) withRetry(op func(a *csAttempt) error, onSuccess func())
|
|||
cs.mu.Unlock()
|
||||
return err
|
||||
}
|
||||
if err := cs.retryLocked(err); err != nil {
|
||||
if err := cs.retryLocked(a, err); err != nil {
|
||||
cs.mu.Unlock()
|
||||
return err
|
||||
}
|
||||
|
@ -734,7 +751,7 @@ func (cs *clientStream) Header() (metadata.MD, error) {
|
|||
cs.binlog.Log(logEntry)
|
||||
cs.serverHeaderBinlogged = true
|
||||
}
|
||||
return m, err
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func (cs *clientStream) Trailer() metadata.MD {
|
||||
|
@ -752,10 +769,9 @@ func (cs *clientStream) Trailer() metadata.MD {
|
|||
return cs.attempt.s.Trailer()
|
||||
}
|
||||
|
||||
func (cs *clientStream) replayBufferLocked() error {
|
||||
a := cs.attempt
|
||||
func (cs *clientStream) replayBufferLocked(attempt *csAttempt) error {
|
||||
for _, f := range cs.buffer {
|
||||
if err := f(a); err != nil {
|
||||
if err := f(attempt); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -803,22 +819,17 @@ func (cs *clientStream) SendMsg(m interface{}) (err error) {
|
|||
if len(payload) > *cs.callInfo.maxSendMessageSize {
|
||||
return status.Errorf(codes.ResourceExhausted, "trying to send message larger than max (%d vs. %d)", len(payload), *cs.callInfo.maxSendMessageSize)
|
||||
}
|
||||
msgBytes := data // Store the pointer before setting to nil. For binary logging.
|
||||
op := func(a *csAttempt) error {
|
||||
err := a.sendMsg(m, hdr, payload, data)
|
||||
// nil out the message and uncomp when replaying; they are only needed for
|
||||
// stats which is disabled for subsequent attempts.
|
||||
m, data = nil, nil
|
||||
return err
|
||||
return a.sendMsg(m, hdr, payload, data)
|
||||
}
|
||||
err = cs.withRetry(op, func() { cs.bufferForRetryLocked(len(hdr)+len(payload), op) })
|
||||
if cs.binlog != nil && err == nil {
|
||||
cs.binlog.Log(&binarylog.ClientMessage{
|
||||
OnClientSide: true,
|
||||
Message: msgBytes,
|
||||
Message: data,
|
||||
})
|
||||
}
|
||||
return
|
||||
return err
|
||||
}
|
||||
|
||||
func (cs *clientStream) RecvMsg(m interface{}) error {
|
||||
|
|
|
@ -1508,7 +1508,7 @@ func testFailFast(t *testing.T, e env) {
|
|||
|
||||
cc := te.clientConn()
|
||||
tc := testpb.NewTestServiceClient(cc)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
|
||||
defer cancel()
|
||||
if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); err != nil {
|
||||
t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err)
|
||||
|
@ -1517,9 +1517,10 @@ func testFailFast(t *testing.T, e env) {
|
|||
te.srv.Stop()
|
||||
// Loop until the server teardown is propagated to the client.
|
||||
for {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
if err := ctx.Err(); err != nil {
|
||||
t.Fatalf("EmptyCall did not return UNAVAILABLE before timeout")
|
||||
}
|
||||
_, err := tc.EmptyCall(ctx, &testpb.Empty{})
|
||||
cancel()
|
||||
if status.Code(err) == codes.Unavailable {
|
||||
break
|
||||
}
|
||||
|
|
|
@ -34,6 +34,7 @@ import (
|
|||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/credentials/insecure"
|
||||
"google.golang.org/grpc/internal/grpcsync"
|
||||
"google.golang.org/grpc/internal/stubserver"
|
||||
"google.golang.org/grpc/metadata"
|
||||
"google.golang.org/grpc/stats"
|
||||
|
@ -44,7 +45,8 @@ import (
|
|||
func (s) TestRetryUnary(t *testing.T) {
|
||||
i := -1
|
||||
ss := &stubserver.StubServer{
|
||||
EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) {
|
||||
EmptyCallF: func(context.Context, *testpb.Empty) (r *testpb.Empty, err error) {
|
||||
defer func() { t.Logf("server call %v returning err %v", i, err) }()
|
||||
i++
|
||||
switch i {
|
||||
case 0, 2, 5:
|
||||
|
@ -55,11 +57,8 @@ func (s) TestRetryUnary(t *testing.T) {
|
|||
return nil, status.New(codes.AlreadyExists, "retryable error").Err()
|
||||
},
|
||||
}
|
||||
if err := ss.Start([]grpc.ServerOption{}); err != nil {
|
||||
t.Fatalf("Error starting endpoint server: %v", err)
|
||||
}
|
||||
defer ss.Stop()
|
||||
ss.NewServiceConfig(`{
|
||||
if err := ss.Start([]grpc.ServerOption{},
|
||||
grpc.WithDefaultServiceConfig(`{
|
||||
"methodConfig": [{
|
||||
"name": [{"service": "grpc.testing.TestService"}],
|
||||
"waitForReady": true,
|
||||
|
@ -70,18 +69,10 @@ func (s) TestRetryUnary(t *testing.T) {
|
|||
"BackoffMultiplier": 1.0,
|
||||
"RetryableStatusCodes": [ "ALREADY_EXISTS" ]
|
||||
}
|
||||
}]}`)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
for {
|
||||
if ctx.Err() != nil {
|
||||
t.Fatalf("Timed out waiting for service config update")
|
||||
}
|
||||
if ss.CC.GetMethodConfig("/grpc.testing.TestService/EmptyCall").WaitForReady != nil {
|
||||
break
|
||||
}
|
||||
time.Sleep(time.Millisecond)
|
||||
}]}`)); err != nil {
|
||||
t.Fatalf("Error starting endpoint server: %v", err)
|
||||
}
|
||||
cancel()
|
||||
defer ss.Stop()
|
||||
|
||||
testCases := []struct {
|
||||
code codes.Code
|
||||
|
@ -95,7 +86,8 @@ func (s) TestRetryUnary(t *testing.T) {
|
|||
{codes.Internal, 11},
|
||||
{codes.AlreadyExists, 15},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
for num, tc := range testCases {
|
||||
t.Log("Case", num)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
_, err := ss.Client.EmptyCall(ctx, &testpb.Empty{})
|
||||
cancel()
|
||||
|
@ -120,11 +112,8 @@ func (s) TestRetryThrottling(t *testing.T) {
|
|||
return nil, status.New(codes.Unavailable, "retryable error").Err()
|
||||
},
|
||||
}
|
||||
if err := ss.Start([]grpc.ServerOption{}); err != nil {
|
||||
t.Fatalf("Error starting endpoint server: %v", err)
|
||||
}
|
||||
defer ss.Stop()
|
||||
ss.NewServiceConfig(`{
|
||||
if err := ss.Start([]grpc.ServerOption{},
|
||||
grpc.WithDefaultServiceConfig(`{
|
||||
"methodConfig": [{
|
||||
"name": [{"service": "grpc.testing.TestService"}],
|
||||
"waitForReady": true,
|
||||
|
@ -140,18 +129,10 @@ func (s) TestRetryThrottling(t *testing.T) {
|
|||
"maxTokens": 10,
|
||||
"tokenRatio": 0.5
|
||||
}
|
||||
}`)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
for {
|
||||
if ctx.Err() != nil {
|
||||
t.Fatalf("Timed out waiting for service config update")
|
||||
}
|
||||
if ss.CC.GetMethodConfig("/grpc.testing.TestService/EmptyCall").WaitForReady != nil {
|
||||
break
|
||||
}
|
||||
time.Sleep(time.Millisecond)
|
||||
}`)); err != nil {
|
||||
t.Fatalf("Error starting endpoint server: %v", err)
|
||||
}
|
||||
cancel()
|
||||
defer ss.Stop()
|
||||
|
||||
testCases := []struct {
|
||||
code codes.Code
|
||||
|
@ -430,11 +411,8 @@ func (s) TestRetryStreaming(t *testing.T) {
|
|||
return nil
|
||||
},
|
||||
}
|
||||
if err := ss.Start([]grpc.ServerOption{}, grpc.WithDefaultCallOptions(grpc.MaxRetryRPCBufferSize(200))); err != nil {
|
||||
t.Fatalf("Error starting endpoint server: %v", err)
|
||||
}
|
||||
defer ss.Stop()
|
||||
ss.NewServiceConfig(`{
|
||||
if err := ss.Start([]grpc.ServerOption{}, grpc.WithDefaultCallOptions(grpc.MaxRetryRPCBufferSize(200)),
|
||||
grpc.WithDefaultServiceConfig(`{
|
||||
"methodConfig": [{
|
||||
"name": [{"service": "grpc.testing.TestService"}],
|
||||
"waitForReady": true,
|
||||
|
@ -445,7 +423,10 @@ func (s) TestRetryStreaming(t *testing.T) {
|
|||
"BackoffMultiplier": 1.0,
|
||||
"RetryableStatusCodes": [ "UNAVAILABLE" ]
|
||||
}
|
||||
}]}`)
|
||||
}]}`)); err != nil {
|
||||
t.Fatalf("Error starting endpoint server: %v", err)
|
||||
}
|
||||
defer ss.Stop()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
for {
|
||||
if ctx.Err() != nil {
|
||||
|
@ -644,3 +625,85 @@ func (s) TestRetryStats(t *testing.T) {
|
|||
t.Fatalf("pushback time before final attempt = %v; want ~10ms", diff)
|
||||
}
|
||||
}
|
||||
|
||||
func (s) TestRetryTransparentWhenCommitted(t *testing.T) {
|
||||
// With MaxConcurrentStreams=1:
|
||||
//
|
||||
// 1. Create stream 1 that is retriable.
|
||||
// 2. Stream 1 is created and fails with a retriable code.
|
||||
// 3. Create dummy stream 2, blocking indefinitely.
|
||||
// 4. Stream 1 retries (and blocks until stream 2 finishes)
|
||||
// 5. Stream 1 is canceled manually.
|
||||
//
|
||||
// If there is no bug, the stream is done and errors with CANCELED. With a bug:
|
||||
//
|
||||
// 6. Stream 1 has a nil stream (attempt.s). Operations like CloseSend will panic.
|
||||
|
||||
first := grpcsync.NewEvent()
|
||||
ss := &stubserver.StubServer{
|
||||
FullDuplexCallF: func(stream testpb.TestService_FullDuplexCallServer) error {
|
||||
// signal?
|
||||
if !first.HasFired() {
|
||||
first.Fire()
|
||||
t.Log("returned first error")
|
||||
return status.Error(codes.AlreadyExists, "first attempt fails and is retriable")
|
||||
}
|
||||
t.Log("blocking")
|
||||
<-stream.Context().Done()
|
||||
return stream.Context().Err()
|
||||
},
|
||||
}
|
||||
|
||||
if err := ss.Start([]grpc.ServerOption{grpc.MaxConcurrentStreams(1)},
|
||||
grpc.WithDefaultServiceConfig(`{
|
||||
"methodConfig": [{
|
||||
"name": [{"service": "grpc.testing.TestService"}],
|
||||
"waitForReady": true,
|
||||
"retryPolicy": {
|
||||
"MaxAttempts": 2,
|
||||
"InitialBackoff": ".1s",
|
||||
"MaxBackoff": ".1s",
|
||||
"BackoffMultiplier": 1.0,
|
||||
"RetryableStatusCodes": [ "ALREADY_EXISTS" ]
|
||||
}
|
||||
}]}`)); err != nil {
|
||||
t.Fatalf("Error starting endpoint server: %v", err)
|
||||
}
|
||||
defer ss.Stop()
|
||||
|
||||
ctx1, cancel1 := context.WithTimeout(context.Background(), defaultTestTimeout)
|
||||
defer cancel1()
|
||||
ctx2, cancel2 := context.WithTimeout(context.Background(), defaultTestTimeout)
|
||||
defer cancel2()
|
||||
|
||||
stream1, err := ss.Client.FullDuplexCall(ctx1)
|
||||
if err != nil {
|
||||
t.Fatalf("Error creating stream 1: %v", err)
|
||||
}
|
||||
|
||||
// Create dummy stream to block indefinitely.
|
||||
_, err = ss.Client.FullDuplexCall(ctx2)
|
||||
if err != nil {
|
||||
t.Errorf("Error creating stream 2: %v", err)
|
||||
}
|
||||
|
||||
stream1Closed := grpcsync.NewEvent()
|
||||
go func() {
|
||||
_, err := stream1.Recv()
|
||||
// Will trigger a retry when it sees the ALREADY_EXISTS error
|
||||
if status.Code(err) != codes.Canceled {
|
||||
t.Errorf("Expected stream1 to be canceled; got error: %v", err)
|
||||
}
|
||||
stream1Closed.Fire()
|
||||
}()
|
||||
|
||||
// Wait longer than the retry backoff timer.
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
cancel1()
|
||||
|
||||
// Operations on the stream should not panic.
|
||||
<-stream1Closed.Done()
|
||||
stream1.CloseSend()
|
||||
stream1.Recv()
|
||||
stream1.Send(&testpb.StreamingOutputCallRequest{})
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue