mirror of https://github.com/grpc/grpc-java.git
census: fix NPE in calling recordFinishedAttempt() (#8706)
Fix the NPE as shown in the following stacktrace:
```
Caused by: java.lang.RuntimeException: java.lang.NullPointerException with message: null
at io.grpc.census.CensusStatsModule$ClientTracer.recordFinishedAttempt(CensusStatsModule.java:388) ~[grpc-census-1.42.0.jar:1.42.0]
at io.grpc.census.CensusStatsModule$CallAttemptsTracerFactory.recordFinishedCall(CensusStatsModule.java:525) ~[grpc-census-1.42.0.jar:1.42.0]
at io.grpc.census.CensusStatsModule$CallAttemptsTracerFactory.attemptEnded(CensusStatsModule.java:492) ~[grpc-census-1.42.0.jar:1.42.0]
at io.grpc.census.CensusStatsModule$ClientTracer.streamClosed(CensusStatsModule.java:345) ~[grpc-census-1.42.0.jar:1.42.0]
at io.grpc.internal.StatsTraceContext.streamClosed(StatsTraceContext.java:155) ~[grpc-core-1.42.0.jar:1.42.0]
at io.grpc.internal.AbstractClientStream$TransportState.closeListener(AbstractClientStream.java:458) ~[grpc-core-1.42.0.jar:1.42.0]
at io.grpc.internal.AbstractClientStream$TransportState.access$400(AbstractClientStream.java:221) ~[grpc-core-1.42.0.jar:1.42.0]
at io.grpc.internal.AbstractClientStream$TransportState$1.run(AbstractClientStream.java:442) ~[grpc-core-1.42.0.jar:1.42.0]
at io.grpc.internal.AbstractClientStream$TransportState.deframerClosed(AbstractClientStream.java:278) ~[grpc-core-1.42.0.jar:1.42.0]
at io.grpc.internal.Http2ClientStreamTransportState.deframerClosed(Http2ClientStreamTransportState.java:31) ~[grpc-core-1.42.0.jar:1.42.0]
at io.grpc.internal.MessageDeframer.close(MessageDeframer.java:233) ~[grpc-core-1.42.0.jar:1.42.0]
at io.grpc.internal.MessageDeframer.closeWhenComplete(MessageDeframer.java:191) ~[grpc-core-1.42.0.jar:1.42.0]
at io.grpc.internal.AbstractStream$TransportState.closeDeframer(AbstractStream.java:200) ~[grpc-core-1.42.0.jar:1.42.0]
at io.grpc.internal.AbstractClientStream$TransportState.transportReportStatus(AbstractClientStream.java:445) ~[grpc-core-1.42.0.jar:1.42.0]
at io.grpc.internal.AbstractClientStream$TransportState.transportReportStatus(AbstractClientStream.java:401) ~[grpc-core-1.42.0.jar:1.42.0]
at io.grpc.internal.AbstractClientStream$TransportState.inboundTrailersReceived(AbstractClientStream.java:384) ~[grpc-core-1.42.0.jar:1.42.0]
at io.grpc.internal.Http2ClientStreamTransportState.transportTrailersReceived(Http2ClientStreamTransportState.java:183) ~[grpc-core-1.42.0.jar:1.42.0]
at io.grpc.netty.shaded.io.grpc.netty.NettyClientStream$TransportState.transportHeadersReceived(NettyClientStream.java:334) ~[grpc-netty-shaded-1.42.0.jar:1.42.0]
at io.grpc.netty.shaded.io.grpc.netty.NettyClientHandler.onHeadersRead(NettyClientHandler.java:372) ~[grpc-netty-shaded-1.42.0.jar:1.42.0]
at io.grpc.netty.shaded.io.grpc.netty.NettyClientHandler.access$1200(NettyClientHandler.java:91) ~[grpc-netty-shaded-1.42.0.jar:1.42.0]
at io.grpc.netty.shaded.io.grpc.netty.NettyClientHandler$FrameListener.onHeadersRead(NettyClientHandler.java:934) ~[grpc-netty-shaded-1.42.0.jar:1.42.0]
```
The NPE can happen when `ClientCall.Listener.onClose()` and `StatsTraceContext.streamClosed()` (or `ClientStreamListener.closed()`) are invoked concurrently in different threads. Note that `CensusStatsModule$CallAttemptsTracerFactory.attemptEnded()` in the above stack trace would observe `callEnded==true` in such a race condition.
The following are the possible scenarios that the race between `ClientCall.Listener.onClose()` and `ClientStreamListener.closed()` can happen:
- Deadline exceeded but the underlying real stream is [not committed](https://github.com/grpc/grpc-java/blob/v1.42.0/core/src/main/java/io/grpc/internal/RetriableStream.java#L486-L495), the `ClientCall.Listener` may be closed earlier than the stream listener. (This is the case of the above stack trace, in which the inbound end-of-stream is received observing `callEnded==true`. Even if nothing inbound is received, there is still a chance that the NPE can happen.)
- DelayedClientTransport.PendingStream has created a realStream but `setStream(realStream)` is [not called yet](https://github.com/grpc/grpc-java/blob/v1.42.0/core/src/main/java/io/grpc/internal/DelayedClientTransport.java#L366-L372), when deadline exceeded. (This has little chance to happen, only for the very first RPC on the channel.)
- Hedging case.
In deadline-exceeded cases, the shorter the deadline is, the more likely the race can happen.
This commit is contained in:
parent
ee581bfdfa
commit
a4334eb5c3
|
|
@ -342,7 +342,6 @@ final class CensusStatsModule {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void streamClosed(Status status) {
|
public void streamClosed(Status status) {
|
||||||
attemptsState.attemptEnded();
|
|
||||||
stopwatch.stop();
|
stopwatch.stop();
|
||||||
roundtripNanos = stopwatch.elapsed(TimeUnit.NANOSECONDS);
|
roundtripNanos = stopwatch.elapsed(TimeUnit.NANOSECONDS);
|
||||||
Deadline deadline = info.getCallOptions().getDeadline();
|
Deadline deadline = info.getCallOptions().getDeadline();
|
||||||
|
|
@ -355,6 +354,7 @@ final class CensusStatsModule {
|
||||||
statusCode = Code.DEADLINE_EXCEEDED;
|
statusCode = Code.DEADLINE_EXCEEDED;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
attemptsState.attemptEnded();
|
||||||
if (inboundReceivedOrClosed.compareAndSet(false, true)) {
|
if (inboundReceivedOrClosed.compareAndSet(false, true)) {
|
||||||
if (module.recordFinishedRpcs) {
|
if (module.recordFinishedRpcs) {
|
||||||
// Stream is closed early. So no need to record metrics for any inbound events after this
|
// Stream is closed early. So no need to record metrics for any inbound events after this
|
||||||
|
|
@ -522,6 +522,8 @@ final class CensusStatsModule {
|
||||||
tracer.statusCode = status.getCode();
|
tracer.statusCode = status.getCode();
|
||||||
tracer.recordFinishedAttempt();
|
tracer.recordFinishedAttempt();
|
||||||
} else if (inboundMetricTracer != null) {
|
} else if (inboundMetricTracer != null) {
|
||||||
|
// activeStreams has been decremented to 0 by attemptEnded(),
|
||||||
|
// so inboundMetricTracer.statusCode is guaranteed to be assigned already.
|
||||||
inboundMetricTracer.recordFinishedAttempt();
|
inboundMetricTracer.recordFinishedAttempt();
|
||||||
}
|
}
|
||||||
if (!module.recordRetryMetrics) {
|
if (!module.recordRetryMetrics) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue