From a223263134c2bd4bf97b943bc52a0a128bc67268 Mon Sep 17 00:00:00 2001 From: ZHANG Dapeng Date: Wed, 22 Jan 2020 15:38:38 -0800 Subject: [PATCH] xds: better error handling to avoid RPC hangup This change will fail application RPC immediately if XdsClient encounters any error instead of retrying or getting to fallback silently. There could be optimization if the channel is currently READY while XdsClient stream just closed due to connection error, in which case we could still be using the current available subchannels while retrying, but this requires the LB knows the semantics of error status from the XdsClient. This optimization is not worth the effort for now. --- xds/src/main/java/io/grpc/xds/LookasideLb.java | 1 + xds/src/main/java/io/grpc/xds/XdsLoadBalancer2.java | 5 +++++ xds/src/test/java/io/grpc/xds/LookasideLbTest.java | 3 +++ xds/src/test/java/io/grpc/xds/XdsLoadBalancer2Test.java | 5 +++++ 4 files changed, 14 insertions(+) diff --git a/xds/src/main/java/io/grpc/xds/LookasideLb.java b/xds/src/main/java/io/grpc/xds/LookasideLb.java index 7c1c0e4837..68c834f0f6 100644 --- a/xds/src/main/java/io/grpc/xds/LookasideLb.java +++ b/xds/src/main/java/io/grpc/xds/LookasideLb.java @@ -502,6 +502,7 @@ final class LookasideLb extends LoadBalancer { @Override public void onError(Status error) { channelLogger.log(ChannelLogLevel.ERROR, "EDS load balancer received an error: {0}", error); + lookasideLbHelper.updateBalancingState(TRANSIENT_FAILURE, new ErrorPicker(error)); endpointUpdateCallback.onError(); } } diff --git a/xds/src/main/java/io/grpc/xds/XdsLoadBalancer2.java b/xds/src/main/java/io/grpc/xds/XdsLoadBalancer2.java index fdbfdf5149..564b085fdb 100644 --- a/xds/src/main/java/io/grpc/xds/XdsLoadBalancer2.java +++ b/xds/src/main/java/io/grpc/xds/XdsLoadBalancer2.java @@ -28,6 +28,7 @@ import io.grpc.Status; import io.grpc.SynchronizationContext.ScheduledHandle; import io.grpc.util.ForwardingLoadBalancerHelper; import io.grpc.xds.LookasideLb.EndpointUpdateCallback; +import io.grpc.xds.XdsSubchannelPickers.ErrorPicker; import java.util.concurrent.TimeUnit; import javax.annotation.CheckForNull; import javax.annotation.Nullable; @@ -118,6 +119,10 @@ final class XdsLoadBalancer2 extends LoadBalancer { @Override public void run() { + helper.updateBalancingState( + ConnectivityState.TRANSIENT_FAILURE, + new ErrorPicker(Status.UNAVAILABLE.withDescription( + "Channel is not ready when timeout for entering fallback mode happens"))); useFallbackPolicy(); } } diff --git a/xds/src/test/java/io/grpc/xds/LookasideLbTest.java b/xds/src/test/java/io/grpc/xds/LookasideLbTest.java index fd907d341c..7baf2ce7a7 100644 --- a/xds/src/test/java/io/grpc/xds/LookasideLbTest.java +++ b/xds/src/test/java/io/grpc/xds/LookasideLbTest.java @@ -787,8 +787,11 @@ public class LookasideLbTest { public void verifyRpcErrorPropagation() { lookasideLb.handleResolvedAddresses(defaultResolvedAddress); + verify(helper, never()).updateBalancingState( + eq(TRANSIENT_FAILURE), any(SubchannelPicker.class)); verify(edsUpdateCallback, never()).onError(); serverResponseWriter.onError(new RuntimeException()); + verify(helper).updateBalancingState(eq(TRANSIENT_FAILURE), any(SubchannelPicker.class)); verify(edsUpdateCallback).onError(); } diff --git a/xds/src/test/java/io/grpc/xds/XdsLoadBalancer2Test.java b/xds/src/test/java/io/grpc/xds/XdsLoadBalancer2Test.java index db8b82e2ff..694f5e5707 100644 --- a/xds/src/test/java/io/grpc/xds/XdsLoadBalancer2Test.java +++ b/xds/src/test/java/io/grpc/xds/XdsLoadBalancer2Test.java @@ -19,6 +19,9 @@ package io.grpc.xds; import static com.google.common.truth.Truth.assertThat; import static io.grpc.ConnectivityState.CONNECTING; import static io.grpc.ConnectivityState.READY; +import static io.grpc.ConnectivityState.TRANSIENT_FAILURE; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.ArgumentMatchers.same; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; @@ -144,7 +147,9 @@ public class XdsLoadBalancer2Test { fakeClock.forwardTime(9, TimeUnit.SECONDS); edsUpdateCallback.onWorking(); verifyNotInFallbackMode(); + fakeClock.forwardTime(1, TimeUnit.SECONDS); + verify(helper).updateBalancingState(eq(TRANSIENT_FAILURE), any(SubchannelPicker.class)); verifyInFallbackMode(); SubchannelPicker subchannelPicker = mock(SubchannelPicker.class);