xds: Fix LBs blindly propagating XdsClient errors

This is similar to 2a45524 (for #8950) but for additional similar cases.
This commit is contained in:
Eric Anderson 2022-03-22 16:41:59 -07:00
parent 37904a02c0
commit 700afafb10
4 changed files with 36 additions and 5 deletions

View File

@ -246,7 +246,12 @@ final class CdsLoadBalancer2 extends LoadBalancer {
}
@Override
public void onError(final Status error) {
public void onError(Status error) {
Status status = Status.UNAVAILABLE
.withDescription(
String.format("Unable to load CDS %s. xDS server returned: %s: %s",
name, error.getCode(), error.getDescription()))
.withCause(error.getCause());
syncContext.execute(new Runnable() {
@Override
public void run() {
@ -255,7 +260,7 @@ final class CdsLoadBalancer2 extends LoadBalancer {
}
// All watchers should receive the same error, so we only propagate it once.
if (ClusterState.this == root) {
handleClusterDiscoveryError(error);
handleClusterDiscoveryError(status);
}
}
});

View File

@ -461,7 +461,11 @@ final class ClusterResolverLoadBalancer extends LoadBalancer {
if (shutdown) {
return;
}
status = error;
String resourceName = edsServiceName != null ? edsServiceName : name;
status = Status.UNAVAILABLE
.withDescription(String.format("Unable to load EDS %s. xDS server returned: %s: %s",
resourceName, error.getCode(), error.getDescription()))
.withCause(error.getCause());
logger.log(XdsLogLevel.WARNING, "Received EDS error: {0}", error);
handleEndpointResolutionError();
}

View File

@ -459,7 +459,10 @@ public class CdsLoadBalancer2Test {
xdsClient.deliverError(error);
verify(helper).updateBalancingState(
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
assertPicker(pickerCaptor.getValue(), error, null);
Status expectedError = Status.UNAVAILABLE.withDescription(
"Unable to load CDS cluster-foo.googleapis.com. xDS server returned: "
+ "RESOURCE_EXHAUSTED: OOM");
assertPicker(pickerCaptor.getValue(), expectedError, null);
assertThat(childBalancers).isEmpty();
}
@ -481,7 +484,8 @@ public class CdsLoadBalancer2Test {
Status error = Status.RESOURCE_EXHAUSTED.withDescription("OOM");
xdsClient.deliverError(error);
assertThat(childLb.upstreamError).isEqualTo(error);
assertThat(childLb.upstreamError.getCode()).isEqualTo(Status.Code.UNAVAILABLE);
assertThat(childLb.upstreamError.getDescription()).contains("RESOURCE_EXHAUSTED: OOM");
assertThat(childLb.shutdown).isFalse(); // child LB may choose to keep working
}

View File

@ -850,6 +850,24 @@ public class ClusterResolverLoadBalancerTest {
null);
}
@Test
public void resolutionErrorBeforeChildLbCreated_edsOnly_returnErrorPicker() {
ClusterResolverConfig config = new ClusterResolverConfig(
Arrays.asList(edsDiscoveryMechanism1), roundRobin);
deliverLbConfig(config);
assertThat(xdsClient.watchers.keySet()).containsExactly(EDS_SERVICE_NAME1);
assertThat(childBalancers).isEmpty();
reset(helper);
xdsClient.deliverError(Status.RESOURCE_EXHAUSTED.withDescription("OOM"));
assertThat(childBalancers).isEmpty();
verify(helper).updateBalancingState(
eq(ConnectivityState.TRANSIENT_FAILURE), pickerCaptor.capture());
PickResult result = pickerCaptor.getValue().pickSubchannel(mock(PickSubchannelArgs.class));
Status actualStatus = result.getStatus();
assertThat(actualStatus.getCode()).isEqualTo(Status.Code.UNAVAILABLE);
assertThat(actualStatus.getDescription()).contains("RESOURCE_EXHAUSTED: OOM");
}
@Test
public void handleNameResolutionErrorFromUpstream_beforeChildLbCreated_returnErrorPicker() {
ClusterResolverConfig config = new ClusterResolverConfig(