mirror of https://github.com/grpc/grpc-java.git
xds: include node ID in RPC failure status messages from the XdsClient (#9099)
This commit is contained in:
parent
3a303af02f
commit
a5829107c3
|
|
@ -2537,8 +2537,16 @@ final class ClientXdsClient extends XdsClient implements XdsResponseHandler, Res
|
||||||
respTimer.cancel();
|
respTimer.cancel();
|
||||||
respTimer = null;
|
respTimer = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Include node ID in xds failures to allow cross-referencing with control plane logs
|
||||||
|
// when debugging.
|
||||||
|
String description = error.getDescription() == null ? "" : error.getDescription() + " ";
|
||||||
|
Status errorAugmented = Status.fromCode(error.getCode())
|
||||||
|
.withDescription(description + "nodeID: " + bootstrapInfo.node().getId())
|
||||||
|
.withCause(error.getCause());
|
||||||
|
|
||||||
for (ResourceWatcher watcher : watchers) {
|
for (ResourceWatcher watcher : watchers) {
|
||||||
watcher.onError(error);
|
watcher.onError(errorAugmented);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -398,6 +398,13 @@ abstract class XdsClient {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Called when the resource discovery RPC encounters some transient error.
|
* Called when the resource discovery RPC encounters some transient error.
|
||||||
|
*
|
||||||
|
* <p>Note that we expect that the implementer to:
|
||||||
|
* - Comply with the guarantee to not generate certain statuses by the library:
|
||||||
|
* https://grpc.github.io/grpc/core/md_doc_statuscodes.html. If the code needs to be
|
||||||
|
* propagated to the channel, override it with {@link Status.Code#UNAVAILABLE}.
|
||||||
|
* - Keep {@link Status} description in one form or another, as it contains valuable debugging
|
||||||
|
* information.
|
||||||
*/
|
*/
|
||||||
void onError(Status error);
|
void onError(Status error);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -126,7 +126,8 @@ public abstract class ClientXdsClientTestBase {
|
||||||
private static final String VERSION_1 = "42";
|
private static final String VERSION_1 = "42";
|
||||||
private static final String VERSION_2 = "43";
|
private static final String VERSION_2 = "43";
|
||||||
private static final String VERSION_3 = "44";
|
private static final String VERSION_3 = "44";
|
||||||
private static final Node NODE = Node.newBuilder().build();
|
private static final String NODE_ID = "cool-node-id";
|
||||||
|
private static final Node NODE = Node.newBuilder().setId(NODE_ID).build();
|
||||||
private static final Any FAILING_ANY = MessageFactory.FAILING_ANY;
|
private static final Any FAILING_ANY = MessageFactory.FAILING_ANY;
|
||||||
private static final ChannelCredentials CHANNEL_CREDENTIALS = InsecureChannelCredentials.create();
|
private static final ChannelCredentials CHANNEL_CREDENTIALS = InsecureChannelCredentials.create();
|
||||||
private final ServerInfo lrsServerInfo =
|
private final ServerInfo lrsServerInfo =
|
||||||
|
|
@ -314,7 +315,7 @@ public abstract class ClientXdsClientTestBase {
|
||||||
Bootstrapper.BootstrapInfo.builder()
|
Bootstrapper.BootstrapInfo.builder()
|
||||||
.servers(Arrays.asList(
|
.servers(Arrays.asList(
|
||||||
Bootstrapper.ServerInfo.create(SERVER_URI, CHANNEL_CREDENTIALS, useProtocolV3())))
|
Bootstrapper.ServerInfo.create(SERVER_URI, CHANNEL_CREDENTIALS, useProtocolV3())))
|
||||||
.node(EnvoyProtoData.Node.newBuilder().build())
|
.node(NODE)
|
||||||
.authorities(ImmutableMap.of(
|
.authorities(ImmutableMap.of(
|
||||||
"authority.xds.com",
|
"authority.xds.com",
|
||||||
AuthorityInfo.create(
|
AuthorityInfo.create(
|
||||||
|
|
@ -467,6 +468,15 @@ public abstract class ClientXdsClientTestBase {
|
||||||
return metadata;
|
return metadata;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void verifyStatusWithNodeId(Status status, Code expectedCode, String expectedMsg) {
|
||||||
|
assertThat(status.getCode()).isEqualTo(expectedCode);
|
||||||
|
assertThat(status.getCause()).isNull();
|
||||||
|
// Watcher.onError propagates status description to the channel, and we want to
|
||||||
|
// augment the description with the node id.
|
||||||
|
String description = (expectedMsg.isEmpty() ? "" : expectedMsg + " ") + "nodeID: " + NODE_ID;
|
||||||
|
assertThat(status.getDescription()).isEqualTo(description);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helper method to validate {@link XdsClient.EdsUpdate} created for the test CDS resource
|
* Helper method to validate {@link XdsClient.EdsUpdate} created for the test CDS resource
|
||||||
* {@link ClientXdsClientTestBase#testClusterLoadAssignment}.
|
* {@link ClientXdsClientTestBase#testClusterLoadAssignment}.
|
||||||
|
|
@ -1840,11 +1850,8 @@ public abstract class ClientXdsClientTestBase {
|
||||||
+ "io.grpc.xds.ClientXdsClient$ResourceInvalidException: "
|
+ "io.grpc.xds.ClientXdsClient$ResourceInvalidException: "
|
||||||
+ "ca_certificate_provider_instance is required in upstream-tls-context";
|
+ "ca_certificate_provider_instance is required in upstream-tls-context";
|
||||||
call.verifyRequestNack(CDS, CDS_RESOURCE, "", "0000", NODE, ImmutableList.of(errorMsg));
|
call.verifyRequestNack(CDS, CDS_RESOURCE, "", "0000", NODE, ImmutableList.of(errorMsg));
|
||||||
ArgumentCaptor<Status> captor = ArgumentCaptor.forClass(Status.class);
|
verify(cdsResourceWatcher).onError(errorCaptor.capture());
|
||||||
verify(cdsResourceWatcher).onError(captor.capture());
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNAVAILABLE, errorMsg);
|
||||||
Status errorStatus = captor.getValue();
|
|
||||||
assertThat(errorStatus.getCode()).isEqualTo(Status.UNAVAILABLE.getCode());
|
|
||||||
assertThat(errorStatus.getDescription()).isEqualTo(errorMsg);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -1866,11 +1873,8 @@ public abstract class ClientXdsClientTestBase {
|
||||||
String errorMsg = "CDS response Cluster 'cluster.googleapis.com' validation error: "
|
String errorMsg = "CDS response Cluster 'cluster.googleapis.com' validation error: "
|
||||||
+ "transport-socket with name envoy.transport_sockets.bad not supported.";
|
+ "transport-socket with name envoy.transport_sockets.bad not supported.";
|
||||||
call.verifyRequestNack(CDS, CDS_RESOURCE, "", "0000", NODE, ImmutableList.of(errorMsg));
|
call.verifyRequestNack(CDS, CDS_RESOURCE, "", "0000", NODE, ImmutableList.of(errorMsg));
|
||||||
ArgumentCaptor<Status> captor = ArgumentCaptor.forClass(Status.class);
|
verify(cdsResourceWatcher).onError(errorCaptor.capture());
|
||||||
verify(cdsResourceWatcher).onError(captor.capture());
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNAVAILABLE, errorMsg);
|
||||||
Status errorStatus = captor.getValue();
|
|
||||||
assertThat(errorStatus.getCode()).isEqualTo(Status.UNAVAILABLE.getCode());
|
|
||||||
assertThat(errorStatus.getDescription()).isEqualTo(errorMsg);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -2461,13 +2465,13 @@ public abstract class ClientXdsClientTestBase {
|
||||||
// Management server closes the RPC stream with an error.
|
// Management server closes the RPC stream with an error.
|
||||||
call.sendError(Status.UNKNOWN.asException());
|
call.sendError(Status.UNKNOWN.asException());
|
||||||
verify(ldsResourceWatcher).onError(errorCaptor.capture());
|
verify(ldsResourceWatcher).onError(errorCaptor.capture());
|
||||||
assertThat(errorCaptor.getValue().getCode()).isEqualTo(Code.UNKNOWN);
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNKNOWN, "");
|
||||||
verify(rdsResourceWatcher).onError(errorCaptor.capture());
|
verify(rdsResourceWatcher).onError(errorCaptor.capture());
|
||||||
assertThat(errorCaptor.getValue().getCode()).isEqualTo(Code.UNKNOWN);
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNKNOWN, "");
|
||||||
verify(cdsResourceWatcher).onError(errorCaptor.capture());
|
verify(cdsResourceWatcher).onError(errorCaptor.capture());
|
||||||
assertThat(errorCaptor.getValue().getCode()).isEqualTo(Code.UNKNOWN);
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNKNOWN, "");
|
||||||
verify(edsResourceWatcher).onError(errorCaptor.capture());
|
verify(edsResourceWatcher).onError(errorCaptor.capture());
|
||||||
assertThat(errorCaptor.getValue().getCode()).isEqualTo(Code.UNKNOWN);
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNKNOWN, "");
|
||||||
|
|
||||||
// Retry after backoff.
|
// Retry after backoff.
|
||||||
inOrder.verify(backoffPolicyProvider).get();
|
inOrder.verify(backoffPolicyProvider).get();
|
||||||
|
|
@ -2483,15 +2487,16 @@ public abstract class ClientXdsClientTestBase {
|
||||||
call.verifyRequest(EDS, EDS_RESOURCE, "", "", NODE);
|
call.verifyRequest(EDS, EDS_RESOURCE, "", "", NODE);
|
||||||
|
|
||||||
// Management server becomes unreachable.
|
// Management server becomes unreachable.
|
||||||
call.sendError(Status.UNAVAILABLE.asException());
|
String errorMsg = "my fault";
|
||||||
|
call.sendError(Status.UNAVAILABLE.withDescription(errorMsg).asException());
|
||||||
verify(ldsResourceWatcher, times(2)).onError(errorCaptor.capture());
|
verify(ldsResourceWatcher, times(2)).onError(errorCaptor.capture());
|
||||||
assertThat(errorCaptor.getValue().getCode()).isEqualTo(Code.UNAVAILABLE);
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNAVAILABLE, errorMsg);
|
||||||
verify(rdsResourceWatcher, times(2)).onError(errorCaptor.capture());
|
verify(rdsResourceWatcher, times(2)).onError(errorCaptor.capture());
|
||||||
assertThat(errorCaptor.getValue().getCode()).isEqualTo(Code.UNAVAILABLE);
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNAVAILABLE, errorMsg);
|
||||||
verify(cdsResourceWatcher, times(2)).onError(errorCaptor.capture());
|
verify(cdsResourceWatcher, times(2)).onError(errorCaptor.capture());
|
||||||
assertThat(errorCaptor.getValue().getCode()).isEqualTo(Code.UNAVAILABLE);
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNAVAILABLE, errorMsg);
|
||||||
verify(edsResourceWatcher, times(2)).onError(errorCaptor.capture());
|
verify(edsResourceWatcher, times(2)).onError(errorCaptor.capture());
|
||||||
assertThat(errorCaptor.getValue().getCode()).isEqualTo(Code.UNAVAILABLE);
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNAVAILABLE, errorMsg);
|
||||||
|
|
||||||
// Retry after backoff.
|
// Retry after backoff.
|
||||||
inOrder.verify(backoffPolicy1).nextBackoffNanos();
|
inOrder.verify(backoffPolicy1).nextBackoffNanos();
|
||||||
|
|
@ -2518,13 +2523,13 @@ public abstract class ClientXdsClientTestBase {
|
||||||
|
|
||||||
call.sendError(Status.DEADLINE_EXCEEDED.asException());
|
call.sendError(Status.DEADLINE_EXCEEDED.asException());
|
||||||
verify(ldsResourceWatcher, times(3)).onError(errorCaptor.capture());
|
verify(ldsResourceWatcher, times(3)).onError(errorCaptor.capture());
|
||||||
assertThat(errorCaptor.getValue().getCode()).isEqualTo(Code.DEADLINE_EXCEEDED);
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.DEADLINE_EXCEEDED, "");
|
||||||
verify(rdsResourceWatcher, times(3)).onError(errorCaptor.capture());
|
verify(rdsResourceWatcher, times(3)).onError(errorCaptor.capture());
|
||||||
assertThat(errorCaptor.getValue().getCode()).isEqualTo(Code.DEADLINE_EXCEEDED);
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.DEADLINE_EXCEEDED, "");
|
||||||
verify(cdsResourceWatcher, times(3)).onError(errorCaptor.capture());
|
verify(cdsResourceWatcher, times(3)).onError(errorCaptor.capture());
|
||||||
assertThat(errorCaptor.getValue().getCode()).isEqualTo(Code.DEADLINE_EXCEEDED);
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.DEADLINE_EXCEEDED, "");
|
||||||
verify(edsResourceWatcher, times(3)).onError(errorCaptor.capture());
|
verify(edsResourceWatcher, times(3)).onError(errorCaptor.capture());
|
||||||
assertThat(errorCaptor.getValue().getCode()).isEqualTo(Code.DEADLINE_EXCEEDED);
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.DEADLINE_EXCEEDED, "");
|
||||||
|
|
||||||
// Reset backoff sequence and retry after backoff.
|
// Reset backoff sequence and retry after backoff.
|
||||||
inOrder.verify(backoffPolicyProvider).get();
|
inOrder.verify(backoffPolicyProvider).get();
|
||||||
|
|
@ -2542,13 +2547,13 @@ public abstract class ClientXdsClientTestBase {
|
||||||
// Management server becomes unreachable again.
|
// Management server becomes unreachable again.
|
||||||
call.sendError(Status.UNAVAILABLE.asException());
|
call.sendError(Status.UNAVAILABLE.asException());
|
||||||
verify(ldsResourceWatcher, times(4)).onError(errorCaptor.capture());
|
verify(ldsResourceWatcher, times(4)).onError(errorCaptor.capture());
|
||||||
assertThat(errorCaptor.getValue().getCode()).isEqualTo(Code.UNAVAILABLE);
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNAVAILABLE, "");
|
||||||
verify(rdsResourceWatcher, times(4)).onError(errorCaptor.capture());
|
verify(rdsResourceWatcher, times(4)).onError(errorCaptor.capture());
|
||||||
assertThat(errorCaptor.getValue().getCode()).isEqualTo(Code.UNAVAILABLE);
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNAVAILABLE, "");
|
||||||
verify(cdsResourceWatcher, times(4)).onError(errorCaptor.capture());
|
verify(cdsResourceWatcher, times(4)).onError(errorCaptor.capture());
|
||||||
assertThat(errorCaptor.getValue().getCode()).isEqualTo(Code.UNAVAILABLE);
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNAVAILABLE, "");
|
||||||
verify(edsResourceWatcher, times(4)).onError(errorCaptor.capture());
|
verify(edsResourceWatcher, times(4)).onError(errorCaptor.capture());
|
||||||
assertThat(errorCaptor.getValue().getCode()).isEqualTo(Code.UNAVAILABLE);
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNAVAILABLE, "");
|
||||||
|
|
||||||
// Retry after backoff.
|
// Retry after backoff.
|
||||||
inOrder.verify(backoffPolicy2).nextBackoffNanos();
|
inOrder.verify(backoffPolicy2).nextBackoffNanos();
|
||||||
|
|
@ -2572,9 +2577,9 @@ public abstract class ClientXdsClientTestBase {
|
||||||
DiscoveryRpcCall call = resourceDiscoveryCalls.poll();
|
DiscoveryRpcCall call = resourceDiscoveryCalls.poll();
|
||||||
call.sendError(Status.UNAVAILABLE.asException());
|
call.sendError(Status.UNAVAILABLE.asException());
|
||||||
verify(ldsResourceWatcher).onError(errorCaptor.capture());
|
verify(ldsResourceWatcher).onError(errorCaptor.capture());
|
||||||
assertThat(errorCaptor.getValue().getCode()).isEqualTo(Code.UNAVAILABLE);
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNAVAILABLE, "");
|
||||||
verify(rdsResourceWatcher).onError(errorCaptor.capture());
|
verify(rdsResourceWatcher).onError(errorCaptor.capture());
|
||||||
assertThat(errorCaptor.getValue().getCode()).isEqualTo(Code.UNAVAILABLE);
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNAVAILABLE, "");
|
||||||
ScheduledTask retryTask =
|
ScheduledTask retryTask =
|
||||||
Iterables.getOnlyElement(fakeClock.getPendingTasks(RPC_RETRY_TASK_FILTER));
|
Iterables.getOnlyElement(fakeClock.getPendingTasks(RPC_RETRY_TASK_FILTER));
|
||||||
assertThat(retryTask.getDelay(TimeUnit.NANOSECONDS)).isEqualTo(10L);
|
assertThat(retryTask.getDelay(TimeUnit.NANOSECONDS)).isEqualTo(10L);
|
||||||
|
|
@ -2621,6 +2626,14 @@ public abstract class ClientXdsClientTestBase {
|
||||||
call.sendError(Status.UNAVAILABLE.asException());
|
call.sendError(Status.UNAVAILABLE.asException());
|
||||||
assertThat(cdsResourceTimeout.isCancelled()).isTrue();
|
assertThat(cdsResourceTimeout.isCancelled()).isTrue();
|
||||||
assertThat(edsResourceTimeout.isCancelled()).isTrue();
|
assertThat(edsResourceTimeout.isCancelled()).isTrue();
|
||||||
|
verify(ldsResourceWatcher).onError(errorCaptor.capture());
|
||||||
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNAVAILABLE, "");
|
||||||
|
verify(rdsResourceWatcher).onError(errorCaptor.capture());
|
||||||
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNAVAILABLE, "");
|
||||||
|
verify(cdsResourceWatcher).onError(errorCaptor.capture());
|
||||||
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNAVAILABLE, "");
|
||||||
|
verify(edsResourceWatcher).onError(errorCaptor.capture());
|
||||||
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNAVAILABLE, "");
|
||||||
|
|
||||||
fakeClock.forwardNanos(10L);
|
fakeClock.forwardNanos(10L);
|
||||||
assertThat(fakeClock.getPendingTasks(LDS_RESOURCE_FETCH_TIMEOUT_TASK_FILTER)).hasSize(0);
|
assertThat(fakeClock.getPendingTasks(LDS_RESOURCE_FETCH_TIMEOUT_TASK_FILTER)).hasSize(0);
|
||||||
|
|
@ -2740,11 +2753,8 @@ public abstract class ClientXdsClientTestBase {
|
||||||
+ "0.0.0.0:7000\' validation error: "
|
+ "0.0.0.0:7000\' validation error: "
|
||||||
+ "common-tls-context is required in downstream-tls-context";
|
+ "common-tls-context is required in downstream-tls-context";
|
||||||
call.verifyRequestNack(LDS, LISTENER_RESOURCE, "", "0000", NODE, ImmutableList.of(errorMsg));
|
call.verifyRequestNack(LDS, LISTENER_RESOURCE, "", "0000", NODE, ImmutableList.of(errorMsg));
|
||||||
ArgumentCaptor<Status> captor = ArgumentCaptor.forClass(Status.class);
|
verify(ldsResourceWatcher).onError(errorCaptor.capture());
|
||||||
verify(ldsResourceWatcher).onError(captor.capture());
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNAVAILABLE, errorMsg);
|
||||||
Status errorStatus = captor.getValue();
|
|
||||||
assertThat(errorStatus.getCode()).isEqualTo(Status.UNAVAILABLE.getCode());
|
|
||||||
assertThat(errorStatus.getDescription()).isEqualTo(errorMsg);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -2770,11 +2780,8 @@ public abstract class ClientXdsClientTestBase {
|
||||||
+ "transport-socket with name envoy.transport_sockets.bad1 not supported.";
|
+ "transport-socket with name envoy.transport_sockets.bad1 not supported.";
|
||||||
call.verifyRequestNack(LDS, LISTENER_RESOURCE, "", "0000", NODE, ImmutableList.of(
|
call.verifyRequestNack(LDS, LISTENER_RESOURCE, "", "0000", NODE, ImmutableList.of(
|
||||||
errorMsg));
|
errorMsg));
|
||||||
ArgumentCaptor<Status> captor = ArgumentCaptor.forClass(Status.class);
|
verify(ldsResourceWatcher).onError(errorCaptor.capture());
|
||||||
verify(ldsResourceWatcher).onError(captor.capture());
|
verifyStatusWithNodeId(errorCaptor.getValue(), Code.UNAVAILABLE, errorMsg);
|
||||||
Status errorStatus = captor.getValue();
|
|
||||||
assertThat(errorStatus.getCode()).isEqualTo(Status.UNAVAILABLE.getCode());
|
|
||||||
assertThat(errorStatus.getDescription()).isEqualTo(errorMsg);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private DiscoveryRpcCall startResourceWatcher(
|
private DiscoveryRpcCall startResourceWatcher(
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue