xds: enhance delay injection error message on DEADLINE_EXCEEDED (#8185)

When an RPC is injected with a delay and then fails with DEADLINE_EXCEEDED (partially) due to the delay, it could confuse users if the error message does not mention the existence of the delay injection, because end users normally are not the same people who configured fault injection policy in control plane.
This commit is contained in:
ZHANG Dapeng 2021-05-26 14:35:45 -07:00 committed by GitHub
parent a7792d3d14
commit 6aeeba805f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 89 additions and 16 deletions

View File

@ -35,10 +35,13 @@ import io.grpc.ClientCall;
import io.grpc.ClientInterceptor;
import io.grpc.Context;
import io.grpc.Deadline;
import io.grpc.ForwardingClientCall;
import io.grpc.ForwardingClientCallListener.SimpleForwardingClientCallListener;
import io.grpc.LoadBalancer.PickSubchannelArgs;
import io.grpc.Metadata;
import io.grpc.MethodDescriptor;
import io.grpc.Status;
import io.grpc.Status.Code;
import io.grpc.internal.DelayedClientCall;
import io.grpc.internal.GrpcUtil;
import io.grpc.xds.FaultConfig.FaultAbort;
@ -215,23 +218,62 @@ final class FaultFilter implements Filter, ClientInterceptorBuilder {
// TODO(https://github.com/grpc/grpc-java/issues/7868)
callExecutor = MoreExecutors.directExecutor();
}
if (finalDelayNanos != null && finalAbortStatus != null) {
return new DelayInjectedCall<>(
finalDelayNanos, callExecutor, scheduler, callOptions.getDeadline(),
Suppliers.ofInstance(
new FailingClientCall<ReqT, RespT>(finalAbortStatus, callExecutor)));
}
if (finalDelayNanos != null) {
Supplier<? extends ClientCall<ReqT, RespT>> callSupplier;
if (finalAbortStatus != null) {
return new FailingClientCall<>(finalAbortStatus, callExecutor);
callSupplier = Suppliers.ofInstance(
new FailingClientCall<ReqT, RespT>(finalAbortStatus, callExecutor));
} else {
return new DelayInjectedCall<>(
finalDelayNanos, callExecutor, scheduler, callOptions.getDeadline(),
new Supplier<ClientCall<ReqT, RespT>>() {
callSupplier = new Supplier<ClientCall<ReqT, RespT>>() {
@Override
public ClientCall<ReqT, RespT> get() {
return next.newCall(method, callOptions);
}
});
};
}
final DelayInjectedCall<ReqT, RespT> delayInjectedCall = new DelayInjectedCall<>(
finalDelayNanos, callExecutor, scheduler, callOptions.getDeadline(), callSupplier);
final class DeadlineInsightForwardingCall extends ForwardingClientCall<ReqT, RespT> {
@Override
protected ClientCall<ReqT, RespT> delegate() {
return delayInjectedCall;
}
@Override
public void start(Listener<RespT> listener, Metadata headers) {
Listener<RespT> finalListener =
new SimpleForwardingClientCallListener<RespT>(listener) {
@Override
public void onClose(Status status, Metadata trailers) {
if (status.getCode().equals(Code.DEADLINE_EXCEEDED)) {
// TODO(zdapeng:) check effective deadline locally, and
// do the following only if the local deadline is exceeded.
// (If the server sends DEADLINE_EXCEEDED for its own deadline, then the
// injected delay does not contribute to the error, because the request is
// only sent out after the delay. There could be a race between local and
// remote, but it is rather rare.)
String description = String.format(
"Deadline exceeded after up to %d ns of fault-injected delay",
finalDelayNanos);
if (status.getDescription() != null) {
description = description + ": " + status.getDescription();
}
status = Status.DEADLINE_EXCEEDED
.withDescription(description).withCause(status.getCause());
// Replace trailers to prevent mixing sources of status and trailers.
trailers = new Metadata();
}
delegate().onClose(status, trailers);
}
};
delegate().start(finalListener, headers);
}
}
return new DeadlineInsightForwardingCall();
} else {
return new FailingClientCall<>(finalAbortStatus, callExecutor);
}
}
}

View File

@ -42,6 +42,7 @@ import io.grpc.Channel;
import io.grpc.ClientCall;
import io.grpc.ClientInterceptor;
import io.grpc.ClientInterceptors;
import io.grpc.Deadline;
import io.grpc.InternalConfigSelector;
import io.grpc.InternalConfigSelector.Result;
import io.grpc.Metadata;
@ -1180,6 +1181,36 @@ public class XdsNameResolverTest {
verifyRpcDelayed(observer3, 5000L);
}
@Test
public void resolved_faultDelayInLdsUpdate_callWithEarlyDeadline() {
resolver.start(mockListener);
FakeXdsClient xdsClient = (FakeXdsClient) resolver.getXdsClient();
when(mockRandom.nextInt(1000_000)).thenReturn(500_000); // 50%
FaultConfig httpFilterFaultConfig = FaultConfig.create(
FaultDelay.forFixedDelay(5000L, FaultConfig.FractionalPercent.perMillion(1000_000)),
null,
null);
xdsClient.deliverLdsUpdateWithFaultInjection(cluster1, httpFilterFaultConfig, null, null, null);
verify(mockListener).onResult(resolutionResultCaptor.capture());
ResolutionResult result = resolutionResultCaptor.getValue();
InternalConfigSelector configSelector = result.getAttributes().get(InternalConfigSelector.KEY);
Deadline.Ticker fakeTicker = new Deadline.Ticker() {
@Override
public long nanoTime() {
return fakeClock.getTicker().read();
}
};
ClientCall.Listener<Void> observer = startNewCall(TestMethodDescriptors.voidMethod(),
configSelector, Collections.<String, String>emptyMap(), CallOptions.DEFAULT.withDeadline(
Deadline.after(4000, TimeUnit.NANOSECONDS, fakeTicker)));
assertThat(testCall).isNull();
verifyRpcDelayedThenAborted(observer, 4000L, Status.DEADLINE_EXCEEDED.withDescription(
"Deadline exceeded after up to 5000 ns of fault-injected delay:"
+ " Deadline exceeded after 0.000004000s. "));
}
@Test
public void resolved_withNoRouterFilter() {
resolver.start(mockListener);