From bfc0f959cd7e8889d384d5a628fb1b1291f597bd Mon Sep 17 00:00:00 2001 From: Eric Anderson Date: Fri, 23 Feb 2024 08:07:12 -0800 Subject: [PATCH] xds: Avoid nonexistent DNS resolution in XdsClientImplV3Test The DNS lookups are taking considerable time on the Windows CI (~11s), which causes the test to time out: ``` Wanted but not invoked: ldsResourceWatcher.onError(); -> at io.grpc.xds.XdsClientImplTestBase.sendToNonexistentHost(XdsClientImplTestBase.java:3733) Actually, there were zero interactions with this mock. at io.grpc.xds.XdsClientImplTestBase.sendToNonexistentHost(XdsClientImplTestBase.java:3733) ``` The ARM build, which uses an emulator, has had this test succeed, so the failure seems unrelated to CPU usage. We want to avoid external I/O anyway during tests, so removing the DNS lookup is good. The TSAN comment referenced XdsClientImplTestBase.sendToNonexistentHost, but the test no longer calls fakeClock.forwardTime so the comment was out-of-date. Change the comment to make clear the race involved. --- xds/src/main/java/io/grpc/xds/ControlPlaneClient.java | 4 +++- xds/src/test/java/io/grpc/xds/XdsClientImplTestBase.java | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/xds/src/main/java/io/grpc/xds/ControlPlaneClient.java b/xds/src/main/java/io/grpc/xds/ControlPlaneClient.java index 5f013bfd3b..3ce2815d9d 100644 --- a/xds/src/main/java/io/grpc/xds/ControlPlaneClient.java +++ b/xds/src/main/java/io/grpc/xds/ControlPlaneClient.java @@ -392,7 +392,9 @@ final class ControlPlaneClient { // has never been initialized. retryBackoffPolicy = backoffPolicyProvider.get(); } - // Need this here to avoid tsan race condition in XdsClientImplTestBase.sendToNonexistentHost + // FakeClock in tests isn't thread-safe. Schedule the retry timer before notifying callbacks + // to avoid TSAN races, since tests may wait until callbacks are called but then would run + // concurrently with the stopwatch and schedule. long elapsed = stopwatch.elapsed(TimeUnit.NANOSECONDS); long delayNanos = Math.max(0, retryBackoffPolicy.nextBackoffNanos() - elapsed); rpcRetryTimer = syncContext.schedule( diff --git a/xds/src/test/java/io/grpc/xds/XdsClientImplTestBase.java b/xds/src/test/java/io/grpc/xds/XdsClientImplTestBase.java index 9f1be942ea..fe502a2303 100644 --- a/xds/src/test/java/io/grpc/xds/XdsClientImplTestBase.java +++ b/xds/src/test/java/io/grpc/xds/XdsClientImplTestBase.java @@ -3726,9 +3726,12 @@ public abstract class XdsClientImplTestBase { } @Test - public void sendToNonexistentHost() throws Exception { + public void sendToNonexistentServer() throws Exception { // Setup xdsClient to fail on stream creation - XdsClientImpl client = createXdsClient("some.garbage"); + // The Windows CI takes ~11 seconds to resolve "doesnotexist.invalid.". That seems broken, since + // it should take no I/O, but let's limit ourselves to IP literals and hostnames in the hosts + // file. Assume localhost doesn't speak HTTP/2 on the finger port + XdsClientImpl client = createXdsClient("localhost:79"); client.watchXdsResource(XdsListenerResource.getInstance(), LDS_RESOURCE, ldsResourceWatcher); verify(ldsResourceWatcher, Mockito.timeout(5000).times(1)).onError(ArgumentMatchers.any()); assertThat(fakeClock.numPendingTasks()).isEqualTo(1); //retry