rls: Reduce RLS channel logging

The channel log is shared by many components and is poorly suited to
the noise of per-RPC events. This commit restricts RLS usage of the
logger to no more frequent than cache entry events. This may still be
too frequent, but should substantially improve the signal-to-noise and
we can do further rework as needed.

Many of the log entries were poor because they lacked enough context.
They weren't even clear they were from RLS. The cache entry events now
regularly include the request key in the logs, allowing you to follow
events for specific keys. I would have preferred using the hash code,
but NumberFormat is annoying and toString() may be acceptable given its
convenience.

This commit reverts much of eba699ad. Those logs have not proven to be
helpful as they produce more output than can be reasonably stored.
This commit is contained in:
Eric Anderson 2024-11-27 11:37:45 -08:00 committed by GitHub
parent ebb43a69e7
commit 7f9c1f39f3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 19 additions and 50 deletions

View File

@ -278,9 +278,8 @@ final class CachingRlsLbClient {
@GuardedBy("lock") @GuardedBy("lock")
private CachedRouteLookupResponse asyncRlsCall( private CachedRouteLookupResponse asyncRlsCall(
RouteLookupRequest request, @Nullable BackoffPolicy backoffPolicy) { RouteLookupRequest request, @Nullable BackoffPolicy backoffPolicy) {
logger.log(ChannelLogLevel.DEBUG, "Making an async call to RLS");
if (throttler.shouldThrottle()) { if (throttler.shouldThrottle()) {
logger.log(ChannelLogLevel.DEBUG, "Request is throttled"); logger.log(ChannelLogLevel.DEBUG, "[RLS Entry {0}] Throttled RouteLookup", request);
// Cache updated, but no need to call updateBalancingState because no RPCs were queued waiting // Cache updated, but no need to call updateBalancingState because no RPCs were queued waiting
// on this result // on this result
return CachedRouteLookupResponse.backoffEntry(createBackOffEntry( return CachedRouteLookupResponse.backoffEntry(createBackOffEntry(
@ -288,27 +287,29 @@ final class CachingRlsLbClient {
} }
final SettableFuture<RouteLookupResponse> response = SettableFuture.create(); final SettableFuture<RouteLookupResponse> response = SettableFuture.create();
io.grpc.lookup.v1.RouteLookupRequest routeLookupRequest = REQUEST_CONVERTER.convert(request); io.grpc.lookup.v1.RouteLookupRequest routeLookupRequest = REQUEST_CONVERTER.convert(request);
logger.log(ChannelLogLevel.DEBUG, "Sending RouteLookupRequest: {0}", routeLookupRequest); logger.log(ChannelLogLevel.DEBUG,
"[RLS Entry {0}] Starting RouteLookup: {1}", request, routeLookupRequest);
rlsStub.withDeadlineAfter(callTimeoutNanos, TimeUnit.NANOSECONDS) rlsStub.withDeadlineAfter(callTimeoutNanos, TimeUnit.NANOSECONDS)
.routeLookup( .routeLookup(
routeLookupRequest, routeLookupRequest,
new StreamObserver<io.grpc.lookup.v1.RouteLookupResponse>() { new StreamObserver<io.grpc.lookup.v1.RouteLookupResponse>() {
@Override @Override
public void onNext(io.grpc.lookup.v1.RouteLookupResponse value) { public void onNext(io.grpc.lookup.v1.RouteLookupResponse value) {
logger.log(ChannelLogLevel.DEBUG, "Received RouteLookupResponse: {0}", value); logger.log(ChannelLogLevel.DEBUG,
"[RLS Entry {0}] RouteLookup succeeded: {1}", request, value);
response.set(RESPONSE_CONVERTER.reverse().convert(value)); response.set(RESPONSE_CONVERTER.reverse().convert(value));
} }
@Override @Override
public void onError(Throwable t) { public void onError(Throwable t) {
logger.log(ChannelLogLevel.DEBUG, "Error looking up route:", t); logger.log(ChannelLogLevel.DEBUG,
"[RLS Entry {0}] RouteLookup failed: {1}", request, t);
response.setException(t); response.setException(t);
throttler.registerBackendResponse(true); throttler.registerBackendResponse(true);
} }
@Override @Override
public void onCompleted() { public void onCompleted() {
logger.log(ChannelLogLevel.DEBUG, "routeLookup call completed");
throttler.registerBackendResponse(false); throttler.registerBackendResponse(false);
} }
}); });
@ -323,13 +324,10 @@ final class CachingRlsLbClient {
*/ */
@CheckReturnValue @CheckReturnValue
final CachedRouteLookupResponse get(final RouteLookupRequest request) { final CachedRouteLookupResponse get(final RouteLookupRequest request) {
logger.log(ChannelLogLevel.DEBUG, "Acquiring lock to get cached entry");
synchronized (lock) { synchronized (lock) {
logger.log(ChannelLogLevel.DEBUG, "Acquired lock to get cached entry");
final CacheEntry cacheEntry; final CacheEntry cacheEntry;
cacheEntry = linkedHashLruCache.read(request); cacheEntry = linkedHashLruCache.read(request);
if (cacheEntry == null) { if (cacheEntry == null) {
logger.log(ChannelLogLevel.DEBUG, "No cache entry found, making a new RLS request");
PendingCacheEntry pendingEntry = pendingCallCache.get(request); PendingCacheEntry pendingEntry = pendingCallCache.get(request);
if (pendingEntry != null) { if (pendingEntry != null) {
return CachedRouteLookupResponse.pendingResponse(pendingEntry); return CachedRouteLookupResponse.pendingResponse(pendingEntry);
@ -339,15 +337,12 @@ final class CachingRlsLbClient {
if (cacheEntry instanceof DataCacheEntry) { if (cacheEntry instanceof DataCacheEntry) {
// cache hit, initiate async-refresh if entry is staled // cache hit, initiate async-refresh if entry is staled
logger.log(ChannelLogLevel.DEBUG, "Cache hit for the request");
DataCacheEntry dataEntry = ((DataCacheEntry) cacheEntry); DataCacheEntry dataEntry = ((DataCacheEntry) cacheEntry);
if (dataEntry.isStaled(ticker.read())) { if (dataEntry.isStaled(ticker.read())) {
logger.log(ChannelLogLevel.DEBUG, "Cache entry is stale");
dataEntry.maybeRefresh(); dataEntry.maybeRefresh();
} }
return CachedRouteLookupResponse.dataEntry((DataCacheEntry) cacheEntry); return CachedRouteLookupResponse.dataEntry((DataCacheEntry) cacheEntry);
} }
logger.log(ChannelLogLevel.DEBUG, "Cache hit for a backup entry");
return CachedRouteLookupResponse.backoffEntry((BackoffCacheEntry) cacheEntry); return CachedRouteLookupResponse.backoffEntry((BackoffCacheEntry) cacheEntry);
} }
} }
@ -409,8 +404,8 @@ final class CachingRlsLbClient {
RouteLookupRequest request, RouteLookupResponse routeLookupResponse) { RouteLookupRequest request, RouteLookupResponse routeLookupResponse) {
logger.log( logger.log(
ChannelLogLevel.DEBUG, ChannelLogLevel.DEBUG,
"Transition to data cache: routeLookupResponse={0}", "[RLS Entry {0}] Transition to data cache: routeLookupResponse={1}",
routeLookupResponse); request, routeLookupResponse);
DataCacheEntry entry = new DataCacheEntry(request, routeLookupResponse); DataCacheEntry entry = new DataCacheEntry(request, routeLookupResponse);
// Constructor for DataCacheEntry causes updateBalancingState, but the picks can't happen until // Constructor for DataCacheEntry causes updateBalancingState, but the picks can't happen until
// this cache update because the lock is held // this cache update because the lock is held
@ -421,18 +416,19 @@ final class CachingRlsLbClient {
@GuardedBy("lock") @GuardedBy("lock")
private BackoffCacheEntry createBackOffEntry( private BackoffCacheEntry createBackOffEntry(
RouteLookupRequest request, Status status, @Nullable BackoffPolicy backoffPolicy) { RouteLookupRequest request, Status status, @Nullable BackoffPolicy backoffPolicy) {
logger.log(ChannelLogLevel.DEBUG, "Transition to back off: status={0}", status);
if (backoffPolicy == null) { if (backoffPolicy == null) {
backoffPolicy = backoffProvider.get(); backoffPolicy = backoffProvider.get();
} }
long delayNanos = backoffPolicy.nextBackoffNanos(); long delayNanos = backoffPolicy.nextBackoffNanos();
logger.log(
ChannelLogLevel.DEBUG,
"[RLS Entry {0}] Transition to back off: status={1}, delayNanos={2}",
request, status, delayNanos);
BackoffCacheEntry entry = new BackoffCacheEntry(request, status, backoffPolicy); BackoffCacheEntry entry = new BackoffCacheEntry(request, status, backoffPolicy);
// Lock is held, so the task can't execute before the assignment // Lock is held, so the task can't execute before the assignment
entry.scheduledFuture = scheduledExecutorService.schedule( entry.scheduledFuture = scheduledExecutorService.schedule(
() -> refreshBackoffEntry(entry), delayNanos, TimeUnit.NANOSECONDS); () -> refreshBackoffEntry(entry), delayNanos, TimeUnit.NANOSECONDS);
linkedHashLruCache.cacheAndClean(request, entry); linkedHashLruCache.cacheAndClean(request, entry);
logger.log(ChannelLogLevel.DEBUG, "BackoffCacheEntry created with a delay of {0} nanos",
delayNanos);
return entry; return entry;
} }
@ -443,7 +439,8 @@ final class CachingRlsLbClient {
// Future was previously cancelled // Future was previously cancelled
return; return;
} }
logger.log(ChannelLogLevel.DEBUG, "Calling RLS for transition to pending"); logger.log(ChannelLogLevel.DEBUG,
"[RLS Entry {0}] Calling RLS for transition to pending", entry.request);
linkedHashLruCache.invalidate(entry.request); linkedHashLruCache.invalidate(entry.request);
asyncRlsCall(entry.request, entry.backoffPolicy); asyncRlsCall(entry.request, entry.backoffPolicy);
} }
@ -659,10 +656,10 @@ final class CachingRlsLbClient {
synchronized (lock) { // Lock is already held, but ErrorProne can't tell synchronized (lock) { // Lock is already held, but ErrorProne can't tell
if (pendingCallCache.containsKey(request)) { if (pendingCallCache.containsKey(request)) {
// pending already requested // pending already requested
logger.log(ChannelLogLevel.DEBUG,
"A pending refresh request already created, no need to proceed with refresh");
return; return;
} }
logger.log(ChannelLogLevel.DEBUG,
"[RLS Entry {0}] Cache entry is stale, refreshing", request);
asyncRlsCall(request, /* backoffPolicy= */ null); asyncRlsCall(request, /* backoffPolicy= */ null);
} }
} }
@ -943,13 +940,10 @@ final class CachingRlsLbClient {
@Override @Override
public void onStatusChanged(ConnectivityState newState) { public void onStatusChanged(ConnectivityState newState) {
logger.log(ChannelLogLevel.DEBUG, "LB status changed to: {0}", newState);
if (prevState == ConnectivityState.TRANSIENT_FAILURE if (prevState == ConnectivityState.TRANSIENT_FAILURE
&& newState == ConnectivityState.READY) { && newState == ConnectivityState.READY) {
logger.log(ChannelLogLevel.DEBUG, "Transitioning from TRANSIENT_FAILURE to READY"); logger.log(ChannelLogLevel.DEBUG, "Transitioning from TRANSIENT_FAILURE to READY");
logger.log(ChannelLogLevel.DEBUG, "Acquiring lock force refresh backoff cache entries");
synchronized (lock) { synchronized (lock) {
logger.log(ChannelLogLevel.DEBUG, "Lock acquired for refreshing backoff cache entries");
for (CacheEntry value : linkedHashLruCache.values()) { for (CacheEntry value : linkedHashLruCache.values()) {
if (value instanceof BackoffCacheEntry) { if (value instanceof BackoffCacheEntry) {
refreshBackoffEntry((BackoffCacheEntry) value); refreshBackoffEntry((BackoffCacheEntry) value);
@ -983,31 +977,22 @@ final class CachingRlsLbClient {
RouteLookupRequest request = RouteLookupRequest request =
requestFactory.create(serviceName, methodName, args.getHeaders()); requestFactory.create(serviceName, methodName, args.getHeaders());
final CachedRouteLookupResponse response = CachingRlsLbClient.this.get(request); final CachedRouteLookupResponse response = CachingRlsLbClient.this.get(request);
logger.log(ChannelLogLevel.DEBUG,
"Got route lookup cache entry for service={0}, method={1}, headers={2}:\n {3}",
new Object[]{serviceName, methodName, args.getHeaders(), response});
if (response.getHeaderData() != null && !response.getHeaderData().isEmpty()) { if (response.getHeaderData() != null && !response.getHeaderData().isEmpty()) {
logger.log(ChannelLogLevel.DEBUG, "Updating RLS metadata from the RLS response headers");
Metadata headers = args.getHeaders(); Metadata headers = args.getHeaders();
headers.discardAll(RLS_DATA_KEY); headers.discardAll(RLS_DATA_KEY);
headers.put(RLS_DATA_KEY, response.getHeaderData()); headers.put(RLS_DATA_KEY, response.getHeaderData());
} }
String defaultTarget = lbPolicyConfig.getRouteLookupConfig().defaultTarget(); String defaultTarget = lbPolicyConfig.getRouteLookupConfig().defaultTarget();
logger.log(ChannelLogLevel.DEBUG, "defaultTarget = {0}", defaultTarget);
boolean hasFallback = defaultTarget != null && !defaultTarget.isEmpty(); boolean hasFallback = defaultTarget != null && !defaultTarget.isEmpty();
if (response.hasData()) { if (response.hasData()) {
logger.log(ChannelLogLevel.DEBUG, "RLS response has data, proceed with selecting a picker");
ChildPolicyWrapper childPolicyWrapper = response.getChildPolicyWrapper(); ChildPolicyWrapper childPolicyWrapper = response.getChildPolicyWrapper();
SubchannelPicker picker = SubchannelPicker picker =
(childPolicyWrapper != null) ? childPolicyWrapper.getPicker() : null; (childPolicyWrapper != null) ? childPolicyWrapper.getPicker() : null;
if (picker == null) { if (picker == null) {
logger.log(ChannelLogLevel.DEBUG,
"Child policy wrapper didn't return a picker, returning PickResult with no results");
return PickResult.withNoResult(); return PickResult.withNoResult();
} }
// Happy path // Happy path
logger.log(ChannelLogLevel.DEBUG, "Returning PickResult");
PickResult pickResult = picker.pickSubchannel(args); PickResult pickResult = picker.pickSubchannel(args);
if (pickResult.hasResult()) { if (pickResult.hasResult()) {
helper.getMetricRecorder().addLongCounter(TARGET_PICKS_COUNTER, 1, helper.getMetricRecorder().addLongCounter(TARGET_PICKS_COUNTER, 1,
@ -1017,20 +1002,15 @@ final class CachingRlsLbClient {
} }
return pickResult; return pickResult;
} else if (response.hasError()) { } else if (response.hasError()) {
logger.log(ChannelLogLevel.DEBUG, "RLS response has errors");
if (hasFallback) { if (hasFallback) {
logger.log(ChannelLogLevel.DEBUG, "Using RLS fallback");
return useFallback(args); return useFallback(args);
} }
logger.log(ChannelLogLevel.DEBUG, "No RLS fallback, returning PickResult with an error");
helper.getMetricRecorder().addLongCounter(FAILED_PICKS_COUNTER, 1, helper.getMetricRecorder().addLongCounter(FAILED_PICKS_COUNTER, 1,
Arrays.asList(helper.getChannelTarget(), lookupService), Collections.emptyList()); Arrays.asList(helper.getChannelTarget(), lookupService), Collections.emptyList());
return PickResult.withError( return PickResult.withError(
convertRlsServerStatus(response.getStatus(), convertRlsServerStatus(response.getStatus(),
lbPolicyConfig.getRouteLookupConfig().lookupService())); lbPolicyConfig.getRouteLookupConfig().lookupService()));
} else { } else {
logger.log(ChannelLogLevel.DEBUG,
"RLS response had no data, return a PickResult with no data");
return PickResult.withNoResult(); return PickResult.withNoResult();
} }
} }
@ -1067,13 +1047,11 @@ final class CachingRlsLbClient {
private void startFallbackChildPolicy() { private void startFallbackChildPolicy() {
String defaultTarget = lbPolicyConfig.getRouteLookupConfig().defaultTarget(); String defaultTarget = lbPolicyConfig.getRouteLookupConfig().defaultTarget();
logger.log(ChannelLogLevel.DEBUG, "starting fallback to {0}", defaultTarget);
logger.log(ChannelLogLevel.DEBUG, "Acquiring lock to start fallback child policy");
synchronized (lock) { synchronized (lock) {
logger.log(ChannelLogLevel.DEBUG, "Acquired lock for starting fallback child policy");
if (fallbackChildPolicyWrapper != null) { if (fallbackChildPolicyWrapper != null) {
return; return;
} }
logger.log(ChannelLogLevel.DEBUG, "starting fallback to {0}", defaultTarget);
fallbackChildPolicyWrapper = refCountedChildPolicyWrapperFactory.createOrGet(defaultTarget); fallbackChildPolicyWrapper = refCountedChildPolicyWrapperFactory.createOrGet(defaultTarget);
} }
} }
@ -1081,7 +1059,6 @@ final class CachingRlsLbClient {
// GuardedBy CachingRlsLbClient.lock // GuardedBy CachingRlsLbClient.lock
void close() { void close() {
synchronized (lock) { // Lock is already held, but ErrorProne can't tell synchronized (lock) { // Lock is already held, but ErrorProne can't tell
logger.log(ChannelLogLevel.DEBUG, "Closing RLS picker");
if (fallbackChildPolicyWrapper != null) { if (fallbackChildPolicyWrapper != null) {
refCountedChildPolicyWrapperFactory.release(fallbackChildPolicyWrapper); refCountedChildPolicyWrapperFactory.release(fallbackChildPolicyWrapper);
} }

View File

@ -50,12 +50,11 @@ final class RlsLoadBalancer extends LoadBalancer {
@Override @Override
public Status acceptResolvedAddresses(ResolvedAddresses resolvedAddresses) { public Status acceptResolvedAddresses(ResolvedAddresses resolvedAddresses) {
logger.log(ChannelLogLevel.DEBUG, "Received resolution result: {0}", resolvedAddresses);
LbPolicyConfiguration lbPolicyConfiguration = LbPolicyConfiguration lbPolicyConfiguration =
(LbPolicyConfiguration) resolvedAddresses.getLoadBalancingPolicyConfig(); (LbPolicyConfiguration) resolvedAddresses.getLoadBalancingPolicyConfig();
checkNotNull(lbPolicyConfiguration, "Missing RLS LB config"); checkNotNull(lbPolicyConfiguration, "Missing RLS LB config");
if (!lbPolicyConfiguration.equals(this.lbPolicyConfiguration)) { if (!lbPolicyConfiguration.equals(this.lbPolicyConfiguration)) {
logger.log(ChannelLogLevel.DEBUG, "A new RLS LB config received"); logger.log(ChannelLogLevel.DEBUG, "A new RLS LB config received: {0}", lbPolicyConfiguration);
boolean needToConnect = this.lbPolicyConfiguration == null boolean needToConnect = this.lbPolicyConfiguration == null
|| !this.lbPolicyConfiguration.getRouteLookupConfig().lookupService().equals( || !this.lbPolicyConfiguration.getRouteLookupConfig().lookupService().equals(
lbPolicyConfiguration.getRouteLookupConfig().lookupService()); lbPolicyConfiguration.getRouteLookupConfig().lookupService());
@ -80,22 +79,18 @@ final class RlsLoadBalancer extends LoadBalancer {
// not required. // not required.
this.lbPolicyConfiguration = lbPolicyConfiguration; this.lbPolicyConfiguration = lbPolicyConfiguration;
} }
logger.log(ChannelLogLevel.DEBUG, "RLS LB accepted resolved addresses successfully");
return Status.OK; return Status.OK;
} }
@Override @Override
public void requestConnection() { public void requestConnection() {
logger.log(ChannelLogLevel.DEBUG, "connection requested from RLS LB");
if (routeLookupClient != null) { if (routeLookupClient != null) {
logger.log(ChannelLogLevel.DEBUG, "requesting a connection from the routeLookupClient");
routeLookupClient.requestConnection(); routeLookupClient.requestConnection();
} }
} }
@Override @Override
public void handleNameResolutionError(final Status error) { public void handleNameResolutionError(final Status error) {
logger.log(ChannelLogLevel.DEBUG, "Received resolution error: {0}", error);
class ErrorPicker extends SubchannelPicker { class ErrorPicker extends SubchannelPicker {
@Override @Override
public PickResult pickSubchannel(PickSubchannelArgs args) { public PickResult pickSubchannel(PickSubchannelArgs args) {
@ -116,14 +111,11 @@ final class RlsLoadBalancer extends LoadBalancer {
routeLookupClient = null; routeLookupClient = null;
lbPolicyConfiguration = null; lbPolicyConfiguration = null;
} }
logger.log(ChannelLogLevel.DEBUG,
"Updating balancing state to TRANSIENT_FAILURE with an error picker");
helper.updateBalancingState(ConnectivityState.TRANSIENT_FAILURE, new ErrorPicker()); helper.updateBalancingState(ConnectivityState.TRANSIENT_FAILURE, new ErrorPicker());
} }
@Override @Override
public void shutdown() { public void shutdown() {
logger.log(ChannelLogLevel.DEBUG, "Rls lb shutdown");
if (routeLookupClient != null) { if (routeLookupClient != null) {
logger.log(ChannelLogLevel.DEBUG, "closing the routeLookupClient because of RLS LB shutdown"); logger.log(ChannelLogLevel.DEBUG, "closing the routeLookupClient because of RLS LB shutdown");
routeLookupClient.close(); routeLookupClient.close();