mirror of https://github.com/grpc/grpc-node.git
Serverside keepalive error detection and cleanups
- Bugfix: Ensure that if session.ping returns false we correctly identify fail the keepalive and connection - Bugfix: Ensure that if the interval between keepalives being sent occurs faster than the prior keepalive's timeout that we do not overwrite the reference to the prior timeout. Prior implementation could have in theory prevented a valid keepalive timeout from clearing itself. This rewrite keeps every timeout as a local (vs a shared state per session). Even if the timeout outlives the lifetime of a session, we still guard against errors by checking that the parent interval is not false-y. I reckon this could result in a short-term memory leak per session which is bounded for a maximum of keepaliveTimeoutMs. On the other hand even with that potential for a short reference hold, this implementation proposed here is more correct I think. One alternative we could do is keep a list of pending timeouts.. which is complex for a rare situation that will self resolve anyhow when keepaliveTimeoutMs is reached. - Bug Fix: keepalive intervals were being cleared with an incorrect clearTimeout before. Not sure if this was causing intervals leaks in some nodejs impls or not. (v20.13.1 seems to accept this mismatch without issue) - Rename variables for clarity, to prevent future bugs like swapping clearInterval vs clearTimeout. - Implementation is repeated in two places, per warning from https://github.com/grpc/grpc-node/pull/2756#issuecomment-2136031256 - This commit supercedes the prior PR on a master branch which was out of date. https://github.com/grpc/grpc-node/pull/2756
This commit is contained in:
parent
729a3f52cf
commit
ad598ecbe4
|
@ -1376,8 +1376,7 @@ export class Server {
|
|||
|
||||
let connectionAgeTimer: NodeJS.Timeout | null = null;
|
||||
let connectionAgeGraceTimer: NodeJS.Timeout | null = null;
|
||||
let keeapliveTimeTimer: NodeJS.Timeout | null = null;
|
||||
let keepaliveTimeoutTimer: NodeJS.Timeout | null = null;
|
||||
let keepaliveInterval: NodeJS.Timeout | null = null;
|
||||
let sessionClosedByServer = false;
|
||||
|
||||
const idleTimeoutObj = this.enableIdleTimeout(session);
|
||||
|
@ -1421,41 +1420,74 @@ export class Server {
|
|||
}
|
||||
|
||||
if (this.keepaliveTimeMs < KEEPALIVE_MAX_TIME_MS) {
|
||||
keeapliveTimeTimer = setInterval(() => {
|
||||
keepaliveTimeoutTimer = setTimeout(() => {
|
||||
sessionClosedByServer = true;
|
||||
session.close();
|
||||
keepaliveInterval = setInterval(() => {
|
||||
// NOTE to self: document in PR that prior implementation would overwrite the prior pending timeout
|
||||
// if the timeout had not occurred before the prior interval had elapsed (bad bug)
|
||||
const keepaliveTimeout = setTimeout(() => {
|
||||
if (keepaliveInterval) {
|
||||
clearInterval(keepaliveInterval);
|
||||
keepaliveInterval = null;
|
||||
sessionClosedByServer = true;
|
||||
this.trace('Connection dropped by keepalive timeout');
|
||||
session.close();
|
||||
}
|
||||
}, this.keepaliveTimeoutMs);
|
||||
keepaliveTimeoutTimer.unref?.();
|
||||
keepaliveTimeout.unref?.();
|
||||
|
||||
try {
|
||||
session.ping(
|
||||
(err: Error | null, duration: number, payload: Buffer) => {
|
||||
if (keepaliveTimeoutTimer) {
|
||||
clearTimeout(keepaliveTimeoutTimer);
|
||||
if (
|
||||
!session.ping(
|
||||
(err: Error | null, duration: number, payload: Buffer) => {
|
||||
clearTimeout(keepaliveTimeout);
|
||||
if (err) {
|
||||
if (keepaliveInterval) {
|
||||
clearInterval(keepaliveInterval);
|
||||
keepaliveInterval = null;
|
||||
}
|
||||
sessionClosedByServer = true;
|
||||
this.trace(
|
||||
'Connection dropped due to error with ping frame ' +
|
||||
err.message +
|
||||
' return in ' +
|
||||
duration
|
||||
);
|
||||
session.close();
|
||||
}
|
||||
}
|
||||
|
||||
if (err) {
|
||||
sessionClosedByServer = true;
|
||||
this.trace(
|
||||
'Connection dropped due to error of a ping frame ' +
|
||||
err.message +
|
||||
' return in ' +
|
||||
duration
|
||||
);
|
||||
session.close();
|
||||
}
|
||||
}
|
||||
);
|
||||
)
|
||||
) {
|
||||
throw new Error('Server keepalive ping send failed');
|
||||
}
|
||||
} catch (e) {
|
||||
clearTimeout(keepaliveTimeoutTimer);
|
||||
// The ping can't be sent because the session is already closed
|
||||
// The ping can't be sent because the session is already closed, max outstanding pings reached, etc
|
||||
clearTimeout(keepaliveTimeout);
|
||||
if (keepaliveInterval) {
|
||||
clearInterval(keepaliveInterval);
|
||||
keepaliveInterval = null;
|
||||
}
|
||||
this.trace(
|
||||
'Connection dropped due to error sending ping frame ' +
|
||||
(e instanceof Error ? e.message : 'unknown error')
|
||||
);
|
||||
session.destroy();
|
||||
}
|
||||
}, this.keepaliveTimeMs);
|
||||
keeapliveTimeTimer.unref?.();
|
||||
keepaliveInterval.unref?.();
|
||||
}
|
||||
|
||||
session.once('goaway', (errorCode, lastStreamID, opaqueData) => {
|
||||
if (errorCode === http2.constants.NGHTTP2_ENHANCE_YOUR_CALM) {
|
||||
this.trace('Connection dropped by client due to ENHANCE_YOUR_CALM');
|
||||
} else {
|
||||
this.trace(
|
||||
'Connection dropped by client via GOAWAY with error code ' +
|
||||
errorCode
|
||||
);
|
||||
}
|
||||
sessionClosedByServer = true;
|
||||
session.destroy();
|
||||
});
|
||||
|
||||
session.on('close', () => {
|
||||
if (!sessionClosedByServer) {
|
||||
this.trace(
|
||||
|
@ -1471,11 +1503,9 @@ export class Server {
|
|||
clearTimeout(connectionAgeGraceTimer);
|
||||
}
|
||||
|
||||
if (keeapliveTimeTimer) {
|
||||
clearInterval(keeapliveTimeTimer);
|
||||
if (keepaliveTimeoutTimer) {
|
||||
clearTimeout(keepaliveTimeoutTimer);
|
||||
}
|
||||
if (keepaliveInterval) {
|
||||
clearInterval(keepaliveInterval);
|
||||
keepaliveInterval = null;
|
||||
}
|
||||
|
||||
if (idleTimeoutObj !== null) {
|
||||
|
@ -1521,8 +1551,7 @@ export class Server {
|
|||
|
||||
let connectionAgeTimer: NodeJS.Timeout | null = null;
|
||||
let connectionAgeGraceTimer: NodeJS.Timeout | null = null;
|
||||
let keeapliveTimeTimer: NodeJS.Timeout | null = null;
|
||||
let keepaliveTimeoutTimer: NodeJS.Timeout | null = null;
|
||||
let keepaliveInterval: NodeJS.Timeout | null = null;
|
||||
let sessionClosedByServer = false;
|
||||
|
||||
const idleTimeoutObj = this.enableIdleTimeout(session);
|
||||
|
@ -1565,49 +1594,85 @@ export class Server {
|
|||
}
|
||||
|
||||
if (this.keepaliveTimeMs < KEEPALIVE_MAX_TIME_MS) {
|
||||
keeapliveTimeTimer = setInterval(() => {
|
||||
keepaliveTimeoutTimer = setTimeout(() => {
|
||||
sessionClosedByServer = true;
|
||||
this.channelzTrace.addTrace(
|
||||
'CT_INFO',
|
||||
'Connection dropped by keepalive timeout from ' + clientAddress
|
||||
);
|
||||
|
||||
session.close();
|
||||
keepaliveInterval = setInterval(() => {
|
||||
const keepaliveTimeout = setTimeout(() => {
|
||||
if (keepaliveInterval) {
|
||||
clearInterval(keepaliveInterval);
|
||||
keepaliveInterval = null;
|
||||
sessionClosedByServer = true;
|
||||
this.channelzTrace.addTrace(
|
||||
'CT_INFO',
|
||||
'Connection dropped by keepalive timeout from ' + clientAddress
|
||||
);
|
||||
session.close();
|
||||
}
|
||||
}, this.keepaliveTimeoutMs);
|
||||
keepaliveTimeoutTimer.unref?.();
|
||||
keepaliveTimeout.unref?.();
|
||||
|
||||
try {
|
||||
session.ping(
|
||||
(err: Error | null, duration: number, payload: Buffer) => {
|
||||
if (keepaliveTimeoutTimer) {
|
||||
clearTimeout(keepaliveTimeoutTimer);
|
||||
if (
|
||||
!session.ping(
|
||||
(err: Error | null, duration: number, payload: Buffer) => {
|
||||
clearTimeout(keepaliveTimeout);
|
||||
if (err) {
|
||||
if (keepaliveInterval) {
|
||||
clearInterval(keepaliveInterval);
|
||||
keepaliveInterval = null;
|
||||
}
|
||||
sessionClosedByServer = true;
|
||||
this.channelzTrace.addTrace(
|
||||
'CT_INFO',
|
||||
'Connection dropped due to error with ping frame ' +
|
||||
err.message +
|
||||
' return in ' +
|
||||
duration
|
||||
);
|
||||
session.close();
|
||||
}
|
||||
}
|
||||
|
||||
if (err) {
|
||||
sessionClosedByServer = true;
|
||||
this.channelzTrace.addTrace(
|
||||
'CT_INFO',
|
||||
'Connection dropped due to error of a ping frame ' +
|
||||
err.message +
|
||||
' return in ' +
|
||||
duration
|
||||
);
|
||||
|
||||
session.close();
|
||||
}
|
||||
}
|
||||
);
|
||||
)
|
||||
) {
|
||||
throw new Error('Server keepalive ping send failed');
|
||||
}
|
||||
channelzSessionInfo.keepAlivesSent += 1;
|
||||
} catch (e) {
|
||||
clearTimeout(keepaliveTimeoutTimer);
|
||||
// The ping can't be sent because the session is already closed
|
||||
// The ping can't be sent because the session is already closed, max outstanding pings reached, etc
|
||||
clearTimeout(keepaliveTimeout);
|
||||
if (keepaliveInterval) {
|
||||
clearInterval(keepaliveInterval);
|
||||
keepaliveInterval = null;
|
||||
}
|
||||
this.channelzTrace.addTrace(
|
||||
'CT_INFO',
|
||||
'Connection dropped due to error sending ping frame ' +
|
||||
(e instanceof Error ? e.message : 'unknown error')
|
||||
);
|
||||
session.destroy();
|
||||
}
|
||||
}, this.keepaliveTimeMs);
|
||||
keeapliveTimeTimer.unref?.();
|
||||
keepaliveInterval.unref?.();
|
||||
}
|
||||
|
||||
session.once('goaway', (errorCode, lastStreamID, opaqueData) => {
|
||||
if (errorCode === http2.constants.NGHTTP2_ENHANCE_YOUR_CALM) {
|
||||
this.channelzTrace.addTrace(
|
||||
'CT_INFO',
|
||||
'Connection dropped by client due GOAWAY of ENHANCE_YOUR_CALM from ' +
|
||||
clientAddress
|
||||
);
|
||||
} else {
|
||||
this.channelzTrace.addTrace(
|
||||
'CT_INFO',
|
||||
'Connection dropped by client via GOAWAY with error code ' +
|
||||
errorCode +
|
||||
' from ' +
|
||||
clientAddress
|
||||
);
|
||||
}
|
||||
sessionClosedByServer = true;
|
||||
session.destroy();
|
||||
});
|
||||
|
||||
session.on('close', () => {
|
||||
if (!sessionClosedByServer) {
|
||||
this.channelzTrace.addTrace(
|
||||
|
@ -1627,11 +1692,9 @@ export class Server {
|
|||
clearTimeout(connectionAgeGraceTimer);
|
||||
}
|
||||
|
||||
if (keeapliveTimeTimer) {
|
||||
clearInterval(keeapliveTimeTimer);
|
||||
if (keepaliveTimeoutTimer) {
|
||||
clearTimeout(keepaliveTimeoutTimer);
|
||||
}
|
||||
if (keepaliveInterval) {
|
||||
clearInterval(keepaliveInterval);
|
||||
keepaliveInterval = null;
|
||||
}
|
||||
|
||||
if (idleTimeoutObj !== null) {
|
||||
|
|
Loading…
Reference in New Issue