[close #635] constraint the getMember timeout by inject a backoffer (#636)

* constraint the getMember timeout by inject a backoffer

Signed-off-by: iosmanthus <myosmanthustree@gmail.com>

* fix checkHealth unlimited retry

Signed-off-by: iosmanthus <myosmanthustree@gmail.com>

* ./dev/javafmt

Signed-off-by: iosmanthus <myosmanthustree@gmail.com>

* fix unstable test

Signed-off-by: iosmanthus <myosmanthustree@gmail.com>

* unify default backoffer in tryUpdateLeader

Signed-off-by: iosmanthus <myosmanthustree@gmail.com>
This commit is contained in:
iosmanthus 2022-07-27 21:03:07 +08:00 committed by GitHub
parent 2ae746d1b0
commit 3128161b1f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 94 additions and 16 deletions

View File

@ -40,7 +40,6 @@ import org.tikv.common.streaming.StreamingResponse;
import org.tikv.common.util.BackOffFunction.BackOffFuncType;
import org.tikv.common.util.BackOffer;
import org.tikv.common.util.ChannelFactory;
import org.tikv.common.util.ConcreteBackOffer;
public abstract class AbstractGRPCClient<
BlockingStubT extends AbstractStub<BlockingStubT>,
@ -198,8 +197,7 @@ public abstract class AbstractGRPCClient<
}
}
protected boolean checkHealth(String addressStr, HostMapping hostMapping) {
BackOffer backOffer = ConcreteBackOffer.newCustomBackOff((int) (timeout * 2));
protected boolean checkHealth(BackOffer backOffer, String addressStr, HostMapping hostMapping) {
try {
return doCheckHealth(backOffer, addressStr, hostMapping);
} catch (Exception e) {

View File

@ -440,6 +440,8 @@ public class PDClient extends AbstractGRPCClient<PDBlockingStub, PDFutureStub>
private GetMembersResponse doGetMembers(BackOffer backOffer, URI uri) {
while (true) {
backOffer.checkTimeout();
try {
ManagedChannel probChan = channelFactory.getChannel(uriToAddr(uri), hostMapping);
PDGrpc.PDBlockingStub stub =
@ -459,8 +461,7 @@ public class PDClient extends AbstractGRPCClient<PDBlockingStub, PDFutureStub>
}
}
private GetMembersResponse getMembers(URI uri) {
BackOffer backOffer = ConcreteBackOffer.newCustomBackOff(BackOffer.PD_INFO_BACKOFF);
private GetMembersResponse getMembers(BackOffer backOffer, URI uri) {
try {
return doGetMembers(backOffer, uri);
} catch (Exception e) {
@ -497,11 +498,12 @@ public class PDClient extends AbstractGRPCClient<PDBlockingStub, PDFutureStub>
return true;
}
synchronized boolean createFollowerClientWrapper(String followerUrlStr, String leaderUrls) {
synchronized boolean createFollowerClientWrapper(
BackOffer backOffer, String followerUrlStr, String leaderUrls) {
// TODO: Why not strip protocol info on server side since grpc does not need it
try {
if (!checkHealth(followerUrlStr, hostMapping)) {
if (!checkHealth(backOffer, followerUrlStr, hostMapping)) {
return false;
}
@ -516,13 +518,13 @@ public class PDClient extends AbstractGRPCClient<PDBlockingStub, PDFutureStub>
return true;
}
public synchronized void updateLeaderOrForwardFollower() {
public synchronized void updateLeaderOrForwardFollower(BackOffer backOffer) {
if (System.currentTimeMillis() - lastUpdateLeaderTime < MIN_TRY_UPDATE_DURATION) {
return;
}
for (URI url : this.pdAddrs) {
// since resp is null, we need update leader's address by walking through all pd server.
GetMembersResponse resp = getMembers(url);
GetMembersResponse resp = getMembers(backOffer, url);
if (resp == null) {
continue;
}
@ -534,7 +536,8 @@ public class PDClient extends AbstractGRPCClient<PDBlockingStub, PDFutureStub>
leaderUrlStr = uriToAddr(addrToUri(leaderUrlStr));
// if leader is switched, just return.
if (checkHealth(leaderUrlStr, hostMapping) && createLeaderClientWrapper(leaderUrlStr)) {
if (checkHealth(backOffer, leaderUrlStr, hostMapping)
&& createLeaderClientWrapper(leaderUrlStr)) {
lastUpdateLeaderTime = System.currentTimeMillis();
return;
}
@ -561,7 +564,8 @@ public class PDClient extends AbstractGRPCClient<PDBlockingStub, PDFutureStub>
hasReachNextMember = true;
continue;
}
if (hasReachNextMember && createFollowerClientWrapper(followerUrlStr, leaderUrlStr)) {
if (hasReachNextMember
&& createFollowerClientWrapper(backOffer, followerUrlStr, leaderUrlStr)) {
logger.warn(
String.format("forward request to pd [%s] by pd [%s]", leaderUrlStr, followerUrlStr));
return;
@ -577,8 +581,9 @@ public class PDClient extends AbstractGRPCClient<PDBlockingStub, PDFutureStub>
public void tryUpdateLeader() {
for (URI url : this.pdAddrs) {
BackOffer backOffer = defaultBackOffer();
// since resp is null, we need update leader's address by walking through all pd server.
GetMembersResponse resp = getMembers(url);
GetMembersResponse resp = getMembers(backOffer, url);
if (resp == null) {
continue;
}
@ -591,7 +596,7 @@ public class PDClient extends AbstractGRPCClient<PDBlockingStub, PDFutureStub>
leaderUrlStr = uriToAddr(addrToUri(leaderUrlStr));
// If leader is not change but becomes available, we can cancel follower forward.
if (checkHealth(leaderUrlStr, hostMapping) && trySwitchLeader(leaderUrlStr)) {
if (checkHealth(backOffer, leaderUrlStr, hostMapping) && trySwitchLeader(leaderUrlStr)) {
if (!urls.equals(this.pdAddrs)) {
tryUpdateMembers(urls);
}
@ -705,7 +710,7 @@ public class PDClient extends AbstractGRPCClient<PDBlockingStub, PDFutureStub>
this.timeout = conf.getPdFirstGetMemberTimeout();
for (URI u : pdAddrs) {
logger.info("get members with pd " + u + ": start");
resp = getMembers(u);
resp = getMembers(defaultBackOffer(), u);
logger.info("get members with pd " + u + ": end");
if (resp != null) {
break;
@ -825,4 +830,8 @@ public class PDClient extends AbstractGRPCClient<PDBlockingStub, PDFutureStub>
public RequestKeyCodec getCodec() {
return codec;
}
private static BackOffer defaultBackOffer() {
return ConcreteBackOffer.newCustomBackOff(BackOffer.PD_INFO_BACKOFF);
}
}

View File

@ -59,7 +59,7 @@ public class PDErrorHandler<RespT> implements ErrorHandler<RespT> {
case PD_ERROR:
backOffer.doBackOff(
BackOffFunction.BackOffFuncType.BoPDRPC, new GrpcException(error.toString()));
client.updateLeaderOrForwardFollower();
client.updateLeaderOrForwardFollower(backOffer);
return true;
case REGION_PEER_NOT_ELECTED:
logger.debug(error.getMessage());
@ -80,7 +80,7 @@ public class PDErrorHandler<RespT> implements ErrorHandler<RespT> {
return false;
}
backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoPDRPC, e);
client.updateLeaderOrForwardFollower();
client.updateLeaderOrForwardFollower(backOffer);
return true;
}
}

View File

@ -20,6 +20,7 @@ package org.tikv.common;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Function;
import org.junit.After;
import org.junit.Before;
import org.tikv.common.TiConfiguration.ApiVersion;
@ -51,6 +52,16 @@ public abstract class PDMockServerTest {
session = TiSession.create(conf);
}
void updateConf(Function<TiConfiguration, TiConfiguration> update) throws Exception {
if (session == null) {
throw new IllegalStateException("Cluster is not initialized");
}
session.close();
session = TiSession.create(update.apply(session.getConf()));
}
void setup(String addr) throws IOException {
int[] ports = new int[3];
for (int i = 0; i < ports.length; i++) {

View File

@ -0,0 +1,60 @@
/*
* Copyright 2022 TiKV Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.tikv.common;
import com.google.protobuf.ByteString;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.tikv.raw.RawKVClient;
public class TimeoutTest extends MockThreeStoresTest {
@Before
public void init() throws Exception {
updateConf(
conf -> {
conf.setEnableAtomicForCAS(true);
conf.setTimeout(150);
conf.setForwardTimeout(200);
conf.setRawKVReadTimeoutInMS(400);
conf.setRawKVWriteTimeoutInMS(400);
conf.setRawKVBatchReadTimeoutInMS(400);
conf.setRawKVBatchWriteTimeoutInMS(400);
conf.setRawKVWriteSlowLogInMS(50);
conf.setRawKVReadSlowLogInMS(50);
conf.setRawKVBatchReadSlowLogInMS(50);
conf.setRawKVBatchWriteSlowLogInMS(50);
return conf;
});
}
private RawKVClient createClient() {
return session.createRawClient();
}
@Test
public void testTimeoutInTime() {
try (RawKVClient client = createClient()) {
pdServers.get(0).stop();
long start = System.currentTimeMillis();
client.get(ByteString.copyFromUtf8("key"));
long end = System.currentTimeMillis();
Assert.assertTrue(end - start < session.getConf().getRawKVReadTimeoutInMS() * 2L);
}
}
}