mirror of https://github.com/tikv/client-java.git
Try other peer when current leader of this region is not available. (#232)
* Auto switch leader peer rather than ask PD. Signed-off-by: Little-Wallace <bupt2013211450@gmail.com>
This commit is contained in:
parent
48b104f196
commit
d4d1c6ac6a
|
|
@ -89,9 +89,6 @@ public abstract class AbstractGRPCClient<
|
||||||
stub.getChannel(), method, stub.getCallOptions(), requestFactory.get());
|
stub.getChannel(), method, stub.getCallOptions(), requestFactory.get());
|
||||||
},
|
},
|
||||||
method.getFullMethodName());
|
method.getFullMethodName());
|
||||||
if (resp != null && this.conf.getEnableGrpcForward()) {
|
|
||||||
tryUpdateProxy();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (logger.isTraceEnabled()) {
|
if (logger.isTraceEnabled()) {
|
||||||
logger.trace(String.format("leaving %s...", method.getFullMethodName()));
|
logger.trace(String.format("leaving %s...", method.getFullMethodName()));
|
||||||
|
|
@ -180,8 +177,6 @@ public abstract class AbstractGRPCClient<
|
||||||
|
|
||||||
protected abstract StubT getAsyncStub();
|
protected abstract StubT getAsyncStub();
|
||||||
|
|
||||||
protected abstract void tryUpdateProxy();
|
|
||||||
|
|
||||||
protected boolean checkHealth(String addressStr, HostMapping hostMapping) {
|
protected boolean checkHealth(String addressStr, HostMapping hostMapping) {
|
||||||
ManagedChannel channel = channelFactory.getChannel(addressStr, hostMapping);
|
ManagedChannel channel = channelFactory.getChannel(addressStr, hostMapping);
|
||||||
HealthGrpc.HealthBlockingStub stub =
|
HealthGrpc.HealthBlockingStub stub =
|
||||||
|
|
|
||||||
|
|
@ -557,9 +557,6 @@ public class PDClient extends AbstractGRPCClient<PDBlockingStub, PDStub>
|
||||||
return pdClientWrapper.getAsyncStub().withDeadlineAfter(getTimeout(), TimeUnit.MILLISECONDS);
|
return pdClientWrapper.getAsyncStub().withDeadlineAfter(getTimeout(), TimeUnit.MILLISECONDS);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void tryUpdateProxy() {}
|
|
||||||
|
|
||||||
private void initCluster() {
|
private void initCluster() {
|
||||||
GetMembersResponse resp = null;
|
GetMembersResponse resp = null;
|
||||||
List<URI> pdAddrs = getConf().getPdAddrs();
|
List<URI> pdAddrs = getConf().getPdAddrs();
|
||||||
|
|
|
||||||
|
|
@ -94,6 +94,8 @@ public class KVErrorHandler<RespT> implements ErrorHandler<RespT> {
|
||||||
Errorpb.Error error = regionHandler.getRegionError(resp);
|
Errorpb.Error error = regionHandler.getRegionError(resp);
|
||||||
if (error != null) {
|
if (error != null) {
|
||||||
return regionHandler.handleRegionError(backOffer, error);
|
return regionHandler.handleRegionError(backOffer, error);
|
||||||
|
} else {
|
||||||
|
regionHandler.tryUpdateRegionStore();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Key error handling logic
|
// Key error handling logic
|
||||||
|
|
|
||||||
|
|
@ -42,10 +42,16 @@ public class RegionErrorHandler<RespT> implements ErrorHandler<RespT> {
|
||||||
Errorpb.Error error = getRegionError(resp);
|
Errorpb.Error error = getRegionError(resp);
|
||||||
if (error != null) {
|
if (error != null) {
|
||||||
return handleRegionError(backOffer, error);
|
return handleRegionError(backOffer, error);
|
||||||
|
} else {
|
||||||
|
tryUpdateRegionStore();
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void tryUpdateRegionStore() {
|
||||||
|
recv.tryUpdateRegionStore();
|
||||||
|
}
|
||||||
|
|
||||||
public boolean handleRegionError(BackOffer backOffer, Errorpb.Error error) {
|
public boolean handleRegionError(BackOffer backOffer, Errorpb.Error error) {
|
||||||
if (error.hasNotLeader()) {
|
if (error.hasNotLeader()) {
|
||||||
// this error is reported from raftstore:
|
// this error is reported from raftstore:
|
||||||
|
|
|
||||||
|
|
@ -22,11 +22,9 @@ import static com.google.common.base.Preconditions.checkNotNull;
|
||||||
|
|
||||||
import io.grpc.ManagedChannel;
|
import io.grpc.ManagedChannel;
|
||||||
import io.grpc.Metadata;
|
import io.grpc.Metadata;
|
||||||
import io.grpc.health.v1.HealthCheckRequest;
|
|
||||||
import io.grpc.health.v1.HealthCheckResponse;
|
|
||||||
import io.grpc.health.v1.HealthGrpc;
|
|
||||||
import io.grpc.stub.MetadataUtils;
|
import io.grpc.stub.MetadataUtils;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
@ -34,6 +32,7 @@ import org.tikv.common.AbstractGRPCClient;
|
||||||
import org.tikv.common.TiConfiguration;
|
import org.tikv.common.TiConfiguration;
|
||||||
import org.tikv.common.exception.GrpcException;
|
import org.tikv.common.exception.GrpcException;
|
||||||
import org.tikv.common.util.ChannelFactory;
|
import org.tikv.common.util.ChannelFactory;
|
||||||
|
import org.tikv.kvproto.Kvrpcpb;
|
||||||
import org.tikv.kvproto.Metapb;
|
import org.tikv.kvproto.Metapb;
|
||||||
import org.tikv.kvproto.TikvGrpc;
|
import org.tikv.kvproto.TikvGrpc;
|
||||||
|
|
||||||
|
|
@ -46,7 +45,9 @@ public abstract class AbstractRegionStoreClient
|
||||||
protected TiRegion region;
|
protected TiRegion region;
|
||||||
protected TiStore targetStore;
|
protected TiStore targetStore;
|
||||||
protected TiStore originStore;
|
protected TiStore originStore;
|
||||||
protected long retryTimes;
|
private long retryForwardTimes;
|
||||||
|
private long retryLeaderTimes;
|
||||||
|
private Metapb.Peer candidateLeader;
|
||||||
|
|
||||||
protected AbstractRegionStoreClient(
|
protected AbstractRegionStoreClient(
|
||||||
TiConfiguration conf,
|
TiConfiguration conf,
|
||||||
|
|
@ -64,12 +65,17 @@ public abstract class AbstractRegionStoreClient
|
||||||
this.regionManager = regionManager;
|
this.regionManager = regionManager;
|
||||||
this.targetStore = store;
|
this.targetStore = store;
|
||||||
this.originStore = null;
|
this.originStore = null;
|
||||||
this.retryTimes = 0;
|
this.candidateLeader = null;
|
||||||
|
this.retryForwardTimes = 0;
|
||||||
|
this.retryLeaderTimes = 0;
|
||||||
if (this.targetStore.getProxyStore() != null) {
|
if (this.targetStore.getProxyStore() != null) {
|
||||||
this.timeout = conf.getForwardTimeout();
|
this.timeout = conf.getForwardTimeout();
|
||||||
|
} else if (!this.targetStore.isReachable() && !this.targetStore.canForwardFirst()) {
|
||||||
|
onStoreUnreachable();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public TiRegion getRegion() {
|
public TiRegion getRegion() {
|
||||||
return region;
|
return region;
|
||||||
}
|
}
|
||||||
|
|
@ -103,44 +109,155 @@ public abstract class AbstractRegionStoreClient
|
||||||
if (!region.getRegionEpoch().equals(newRegion.getRegionEpoch())) {
|
if (!region.getRegionEpoch().equals(newRegion.getRegionEpoch())) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we try one peer but find the leader has not changed, we do not need try other peers.
|
||||||
|
if (candidateLeader != null
|
||||||
|
&& region.getLeader().getStoreId() == newRegion.getLeader().getStoreId()) {
|
||||||
|
retryLeaderTimes = newRegion.getFollowerList().size();
|
||||||
|
originStore = null;
|
||||||
|
}
|
||||||
|
candidateLeader = null;
|
||||||
region = newRegion;
|
region = newRegion;
|
||||||
targetStore = regionManager.getStoreById(region.getLeader().getStoreId());
|
targetStore = regionManager.getStoreById(region.getLeader().getStoreId());
|
||||||
originStore = null;
|
updateClientStub();
|
||||||
String addressStr = targetStore.getStore().getAddress();
|
|
||||||
ManagedChannel channel =
|
|
||||||
channelFactory.getChannel(addressStr, regionManager.getPDClient().getHostMapping());
|
|
||||||
blockingStub = TikvGrpc.newBlockingStub(channel);
|
|
||||||
asyncStub = TikvGrpc.newStub(channel);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean onStoreUnreachable() {
|
public boolean onStoreUnreachable() {
|
||||||
if (!conf.getEnableGrpcForward()) {
|
|
||||||
regionManager.onRequestFail(region);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (targetStore.getProxyStore() == null) {
|
if (targetStore.getProxyStore() == null) {
|
||||||
if (!targetStore.isUnreachable()) {
|
if (targetStore.isReachable()) {
|
||||||
if (checkHealth(targetStore.getStore())) {
|
return true;
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else if (retryTimes > region.getFollowerList().size()) {
|
}
|
||||||
|
|
||||||
|
// If this store has failed to forward request too many times, we shall try other peer at first
|
||||||
|
// so that we can
|
||||||
|
// reduce the latency cost by fail requests.
|
||||||
|
if (targetStore.canForwardFirst()) {
|
||||||
|
if (conf.getEnableGrpcForward() && retryForwardTimes <= region.getFollowerList().size()) {
|
||||||
|
return retryOtherStoreByProxyForward();
|
||||||
|
}
|
||||||
|
if (retryOtherStoreLeader()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (retryOtherStoreLeader()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (conf.getEnableGrpcForward() && retryForwardTimes <= region.getFollowerList().size()) {
|
||||||
|
return retryOtherStoreByProxyForward();
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.warn(
|
||||||
|
String.format(
|
||||||
|
"retry time exceed for region[%d], invalid this region[%d]",
|
||||||
|
region.getId(), targetStore.getId()));
|
||||||
|
regionManager.onRequestFail(region);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Kvrpcpb.Context makeContext(TiStoreType storeType) {
|
||||||
|
if (candidateLeader != null && storeType == TiStoreType.TiKV) {
|
||||||
|
return region.getReplicaContext(candidateLeader, java.util.Collections.emptySet());
|
||||||
|
} else {
|
||||||
|
return region.getReplicaContext(java.util.Collections.emptySet(), storeType);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Kvrpcpb.Context makeContext(Set<Long> resolvedLocks, TiStoreType storeType) {
|
||||||
|
if (candidateLeader != null && storeType == TiStoreType.TiKV) {
|
||||||
|
return region.getReplicaContext(candidateLeader, resolvedLocks);
|
||||||
|
} else {
|
||||||
|
return region.getReplicaContext(resolvedLocks, storeType);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void tryUpdateRegionStore() {
|
||||||
|
if (originStore != null) {
|
||||||
|
if (originStore.getId() == targetStore.getId()) {
|
||||||
|
logger.warn(
|
||||||
|
String.format(
|
||||||
|
"update store [%s] by proxy-store [%s]",
|
||||||
|
targetStore.getStore().getAddress(), targetStore.getProxyStore().getAddress()));
|
||||||
|
// We do not need to mark the store can-forward, because if one store has grpc forward
|
||||||
|
// successfully, it will
|
||||||
|
// create a new store object, which is can-forward.
|
||||||
|
regionManager.updateStore(originStore, targetStore);
|
||||||
|
} else {
|
||||||
|
// If we try to forward request to leader by follower failed, it means that the store of old
|
||||||
|
// leader may be
|
||||||
|
// unavailable but the new leader has not been report to PD. So we can ban this store for a
|
||||||
|
// short time to
|
||||||
|
// avoid too many request try forward rather than try other peer.
|
||||||
|
originStore.forwardFail();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (candidateLeader != null) {
|
||||||
logger.warn(
|
logger.warn(
|
||||||
String.format(
|
String.format(
|
||||||
"retry time exceed for region[%d], invalid this region[%d]",
|
"update leader to store [%d] for region[%d]",
|
||||||
region.getId(), targetStore.getId()));
|
candidateLeader.getStoreId(), region.getId()));
|
||||||
regionManager.onRequestFail(region);
|
this.regionManager.updateLeader(region, candidateLeader.getStoreId());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean retryOtherStoreLeader() {
|
||||||
|
List<Metapb.Peer> peers = region.getFollowerList();
|
||||||
|
if (retryLeaderTimes >= peers.size()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
retryLeaderTimes += 1;
|
||||||
|
boolean hasVisitedStore = false;
|
||||||
|
for (Metapb.Peer cur : peers) {
|
||||||
|
if (candidateLeader == null || hasVisitedStore) {
|
||||||
|
TiStore store = regionManager.getStoreById(cur.getStoreId());
|
||||||
|
if (store != null && store.isReachable()) {
|
||||||
|
targetStore = store;
|
||||||
|
candidateLeader = cur;
|
||||||
|
logger.warn(
|
||||||
|
String.format(
|
||||||
|
"try store [%d],peer[%d] for region[%d], which may be new leader",
|
||||||
|
targetStore.getId(), candidateLeader.getId(), region.getId()));
|
||||||
|
updateClientStub();
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else if (candidateLeader.getId() == cur.getId()) {
|
||||||
|
hasVisitedStore = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
candidateLeader = null;
|
||||||
|
retryLeaderTimes = peers.size();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateClientStub() {
|
||||||
|
String addressStr = targetStore.getStore().getAddress();
|
||||||
|
ManagedChannel channel =
|
||||||
|
channelFactory.getChannel(addressStr, regionManager.getPDClient().getHostMapping());
|
||||||
|
blockingStub = TikvGrpc.newBlockingStub(channel);
|
||||||
|
asyncStub = TikvGrpc.newStub(channel);
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean retryOtherStoreByProxyForward() {
|
||||||
|
if (!targetStore.isValid()) {
|
||||||
|
targetStore = regionManager.getStoreById(targetStore.getId());
|
||||||
|
logger.warn(
|
||||||
|
String.format("store [%d] has been invalid", region.getId(), targetStore.getId()));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
TiStore proxyStore = switchProxyStore();
|
TiStore proxyStore = switchProxyStore();
|
||||||
if (proxyStore == null) {
|
if (proxyStore == null) {
|
||||||
logger.warn(
|
logger.warn(
|
||||||
String.format(
|
String.format(
|
||||||
"no forward store can be selected for store [%s] and region[%d]",
|
"no forward store can be selected for store [%s] and region[%d]",
|
||||||
targetStore.getStore().getAddress(), region.getId()));
|
targetStore.getStore().getAddress(), region.getId()));
|
||||||
regionManager.onRequestFail(region);
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (originStore == null) {
|
if (originStore == null) {
|
||||||
|
|
@ -150,7 +267,7 @@ public abstract class AbstractRegionStoreClient
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
targetStore = proxyStore;
|
targetStore = proxyStore;
|
||||||
retryTimes += 1;
|
retryForwardTimes += 1;
|
||||||
logger.warn(
|
logger.warn(
|
||||||
String.format(
|
String.format(
|
||||||
"forward request to store [%s] by store [%s] for region[%d]",
|
"forward request to store [%s] by store [%s] for region[%d]",
|
||||||
|
|
@ -167,58 +284,24 @@ public abstract class AbstractRegionStoreClient
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void tryUpdateProxy() {
|
|
||||||
if (originStore != null) {
|
|
||||||
logger.warn(
|
|
||||||
String.format(
|
|
||||||
"update store [%s] by proxy-store [%s]",
|
|
||||||
targetStore.getStore().getAddress(), targetStore.getProxyStore().getAddress()));
|
|
||||||
regionManager.updateStore(originStore, targetStore);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean checkHealth(Metapb.Store store) {
|
|
||||||
String addressStr = store.getAddress();
|
|
||||||
ManagedChannel channel =
|
|
||||||
channelFactory.getChannel(addressStr, regionManager.getPDClient().getHostMapping());
|
|
||||||
HealthGrpc.HealthBlockingStub stub =
|
|
||||||
HealthGrpc.newBlockingStub(channel)
|
|
||||||
.withDeadlineAfter(conf.getGrpcHealthCheckTimeout(), TimeUnit.MILLISECONDS);
|
|
||||||
HealthCheckRequest req = HealthCheckRequest.newBuilder().build();
|
|
||||||
try {
|
|
||||||
HealthCheckResponse resp = stub.check(req);
|
|
||||||
if (resp.getStatus() != HealthCheckResponse.ServingStatus.SERVING) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
private TiStore switchProxyStore() {
|
private TiStore switchProxyStore() {
|
||||||
boolean hasVisitedStore = false;
|
boolean hasVisitedStore = false;
|
||||||
List<Metapb.Peer> peers = region.getFollowerList();
|
List<Metapb.Peer> peers = region.getFollowerList();
|
||||||
for (int i = 0; i < peers.size() * 2; i++) {
|
if (peers.isEmpty()) {
|
||||||
int idx = i % peers.size();
|
return null;
|
||||||
Metapb.Peer peer = peers.get(idx);
|
}
|
||||||
if (peer.getStoreId() != region.getLeader().getStoreId()) {
|
Metapb.Store proxyStore = targetStore.getProxyStore();
|
||||||
if (targetStore.getProxyStore() == null) {
|
if (proxyStore == null || peers.get(peers.size() - 1).getStoreId() == proxyStore.getId()) {
|
||||||
TiStore store = regionManager.getStoreById(peer.getStoreId());
|
hasVisitedStore = true;
|
||||||
if (checkHealth(store.getStore())) {
|
}
|
||||||
return targetStore.withProxy(store.getStore());
|
for (Metapb.Peer peer : peers) {
|
||||||
}
|
if (hasVisitedStore) {
|
||||||
} else {
|
TiStore store = regionManager.getStoreById(peer.getStoreId());
|
||||||
if (peer.getStoreId() == targetStore.getProxyStore().getId()) {
|
if (store.isReachable()) {
|
||||||
hasVisitedStore = true;
|
return targetStore.withProxy(store.getStore());
|
||||||
} else if (hasVisitedStore) {
|
|
||||||
TiStore proxyStore = regionManager.getStoreById(peer.getStoreId());
|
|
||||||
if (!proxyStore.isUnreachable() && checkHealth(proxyStore.getStore())) {
|
|
||||||
return targetStore.withProxy(proxyStore.getStore());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
} else if (peer.getStoreId() == proxyStore.getId()) {
|
||||||
|
hasVisitedStore = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,186 @@
|
||||||
|
package org.tikv.common.region;
|
||||||
|
|
||||||
|
import static org.tikv.common.codec.KeyUtils.formatBytesUTF8;
|
||||||
|
import static org.tikv.common.util.KeyRangeUtils.makeRange;
|
||||||
|
|
||||||
|
import com.google.common.collect.RangeMap;
|
||||||
|
import com.google.common.collect.TreeRangeMap;
|
||||||
|
import com.google.protobuf.ByteString;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.tikv.common.key.Key;
|
||||||
|
import org.tikv.common.util.BackOffer;
|
||||||
|
|
||||||
|
public class RegionCache {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(RegionCache.class);
|
||||||
|
|
||||||
|
private final Map<Long, TiRegion> regionCache;
|
||||||
|
private final Map<Long, TiStore> storeCache;
|
||||||
|
private final RangeMap<Key, Long> keyToRegionIdCache;
|
||||||
|
|
||||||
|
public RegionCache() {
|
||||||
|
regionCache = new HashMap<>();
|
||||||
|
storeCache = new HashMap<>();
|
||||||
|
|
||||||
|
keyToRegionIdCache = TreeRangeMap.create();
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized TiRegion getRegionByKey(ByteString key, BackOffer backOffer) {
|
||||||
|
Long regionId;
|
||||||
|
if (key.isEmpty()) {
|
||||||
|
// if key is empty, it must be the start key.
|
||||||
|
regionId = keyToRegionIdCache.get(Key.toRawKey(key, true));
|
||||||
|
} else {
|
||||||
|
regionId = keyToRegionIdCache.get(Key.toRawKey(key));
|
||||||
|
}
|
||||||
|
if (logger.isDebugEnabled()) {
|
||||||
|
logger.debug(
|
||||||
|
String.format("getRegionByKey key[%s] -> ID[%s]", formatBytesUTF8(key), regionId));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (regionId == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
TiRegion region;
|
||||||
|
region = regionCache.get(regionId);
|
||||||
|
if (logger.isDebugEnabled()) {
|
||||||
|
logger.debug(String.format("getRegionByKey ID[%s] -> Region[%s]", regionId, region));
|
||||||
|
}
|
||||||
|
return region;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized TiRegion putRegion(TiRegion region) {
|
||||||
|
if (logger.isDebugEnabled()) {
|
||||||
|
logger.debug("putRegion: " + region);
|
||||||
|
}
|
||||||
|
TiRegion oldRegion = regionCache.get(region.getId());
|
||||||
|
if (oldRegion != null) {
|
||||||
|
if (oldRegion.getMeta().equals(region.getMeta())) {
|
||||||
|
return oldRegion;
|
||||||
|
} else {
|
||||||
|
invalidateRegion(oldRegion);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
regionCache.put(region.getId(), region);
|
||||||
|
keyToRegionIdCache.put(makeRange(region.getStartKey(), region.getEndKey()), region.getId());
|
||||||
|
return region;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
|
public synchronized TiRegion getRegionById(long regionId) {
|
||||||
|
TiRegion region = regionCache.get(regionId);
|
||||||
|
if (logger.isDebugEnabled()) {
|
||||||
|
logger.debug(String.format("getRegionByKey ID[%s] -> Region[%s]", regionId, region));
|
||||||
|
}
|
||||||
|
return region;
|
||||||
|
}
|
||||||
|
|
||||||
|
private synchronized TiRegion getRegionFromCache(long regionId) {
|
||||||
|
return regionCache.get(regionId);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Removes region associated with regionId from regionCache. */
|
||||||
|
public synchronized void invalidateRegion(TiRegion region) {
|
||||||
|
try {
|
||||||
|
if (logger.isDebugEnabled()) {
|
||||||
|
logger.debug(String.format("invalidateRegion ID[%s]", region.getId()));
|
||||||
|
}
|
||||||
|
TiRegion oldRegion = regionCache.get(region.getId());
|
||||||
|
if (oldRegion != null && oldRegion == region) {
|
||||||
|
keyToRegionIdCache.remove(makeRange(region.getStartKey(), region.getEndKey()));
|
||||||
|
regionCache.remove(region.getId());
|
||||||
|
}
|
||||||
|
} catch (Exception ignore) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized boolean updateRegion(TiRegion expected, TiRegion region) {
|
||||||
|
try {
|
||||||
|
if (logger.isDebugEnabled()) {
|
||||||
|
logger.debug(String.format("invalidateRegion ID[%s]", region.getId()));
|
||||||
|
}
|
||||||
|
TiRegion oldRegion = regionCache.get(region.getId());
|
||||||
|
if (!expected.getMeta().equals(oldRegion.getMeta())) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
if (oldRegion != null) {
|
||||||
|
keyToRegionIdCache.remove(makeRange(oldRegion.getStartKey(), oldRegion.getEndKey()));
|
||||||
|
}
|
||||||
|
regionCache.put(region.getId(), region);
|
||||||
|
keyToRegionIdCache.put(makeRange(region.getStartKey(), region.getEndKey()), region.getId());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} catch (Exception ignore) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized boolean updateStore(TiStore oldStore, TiStore newStore) {
|
||||||
|
if (!newStore.isValid()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
TiStore originStore = storeCache.get(oldStore.getId());
|
||||||
|
if (originStore == oldStore) {
|
||||||
|
storeCache.put(newStore.getId(), newStore);
|
||||||
|
oldStore.markInvalid();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void invalidateAllRegionForStore(TiStore store) {
|
||||||
|
TiStore oldStore = storeCache.get(store.getId());
|
||||||
|
if (oldStore != store) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
List<TiRegion> regionToRemove = new ArrayList<>();
|
||||||
|
for (TiRegion r : regionCache.values()) {
|
||||||
|
if (r.getLeader().getStoreId() == store.getId()) {
|
||||||
|
if (logger.isDebugEnabled()) {
|
||||||
|
logger.debug(String.format("invalidateAllRegionForStore Region[%s]", r));
|
||||||
|
}
|
||||||
|
regionToRemove.add(r);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.warn(String.format("invalid store [%d]", store.getId()));
|
||||||
|
// remove region
|
||||||
|
for (TiRegion r : regionToRemove) {
|
||||||
|
keyToRegionIdCache.remove(makeRange(r.getStartKey(), r.getEndKey()));
|
||||||
|
regionCache.remove(r.getId());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void invalidateStore(long storeId) {
|
||||||
|
TiStore store = storeCache.remove(storeId);
|
||||||
|
if (store != null) {
|
||||||
|
store.markInvalid();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized TiStore getStoreById(long id) {
|
||||||
|
return storeCache.get(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized boolean putStore(long id, TiStore store) {
|
||||||
|
TiStore oldStore = storeCache.get(id);
|
||||||
|
if (oldStore != null) {
|
||||||
|
if (oldStore.equals(store)) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
oldStore.markInvalid();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
storeCache.put(id, store);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void clearAll() {
|
||||||
|
keyToRegionIdCache.clear();
|
||||||
|
regionCache.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -23,5 +23,7 @@ public interface RegionErrorReceiver {
|
||||||
/// return whether we need to retry this request.
|
/// return whether we need to retry this request.
|
||||||
boolean onStoreUnreachable();
|
boolean onStoreUnreachable();
|
||||||
|
|
||||||
|
void tryUpdateRegionStore();
|
||||||
|
|
||||||
TiRegion getRegion();
|
TiRegion getRegion();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -18,16 +18,10 @@
|
||||||
package org.tikv.common.region;
|
package org.tikv.common.region;
|
||||||
|
|
||||||
import static org.tikv.common.codec.KeyUtils.formatBytesUTF8;
|
import static org.tikv.common.codec.KeyUtils.formatBytesUTF8;
|
||||||
import static org.tikv.common.util.KeyRangeUtils.makeRange;
|
|
||||||
|
|
||||||
import com.google.common.collect.RangeMap;
|
|
||||||
import com.google.common.collect.TreeRangeMap;
|
|
||||||
import com.google.protobuf.ByteString;
|
import com.google.protobuf.ByteString;
|
||||||
import io.prometheus.client.Histogram;
|
import io.prometheus.client.Histogram;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
import java.util.concurrent.ScheduledExecutorService;
|
import java.util.concurrent.ScheduledExecutorService;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
@ -40,7 +34,6 @@ import org.tikv.common.TiConfiguration;
|
||||||
import org.tikv.common.event.CacheInvalidateEvent;
|
import org.tikv.common.event.CacheInvalidateEvent;
|
||||||
import org.tikv.common.exception.GrpcException;
|
import org.tikv.common.exception.GrpcException;
|
||||||
import org.tikv.common.exception.TiClientInternalException;
|
import org.tikv.common.exception.TiClientInternalException;
|
||||||
import org.tikv.common.key.Key;
|
|
||||||
import org.tikv.common.util.BackOffer;
|
import org.tikv.common.util.BackOffer;
|
||||||
import org.tikv.common.util.ChannelFactory;
|
import org.tikv.common.util.ChannelFactory;
|
||||||
import org.tikv.common.util.ConcreteBackOffer;
|
import org.tikv.common.util.ConcreteBackOffer;
|
||||||
|
|
@ -52,22 +45,22 @@ import org.tikv.kvproto.Metapb.StoreState;
|
||||||
@SuppressWarnings("UnstableApiUsage")
|
@SuppressWarnings("UnstableApiUsage")
|
||||||
public class RegionManager {
|
public class RegionManager {
|
||||||
private static final Logger logger = LoggerFactory.getLogger(RegionManager.class);
|
private static final Logger logger = LoggerFactory.getLogger(RegionManager.class);
|
||||||
|
public static final Histogram GET_REGION_BY_KEY_REQUEST_LATENCY =
|
||||||
|
Histogram.build()
|
||||||
|
.name("client_java_get_region_by_requests_latency")
|
||||||
|
.help("getRegionByKey request latency.")
|
||||||
|
.register();
|
||||||
|
|
||||||
// TODO: the region cache logic need rewrite.
|
// TODO: the region cache logic need rewrite.
|
||||||
// https://github.com/pingcap/tispark/issues/1170
|
// https://github.com/pingcap/tispark/issues/1170
|
||||||
private final RegionCache cache;
|
private final RegionCache cache;
|
||||||
private final ReadOnlyPDClient pdClient;
|
private final ReadOnlyPDClient pdClient;
|
||||||
private final TiConfiguration conf;
|
private final TiConfiguration conf;
|
||||||
private final ScheduledExecutorService executor;
|
private final ScheduledExecutorService executor;
|
||||||
private final UnreachableStoreChecker storeChecker;
|
private final StoreHealthyChecker storeChecker;
|
||||||
|
|
||||||
private final Function<CacheInvalidateEvent, Void> cacheInvalidateCallback;
|
private final Function<CacheInvalidateEvent, Void> cacheInvalidateCallback;
|
||||||
|
|
||||||
public static final Histogram GET_REGION_BY_KEY_REQUEST_LATENCY =
|
|
||||||
Histogram.build()
|
|
||||||
.name("client_java_get_region_by_requests_latency")
|
|
||||||
.help("getRegionByKey request latency.")
|
|
||||||
.register();
|
|
||||||
|
|
||||||
// To avoid double retrieval, we used the async version of grpc
|
// To avoid double retrieval, we used the async version of grpc
|
||||||
// When rpc not returned, instead of call again, it wait for previous one done
|
// When rpc not returned, instead of call again, it wait for previous one done
|
||||||
public RegionManager(
|
public RegionManager(
|
||||||
|
|
@ -94,10 +87,11 @@ public class RegionManager {
|
||||||
this.conf = conf;
|
this.conf = conf;
|
||||||
|
|
||||||
if (enableGrpcForward) {
|
if (enableGrpcForward) {
|
||||||
UnreachableStoreChecker storeChecker = new UnreachableStoreChecker(channelFactory, pdClient);
|
StoreHealthyChecker storeChecker =
|
||||||
|
new StoreHealthyChecker(channelFactory, pdClient, this.cache);
|
||||||
this.storeChecker = storeChecker;
|
this.storeChecker = storeChecker;
|
||||||
this.executor = Executors.newScheduledThreadPool(1);
|
this.executor = Executors.newScheduledThreadPool(1);
|
||||||
this.executor.scheduleAtFixedRate(storeChecker, 10, 10, TimeUnit.SECONDS);
|
this.executor.scheduleAtFixedRate(storeChecker, 1, 1, TimeUnit.SECONDS);
|
||||||
} else {
|
} else {
|
||||||
this.storeChecker = null;
|
this.storeChecker = null;
|
||||||
this.executor = null;
|
this.executor = null;
|
||||||
|
|
@ -132,13 +126,21 @@ public class RegionManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
public TiRegion getRegionByKey(ByteString key, BackOffer backOffer) {
|
public TiRegion getRegionByKey(ByteString key, BackOffer backOffer) {
|
||||||
|
Histogram.Timer requestTimer = GET_REGION_BY_KEY_REQUEST_LATENCY.startTimer();
|
||||||
TiRegion region = cache.getRegionByKey(key, backOffer);
|
TiRegion region = cache.getRegionByKey(key, backOffer);
|
||||||
if (region == null) {
|
try {
|
||||||
logger.debug("Key not found in keyToRegionIdCache:" + formatBytesUTF8(key));
|
if (region == null) {
|
||||||
Pair<Metapb.Region, Metapb.Peer> regionAndLeader = pdClient.getRegionByKey(backOffer, key);
|
logger.debug("Key not found in keyToRegionIdCache:" + formatBytesUTF8(key));
|
||||||
region =
|
Pair<Metapb.Region, Metapb.Peer> regionAndLeader = pdClient.getRegionByKey(backOffer, key);
|
||||||
cache.putRegion(createRegion(regionAndLeader.first, regionAndLeader.second, backOffer));
|
region =
|
||||||
|
cache.putRegion(createRegion(regionAndLeader.first, regionAndLeader.second, backOffer));
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
return null;
|
||||||
|
} finally {
|
||||||
|
requestTimer.observeDuration();
|
||||||
}
|
}
|
||||||
|
|
||||||
return region;
|
return region;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -232,7 +234,10 @@ public class RegionManager {
|
||||||
if (store.getStore().getState().equals(StoreState.Tombstone)) {
|
if (store.getStore().getState().equals(StoreState.Tombstone)) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
return cache.putStore(id, store);
|
if (cache.putStore(id, store)) {
|
||||||
|
storeChecker.scheduleStoreHealthCheck(store);
|
||||||
|
}
|
||||||
|
return store;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new GrpcException(e);
|
throw new GrpcException(e);
|
||||||
}
|
}
|
||||||
|
|
@ -246,7 +251,10 @@ public class RegionManager {
|
||||||
cache.invalidateRegion(region);
|
cache.invalidateRegion(region);
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized TiRegion updateLeader(TiRegion region, long storeId) {
|
public TiRegion updateLeader(TiRegion region, long storeId) {
|
||||||
|
if (region.getLeader().getStoreId() == storeId) {
|
||||||
|
return region;
|
||||||
|
}
|
||||||
TiRegion newRegion = region.switchPeer(storeId);
|
TiRegion newRegion = region.switchPeer(storeId);
|
||||||
if (cache.updateRegion(region, newRegion)) {
|
if (cache.updateRegion(region, newRegion)) {
|
||||||
return newRegion;
|
return newRegion;
|
||||||
|
|
@ -259,13 +267,7 @@ public class RegionManager {
|
||||||
|
|
||||||
public synchronized void updateStore(TiStore oldStore, TiStore newStore) {
|
public synchronized void updateStore(TiStore oldStore, TiStore newStore) {
|
||||||
if (cache.updateStore(oldStore, newStore)) {
|
if (cache.updateStore(oldStore, newStore)) {
|
||||||
if (newStore.isUnreachable()) {
|
storeChecker.scheduleStoreHealthCheck(newStore);
|
||||||
logger.warn(
|
|
||||||
String.format(
|
|
||||||
"check health for store [%s] in background thread",
|
|
||||||
newStore.getStore().getAddress()));
|
|
||||||
this.storeChecker.scheduleStoreHealthCheck(newStore);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -283,6 +285,24 @@ public class RegionManager {
|
||||||
cache.invalidateRegion(region);
|
cache.invalidateRegion(region);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** If region has changed, return the new one and update cache. */
|
||||||
|
public TiRegion getRegionSkipCache(TiRegion region) {
|
||||||
|
BackOffer backOffer = ConcreteBackOffer.newGetBackOff();
|
||||||
|
try {
|
||||||
|
Pair<Metapb.Region, Metapb.Peer> regionAndLeader =
|
||||||
|
pdClient.getRegionByID(backOffer, region.getId());
|
||||||
|
if (!regionAndLeader.first.equals(region.getMeta())) {
|
||||||
|
region = createRegion(regionAndLeader.first, regionAndLeader.second, backOffer);
|
||||||
|
return cache.putRegion(region);
|
||||||
|
} else {
|
||||||
|
logger.warn("Cannot get region from PD for region id: " + region.getId());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void invalidateStore(long storeId) {
|
public void invalidateStore(long storeId) {
|
||||||
cache.invalidateStore(storeId);
|
cache.invalidateStore(storeId);
|
||||||
}
|
}
|
||||||
|
|
@ -290,177 +310,4 @@ public class RegionManager {
|
||||||
public void invalidateRegion(TiRegion region) {
|
public void invalidateRegion(TiRegion region) {
|
||||||
cache.invalidateRegion(region);
|
cache.invalidateRegion(region);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class RegionCache {
|
|
||||||
private final Map<Long, TiRegion> regionCache;
|
|
||||||
private final Map<Long, TiStore> storeCache;
|
|
||||||
private final RangeMap<Key, Long> keyToRegionIdCache;
|
|
||||||
|
|
||||||
public RegionCache() {
|
|
||||||
regionCache = new HashMap<>();
|
|
||||||
storeCache = new HashMap<>();
|
|
||||||
|
|
||||||
keyToRegionIdCache = TreeRangeMap.create();
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized TiRegion getRegionByKey(ByteString key, BackOffer backOffer) {
|
|
||||||
Histogram.Timer requestTimer = GET_REGION_BY_KEY_REQUEST_LATENCY.startTimer();
|
|
||||||
try {
|
|
||||||
Long regionId;
|
|
||||||
if (key.isEmpty()) {
|
|
||||||
// if key is empty, it must be the start key.
|
|
||||||
regionId = keyToRegionIdCache.get(Key.toRawKey(key, true));
|
|
||||||
} else {
|
|
||||||
regionId = keyToRegionIdCache.get(Key.toRawKey(key));
|
|
||||||
}
|
|
||||||
if (logger.isDebugEnabled()) {
|
|
||||||
logger.debug(
|
|
||||||
String.format("getRegionByKey key[%s] -> ID[%s]", formatBytesUTF8(key), regionId));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (regionId == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
TiRegion region;
|
|
||||||
region = regionCache.get(regionId);
|
|
||||||
if (logger.isDebugEnabled()) {
|
|
||||||
logger.debug(String.format("getRegionByKey ID[%s] -> Region[%s]", regionId, region));
|
|
||||||
}
|
|
||||||
|
|
||||||
return region;
|
|
||||||
} finally {
|
|
||||||
requestTimer.observeDuration();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private synchronized TiRegion putRegion(TiRegion region) {
|
|
||||||
if (logger.isDebugEnabled()) {
|
|
||||||
logger.debug("putRegion: " + region);
|
|
||||||
}
|
|
||||||
TiRegion oldRegion = regionCache.get(region.getId());
|
|
||||||
if (oldRegion != null) {
|
|
||||||
if (oldRegion.getMeta().equals(region.getMeta())) {
|
|
||||||
return oldRegion;
|
|
||||||
} else {
|
|
||||||
invalidateRegion(oldRegion);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
regionCache.put(region.getId(), region);
|
|
||||||
keyToRegionIdCache.put(makeRange(region.getStartKey(), region.getEndKey()), region.getId());
|
|
||||||
return region;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Deprecated
|
|
||||||
private synchronized TiRegion getRegionById(long regionId) {
|
|
||||||
TiRegion region = regionCache.get(regionId);
|
|
||||||
if (logger.isDebugEnabled()) {
|
|
||||||
logger.debug(String.format("getRegionByKey ID[%s] -> Region[%s]", regionId, region));
|
|
||||||
}
|
|
||||||
return region;
|
|
||||||
}
|
|
||||||
|
|
||||||
private synchronized TiRegion getRegionFromCache(long regionId) {
|
|
||||||
return regionCache.get(regionId);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Removes region associated with regionId from regionCache. */
|
|
||||||
public synchronized void invalidateRegion(TiRegion region) {
|
|
||||||
try {
|
|
||||||
if (logger.isDebugEnabled()) {
|
|
||||||
logger.debug(String.format("invalidateRegion ID[%s]", region.getId()));
|
|
||||||
}
|
|
||||||
TiRegion oldRegion = regionCache.get(region.getId());
|
|
||||||
if (oldRegion != null && oldRegion == region) {
|
|
||||||
keyToRegionIdCache.remove(makeRange(region.getStartKey(), region.getEndKey()));
|
|
||||||
regionCache.remove(region.getId());
|
|
||||||
}
|
|
||||||
} catch (Exception ignore) {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized boolean updateRegion(TiRegion expected, TiRegion region) {
|
|
||||||
try {
|
|
||||||
if (logger.isDebugEnabled()) {
|
|
||||||
logger.debug(String.format("invalidateRegion ID[%s]", region.getId()));
|
|
||||||
}
|
|
||||||
TiRegion oldRegion = regionCache.get(region.getId());
|
|
||||||
if (!expected.getMeta().equals(oldRegion.getMeta())) {
|
|
||||||
return false;
|
|
||||||
} else {
|
|
||||||
if (oldRegion != null) {
|
|
||||||
keyToRegionIdCache.remove(makeRange(oldRegion.getStartKey(), oldRegion.getEndKey()));
|
|
||||||
}
|
|
||||||
regionCache.put(region.getId(), region);
|
|
||||||
keyToRegionIdCache.put(
|
|
||||||
makeRange(region.getStartKey(), region.getEndKey()), region.getId());
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} catch (Exception ignore) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized boolean updateStore(TiStore oldStore, TiStore newStore) {
|
|
||||||
TiStore originStore = storeCache.get(oldStore.getId());
|
|
||||||
if (originStore == oldStore) {
|
|
||||||
storeCache.put(newStore.getId(), newStore);
|
|
||||||
if (oldStore != null && oldStore.isUnreachable()) {
|
|
||||||
oldStore.markReachable();
|
|
||||||
}
|
|
||||||
if (newStore.getProxyStore() != null) {
|
|
||||||
newStore.markUnreachable();
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized void invalidateAllRegionForStore(TiStore store) {
|
|
||||||
TiStore oldStore = storeCache.get(store.getId());
|
|
||||||
if (oldStore != store) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
List<TiRegion> regionToRemove = new ArrayList<>();
|
|
||||||
for (TiRegion r : regionCache.values()) {
|
|
||||||
if (r.getLeader().getStoreId() == store.getId()) {
|
|
||||||
if (logger.isDebugEnabled()) {
|
|
||||||
logger.debug(String.format("invalidateAllRegionForStore Region[%s]", r));
|
|
||||||
}
|
|
||||||
regionToRemove.add(r);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.warn(String.format("invalid store [%d]", store.getId()));
|
|
||||||
// remove region
|
|
||||||
for (TiRegion r : regionToRemove) {
|
|
||||||
keyToRegionIdCache.remove(makeRange(r.getStartKey(), r.getEndKey()));
|
|
||||||
regionCache.remove(r.getId());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized void invalidateStore(long storeId) {
|
|
||||||
TiStore store = storeCache.remove(storeId);
|
|
||||||
if (store != null) {
|
|
||||||
store.markReachable();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized TiStore getStoreById(long id) {
|
|
||||||
return storeCache.get(id);
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized TiStore putStore(long id, TiStore store) {
|
|
||||||
TiStore oldStore = storeCache.get(id);
|
|
||||||
if (oldStore != null && oldStore.getStore().equals(store.getStore())) {
|
|
||||||
return oldStore;
|
|
||||||
}
|
|
||||||
storeCache.put(id, store);
|
|
||||||
return store;
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized void clearAll() {
|
|
||||||
keyToRegionIdCache.clear();
|
|
||||||
regionCache.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -171,7 +171,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
Supplier<GetRequest> factory =
|
Supplier<GetRequest> factory =
|
||||||
() ->
|
() ->
|
||||||
GetRequest.newBuilder()
|
GetRequest.newBuilder()
|
||||||
.setContext(region.getReplicaContext(getResolvedLocks(version), this.storeType))
|
.setContext(makeContext(getResolvedLocks(version), this.storeType))
|
||||||
.setKey(key)
|
.setKey(key)
|
||||||
.setVersion(version)
|
.setVersion(version)
|
||||||
.build();
|
.build();
|
||||||
|
|
@ -216,7 +216,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
Supplier<BatchGetRequest> request =
|
Supplier<BatchGetRequest> request =
|
||||||
() ->
|
() ->
|
||||||
BatchGetRequest.newBuilder()
|
BatchGetRequest.newBuilder()
|
||||||
.setContext(region.getReplicaContext(getResolvedLocks(version), this.storeType))
|
.setContext(makeContext(getResolvedLocks(version), this.storeType))
|
||||||
.addAllKeys(keys)
|
.addAllKeys(keys)
|
||||||
.setVersion(version)
|
.setVersion(version)
|
||||||
.build();
|
.build();
|
||||||
|
|
@ -279,7 +279,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
Supplier<ScanRequest> request =
|
Supplier<ScanRequest> request =
|
||||||
() ->
|
() ->
|
||||||
ScanRequest.newBuilder()
|
ScanRequest.newBuilder()
|
||||||
.setContext(region.getReplicaContext(getResolvedLocks(version), this.storeType))
|
.setContext(makeContext(getResolvedLocks(version), this.storeType))
|
||||||
.setStartKey(startKey)
|
.setStartKey(startKey)
|
||||||
.setVersion(version)
|
.setVersion(version)
|
||||||
.setKeyOnly(keyOnly)
|
.setKeyOnly(keyOnly)
|
||||||
|
|
@ -381,7 +381,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
() ->
|
() ->
|
||||||
getIsV4()
|
getIsV4()
|
||||||
? PrewriteRequest.newBuilder()
|
? PrewriteRequest.newBuilder()
|
||||||
.setContext(region.getReplicaContext(storeType))
|
.setContext(makeContext(storeType))
|
||||||
.setStartVersion(startTs)
|
.setStartVersion(startTs)
|
||||||
.setPrimaryLock(primaryLock)
|
.setPrimaryLock(primaryLock)
|
||||||
.addAllMutations(mutations)
|
.addAllMutations(mutations)
|
||||||
|
|
@ -391,7 +391,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
.setTxnSize(16)
|
.setTxnSize(16)
|
||||||
.build()
|
.build()
|
||||||
: PrewriteRequest.newBuilder()
|
: PrewriteRequest.newBuilder()
|
||||||
.setContext(region.getReplicaContext(storeType))
|
.setContext(makeContext(storeType))
|
||||||
.setStartVersion(startTs)
|
.setStartVersion(startTs)
|
||||||
.setPrimaryLock(primaryLock)
|
.setPrimaryLock(primaryLock)
|
||||||
.addAllMutations(mutations)
|
.addAllMutations(mutations)
|
||||||
|
|
@ -471,7 +471,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
Supplier<TxnHeartBeatRequest> factory =
|
Supplier<TxnHeartBeatRequest> factory =
|
||||||
() ->
|
() ->
|
||||||
TxnHeartBeatRequest.newBuilder()
|
TxnHeartBeatRequest.newBuilder()
|
||||||
.setContext(region.getReplicaContext(storeType))
|
.setContext(makeContext(storeType))
|
||||||
.setStartVersion(startTs)
|
.setStartVersion(startTs)
|
||||||
.setPrimaryLock(primaryLock)
|
.setPrimaryLock(primaryLock)
|
||||||
.setAdviseLockTtl(ttl)
|
.setAdviseLockTtl(ttl)
|
||||||
|
|
@ -529,7 +529,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
.setStartVersion(startTs)
|
.setStartVersion(startTs)
|
||||||
.setCommitVersion(commitTs)
|
.setCommitVersion(commitTs)
|
||||||
.addAllKeys(keys)
|
.addAllKeys(keys)
|
||||||
.setContext(region.getReplicaContext(storeType))
|
.setContext(makeContext(storeType))
|
||||||
.build();
|
.build();
|
||||||
KVErrorHandler<CommitResponse> handler =
|
KVErrorHandler<CommitResponse> handler =
|
||||||
new KVErrorHandler<>(
|
new KVErrorHandler<>(
|
||||||
|
|
@ -590,7 +590,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
Supplier<Coprocessor.Request> reqToSend =
|
Supplier<Coprocessor.Request> reqToSend =
|
||||||
() ->
|
() ->
|
||||||
Coprocessor.Request.newBuilder()
|
Coprocessor.Request.newBuilder()
|
||||||
.setContext(region.getReplicaContext(getResolvedLocks(startTs), this.storeType))
|
.setContext(makeContext(getResolvedLocks(startTs), this.storeType))
|
||||||
.setTp(REQ_TYPE_DAG.getValue())
|
.setTp(REQ_TYPE_DAG.getValue())
|
||||||
.setStartTs(startTs)
|
.setStartTs(startTs)
|
||||||
.setData(req.toByteString())
|
.setData(req.toByteString())
|
||||||
|
|
@ -713,7 +713,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
Supplier<Coprocessor.Request> reqToSend =
|
Supplier<Coprocessor.Request> reqToSend =
|
||||||
() ->
|
() ->
|
||||||
Coprocessor.Request.newBuilder()
|
Coprocessor.Request.newBuilder()
|
||||||
.setContext(region.getReplicaContext(getResolvedLocks(startTs), this.storeType))
|
.setContext(makeContext(getResolvedLocks(startTs), this.storeType))
|
||||||
// TODO: If no executors...?
|
// TODO: If no executors...?
|
||||||
.setTp(REQ_TYPE_DAG.getValue())
|
.setTp(REQ_TYPE_DAG.getValue())
|
||||||
.setData(req.toByteString())
|
.setData(req.toByteString())
|
||||||
|
|
@ -751,7 +751,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
Supplier<SplitRegionRequest> request =
|
Supplier<SplitRegionRequest> request =
|
||||||
() ->
|
() ->
|
||||||
SplitRegionRequest.newBuilder()
|
SplitRegionRequest.newBuilder()
|
||||||
.setContext(region.getReplicaContext(storeType))
|
.setContext(makeContext(storeType))
|
||||||
.addAllSplitKeys(splitKeys)
|
.addAllSplitKeys(splitKeys)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
|
@ -792,11 +792,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_get").startTimer();
|
GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_get").startTimer();
|
||||||
try {
|
try {
|
||||||
Supplier<RawGetRequest> factory =
|
Supplier<RawGetRequest> factory =
|
||||||
() ->
|
() -> RawGetRequest.newBuilder().setContext(makeContext(storeType)).setKey(key).build();
|
||||||
RawGetRequest.newBuilder()
|
|
||||||
.setContext(region.getReplicaContext(storeType))
|
|
||||||
.setKey(key)
|
|
||||||
.build();
|
|
||||||
RegionErrorHandler<RawGetResponse> handler =
|
RegionErrorHandler<RawGetResponse> handler =
|
||||||
new RegionErrorHandler<RawGetResponse>(
|
new RegionErrorHandler<RawGetResponse>(
|
||||||
regionManager, this, resp -> resp.hasRegionError() ? resp.getRegionError() : null);
|
regionManager, this, resp -> resp.hasRegionError() ? resp.getRegionError() : null);
|
||||||
|
|
@ -833,7 +829,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
Supplier<RawGetKeyTTLRequest> factory =
|
Supplier<RawGetKeyTTLRequest> factory =
|
||||||
() ->
|
() ->
|
||||||
RawGetKeyTTLRequest.newBuilder()
|
RawGetKeyTTLRequest.newBuilder()
|
||||||
.setContext(region.getReplicaContext(storeType))
|
.setContext(makeContext(storeType))
|
||||||
.setKey(key)
|
.setKey(key)
|
||||||
.build();
|
.build();
|
||||||
RegionErrorHandler<RawGetKeyTTLResponse> handler =
|
RegionErrorHandler<RawGetKeyTTLResponse> handler =
|
||||||
|
|
@ -872,7 +868,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
Supplier<RawDeleteRequest> factory =
|
Supplier<RawDeleteRequest> factory =
|
||||||
() ->
|
() ->
|
||||||
RawDeleteRequest.newBuilder()
|
RawDeleteRequest.newBuilder()
|
||||||
.setContext(region.getReplicaContext(storeType))
|
.setContext(makeContext(storeType))
|
||||||
.setKey(key)
|
.setKey(key)
|
||||||
.setForCas(atomicForCAS)
|
.setForCas(atomicForCAS)
|
||||||
.build();
|
.build();
|
||||||
|
|
@ -910,7 +906,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
Supplier<RawPutRequest> factory =
|
Supplier<RawPutRequest> factory =
|
||||||
() ->
|
() ->
|
||||||
RawPutRequest.newBuilder()
|
RawPutRequest.newBuilder()
|
||||||
.setContext(region.getReplicaContext(storeType))
|
.setContext(makeContext(storeType))
|
||||||
.setKey(key)
|
.setKey(key)
|
||||||
.setValue(value)
|
.setValue(value)
|
||||||
.setTtl(ttl)
|
.setTtl(ttl)
|
||||||
|
|
@ -954,7 +950,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
Supplier<RawCASRequest> factory =
|
Supplier<RawCASRequest> factory =
|
||||||
() ->
|
() ->
|
||||||
RawCASRequest.newBuilder()
|
RawCASRequest.newBuilder()
|
||||||
.setContext(region.getReplicaContext(storeType))
|
.setContext(makeContext(storeType))
|
||||||
.setKey(key)
|
.setKey(key)
|
||||||
.setValue(value)
|
.setValue(value)
|
||||||
.setPreviousValue(prevValue.orElse(ByteString.EMPTY))
|
.setPreviousValue(prevValue.orElse(ByteString.EMPTY))
|
||||||
|
|
@ -1007,7 +1003,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
Supplier<RawBatchGetRequest> factory =
|
Supplier<RawBatchGetRequest> factory =
|
||||||
() ->
|
() ->
|
||||||
RawBatchGetRequest.newBuilder()
|
RawBatchGetRequest.newBuilder()
|
||||||
.setContext(region.getReplicaContext(storeType))
|
.setContext(makeContext(storeType))
|
||||||
.addAllKeys(keys)
|
.addAllKeys(keys)
|
||||||
.build();
|
.build();
|
||||||
RegionErrorHandler<RawBatchGetResponse> handler =
|
RegionErrorHandler<RawBatchGetResponse> handler =
|
||||||
|
|
@ -1043,7 +1039,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
Supplier<RawBatchPutRequest> factory =
|
Supplier<RawBatchPutRequest> factory =
|
||||||
() ->
|
() ->
|
||||||
RawBatchPutRequest.newBuilder()
|
RawBatchPutRequest.newBuilder()
|
||||||
.setContext(region.getReplicaContext(storeType))
|
.setContext(makeContext(storeType))
|
||||||
.addAllPairs(kvPairs)
|
.addAllPairs(kvPairs)
|
||||||
.setTtl(ttl)
|
.setTtl(ttl)
|
||||||
.setForCas(atomicForCAS)
|
.setForCas(atomicForCAS)
|
||||||
|
|
@ -1095,7 +1091,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
Supplier<RawBatchDeleteRequest> factory =
|
Supplier<RawBatchDeleteRequest> factory =
|
||||||
() ->
|
() ->
|
||||||
RawBatchDeleteRequest.newBuilder()
|
RawBatchDeleteRequest.newBuilder()
|
||||||
.setContext(region.getReplicaContext(storeType))
|
.setContext(makeContext(storeType))
|
||||||
.addAllKeys(keys)
|
.addAllKeys(keys)
|
||||||
.setForCas(atomicForCAS)
|
.setForCas(atomicForCAS)
|
||||||
.build();
|
.build();
|
||||||
|
|
@ -1140,7 +1136,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
Supplier<RawScanRequest> factory =
|
Supplier<RawScanRequest> factory =
|
||||||
() ->
|
() ->
|
||||||
RawScanRequest.newBuilder()
|
RawScanRequest.newBuilder()
|
||||||
.setContext(region.getReplicaContext(storeType))
|
.setContext(makeContext(storeType))
|
||||||
.setStartKey(key)
|
.setStartKey(key)
|
||||||
.setKeyOnly(keyOnly)
|
.setKeyOnly(keyOnly)
|
||||||
.setLimit(limit)
|
.setLimit(limit)
|
||||||
|
|
@ -1186,7 +1182,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
Supplier<RawDeleteRangeRequest> factory =
|
Supplier<RawDeleteRangeRequest> factory =
|
||||||
() ->
|
() ->
|
||||||
RawDeleteRangeRequest.newBuilder()
|
RawDeleteRangeRequest.newBuilder()
|
||||||
.setContext(region.getReplicaContext(storeType))
|
.setContext(makeContext(storeType))
|
||||||
.setStartKey(startKey)
|
.setStartKey(startKey)
|
||||||
.setEndKey(endKey)
|
.setEndKey(endKey)
|
||||||
.build();
|
.build();
|
||||||
|
|
@ -1269,7 +1265,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
TikvBlockingStub blockingStub = null;
|
TikvBlockingStub blockingStub = null;
|
||||||
TikvStub asyncStub = null;
|
TikvStub asyncStub = null;
|
||||||
|
|
||||||
if (conf.getEnableGrpcForward() && store.getProxyStore() != null && store.isUnreachable()) {
|
if (conf.getEnableGrpcForward() && store.getProxyStore() != null && !store.isReachable()) {
|
||||||
addressStr = store.getProxyStore().getAddress();
|
addressStr = store.getProxyStore().getAddress();
|
||||||
channel =
|
channel =
|
||||||
channelFactory.getChannel(addressStr, regionManager.getPDClient().getHostMapping());
|
channelFactory.getChannel(addressStr, regionManager.getPDClient().getHostMapping());
|
||||||
|
|
@ -1278,18 +1274,6 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
|
||||||
blockingStub = MetadataUtils.attachHeaders(TikvGrpc.newBlockingStub(channel), header);
|
blockingStub = MetadataUtils.attachHeaders(TikvGrpc.newBlockingStub(channel), header);
|
||||||
asyncStub = MetadataUtils.attachHeaders(TikvGrpc.newStub(channel), header);
|
asyncStub = MetadataUtils.attachHeaders(TikvGrpc.newStub(channel), header);
|
||||||
} else {
|
} else {
|
||||||
// If the store is reachable, which is update by check-health thread, cancel proxy forward.
|
|
||||||
if (!store.isUnreachable()) {
|
|
||||||
if (store.getProxyStore() != null) {
|
|
||||||
logger.warn(
|
|
||||||
String.format(
|
|
||||||
"cancel request to store [%s] forward by store[%s]",
|
|
||||||
store.getStore().getAddress(), store.getProxyStore().getAddress()));
|
|
||||||
TiStore newStore = store.withProxy(null);
|
|
||||||
regionManager.updateStore(store, newStore);
|
|
||||||
store = newStore;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
channel = channelFactory.getChannel(addressStr, pdClient.getHostMapping());
|
channel = channelFactory.getChannel(addressStr, pdClient.getHostMapping());
|
||||||
blockingStub = TikvGrpc.newBlockingStub(channel);
|
blockingStub = TikvGrpc.newBlockingStub(channel);
|
||||||
asyncStub = TikvGrpc.newStub(channel);
|
asyncStub = TikvGrpc.newStub(channel);
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,150 @@
|
||||||
|
package org.tikv.common.region;
|
||||||
|
|
||||||
|
import io.grpc.ManagedChannel;
|
||||||
|
import io.grpc.health.v1.HealthCheckRequest;
|
||||||
|
import io.grpc.health.v1.HealthCheckResponse;
|
||||||
|
import io.grpc.health.v1.HealthGrpc;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.BlockingQueue;
|
||||||
|
import java.util.concurrent.LinkedBlockingQueue;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.tikv.common.ReadOnlyPDClient;
|
||||||
|
import org.tikv.common.util.ChannelFactory;
|
||||||
|
import org.tikv.common.util.ConcreteBackOffer;
|
||||||
|
import org.tikv.kvproto.Metapb;
|
||||||
|
|
||||||
|
public class StoreHealthyChecker implements Runnable {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(StoreHealthyChecker.class);
|
||||||
|
private static final long MAX_CHECK_STORE_TOMBSTONE_TICK = 60;
|
||||||
|
private static final long SLEEP_MILLI_SECONDS_AFTER_DOUBLE_CHECK = 500;
|
||||||
|
private BlockingQueue<TiStore> taskQueue;
|
||||||
|
private final ChannelFactory channelFactory;
|
||||||
|
private final ReadOnlyPDClient pdClient;
|
||||||
|
private final RegionCache cache;
|
||||||
|
private long checkTombstoneTick;
|
||||||
|
|
||||||
|
public StoreHealthyChecker(
|
||||||
|
ChannelFactory channelFactory, ReadOnlyPDClient pdClient, RegionCache cache) {
|
||||||
|
this.taskQueue = new LinkedBlockingQueue<>();
|
||||||
|
this.channelFactory = channelFactory;
|
||||||
|
this.pdClient = pdClient;
|
||||||
|
this.cache = cache;
|
||||||
|
this.checkTombstoneTick = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean scheduleStoreHealthCheck(TiStore store) {
|
||||||
|
if (!this.taskQueue.add(store)) {
|
||||||
|
// add queue false, mark it reachable so that it can be put again.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<TiStore> getValidStores() {
|
||||||
|
List<TiStore> unhealthStore = new LinkedList<>();
|
||||||
|
while (!this.taskQueue.isEmpty()) {
|
||||||
|
try {
|
||||||
|
TiStore store = this.taskQueue.take();
|
||||||
|
if (!store.isValid()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
unhealthStore.add(store);
|
||||||
|
} catch (Exception e) {
|
||||||
|
return unhealthStore;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return unhealthStore;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean checkStoreHealth(TiStore store) {
|
||||||
|
String addressStr = store.getStore().getAddress();
|
||||||
|
try {
|
||||||
|
ManagedChannel channel = channelFactory.getChannel(addressStr, pdClient.getHostMapping());
|
||||||
|
HealthGrpc.HealthBlockingStub stub =
|
||||||
|
HealthGrpc.newBlockingStub(channel).withDeadlineAfter(200, TimeUnit.MILLISECONDS);
|
||||||
|
HealthCheckRequest req = HealthCheckRequest.newBuilder().build();
|
||||||
|
HealthCheckResponse resp = stub.check(req);
|
||||||
|
if (resp.getStatus() == HealthCheckResponse.ServingStatus.SERVING) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean checkStoreTombstone(TiStore store) {
|
||||||
|
try {
|
||||||
|
Metapb.Store newStore = pdClient.getStore(ConcreteBackOffer.newRawKVBackOff(), store.getId());
|
||||||
|
if (newStore.getState() == Metapb.StoreState.Tombstone) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
checkTombstoneTick += 1;
|
||||||
|
boolean needCheckTombstoneStore = false;
|
||||||
|
if (checkTombstoneTick >= MAX_CHECK_STORE_TOMBSTONE_TICK) {
|
||||||
|
needCheckTombstoneStore = true;
|
||||||
|
checkTombstoneTick = 0;
|
||||||
|
}
|
||||||
|
List<TiStore> allStores = getValidStores();
|
||||||
|
List<TiStore> unreachableStore = new LinkedList<>();
|
||||||
|
for (TiStore store : allStores) {
|
||||||
|
if (needCheckTombstoneStore) {
|
||||||
|
if (checkStoreTombstone(store)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (checkStoreHealth(store)) {
|
||||||
|
if (store.getProxyStore() != null) {
|
||||||
|
TiStore newStore = store.withProxy(null);
|
||||||
|
logger.warn(String.format("store [%s] recovers to be reachable", store.getAddress()));
|
||||||
|
if (cache.putStore(newStore.getId(), newStore)) {
|
||||||
|
this.taskQueue.add(newStore);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (!store.isReachable()) {
|
||||||
|
logger.warn(
|
||||||
|
String.format(
|
||||||
|
"store [%s] recovers to be reachable and canforward", store.getAddress()));
|
||||||
|
store.markReachable();
|
||||||
|
}
|
||||||
|
if (!store.canForwardFirst()) {
|
||||||
|
store.makrCanForward();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (store.isReachable()) {
|
||||||
|
unreachableStore.add(store);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
this.taskQueue.add(store);
|
||||||
|
}
|
||||||
|
if (!unreachableStore.isEmpty()) {
|
||||||
|
try {
|
||||||
|
Thread.sleep(SLEEP_MILLI_SECONDS_AFTER_DOUBLE_CHECK);
|
||||||
|
} catch (Exception e) {
|
||||||
|
this.taskQueue.addAll(unreachableStore);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (TiStore store : unreachableStore) {
|
||||||
|
if (!checkStoreHealth(store)) {
|
||||||
|
logger.warn(String.format("store [%s] is not reachable", store.getAddress()));
|
||||||
|
store.markUnreachable();
|
||||||
|
}
|
||||||
|
this.taskQueue.add(store);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -142,20 +142,21 @@ public class TiRegion implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public Kvrpcpb.Context getLeaderContext() {
|
public Kvrpcpb.Context getLeaderContext() {
|
||||||
return getContext(this.leader, java.util.Collections.emptySet(), TiStoreType.TiKV);
|
return getContext(this.leader, java.util.Collections.emptySet(), false);
|
||||||
}
|
|
||||||
|
|
||||||
public Kvrpcpb.Context getReplicaContext(TiStoreType storeType) {
|
|
||||||
return getContext(getCurrentReplica(), java.util.Collections.emptySet(), storeType);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Kvrpcpb.Context getReplicaContext(Set<Long> resolvedLocks, TiStoreType storeType) {
|
public Kvrpcpb.Context getReplicaContext(Set<Long> resolvedLocks, TiStoreType storeType) {
|
||||||
return getContext(getCurrentReplica(), resolvedLocks, storeType);
|
Peer currentPeer = getCurrentReplica();
|
||||||
|
boolean replicaRead = !isLeader(currentPeer) && TiStoreType.TiKV.equals(storeType);
|
||||||
|
return getContext(currentPeer, resolvedLocks, replicaRead);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Kvrpcpb.Context getReplicaContext(Peer currentPeer, Set<Long> resolvedLocks) {
|
||||||
|
return getContext(currentPeer, resolvedLocks, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Kvrpcpb.Context getContext(
|
private Kvrpcpb.Context getContext(
|
||||||
Peer currentPeer, Set<Long> resolvedLocks, TiStoreType storeType) {
|
Peer currentPeer, Set<Long> resolvedLocks, boolean replicaRead) {
|
||||||
boolean replicaRead = !isLeader(currentPeer) && TiStoreType.TiKV.equals(storeType);
|
|
||||||
|
|
||||||
Kvrpcpb.Context.Builder builder = Kvrpcpb.Context.newBuilder();
|
Kvrpcpb.Context.Builder builder = Kvrpcpb.Context.newBuilder();
|
||||||
builder
|
builder
|
||||||
|
|
|
||||||
|
|
@ -1,23 +1,60 @@
|
||||||
package org.tikv.common.region;
|
package org.tikv.common.region;
|
||||||
|
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
import org.tikv.kvproto.Metapb;
|
import org.tikv.kvproto.Metapb;
|
||||||
|
|
||||||
public class TiStore {
|
public class TiStore {
|
||||||
|
private static long MAX_FAIL_FORWARD_TIMES = 4;
|
||||||
private final Metapb.Store store;
|
private final Metapb.Store store;
|
||||||
private final Metapb.Store proxyStore;
|
private final Metapb.Store proxyStore;
|
||||||
private AtomicBoolean unreachable;
|
private AtomicBoolean reachable;
|
||||||
|
private AtomicBoolean valid;
|
||||||
|
private AtomicLong failForwardCount;
|
||||||
|
private AtomicBoolean canForward;
|
||||||
|
|
||||||
public TiStore(Metapb.Store store) {
|
public TiStore(Metapb.Store store) {
|
||||||
this.store = store;
|
this.store = store;
|
||||||
this.unreachable = new AtomicBoolean(false);
|
this.reachable = new AtomicBoolean(true);
|
||||||
|
this.valid = new AtomicBoolean(true);
|
||||||
|
this.canForward = new AtomicBoolean(true);
|
||||||
this.proxyStore = null;
|
this.proxyStore = null;
|
||||||
|
this.failForwardCount = new AtomicLong(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
private TiStore(Metapb.Store store, Metapb.Store proxyStore) {
|
private TiStore(Metapb.Store store, Metapb.Store proxyStore) {
|
||||||
this.store = store;
|
this.store = store;
|
||||||
this.unreachable = new AtomicBoolean(false);
|
if (proxyStore != null) {
|
||||||
|
this.reachable = new AtomicBoolean(false);
|
||||||
|
} else {
|
||||||
|
this.reachable = new AtomicBoolean(true);
|
||||||
|
}
|
||||||
|
this.valid = new AtomicBoolean(true);
|
||||||
|
this.canForward = new AtomicBoolean(true);
|
||||||
this.proxyStore = proxyStore;
|
this.proxyStore = proxyStore;
|
||||||
|
this.failForwardCount = new AtomicLong(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@java.lang.Override
|
||||||
|
public boolean equals(final java.lang.Object obj) {
|
||||||
|
if (obj == this) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (!(obj instanceof TiStore)) {
|
||||||
|
return super.equals(obj);
|
||||||
|
}
|
||||||
|
TiStore other = (TiStore) obj;
|
||||||
|
if (!this.store.equals(other.store)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (proxyStore == null && other.proxyStore == null) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (proxyStore != null && other.proxyStore != null) {
|
||||||
|
return proxyStore.equals(other.proxyStore);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public TiStore withProxy(Metapb.Store proxyStore) {
|
public TiStore withProxy(Metapb.Store proxyStore) {
|
||||||
|
|
@ -25,15 +62,40 @@ public class TiStore {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void markUnreachable() {
|
public void markUnreachable() {
|
||||||
this.unreachable.set(true);
|
this.reachable.set(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void markReachable() {
|
public void markReachable() {
|
||||||
this.unreachable.set(false);
|
this.reachable.set(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isUnreachable() {
|
public boolean isReachable() {
|
||||||
return this.unreachable.get();
|
return this.reachable.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isValid() {
|
||||||
|
return this.valid.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void markInvalid() {
|
||||||
|
this.valid.set(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void forwardFail() {
|
||||||
|
if (this.canForward.get()) {
|
||||||
|
if (this.failForwardCount.addAndGet(1) >= MAX_FAIL_FORWARD_TIMES) {
|
||||||
|
this.canForward.set(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void makrCanForward() {
|
||||||
|
this.failForwardCount.set(0);
|
||||||
|
this.canForward.set(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean canForwardFirst() {
|
||||||
|
return this.canForward.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
public Metapb.Store getStore() {
|
public Metapb.Store getStore() {
|
||||||
|
|
|
||||||
|
|
@ -1,90 +0,0 @@
|
||||||
package org.tikv.common.region;
|
|
||||||
|
|
||||||
import io.grpc.ManagedChannel;
|
|
||||||
import io.grpc.health.v1.HealthCheckRequest;
|
|
||||||
import io.grpc.health.v1.HealthCheckResponse;
|
|
||||||
import io.grpc.health.v1.HealthGrpc;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.concurrent.BlockingQueue;
|
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
|
||||||
import java.util.concurrent.LinkedBlockingQueue;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
import org.tikv.common.ReadOnlyPDClient;
|
|
||||||
import org.tikv.common.util.ChannelFactory;
|
|
||||||
import org.tikv.common.util.ConcreteBackOffer;
|
|
||||||
import org.tikv.kvproto.Metapb;
|
|
||||||
|
|
||||||
public class UnreachableStoreChecker implements Runnable {
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(UnreachableStoreChecker.class);
|
|
||||||
private ConcurrentHashMap<Long, TiStore> stores;
|
|
||||||
private BlockingQueue<TiStore> taskQueue;
|
|
||||||
private final ChannelFactory channelFactory;
|
|
||||||
private final ReadOnlyPDClient pdClient;
|
|
||||||
|
|
||||||
public UnreachableStoreChecker(ChannelFactory channelFactory, ReadOnlyPDClient pdClient) {
|
|
||||||
this.stores = new ConcurrentHashMap();
|
|
||||||
this.taskQueue = new LinkedBlockingQueue<>();
|
|
||||||
this.channelFactory = channelFactory;
|
|
||||||
this.pdClient = pdClient;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void scheduleStoreHealthCheck(TiStore store) {
|
|
||||||
TiStore oldStore = this.stores.get(Long.valueOf(store.getId()));
|
|
||||||
if (oldStore == store) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
this.stores.put(Long.valueOf(store.getId()), store);
|
|
||||||
if (!this.taskQueue.add(store)) {
|
|
||||||
// add queue false, mark it reachable so that it can be put again.
|
|
||||||
store.markReachable();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<TiStore> getUnhealthStore() {
|
|
||||||
List<TiStore> unhealthStore = new LinkedList<>();
|
|
||||||
while (!this.taskQueue.isEmpty()) {
|
|
||||||
try {
|
|
||||||
TiStore store = this.taskQueue.take();
|
|
||||||
unhealthStore.add(store);
|
|
||||||
} catch (Exception e) {
|
|
||||||
return unhealthStore;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return unhealthStore;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void run() {
|
|
||||||
List<TiStore> unhealthStore = getUnhealthStore();
|
|
||||||
for (TiStore store : unhealthStore) {
|
|
||||||
if (!store.isUnreachable()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
String addressStr = store.getStore().getAddress();
|
|
||||||
ManagedChannel channel = channelFactory.getChannel(addressStr, pdClient.getHostMapping());
|
|
||||||
HealthGrpc.HealthBlockingStub stub = HealthGrpc.newBlockingStub(channel);
|
|
||||||
HealthCheckRequest req = HealthCheckRequest.newBuilder().build();
|
|
||||||
try {
|
|
||||||
HealthCheckResponse resp = stub.check(req);
|
|
||||||
if (resp.getStatus() == HealthCheckResponse.ServingStatus.SERVING) {
|
|
||||||
store.markReachable();
|
|
||||||
logger.warn(
|
|
||||||
String.format("store [%s] recovers to be reachable", store.getStore().getAddress()));
|
|
||||||
|
|
||||||
this.stores.remove(Long.valueOf(store.getId()));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
Metapb.Store newStore =
|
|
||||||
pdClient.getStore(ConcreteBackOffer.newRawKVBackOff(), store.getId());
|
|
||||||
if (newStore.getState() == Metapb.StoreState.Tombstone) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
this.taskQueue.add(store);
|
|
||||||
} catch (Exception e) {
|
|
||||||
this.taskQueue.add(store);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -102,7 +102,7 @@ public class ConcreteBackOffer implements BackOffer {
|
||||||
backOffFunction = BackOffFunction.create(200, 3000, BackOffStrategy.EqualJitter);
|
backOffFunction = BackOffFunction.create(200, 3000, BackOffStrategy.EqualJitter);
|
||||||
break;
|
break;
|
||||||
case BoPDRPC:
|
case BoPDRPC:
|
||||||
backOffFunction = BackOffFunction.create(500, 3000, BackOffStrategy.EqualJitter);
|
backOffFunction = BackOffFunction.create(100, 600, BackOffStrategy.EqualJitter);
|
||||||
break;
|
break;
|
||||||
case BoTiKVRPC:
|
case BoTiKVRPC:
|
||||||
backOffFunction = BackOffFunction.create(100, 400, BackOffStrategy.EqualJitter);
|
backOffFunction = BackOffFunction.create(100, 400, BackOffStrategy.EqualJitter);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue