retry on leader for region not available (#1769)

Signed-off-by: rishabh_mittal <rishabh.mittal@airbnb.com>

Co-authored-by: rishabh_mittal <rishabh.mittal@airbnb.com>
This commit is contained in:
mittalrishabh 2025-10-21 19:12:46 -07:00 committed by GitHub
parent f03cebdc32
commit d632a4c0b2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 48 additions and 7 deletions

View File

@ -1851,6 +1851,14 @@ func (c *RegionCache) BatchLoadRegionsFromKey(bo *retry.Backoffer, startKey []by
return regions[len(regions)-1].EndKey(), nil
}
func (c *RegionCache) AsyncInvalidateCachedRegion(id RegionVerID) {
cachedRegion := c.GetCachedRegionWithRLock(id)
if cachedRegion == nil {
return
}
cachedRegion.setSyncFlags(needDelayedReloadPending)
}
// InvalidateCachedRegion removes a cached Region.
func (c *RegionCache) InvalidateCachedRegion(id RegionVerID) {
c.InvalidateCachedRegionWithReason(id, Other)

View File

@ -1956,8 +1956,12 @@ func (s *RegionRequestSender) onRegionError(
// This peer is removed from the region. Invalidate the region since it's too stale.
// if the region error is from follower, can we mark the peer unavailable and reload region asynchronously?
if regionErr.GetRegionNotFound() != nil {
s.regionCache.InvalidateCachedRegion(ctx.Region)
return false, nil
if s.replicaSelector != nil {
return s.replicaSelector.onRegionNotFound(bo, ctx, req)
} else {
s.regionCache.InvalidateCachedRegion(ctx.Region)
return false, nil
}
}
if regionErr.GetKeyNotInRegion() != nil {

View File

@ -303,11 +303,11 @@ func testRegionCacheStaleRead(t *testing.T) {
leaderAsyncReload: util.Some(false),
leaderSuccessReplica: []string{"z1"},
leaderSuccessReadType: SuccessStaleRead,
followerRegionValid: false,
followerAsyncReload: util.Some(false),
followerRegionValid: true,
followerAsyncReload: util.Some(true),
// may async reload region and access it from leader.
followerSuccessReplica: []string{},
followerSuccessReadType: ReadFail,
followerSuccessReplica: []string{"z1"},
followerSuccessReadType: SuccessStaleRead,
},
{
do: evictLeader,

View File

@ -517,6 +517,23 @@ func (s *replicaSelector) onDataIsNotReady() {
}
}
func (s *replicaSelector) onRegionNotFound(
bo *retry.Backoffer, ctx *RPCContext, req *tikvrpc.Request,
) (shouldRetry bool, err error) {
leaderIdx := s.region.getStore().workTiKVIdx
leader := s.replicas[leaderIdx]
if !leader.isExhausted(1, 0) {
// if the request is not sent to leader, we can retry it with leader and invalidate the region cache asynchronously. It helps in the scenario
// where region is split by the leader but not yet created in replica due to replica down.
req.SetReplicaReadType(kv.ReplicaReadLeader)
s.replicaReadType = kv.ReplicaReadLeader
s.regionCache.AsyncInvalidateCachedRegion(ctx.Region)
return true, nil
}
s.regionCache.InvalidateCachedRegion(ctx.Region)
return false, nil
}
func (s *replicaSelector) onServerIsBusy(
bo *retry.Backoffer, ctx *RPCContext, req *tikvrpc.Request, serverIsBusy *errorpb.ServerIsBusy,
) (shouldRetry bool, err error) {

View File

@ -1149,12 +1149,24 @@ func testReplicaReadAccessPathByBasicCase(s *testReplicaSelectorSuite) {
respErr = "region 0 is not prepared for the flashback"
respRegionError = nil
regionIsValid = true
case RegionNotFoundErr:
regionIsValid = false
}
switch readType {
case kv.ReplicaReadLeader:
accessPath = []string{"{addr: store1, replica-read: false, stale-read: false}"}
case kv.ReplicaReadFollower:
accessPath = []string{"{addr: store2, replica-read: true, stale-read: false}"}
// For RegionNotFoundErr from follower, it will retry on leader
if tp == RegionNotFoundErr {
accessPath = []string{
"{addr: store2, replica-read: true, stale-read: false}",
"{addr: store1, replica-read: false, stale-read: false}",
}
respRegionError = nil
regionIsValid = true
} else {
accessPath = []string{"{addr: store2, replica-read: true, stale-read: false}"}
}
case kv.ReplicaReadMixed:
if staleRead {
accessPath = []string{"{addr: store1, replica-read: false, stale-read: true}"}