support witness peers (#633)

Signed-off-by: Wenbo Zhang <ethercflow@gmail.com>
This commit is contained in:
Zwb 2023-01-09 20:53:19 +08:00 committed by GitHub
parent 2d2a3fa00e
commit c20405f345
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 75 additions and 5 deletions

View File

@ -93,6 +93,8 @@ var (
ErrRegionFlashbackInProgress = errors.New("region is in the flashback progress")
// ErrRegionFlashbackNotPrepared is the error when a region is not prepared for the flashback first.
ErrRegionFlashbackNotPrepared = errors.New("region is not prepared for the flashback")
// ErrIsWitness is the error when a request is send to a witness.
ErrIsWitness = errors.New("peer is witness")
// ErrUnknown is the unknow error.
ErrUnknown = errors.New("unknow")
// ErrResultUndetermined is the error when execution result is unknown.

2
go.mod
View File

@ -13,7 +13,7 @@ require (
github.com/opentracing/opentracing-go v1.2.0
github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989
github.com/pingcap/kvproto v0.0.0-20221227030452-22819f5b377a
github.com/pingcap/kvproto v0.0.0-20230104090009-7c5d757b6e12
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.11.0

4
go.sum
View File

@ -155,8 +155,8 @@ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 h1:surzm05a8C9dN8dIUmo4Be2+pMRb6f55i+UIYrluu2E=
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989/go.mod h1:O17XtbryoCJhkKGbT62+L2OlrniwqiGLSqrmdHCMzZw=
github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
github.com/pingcap/kvproto v0.0.0-20221227030452-22819f5b377a h1:4TiR0nGKmLmu2kTC22jOhUIplmv4GGUrUD9D2DTMms0=
github.com/pingcap/kvproto v0.0.0-20221227030452-22819f5b377a/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
github.com/pingcap/kvproto v0.0.0-20230104090009-7c5d757b6e12 h1:ZgPcRwsrzcuYcgrsGB+2I9w0n6JIT+GEptLUZ1kWKZA=
github.com/pingcap/kvproto v0.0.0-20230104090009-7c5d757b6e12/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 h1:URLoJ61DmmY++Sa/yyPEQHG2s/ZBeV1FbIswHEMrdoY=
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=

View File

@ -344,13 +344,19 @@ github.com/pingcap/fn v0.0.0-20200306044125-d5540d389059 h1:Pe2LbxRmbTfAoKJ65bZL
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 h1:surzm05a8C9dN8dIUmo4Be2+pMRb6f55i+UIYrluu2E=
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989/go.mod h1:O17XtbryoCJhkKGbT62+L2OlrniwqiGLSqrmdHCMzZw=
github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
github.com/pingcap/kvproto v0.0.0-20230104090009-7c5d757b6e12 h1:ZgPcRwsrzcuYcgrsGB+2I9w0n6JIT+GEptLUZ1kWKZA=
github.com/pingcap/kvproto v0.0.0-20230104090009-7c5d757b6e12/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
github.com/pingcap/kvproto v0.0.0-20230105060948-64890fa4f6c1 h1:jw4NjEiCleRJPPpHM7K6l8OKzOjnZAj62eKteCAY6ro=
github.com/pingcap/kvproto v0.0.0-20230105060948-64890fa4f6c1/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8=
github.com/pingcap/log v1.1.0/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4=
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 h1:URLoJ61DmmY++Sa/yyPEQHG2s/ZBeV1FbIswHEMrdoY=
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4=
github.com/pingcap/log v1.1.1-0.20221116035753-734d527bc87c h1:crhkw6DD+07Bg1wYhW5Piw+kYNKZqFQqfC2puUf6gMI=
github.com/pingcap/log v1.1.1-0.20221116035753-734d527bc87c/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4=
github.com/pingcap/sysutil v0.0.0-20220114020952-ea68d2dbf5b4 h1:HYbcxtnkN3s5tqrZ/z3eJS4j3Db8wMphEm1q10lY/TM=
github.com/pingcap/tidb v1.1.0-beta.0.20221101102559-97add26c8f84 h1:qAougMzGGYDQ00UfIVs+M0LBaQQrL8yXdmLQEqqR8HY=
github.com/pingcap/tidb v1.1.0-beta.0.20221101102559-97add26c8f84/go.mod h1:lhtdZBXqJGbk2/Fr8JnT9KhXxEKy8h0vrDwdna/ou3g=
github.com/pingcap/tidb v1.1.0-beta.0.20230109054422-b477b1c94620 h1:153psGlYCgPaDYc3AtlDG5ePNPVE4T1c8iJGHFZInZw=
github.com/pingcap/tidb v1.1.0-beta.0.20230109054422-b477b1c94620/go.mod h1:Rounl3rQhnhOBh8e7pxAr6BhnNJh0nGvomRCRE6W98U=
github.com/pingcap/tidb/parser v0.0.0-20230109054422-b477b1c94620 h1:cEP+A7ylw8sGGOCHe8F8gto+vlkGQaYt09DHgHd2xmg=

View File

@ -267,6 +267,14 @@ func newRegion(bo *retry.Backoffer, c *RegionCache, pdRegion *pd.Region) (*Regio
if addr == "" {
continue
}
// Since the witness is read-write prohibited, it does not make sense to send requests to it
// unless it is the leader. When it is the leader and transfer leader encounters a problem,
// the backoff timeout will be triggered, and the client can give a more accurate error message.
if p.IsWitness && !isSamePeer(p, leader) {
continue
}
if isSamePeer(p, leader) {
leaderAccessIdx = AccessIndex(len(rs.accessIndex[tiKVOnly]))
}

View File

@ -1260,6 +1260,42 @@ func (s *testRegionCacheSuite) TestPeersLenChange() {
s.cache.OnSendFail(retry.NewNoopBackoff(context.Background()), ctx, false, errors.New("send fail"))
}
func (s *testRegionCacheSuite) TestPeersLenChangedByWitness() {
// 2 peers [peer1, peer2] and let peer2 become leader
loc, err := s.cache.LocateKey(s.bo, []byte("a"))
s.Nil(err)
s.cache.UpdateLeader(loc.Region, &metapb.Peer{Id: s.peer2, StoreId: s.store2}, 0)
// current leader is peer2 in [peer1, peer2]
loc, err = s.cache.LocateKey(s.bo, []byte("a"))
s.Nil(err)
ctx, err := s.cache.GetTiKVRPCContext(s.bo, loc.Region, kv.ReplicaReadLeader, 0)
s.Nil(err)
s.Equal(ctx.Peer.StoreId, s.store2)
// simulate peer1 become witness in kv heartbeat and loaded before response back.
cpMeta := &metapb.Region{
Id: ctx.Meta.Id,
StartKey: ctx.Meta.StartKey,
EndKey: ctx.Meta.EndKey,
RegionEpoch: ctx.Meta.RegionEpoch,
Peers: make([]*metapb.Peer, len(ctx.Meta.Peers)),
}
copy(cpMeta.Peers, ctx.Meta.Peers)
for _, peer := range cpMeta.Peers {
if peer.Id == s.peer1 {
peer.IsWitness = true
}
}
cpRegion := &pd.Region{Meta: cpMeta}
region, err := newRegion(s.bo, s.cache, cpRegion)
s.Nil(err)
s.cache.insertRegionToCache(region)
// OnSendFail should not panic
s.cache.OnSendFail(retry.NewNoopBackoff(context.Background()), ctx, false, errors.New("send fail"))
}
func createSampleRegion(startKey, endKey []byte) *Region {
return &Region{
meta: &metapb.Region{

View File

@ -1403,6 +1403,8 @@ func regionErrorToLabel(e *errorpb.Error) string {
return "flashback_in_progress"
} else if e.GetFlashbackNotPrepared() != nil {
return "flashback_not_prepared"
} else if e.GetIsWitness() != nil {
return "peer_is_witness"
}
return "unknown"
}
@ -1460,6 +1462,16 @@ func (s *RegionRequestSender) onRegionError(bo *retry.Backoffer, ctx *RPCContext
return false, nil
}
if regionErr.GetIsWitness() != nil {
s.regionCache.InvalidateCachedRegion(ctx.Region)
logutil.BgLogger().Debug("tikv reports `IsWitness`", zap.Stringer("ctx", ctx))
err = bo.Backoff(retry.BoIsWitness, errors.Errorf("is witness, ctx: %v", ctx))
if err != nil {
return false, err
}
return false, nil
}
// Since we expect that the workload should be stopped during the flashback progress,
// if a request meets the FlashbackInProgress error, it should stop retrying immediately
// to avoid unnecessary backoff and potential unexpected data status to the user.

View File

@ -51,8 +51,8 @@ func TestBackoffWithMax(t *testing.T) {
}
func TestBackoffErrorType(t *testing.T) {
// the actual maxSleep is multiplied by weight, which is 480ms
b := NewBackofferWithVars(context.TODO(), 210, nil)
// the actual maxSleep is multiplied by weight, which is 1600ms
b := NewBackofferWithVars(context.TODO(), 800, nil)
err := b.Backoff(BoRegionMiss, errors.New("region miss")) // 2ms sleep
assert.Nil(t, err)
// 6ms sleep at most in total
@ -67,6 +67,9 @@ func TestBackoffErrorType(t *testing.T) {
// sleep from ServerIsBusy is not counted
err = b.Backoff(BoTiKVServerBusy, errors.New("server is busy"))
assert.Nil(t, err)
// 1000ms sleep at most in total
err = b.Backoff(BoIsWitness, errors.New("peer is witness"))
assert.Nil(t, err)
// wait it exceed max sleep
for i := 0; i < 10; i++ {
err = b.Backoff(BoTxnNotFound, errors.New("txn not found"))

View File

@ -125,6 +125,7 @@ var (
BoMaxTsNotSynced = NewConfig("maxTsNotSynced", &metrics.BackoffHistogramEmpty, NewBackoffFnCfg(2, 500, NoJitter), tikverr.ErrTiKVMaxTimestampNotSynced)
BoMaxDataNotReady = NewConfig("dataNotReady", &metrics.BackoffHistogramDataNotReady, NewBackoffFnCfg(2, 2000, NoJitter), tikverr.ErrRegionDataNotReady)
BoMaxRegionNotInitialized = NewConfig("regionNotInitialized", &metrics.BackoffHistogramEmpty, NewBackoffFnCfg(2, 1000, NoJitter), tikverr.ErrRegionNotInitialized)
BoIsWitness = NewConfig("isWitness", &metrics.BackoffHistogramIsWitness, NewBackoffFnCfg(1000, 10000, EqualJitter), tikverr.ErrIsWitness)
// TxnLockFast's `base` load from vars.BackoffLockFast when create BackoffFn.
BoTxnLockFast = NewConfig(txnLockFastName, &metrics.BackoffHistogramLockFast, NewBackoffFnCfg(2, 3000, EqualJitter), tikverr.ErrResolveLockTimeout)
)

View File

@ -66,6 +66,7 @@ var (
BackoffHistogramRegionRecoveryInProgress prometheus.Observer
BackoffHistogramStaleCmd prometheus.Observer
BackoffHistogramDataNotReady prometheus.Observer
BackoffHistogramIsWitness prometheus.Observer
BackoffHistogramEmpty prometheus.Observer
TxnRegionsNumHistogramWithSnapshot prometheus.Observer
@ -166,6 +167,7 @@ func initShortcuts() {
BackoffHistogramRegionRecoveryInProgress = TiKVBackoffHistogram.WithLabelValues("regionRecoveryInProgress")
BackoffHistogramStaleCmd = TiKVBackoffHistogram.WithLabelValues("staleCommand")
BackoffHistogramDataNotReady = TiKVBackoffHistogram.WithLabelValues("dataNotReady")
BackoffHistogramIsWitness = TiKVBackoffHistogram.WithLabelValues("isWitness")
BackoffHistogramEmpty = TiKVBackoffHistogram.WithLabelValues("")
TxnRegionsNumHistogramWithSnapshot = TiKVTxnRegionsNumHistogram.WithLabelValues("snapshot")