mirror of https://github.com/tikv/client-go.git
support witness peers (#633)
Signed-off-by: Wenbo Zhang <ethercflow@gmail.com>
This commit is contained in:
parent
2d2a3fa00e
commit
c20405f345
|
@ -93,6 +93,8 @@ var (
|
|||
ErrRegionFlashbackInProgress = errors.New("region is in the flashback progress")
|
||||
// ErrRegionFlashbackNotPrepared is the error when a region is not prepared for the flashback first.
|
||||
ErrRegionFlashbackNotPrepared = errors.New("region is not prepared for the flashback")
|
||||
// ErrIsWitness is the error when a request is send to a witness.
|
||||
ErrIsWitness = errors.New("peer is witness")
|
||||
// ErrUnknown is the unknow error.
|
||||
ErrUnknown = errors.New("unknow")
|
||||
// ErrResultUndetermined is the error when execution result is unknown.
|
||||
|
|
2
go.mod
2
go.mod
|
@ -13,7 +13,7 @@ require (
|
|||
github.com/opentracing/opentracing-go v1.2.0
|
||||
github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00
|
||||
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989
|
||||
github.com/pingcap/kvproto v0.0.0-20221227030452-22819f5b377a
|
||||
github.com/pingcap/kvproto v0.0.0-20230104090009-7c5d757b6e12
|
||||
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81
|
||||
github.com/pkg/errors v0.9.1
|
||||
github.com/prometheus/client_golang v1.11.0
|
||||
|
|
4
go.sum
4
go.sum
|
@ -155,8 +155,8 @@ github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB
|
|||
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 h1:surzm05a8C9dN8dIUmo4Be2+pMRb6f55i+UIYrluu2E=
|
||||
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989/go.mod h1:O17XtbryoCJhkKGbT62+L2OlrniwqiGLSqrmdHCMzZw=
|
||||
github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
|
||||
github.com/pingcap/kvproto v0.0.0-20221227030452-22819f5b377a h1:4TiR0nGKmLmu2kTC22jOhUIplmv4GGUrUD9D2DTMms0=
|
||||
github.com/pingcap/kvproto v0.0.0-20221227030452-22819f5b377a/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
|
||||
github.com/pingcap/kvproto v0.0.0-20230104090009-7c5d757b6e12 h1:ZgPcRwsrzcuYcgrsGB+2I9w0n6JIT+GEptLUZ1kWKZA=
|
||||
github.com/pingcap/kvproto v0.0.0-20230104090009-7c5d757b6e12/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
|
||||
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 h1:URLoJ61DmmY++Sa/yyPEQHG2s/ZBeV1FbIswHEMrdoY=
|
||||
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4=
|
||||
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
|
|
|
@ -344,13 +344,19 @@ github.com/pingcap/fn v0.0.0-20200306044125-d5540d389059 h1:Pe2LbxRmbTfAoKJ65bZL
|
|||
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 h1:surzm05a8C9dN8dIUmo4Be2+pMRb6f55i+UIYrluu2E=
|
||||
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989/go.mod h1:O17XtbryoCJhkKGbT62+L2OlrniwqiGLSqrmdHCMzZw=
|
||||
github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
|
||||
github.com/pingcap/kvproto v0.0.0-20230104090009-7c5d757b6e12 h1:ZgPcRwsrzcuYcgrsGB+2I9w0n6JIT+GEptLUZ1kWKZA=
|
||||
github.com/pingcap/kvproto v0.0.0-20230104090009-7c5d757b6e12/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
|
||||
github.com/pingcap/kvproto v0.0.0-20230105060948-64890fa4f6c1 h1:jw4NjEiCleRJPPpHM7K6l8OKzOjnZAj62eKteCAY6ro=
|
||||
github.com/pingcap/kvproto v0.0.0-20230105060948-64890fa4f6c1/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI=
|
||||
github.com/pingcap/log v0.0.0-20200511115504-543df19646ad/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8=
|
||||
github.com/pingcap/log v1.1.0/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4=
|
||||
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 h1:URLoJ61DmmY++Sa/yyPEQHG2s/ZBeV1FbIswHEMrdoY=
|
||||
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4=
|
||||
github.com/pingcap/log v1.1.1-0.20221116035753-734d527bc87c h1:crhkw6DD+07Bg1wYhW5Piw+kYNKZqFQqfC2puUf6gMI=
|
||||
github.com/pingcap/log v1.1.1-0.20221116035753-734d527bc87c/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4=
|
||||
github.com/pingcap/sysutil v0.0.0-20220114020952-ea68d2dbf5b4 h1:HYbcxtnkN3s5tqrZ/z3eJS4j3Db8wMphEm1q10lY/TM=
|
||||
github.com/pingcap/tidb v1.1.0-beta.0.20221101102559-97add26c8f84 h1:qAougMzGGYDQ00UfIVs+M0LBaQQrL8yXdmLQEqqR8HY=
|
||||
github.com/pingcap/tidb v1.1.0-beta.0.20221101102559-97add26c8f84/go.mod h1:lhtdZBXqJGbk2/Fr8JnT9KhXxEKy8h0vrDwdna/ou3g=
|
||||
github.com/pingcap/tidb v1.1.0-beta.0.20230109054422-b477b1c94620 h1:153psGlYCgPaDYc3AtlDG5ePNPVE4T1c8iJGHFZInZw=
|
||||
github.com/pingcap/tidb v1.1.0-beta.0.20230109054422-b477b1c94620/go.mod h1:Rounl3rQhnhOBh8e7pxAr6BhnNJh0nGvomRCRE6W98U=
|
||||
github.com/pingcap/tidb/parser v0.0.0-20230109054422-b477b1c94620 h1:cEP+A7ylw8sGGOCHe8F8gto+vlkGQaYt09DHgHd2xmg=
|
||||
|
|
|
@ -267,6 +267,14 @@ func newRegion(bo *retry.Backoffer, c *RegionCache, pdRegion *pd.Region) (*Regio
|
|||
if addr == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Since the witness is read-write prohibited, it does not make sense to send requests to it
|
||||
// unless it is the leader. When it is the leader and transfer leader encounters a problem,
|
||||
// the backoff timeout will be triggered, and the client can give a more accurate error message.
|
||||
if p.IsWitness && !isSamePeer(p, leader) {
|
||||
continue
|
||||
}
|
||||
|
||||
if isSamePeer(p, leader) {
|
||||
leaderAccessIdx = AccessIndex(len(rs.accessIndex[tiKVOnly]))
|
||||
}
|
||||
|
|
|
@ -1260,6 +1260,42 @@ func (s *testRegionCacheSuite) TestPeersLenChange() {
|
|||
s.cache.OnSendFail(retry.NewNoopBackoff(context.Background()), ctx, false, errors.New("send fail"))
|
||||
}
|
||||
|
||||
func (s *testRegionCacheSuite) TestPeersLenChangedByWitness() {
|
||||
// 2 peers [peer1, peer2] and let peer2 become leader
|
||||
loc, err := s.cache.LocateKey(s.bo, []byte("a"))
|
||||
s.Nil(err)
|
||||
s.cache.UpdateLeader(loc.Region, &metapb.Peer{Id: s.peer2, StoreId: s.store2}, 0)
|
||||
|
||||
// current leader is peer2 in [peer1, peer2]
|
||||
loc, err = s.cache.LocateKey(s.bo, []byte("a"))
|
||||
s.Nil(err)
|
||||
ctx, err := s.cache.GetTiKVRPCContext(s.bo, loc.Region, kv.ReplicaReadLeader, 0)
|
||||
s.Nil(err)
|
||||
s.Equal(ctx.Peer.StoreId, s.store2)
|
||||
|
||||
// simulate peer1 become witness in kv heartbeat and loaded before response back.
|
||||
cpMeta := &metapb.Region{
|
||||
Id: ctx.Meta.Id,
|
||||
StartKey: ctx.Meta.StartKey,
|
||||
EndKey: ctx.Meta.EndKey,
|
||||
RegionEpoch: ctx.Meta.RegionEpoch,
|
||||
Peers: make([]*metapb.Peer, len(ctx.Meta.Peers)),
|
||||
}
|
||||
copy(cpMeta.Peers, ctx.Meta.Peers)
|
||||
for _, peer := range cpMeta.Peers {
|
||||
if peer.Id == s.peer1 {
|
||||
peer.IsWitness = true
|
||||
}
|
||||
}
|
||||
cpRegion := &pd.Region{Meta: cpMeta}
|
||||
region, err := newRegion(s.bo, s.cache, cpRegion)
|
||||
s.Nil(err)
|
||||
s.cache.insertRegionToCache(region)
|
||||
|
||||
// OnSendFail should not panic
|
||||
s.cache.OnSendFail(retry.NewNoopBackoff(context.Background()), ctx, false, errors.New("send fail"))
|
||||
}
|
||||
|
||||
func createSampleRegion(startKey, endKey []byte) *Region {
|
||||
return &Region{
|
||||
meta: &metapb.Region{
|
||||
|
|
|
@ -1403,6 +1403,8 @@ func regionErrorToLabel(e *errorpb.Error) string {
|
|||
return "flashback_in_progress"
|
||||
} else if e.GetFlashbackNotPrepared() != nil {
|
||||
return "flashback_not_prepared"
|
||||
} else if e.GetIsWitness() != nil {
|
||||
return "peer_is_witness"
|
||||
}
|
||||
return "unknown"
|
||||
}
|
||||
|
@ -1460,6 +1462,16 @@ func (s *RegionRequestSender) onRegionError(bo *retry.Backoffer, ctx *RPCContext
|
|||
return false, nil
|
||||
}
|
||||
|
||||
if regionErr.GetIsWitness() != nil {
|
||||
s.regionCache.InvalidateCachedRegion(ctx.Region)
|
||||
logutil.BgLogger().Debug("tikv reports `IsWitness`", zap.Stringer("ctx", ctx))
|
||||
err = bo.Backoff(retry.BoIsWitness, errors.Errorf("is witness, ctx: %v", ctx))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Since we expect that the workload should be stopped during the flashback progress,
|
||||
// if a request meets the FlashbackInProgress error, it should stop retrying immediately
|
||||
// to avoid unnecessary backoff and potential unexpected data status to the user.
|
||||
|
|
|
@ -51,8 +51,8 @@ func TestBackoffWithMax(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestBackoffErrorType(t *testing.T) {
|
||||
// the actual maxSleep is multiplied by weight, which is 480ms
|
||||
b := NewBackofferWithVars(context.TODO(), 210, nil)
|
||||
// the actual maxSleep is multiplied by weight, which is 1600ms
|
||||
b := NewBackofferWithVars(context.TODO(), 800, nil)
|
||||
err := b.Backoff(BoRegionMiss, errors.New("region miss")) // 2ms sleep
|
||||
assert.Nil(t, err)
|
||||
// 6ms sleep at most in total
|
||||
|
@ -67,6 +67,9 @@ func TestBackoffErrorType(t *testing.T) {
|
|||
// sleep from ServerIsBusy is not counted
|
||||
err = b.Backoff(BoTiKVServerBusy, errors.New("server is busy"))
|
||||
assert.Nil(t, err)
|
||||
// 1000ms sleep at most in total
|
||||
err = b.Backoff(BoIsWitness, errors.New("peer is witness"))
|
||||
assert.Nil(t, err)
|
||||
// wait it exceed max sleep
|
||||
for i := 0; i < 10; i++ {
|
||||
err = b.Backoff(BoTxnNotFound, errors.New("txn not found"))
|
||||
|
|
|
@ -125,6 +125,7 @@ var (
|
|||
BoMaxTsNotSynced = NewConfig("maxTsNotSynced", &metrics.BackoffHistogramEmpty, NewBackoffFnCfg(2, 500, NoJitter), tikverr.ErrTiKVMaxTimestampNotSynced)
|
||||
BoMaxDataNotReady = NewConfig("dataNotReady", &metrics.BackoffHistogramDataNotReady, NewBackoffFnCfg(2, 2000, NoJitter), tikverr.ErrRegionDataNotReady)
|
||||
BoMaxRegionNotInitialized = NewConfig("regionNotInitialized", &metrics.BackoffHistogramEmpty, NewBackoffFnCfg(2, 1000, NoJitter), tikverr.ErrRegionNotInitialized)
|
||||
BoIsWitness = NewConfig("isWitness", &metrics.BackoffHistogramIsWitness, NewBackoffFnCfg(1000, 10000, EqualJitter), tikverr.ErrIsWitness)
|
||||
// TxnLockFast's `base` load from vars.BackoffLockFast when create BackoffFn.
|
||||
BoTxnLockFast = NewConfig(txnLockFastName, &metrics.BackoffHistogramLockFast, NewBackoffFnCfg(2, 3000, EqualJitter), tikverr.ErrResolveLockTimeout)
|
||||
)
|
||||
|
|
|
@ -66,6 +66,7 @@ var (
|
|||
BackoffHistogramRegionRecoveryInProgress prometheus.Observer
|
||||
BackoffHistogramStaleCmd prometheus.Observer
|
||||
BackoffHistogramDataNotReady prometheus.Observer
|
||||
BackoffHistogramIsWitness prometheus.Observer
|
||||
BackoffHistogramEmpty prometheus.Observer
|
||||
|
||||
TxnRegionsNumHistogramWithSnapshot prometheus.Observer
|
||||
|
@ -166,6 +167,7 @@ func initShortcuts() {
|
|||
BackoffHistogramRegionRecoveryInProgress = TiKVBackoffHistogram.WithLabelValues("regionRecoveryInProgress")
|
||||
BackoffHistogramStaleCmd = TiKVBackoffHistogram.WithLabelValues("staleCommand")
|
||||
BackoffHistogramDataNotReady = TiKVBackoffHistogram.WithLabelValues("dataNotReady")
|
||||
BackoffHistogramIsWitness = TiKVBackoffHistogram.WithLabelValues("isWitness")
|
||||
BackoffHistogramEmpty = TiKVBackoffHistogram.WithLabelValues("")
|
||||
|
||||
TxnRegionsNumHistogramWithSnapshot = TiKVTxnRegionsNumHistogram.WithLabelValues("snapshot")
|
||||
|
|
Loading…
Reference in New Issue