mirror of https://github.com/tikv/client-go.git
Add backoff for error recovery in progress (#485)
* add backoff for error recovery in progress Signed-off-by: Connor1996 <zbk602423539@gmail.com>
This commit is contained in:
parent
3705989fa1
commit
a38ac96984
|
|
@ -86,6 +86,8 @@ var (
|
|||
ErrRegionNotInitialized = errors.New("region not Initialized")
|
||||
// ErrTiKVDiskFull is the error when tikv server disk usage is full.
|
||||
ErrTiKVDiskFull = errors.New("tikv disk full")
|
||||
// ErrRegionRecoveryInProgress is the error when region is recovering.
|
||||
ErrRegionRecoveryInProgress = errors.New("region is being online unsafe recovered")
|
||||
// ErrUnknown is the unknow error.
|
||||
ErrUnknown = errors.New("unknow")
|
||||
// ErrResultUndetermined is the error when execution result is unknown.
|
||||
|
|
|
|||
|
|
@ -1348,6 +1348,8 @@ func regionErrorToLabel(e *errorpb.Error) string {
|
|||
return "region_not_initialized"
|
||||
} else if e.GetDiskFull() != nil {
|
||||
return "disk_full"
|
||||
} else if e.GetRecoveryInProgress() != nil {
|
||||
return "recovery_in_progress"
|
||||
}
|
||||
return "unknown"
|
||||
}
|
||||
|
|
@ -1395,6 +1397,16 @@ func (s *RegionRequestSender) onRegionError(bo *retry.Backoffer, ctx *RPCContext
|
|||
return true, nil
|
||||
}
|
||||
|
||||
if regionErr.GetRecoveryInProgress() != nil {
|
||||
s.regionCache.InvalidateCachedRegion(ctx.Region)
|
||||
logutil.BgLogger().Debug("tikv reports `RecoveryInProgress`", zap.Stringer("ctx", ctx))
|
||||
err = bo.Backoff(retry.BoRegionRecoveryInProgress, errors.Errorf("region recovery in progress, ctx: %v", ctx))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// This peer is removed from the region. Invalidate the region since it's too stale.
|
||||
if regionErr.GetRegionNotFound() != nil {
|
||||
s.regionCache.InvalidateCachedRegion(ctx.Region)
|
||||
|
|
|
|||
|
|
@ -51,26 +51,32 @@ func TestBackoffWithMax(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestBackoffErrorType(t *testing.T) {
|
||||
// the actual maxSleep is multiplied by weight, which is 400ms
|
||||
b := NewBackofferWithVars(context.TODO(), 200, nil)
|
||||
// the actual maxSleep is multiplied by weight, which is 480ms
|
||||
b := NewBackofferWithVars(context.TODO(), 250, nil)
|
||||
err := b.Backoff(BoRegionMiss, errors.New("region miss")) // 2ms sleep
|
||||
assert.Nil(t, err)
|
||||
// 300 ms sleep in total
|
||||
// 300ms sleep at most in total
|
||||
for i := 0; i < 2; i++ {
|
||||
err = b.Backoff(BoMaxDataNotReady, errors.New("data not ready"))
|
||||
assert.Nil(t, err)
|
||||
}
|
||||
// 100ms sleep at most in total
|
||||
err = b.Backoff(BoRegionRecoveryInProgress, errors.New("recovery in progress"))
|
||||
assert.Nil(t, err)
|
||||
|
||||
// sleep from ServerIsBusy is not counted
|
||||
err = b.Backoff(BoTiKVServerBusy, errors.New("server is busy"))
|
||||
assert.Nil(t, err)
|
||||
// 126ms sleep in total
|
||||
for i := 0; i < 6; i++ {
|
||||
// wait it exceed max sleep
|
||||
for i := 0; i < 10; i++ {
|
||||
err = b.Backoff(BoTxnNotFound, errors.New("txn not found"))
|
||||
assert.Nil(t, err)
|
||||
}
|
||||
if err != nil {
|
||||
// Next backoff should return error of backoff that sleeps for longest time.
|
||||
err = b.Backoff(BoTxnNotFound, errors.New("tikv rpc"))
|
||||
assert.ErrorIs(t, err, BoMaxDataNotReady.err)
|
||||
return
|
||||
}
|
||||
}
|
||||
assert.Fail(t, "should not be here")
|
||||
}
|
||||
|
||||
func TestBackoffDeepCopy(t *testing.T) {
|
||||
|
|
|
|||
|
|
@ -118,6 +118,7 @@ var (
|
|||
BoRegionScheduling = NewConfig("regionScheduling", &metrics.BackoffHistogramRegionScheduling, NewBackoffFnCfg(2, 500, NoJitter), tikverr.ErrRegionUnavailable)
|
||||
BoTiKVServerBusy = NewConfig("tikvServerBusy", &metrics.BackoffHistogramServerBusy, NewBackoffFnCfg(2000, 10000, EqualJitter), tikverr.ErrTiKVServerBusy)
|
||||
BoTiKVDiskFull = NewConfig("tikvDiskFull", &metrics.BackoffHistogramTiKVDiskFull, NewBackoffFnCfg(500, 5000, NoJitter), tikverr.ErrTiKVDiskFull)
|
||||
BoRegionRecoveryInProgress = NewConfig("regionRecoveryInProgress", &metrics.BackoffHistogramRegionRecoveryInProgress, NewBackoffFnCfg(100, 10000, EqualJitter), tikverr.ErrRegionRecoveryInProgress)
|
||||
BoTiFlashServerBusy = NewConfig("tiflashServerBusy", &metrics.BackoffHistogramServerBusy, NewBackoffFnCfg(2000, 10000, EqualJitter), tikverr.ErrTiFlashServerBusy)
|
||||
BoTxnNotFound = NewConfig("txnNotFound", &metrics.BackoffHistogramEmpty, NewBackoffFnCfg(2, 500, NoJitter), tikverr.ErrResolveLockTimeout)
|
||||
BoStaleCmd = NewConfig("staleCommand", &metrics.BackoffHistogramStaleCmd, NewBackoffFnCfg(2, 1000, NoJitter), tikverr.ErrTiKVStaleCommand)
|
||||
|
|
|
|||
|
|
@ -62,6 +62,7 @@ var (
|
|||
BackoffHistogramRegionScheduling prometheus.Observer
|
||||
BackoffHistogramServerBusy prometheus.Observer
|
||||
BackoffHistogramTiKVDiskFull prometheus.Observer
|
||||
BackoffHistogramRegionRecoveryInProgress prometheus.Observer
|
||||
BackoffHistogramStaleCmd prometheus.Observer
|
||||
BackoffHistogramDataNotReady prometheus.Observer
|
||||
BackoffHistogramEmpty prometheus.Observer
|
||||
|
|
@ -155,6 +156,7 @@ func initShortcuts() {
|
|||
BackoffHistogramRegionScheduling = TiKVBackoffHistogram.WithLabelValues("regionScheduling")
|
||||
BackoffHistogramServerBusy = TiKVBackoffHistogram.WithLabelValues("serverBusy")
|
||||
BackoffHistogramTiKVDiskFull = TiKVBackoffHistogram.WithLabelValues("tikvDiskFull")
|
||||
BackoffHistogramRegionRecoveryInProgress = TiKVBackoffHistogram.WithLabelValues("regionRecoveryInProgress")
|
||||
BackoffHistogramStaleCmd = TiKVBackoffHistogram.WithLabelValues("staleCommand")
|
||||
BackoffHistogramDataNotReady = TiKVBackoffHistogram.WithLabelValues("dataNotReady")
|
||||
BackoffHistogramEmpty = TiKVBackoffHistogram.WithLabelValues("")
|
||||
|
|
|
|||
Loading…
Reference in New Issue