mirror of https://github.com/tikv/client-go.git
204 lines
7.9 KiB
Go
204 lines
7.9 KiB
Go
// Copyright 2021 TiKV Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// NOTE: The code in this file is based on code from the
|
|
// TiDB project, licensed under the Apache License v 2.0
|
|
//
|
|
// https://github.com/pingcap/tidb/tree/cc5e161ac06827589c4966674597c137cc9e809c/store/tikv/retry/config.go
|
|
//
|
|
|
|
// Copyright 2021 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package retry
|
|
|
|
import (
|
|
"context"
|
|
"math"
|
|
"math/rand"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
tikverr "github.com/tikv/client-go/v2/error"
|
|
"github.com/tikv/client-go/v2/internal/logutil"
|
|
"github.com/tikv/client-go/v2/kv"
|
|
"github.com/tikv/client-go/v2/metrics"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
// Config is the configuration of the Backoff function.
|
|
type Config struct {
|
|
name string
|
|
metric *prometheus.Observer
|
|
fnCfg *BackoffFnCfg
|
|
err error
|
|
}
|
|
|
|
// backoffFn is the backoff function which compute the sleep time and do sleep.
|
|
type backoffFn func(ctx context.Context, maxSleepMs int) int
|
|
|
|
func (c *Config) createBackoffFn(vars *kv.Variables) backoffFn {
|
|
if strings.EqualFold(c.name, txnLockFastName) {
|
|
return newBackoffFn(vars.BackoffLockFast, c.fnCfg.cap, c.fnCfg.jitter)
|
|
}
|
|
return newBackoffFn(c.fnCfg.base, c.fnCfg.cap, c.fnCfg.jitter)
|
|
}
|
|
|
|
// BackoffFnCfg is the configuration for the backoff func which implements exponential backoff with
|
|
// optional jitters.
|
|
// See http://www.awsarchitectureblog.com/2015/03/backoff.html
|
|
type BackoffFnCfg struct {
|
|
base int
|
|
cap int
|
|
jitter int
|
|
}
|
|
|
|
// NewBackoffFnCfg creates the config for BackoffFn.
|
|
func NewBackoffFnCfg(base, cap, jitter int) *BackoffFnCfg {
|
|
return &BackoffFnCfg{
|
|
base,
|
|
cap,
|
|
jitter,
|
|
}
|
|
}
|
|
|
|
// NewConfig creates a new Config for the Backoff operation.
|
|
func NewConfig(name string, metric *prometheus.Observer, backoffFnCfg *BackoffFnCfg, err error) *Config {
|
|
return &Config{
|
|
name: name,
|
|
metric: metric,
|
|
fnCfg: backoffFnCfg,
|
|
err: err,
|
|
}
|
|
}
|
|
|
|
func (c *Config) String() string {
|
|
return c.name
|
|
}
|
|
|
|
// SetErrors sets a more detailed error instead of the default bo config.
|
|
func (c *Config) SetErrors(err error) {
|
|
c.err = err
|
|
}
|
|
|
|
const txnLockFastName = "txnLockFast"
|
|
|
|
// Backoff Config variables.
|
|
var (
|
|
// TODO: distinguish tikv and tiflash in metrics
|
|
BoTiKVRPC = NewConfig("tikvRPC", &metrics.BackoffHistogramRPC, NewBackoffFnCfg(100, 2000, EqualJitter), tikverr.ErrTiKVServerTimeout)
|
|
BoTiFlashRPC = NewConfig("tiflashRPC", &metrics.BackoffHistogramRPC, NewBackoffFnCfg(100, 2000, EqualJitter), tikverr.ErrTiFlashServerTimeout)
|
|
BoTxnLock = NewConfig("txnLock", &metrics.BackoffHistogramLock, NewBackoffFnCfg(100, 3000, EqualJitter), tikverr.ErrResolveLockTimeout)
|
|
BoPDRPC = NewConfig("pdRPC", &metrics.BackoffHistogramPD, NewBackoffFnCfg(500, 3000, EqualJitter), tikverr.NewErrPDServerTimeout(""))
|
|
// change base time to 2ms, because it may recover soon.
|
|
BoRegionMiss = NewConfig("regionMiss", &metrics.BackoffHistogramRegionMiss, NewBackoffFnCfg(2, 500, NoJitter), tikverr.ErrRegionUnavailable)
|
|
BoRegionScheduling = NewConfig("regionScheduling", &metrics.BackoffHistogramRegionScheduling, NewBackoffFnCfg(2, 500, NoJitter), tikverr.ErrRegionUnavailable)
|
|
BoTiKVServerBusy = NewConfig("tikvServerBusy", &metrics.BackoffHistogramServerBusy, NewBackoffFnCfg(2000, 10000, EqualJitter), tikverr.ErrTiKVServerBusy)
|
|
BoTiKVDiskFull = NewConfig("tikvDiskFull", &metrics.BackoffHistogramTiKVDiskFull, NewBackoffFnCfg(500, 5000, NoJitter), tikverr.ErrTiKVDiskFull)
|
|
BoRegionRecoveryInProgress = NewConfig("regionRecoveryInProgress", &metrics.BackoffHistogramRegionRecoveryInProgress, NewBackoffFnCfg(100, 10000, EqualJitter), tikverr.ErrRegionRecoveryInProgress)
|
|
BoTiFlashServerBusy = NewConfig("tiflashServerBusy", &metrics.BackoffHistogramServerBusy, NewBackoffFnCfg(2000, 10000, EqualJitter), tikverr.ErrTiFlashServerBusy)
|
|
BoTxnNotFound = NewConfig("txnNotFound", &metrics.BackoffHistogramEmpty, NewBackoffFnCfg(2, 500, NoJitter), tikverr.ErrResolveLockTimeout)
|
|
BoStaleCmd = NewConfig("staleCommand", &metrics.BackoffHistogramStaleCmd, NewBackoffFnCfg(2, 1000, NoJitter), tikverr.ErrTiKVStaleCommand)
|
|
BoMaxTsNotSynced = NewConfig("maxTsNotSynced", &metrics.BackoffHistogramEmpty, NewBackoffFnCfg(2, 500, NoJitter), tikverr.ErrTiKVMaxTimestampNotSynced)
|
|
BoMaxDataNotReady = NewConfig("dataNotReady", &metrics.BackoffHistogramDataNotReady, NewBackoffFnCfg(2, 2000, NoJitter), tikverr.ErrRegionDataNotReady)
|
|
BoMaxRegionNotInitialized = NewConfig("regionNotInitialized", &metrics.BackoffHistogramEmpty, NewBackoffFnCfg(2, 1000, NoJitter), tikverr.ErrRegionNotInitialized)
|
|
BoIsWitness = NewConfig("isWitness", &metrics.BackoffHistogramIsWitness, NewBackoffFnCfg(1000, 10000, EqualJitter), tikverr.ErrIsWitness)
|
|
// TxnLockFast's `base` load from vars.BackoffLockFast when create BackoffFn.
|
|
BoTxnLockFast = NewConfig(txnLockFastName, &metrics.BackoffHistogramLockFast, NewBackoffFnCfg(2, 3000, EqualJitter), tikverr.ErrResolveLockTimeout)
|
|
)
|
|
|
|
var isSleepExcluded = map[string]int{
|
|
BoTiKVServerBusy.name: 600000, // The max excluded limit is 10min.
|
|
// add BoTiFlashServerBusy if appropriate
|
|
}
|
|
|
|
// setBackoffExcluded is used for test only.
|
|
func setBackoffExcluded(name string, maxVal int) {
|
|
if _, ok := isSleepExcluded[name]; ok {
|
|
isSleepExcluded[name] = maxVal
|
|
}
|
|
}
|
|
|
|
const (
|
|
// NoJitter makes the backoff sequence strict exponential.
|
|
NoJitter = 1 + iota
|
|
// FullJitter applies random factors to strict exponential.
|
|
FullJitter
|
|
// EqualJitter is also randomized, but prevents very short sleeps.
|
|
EqualJitter
|
|
// DecorrJitter increases the maximum jitter based on the last random value.
|
|
DecorrJitter
|
|
)
|
|
|
|
// newBackoffFn creates a backoff func which implements exponential backoff with
|
|
// optional jitters.
|
|
// See http://www.awsarchitectureblog.com/2015/03/backoff.html
|
|
func newBackoffFn(base, cap, jitter int) backoffFn {
|
|
if base < 2 {
|
|
// Top prevent panic in 'rand.Intn'.
|
|
base = 2
|
|
}
|
|
attempts := 0
|
|
lastSleep := base
|
|
return func(ctx context.Context, maxSleepMs int) int {
|
|
var sleep int
|
|
switch jitter {
|
|
case NoJitter:
|
|
sleep = expo(base, cap, attempts)
|
|
case FullJitter:
|
|
v := expo(base, cap, attempts)
|
|
sleep = rand.Intn(v)
|
|
case EqualJitter:
|
|
v := expo(base, cap, attempts)
|
|
sleep = v/2 + rand.Intn(v/2)
|
|
case DecorrJitter:
|
|
sleep = int(math.Min(float64(cap), float64(base+rand.Intn(lastSleep*3-base))))
|
|
}
|
|
logutil.BgLogger().Debug("backoff",
|
|
zap.Int("base", base),
|
|
zap.Int("sleep", sleep),
|
|
zap.Int("attempts", attempts))
|
|
|
|
realSleep := sleep
|
|
// when set maxSleepMs >= 0 in `tikv.BackoffWithMaxSleep` will force sleep maxSleepMs milliseconds.
|
|
if maxSleepMs >= 0 && realSleep > maxSleepMs {
|
|
realSleep = maxSleepMs
|
|
}
|
|
select {
|
|
case <-time.After(time.Duration(realSleep) * time.Millisecond):
|
|
attempts++
|
|
lastSleep = sleep
|
|
return realSleep
|
|
case <-ctx.Done():
|
|
return 0
|
|
}
|
|
}
|
|
}
|
|
|
|
func expo(base, cap, n int) int {
|
|
return int(math.Min(float64(cap), float64(base)*math.Pow(2.0, float64(n))))
|
|
}
|