// Copyright 2021 TiKV Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // NOTE: The code in this file is based on code from the // TiDB project, licensed under the Apache License v 2.0 // // https://github.com/pingcap/tidb/tree/cc5e161ac06827589c4966674597c137cc9e809c/store/tikv/retry/config.go // // Copyright 2021 PingCAP, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package retry import ( "context" "math" "math/rand" "strings" "time" "github.com/prometheus/client_golang/prometheus" tikverr "github.com/tikv/client-go/v2/error" "github.com/tikv/client-go/v2/internal/logutil" "github.com/tikv/client-go/v2/kv" "github.com/tikv/client-go/v2/metrics" "go.uber.org/zap" ) // Config is the configuration of the Backoff function. type Config struct { name string metric *prometheus.Observer fnCfg *BackoffFnCfg err error } // backoffFn is the backoff function which compute the sleep time and do sleep. type backoffFn func(ctx context.Context, maxSleepMs int) int func (c *Config) createBackoffFn(vars *kv.Variables) backoffFn { if strings.EqualFold(c.name, txnLockFastName) { return newBackoffFn(vars.BackoffLockFast, c.fnCfg.cap, c.fnCfg.jitter) } return newBackoffFn(c.fnCfg.base, c.fnCfg.cap, c.fnCfg.jitter) } // BackoffFnCfg is the configuration for the backoff func which implements exponential backoff with // optional jitters. // See http://www.awsarchitectureblog.com/2015/03/backoff.html type BackoffFnCfg struct { base int cap int jitter int } // NewBackoffFnCfg creates the config for BackoffFn. func NewBackoffFnCfg(base, cap, jitter int) *BackoffFnCfg { return &BackoffFnCfg{ base, cap, jitter, } } // NewConfig creates a new Config for the Backoff operation. func NewConfig(name string, metric *prometheus.Observer, backoffFnCfg *BackoffFnCfg, err error) *Config { return &Config{ name: name, metric: metric, fnCfg: backoffFnCfg, err: err, } } func (c *Config) String() string { return c.name } // SetErrors sets a more detailed error instead of the default bo config. func (c *Config) SetErrors(err error) { c.err = err } const txnLockFastName = "txnLockFast" // Backoff Config variables. var ( // TODO: distinguish tikv and tiflash in metrics BoTiKVRPC = NewConfig("tikvRPC", &metrics.BackoffHistogramRPC, NewBackoffFnCfg(100, 2000, EqualJitter), tikverr.ErrTiKVServerTimeout) BoTiFlashRPC = NewConfig("tiflashRPC", &metrics.BackoffHistogramRPC, NewBackoffFnCfg(100, 2000, EqualJitter), tikverr.ErrTiFlashServerTimeout) BoTxnLock = NewConfig("txnLock", &metrics.BackoffHistogramLock, NewBackoffFnCfg(100, 3000, EqualJitter), tikverr.ErrResolveLockTimeout) BoPDRPC = NewConfig("pdRPC", &metrics.BackoffHistogramPD, NewBackoffFnCfg(500, 3000, EqualJitter), tikverr.NewErrPDServerTimeout("")) // change base time to 2ms, because it may recover soon. BoRegionMiss = NewConfig("regionMiss", &metrics.BackoffHistogramRegionMiss, NewBackoffFnCfg(2, 500, NoJitter), tikverr.ErrRegionUnavailable) BoRegionScheduling = NewConfig("regionScheduling", &metrics.BackoffHistogramRegionScheduling, NewBackoffFnCfg(2, 500, NoJitter), tikverr.ErrRegionUnavailable) BoTiKVServerBusy = NewConfig("tikvServerBusy", &metrics.BackoffHistogramServerBusy, NewBackoffFnCfg(2000, 10000, EqualJitter), tikverr.ErrTiKVServerBusy) BoTiKVDiskFull = NewConfig("tikvDiskFull", &metrics.BackoffHistogramTiKVDiskFull, NewBackoffFnCfg(500, 5000, NoJitter), tikverr.ErrTiKVDiskFull) BoRegionRecoveryInProgress = NewConfig("regionRecoveryInProgress", &metrics.BackoffHistogramRegionRecoveryInProgress, NewBackoffFnCfg(100, 10000, EqualJitter), tikverr.ErrRegionRecoveryInProgress) BoTiFlashServerBusy = NewConfig("tiflashServerBusy", &metrics.BackoffHistogramServerBusy, NewBackoffFnCfg(2000, 10000, EqualJitter), tikverr.ErrTiFlashServerBusy) BoTxnNotFound = NewConfig("txnNotFound", &metrics.BackoffHistogramEmpty, NewBackoffFnCfg(2, 500, NoJitter), tikverr.ErrResolveLockTimeout) BoStaleCmd = NewConfig("staleCommand", &metrics.BackoffHistogramStaleCmd, NewBackoffFnCfg(2, 1000, NoJitter), tikverr.ErrTiKVStaleCommand) BoMaxTsNotSynced = NewConfig("maxTsNotSynced", &metrics.BackoffHistogramEmpty, NewBackoffFnCfg(2, 500, NoJitter), tikverr.ErrTiKVMaxTimestampNotSynced) BoMaxDataNotReady = NewConfig("dataNotReady", &metrics.BackoffHistogramDataNotReady, NewBackoffFnCfg(2, 2000, NoJitter), tikverr.ErrRegionDataNotReady) BoMaxRegionNotInitialized = NewConfig("regionNotInitialized", &metrics.BackoffHistogramEmpty, NewBackoffFnCfg(2, 1000, NoJitter), tikverr.ErrRegionNotInitialized) BoIsWitness = NewConfig("isWitness", &metrics.BackoffHistogramIsWitness, NewBackoffFnCfg(1000, 10000, EqualJitter), tikverr.ErrIsWitness) // TxnLockFast's `base` load from vars.BackoffLockFast when create BackoffFn. BoTxnLockFast = NewConfig(txnLockFastName, &metrics.BackoffHistogramLockFast, NewBackoffFnCfg(2, 3000, EqualJitter), tikverr.ErrResolveLockTimeout) ) var isSleepExcluded = map[string]int{ BoTiKVServerBusy.name: 600000, // The max excluded limit is 10min. // add BoTiFlashServerBusy if appropriate } // setBackoffExcluded is used for test only. func setBackoffExcluded(name string, maxVal int) { if _, ok := isSleepExcluded[name]; ok { isSleepExcluded[name] = maxVal } } const ( // NoJitter makes the backoff sequence strict exponential. NoJitter = 1 + iota // FullJitter applies random factors to strict exponential. FullJitter // EqualJitter is also randomized, but prevents very short sleeps. EqualJitter // DecorrJitter increases the maximum jitter based on the last random value. DecorrJitter ) // newBackoffFn creates a backoff func which implements exponential backoff with // optional jitters. // See http://www.awsarchitectureblog.com/2015/03/backoff.html func newBackoffFn(base, cap, jitter int) backoffFn { if base < 2 { // Top prevent panic in 'rand.Intn'. base = 2 } attempts := 0 lastSleep := base return func(ctx context.Context, maxSleepMs int) int { var sleep int switch jitter { case NoJitter: sleep = expo(base, cap, attempts) case FullJitter: v := expo(base, cap, attempts) sleep = rand.Intn(v) case EqualJitter: v := expo(base, cap, attempts) sleep = v/2 + rand.Intn(v/2) case DecorrJitter: sleep = int(math.Min(float64(cap), float64(base+rand.Intn(lastSleep*3-base)))) } logutil.BgLogger().Debug("backoff", zap.Int("base", base), zap.Int("sleep", sleep), zap.Int("attempts", attempts)) realSleep := sleep // when set maxSleepMs >= 0 in `tikv.BackoffWithMaxSleep` will force sleep maxSleepMs milliseconds. if maxSleepMs >= 0 && realSleep > maxSleepMs { realSleep = maxSleepMs } select { case <-time.After(time.Duration(realSleep) * time.Millisecond): attempts++ lastSleep = sleep return realSleep case <-ctx.Done(): return 0 } } } func expo(base, cap, n int) int { return int(math.Min(float64(cap), float64(base)*math.Pow(2.0, float64(n)))) }