client-go/internal/retry/backoff.go

309 lines
9.0 KiB
Go

// Copyright 2021 TiKV Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
// NOTE: The code in this file is based on code from the
// TiDB project, licensed under the Apache License v 2.0
//
// https://github.com/pingcap/tidb/tree/cc5e161ac06827589c4966674597c137cc9e809c/store/tikv/retry/backoff.go
//
// Copyright 2016 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package retry
import (
"context"
"fmt"
"math"
"strings"
"sync/atomic"
"time"
"github.com/opentracing/opentracing-go"
"github.com/pingcap/errors"
"github.com/pingcap/log"
tikverr "github.com/tikv/client-go/v2/error"
"github.com/tikv/client-go/v2/kv"
"github.com/tikv/client-go/v2/logutil"
"github.com/tikv/client-go/v2/util"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
)
// Backoffer is a utility for retrying queries.
type Backoffer struct {
ctx context.Context
fn map[string]backoffFn
maxSleep int
totalSleep int
vars *kv.Variables
noop bool
errors []error
configs []*Config
backoffSleepMS map[string]int
backoffTimes map[string]int
parent *Backoffer
}
type txnStartCtxKeyType struct{}
// TxnStartKey is a key for transaction start_ts info in context.Context.
var TxnStartKey interface{} = txnStartCtxKeyType{}
// NewBackoffer (Deprecated) creates a Backoffer with maximum sleep time(in ms).
func NewBackoffer(ctx context.Context, maxSleep int) *Backoffer {
return &Backoffer{
ctx: ctx,
maxSleep: maxSleep,
vars: kv.DefaultVars,
}
}
// NewBackofferWithVars creates a Backoffer with maximum sleep time(in ms) and kv.Variables.
func NewBackofferWithVars(ctx context.Context, maxSleep int, vars *kv.Variables) *Backoffer {
return NewBackoffer(ctx, maxSleep).withVars(vars)
}
// NewNoopBackoff create a Backoffer do nothing just return error directly
func NewNoopBackoff(ctx context.Context) *Backoffer {
return &Backoffer{ctx: ctx, noop: true}
}
// withVars sets the kv.Variables to the Backoffer and return it.
func (b *Backoffer) withVars(vars *kv.Variables) *Backoffer {
if vars != nil {
b.vars = vars
}
// maxSleep is the max sleep time in millisecond.
// When it is multiplied by BackOffWeight, it should not be greater than MaxInt32.
if b.maxSleep > 0 && math.MaxInt32/b.vars.BackOffWeight >= b.maxSleep {
b.maxSleep *= b.vars.BackOffWeight
}
return b
}
// Backoff sleeps a while base on the Config and records the error message.
// It returns a retryable error if total sleep time exceeds maxSleep.
func (b *Backoffer) Backoff(cfg *Config, err error) error {
if span := opentracing.SpanFromContext(b.ctx); span != nil && span.Tracer() != nil {
span1 := span.Tracer().StartSpan(fmt.Sprintf("tikv.backoff.%s", cfg), opentracing.ChildOf(span.Context()))
defer span1.Finish()
opentracing.ContextWithSpan(b.ctx, span1)
}
return b.BackoffWithCfgAndMaxSleep(cfg, -1, err)
}
// BackoffWithMaxSleepTxnLockFast sleeps a while base on the MaxSleepTxnLock and records the error message
// and never sleep more than maxSleepMs for each sleep.
func (b *Backoffer) BackoffWithMaxSleepTxnLockFast(maxSleepMs int, err error) error {
cfg := BoTxnLockFast
return b.BackoffWithCfgAndMaxSleep(cfg, maxSleepMs, err)
}
// BackoffWithCfgAndMaxSleep sleeps a while base on the Config and records the error message
// and never sleep more than maxSleepMs for each sleep.
func (b *Backoffer) BackoffWithCfgAndMaxSleep(cfg *Config, maxSleepMs int, err error) error {
if strings.Contains(err.Error(), tikverr.MismatchClusterID) {
logutil.BgLogger().Fatal("critical error", zap.Error(err))
}
select {
case <-b.ctx.Done():
return errors.Trace(err)
default:
}
b.errors = append(b.errors, errors.Errorf("%s at %s", err.Error(), time.Now().Format(time.RFC3339Nano)))
b.configs = append(b.configs, cfg)
if b.noop || (b.maxSleep > 0 && b.totalSleep >= b.maxSleep) {
errMsg := fmt.Sprintf("%s backoffer.maxSleep %dms is exceeded, errors:", cfg.String(), b.maxSleep)
for i, err := range b.errors {
// Print only last 3 errors for non-DEBUG log levels.
if log.GetLevel() == zapcore.DebugLevel || i >= len(b.errors)-3 {
errMsg += "\n" + err.Error()
}
}
logutil.BgLogger().Warn(errMsg)
// Use the first backoff type to generate a MySQL error.
return b.configs[0].err
}
// Lazy initialize.
if b.fn == nil {
b.fn = make(map[string]backoffFn)
}
f, ok := b.fn[cfg.name]
if !ok {
f = cfg.createBackoffFn(b.vars)
b.fn[cfg.name] = f
}
realSleep := f(b.ctx, maxSleepMs)
if cfg.metric != nil {
(*cfg.metric).Observe(float64(realSleep) / 1000)
}
b.totalSleep += realSleep
if b.backoffSleepMS == nil {
b.backoffSleepMS = make(map[string]int)
}
b.backoffSleepMS[cfg.name] += realSleep
if b.backoffTimes == nil {
b.backoffTimes = make(map[string]int)
}
b.backoffTimes[cfg.name]++
stmtExec := b.ctx.Value(util.ExecDetailsKey)
if stmtExec != nil {
detail := stmtExec.(*util.ExecDetails)
atomic.AddInt64(&detail.BackoffDuration, int64(realSleep)*int64(time.Millisecond))
atomic.AddInt64(&detail.BackoffCount, 1)
}
if b.vars != nil && b.vars.Killed != nil {
if atomic.LoadUint32(b.vars.Killed) == 1 {
return tikverr.ErrQueryInterrupted
}
}
var startTs interface{}
if ts := b.ctx.Value(TxnStartKey); ts != nil {
startTs = ts
}
logutil.Logger(b.ctx).Debug("retry later",
zap.Error(err),
zap.Int("totalSleep", b.totalSleep),
zap.Int("maxSleep", b.maxSleep),
zap.Stringer("type", cfg),
zap.Reflect("txnStartTS", startTs))
return nil
}
func (b *Backoffer) String() string {
if b.totalSleep == 0 {
return ""
}
return fmt.Sprintf(" backoff(%dms %v)", b.totalSleep, b.configs)
}
// Clone creates a new Backoffer which keeps current Backoffer's sleep time and errors, and shares
// current Backoffer's context.
func (b *Backoffer) Clone() *Backoffer {
return &Backoffer{
ctx: b.ctx,
maxSleep: b.maxSleep,
totalSleep: b.totalSleep,
errors: b.errors,
vars: b.vars,
parent: b.parent,
}
}
// Fork creates a new Backoffer which keeps current Backoffer's sleep time and errors, and holds
// a child context of current Backoffer's context.
func (b *Backoffer) Fork() (*Backoffer, context.CancelFunc) {
ctx, cancel := context.WithCancel(b.ctx)
return &Backoffer{
ctx: ctx,
maxSleep: b.maxSleep,
totalSleep: b.totalSleep,
errors: b.errors,
vars: b.vars,
parent: b,
}, cancel
}
// GetVars returns the binded vars.
func (b *Backoffer) GetVars() *kv.Variables {
return b.vars
}
// GetTotalSleep returns total sleep time.
func (b *Backoffer) GetTotalSleep() int {
return b.totalSleep
}
// GetTypes returns type list of this backoff and all its ancestors.
func (b *Backoffer) GetTypes() []string {
typs := make([]string, 0, len(b.configs))
for b != nil {
for _, cfg := range b.configs {
typs = append(typs, cfg.String())
}
b = b.parent
}
return typs
}
// GetCtx returns the binded context.
func (b *Backoffer) GetCtx() context.Context {
return b.ctx
}
// SetCtx sets the binded context to ctx.
func (b *Backoffer) SetCtx(ctx context.Context) {
b.ctx = ctx
}
// GetBackoffTimes returns a map contains backoff time count by type.
func (b *Backoffer) GetBackoffTimes() map[string]int {
return b.backoffTimes
}
// GetTotalBackoffTimes returns the total backoff times of the backoffer.
func (b *Backoffer) GetTotalBackoffTimes() int {
total := 0
for _, time := range b.backoffTimes {
total += time
}
return total
}
// GetBackoffSleepMS returns a map contains backoff sleep time by type.
func (b *Backoffer) GetBackoffSleepMS() map[string]int {
return b.backoffSleepMS
}
// ErrorsNum returns the number of errors.
func (b *Backoffer) ErrorsNum() int {
return len(b.errors)
}
// Reset resets the sleep state of the backoffer, so that following backoff
// can sleep shorter. The reason why we don't create a new backoffer is that
// backoffer is similar to context and it records some metrics that we
// want to record for an entire process which is composed of serveral stages.
func (b *Backoffer) Reset() {
b.fn = nil
b.totalSleep = 0
}
// ResetMaxSleep resets the sleep state and max sleep limit of the backoffer.
// It's used when switches to the next stage of the process.
func (b *Backoffer) ResetMaxSleep(maxSleep int) {
b.Reset()
b.maxSleep = maxSleep
b.withVars(b.vars)
}