Merge branch 'master' into bot/update-owners-1723456948376

This commit is contained in:
cfzjywxk 2024-08-16 11:38:01 +08:00 committed by GitHub
commit eb941f71c3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 431 additions and 83 deletions

View File

@ -48,6 +48,18 @@ const (
DefGrpcInitialWindowSize = 1 << 27 // 128MiB
DefGrpcInitialConnWindowSize = 1 << 27 // 128MiB
DefMaxConcurrencyRequestLimit = math.MaxInt64
DefBatchPolicy = BatchPolicyStandard
)
const (
// BatchPolicyBasic is the basic batch policy whose behavior is consistent with versions before v8.3.0.
BatchPolicyBasic = "basic"
// BatchPolicyStandard dynamically batches requests based the arrival time intervals of recent requests.
BatchPolicyStandard = "standard"
// BatchPolicyPositive always performs additional batching.
BatchPolicyPositive = "positive"
// BatchPolicyCustom allows users to customize the internal batch options.
BatchPolicyCustom = "custom"
)
// TiKVClient is the config for tikv client.
@ -72,6 +84,9 @@ type TiKVClient struct {
// CommitTimeout is the max time which command 'commit' will wait.
CommitTimeout string `toml:"commit-timeout" json:"commit-timeout"`
AsyncCommit AsyncCommit `toml:"async-commit" json:"async-commit"`
// BatchPolicy is the policy for batching requests.
BatchPolicy string `toml:"batch-policy" json:"batch-policy"`
// MaxBatchSize is the max batch size when calling batch commands API.
MaxBatchSize uint `toml:"max-batch-size" json:"max-batch-size"`
// If TiKV load is greater than this, TiDB will wait for a while to avoid little batch.
@ -153,6 +168,7 @@ func DefaultTiKVClient() TiKVClient {
AllowedClockDrift: 500 * time.Millisecond,
},
BatchPolicy: DefBatchPolicy,
MaxBatchSize: 128,
OverloadThreshold: 200,
MaxBatchWaitTime: 0,

View File

@ -57,7 +57,7 @@ func TestBackoffErrorType(t *testing.T) {
assert.Nil(t, err)
// 6ms sleep at most in total
for i := 0; i < 2; i++ {
err = b.Backoff(BoMaxDataNotReady, errors.New("data not ready"))
err = b.Backoff(BoMaxRegionNotInitialized, errors.New("region not initialized"))
assert.Nil(t, err)
}
// 100ms sleep at most in total
@ -88,7 +88,7 @@ func TestBackoffDeepCopy(t *testing.T) {
b := NewBackofferWithVars(context.TODO(), 4, nil)
// 700 ms sleep in total and the backoffer will return an error next time.
for i := 0; i < 3; i++ {
err = b.Backoff(BoMaxDataNotReady, errors.New("data not ready"))
err = b.Backoff(BoMaxRegionNotInitialized, errors.New("region not initialized"))
assert.Nil(t, err)
}
bForked, cancel := b.Fork()
@ -96,7 +96,7 @@ func TestBackoffDeepCopy(t *testing.T) {
bCloned := b.Clone()
for _, b := range []*Backoffer{bForked, bCloned} {
err = b.Backoff(BoTiKVRPC, errors.New("tikv rpc"))
assert.ErrorIs(t, err, BoMaxDataNotReady.err)
assert.ErrorIs(t, err, BoMaxRegionNotInitialized.err)
}
}

View File

@ -133,7 +133,6 @@ var (
BoTxnNotFound = NewConfig("txnNotFound", &metrics.BackoffHistogramEmpty, NewBackoffFnCfg(2, 500, NoJitter), tikverr.ErrResolveLockTimeout)
BoStaleCmd = NewConfig("staleCommand", &metrics.BackoffHistogramStaleCmd, NewBackoffFnCfg(2, 1000, NoJitter), tikverr.ErrTiKVStaleCommand)
BoMaxTsNotSynced = NewConfig("maxTsNotSynced", &metrics.BackoffHistogramEmpty, NewBackoffFnCfg(2, 500, NoJitter), tikverr.ErrTiKVMaxTimestampNotSynced)
BoMaxDataNotReady = NewConfig("dataNotReady", &metrics.BackoffHistogramDataNotReady, NewBackoffFnCfg(2, 2000, NoJitter), tikverr.ErrRegionDataNotReady)
BoMaxRegionNotInitialized = NewConfig("regionNotInitialized", &metrics.BackoffHistogramEmpty, NewBackoffFnCfg(2, 1000, NoJitter), tikverr.ErrRegionNotInitialized)
BoIsWitness = NewConfig("isWitness", &metrics.BackoffHistogramIsWitness, NewBackoffFnCfg(1000, 10000, EqualJitter), tikverr.ErrIsWitness)
// TxnLockFast's `base` load from vars.BackoffLockFast when create BackoffFn.

View File

@ -300,8 +300,7 @@ func (a *connArray) Init(addr string, security config.Security, idleNotify *uint
allowBatch := (cfg.TiKVClient.MaxBatchSize > 0) && enableBatch
if allowBatch {
a.batchConn = newBatchConn(uint(len(a.v)), cfg.TiKVClient.MaxBatchSize, idleNotify)
a.pendingRequests = metrics.TiKVBatchPendingRequests.WithLabelValues(a.target)
a.batchSize = metrics.TiKVBatchRequests.WithLabelValues(a.target)
a.batchConn.initMetrics(a.target)
}
keepAlive := cfg.TiKVClient.GrpcKeepAliveTime
keepAliveTimeout := cfg.TiKVClient.GrpcKeepAliveTimeout
@ -365,6 +364,7 @@ func (a *connArray) Init(addr string, security config.Security, idleNotify *uint
dialTimeout: a.dialTimeout,
tryLock: tryLock{sync.NewCond(new(sync.Mutex)), false},
eventListener: eventListener,
metrics: &a.metrics,
}
batchClient.maxConcurrencyRequestLimit.Store(cfg.TiKVClient.MaxConcurrencyRequestLimit)
a.batchCommandsClients = append(a.batchCommandsClients, batchClient)

View File

@ -37,9 +37,12 @@ package client
import (
"context"
"encoding/json"
"fmt"
"math"
"runtime"
"runtime/trace"
"strings"
"sync"
"sync/atomic"
"time"
@ -72,6 +75,11 @@ type batchCommandsEntry struct {
canceled int32
err error
pri uint64
// start indicates when the batch commands entry is generated and sent to the batch conn channel.
start time.Time
sendLat int64
recvLat int64
}
func (b *batchCommandsEntry) isCanceled() bool {
@ -98,6 +106,8 @@ type batchCommandsBuilder struct {
requestIDs []uint64
// In most cases, there isn't any forwardingReq.
forwardingReqs map[string]*tikvpb.BatchCommandsRequest
latestReqStartTime time.Time
}
func (b *batchCommandsBuilder) len() int {
@ -106,6 +116,9 @@ func (b *batchCommandsBuilder) len() int {
func (b *batchCommandsBuilder) push(entry *batchCommandsEntry) {
b.entries.Push(entry)
if entry.start.After(b.latestReqStartTime) {
b.latestReqStartTime = entry.start
}
}
const highTaskPriority = 10
@ -208,6 +221,23 @@ func newBatchCommandsBuilder(maxBatchSize uint) *batchCommandsBuilder {
}
}
type batchConnMetrics struct {
pendingRequests prometheus.Observer
batchSize prometheus.Observer
sendLoopWaitHeadDur prometheus.Observer
sendLoopWaitMoreDur prometheus.Observer
sendLoopSendDur prometheus.Observer
recvLoopRecvDur prometheus.Observer
recvLoopProcessDur prometheus.Observer
headArrivalInterval prometheus.Observer
batchMoreRequests prometheus.Observer
bestBatchSize prometheus.Observer
}
type batchConn struct {
// An atomic flag indicates whether the batch is idle or not.
// 0 for busy, others for idle.
@ -225,10 +255,11 @@ type batchConn struct {
idleNotify *uint32
idleDetect *time.Timer
pendingRequests prometheus.Observer
batchSize prometheus.Observer
fetchMoreTimer *time.Timer
index uint32
metrics batchConnMetrics
}
func newBatchConn(connCount, maxBatchSize uint, idleNotify *uint32) *batchConn {
@ -243,15 +274,27 @@ func newBatchConn(connCount, maxBatchSize uint, idleNotify *uint32) *batchConn {
}
}
func (a *batchConn) initMetrics(target string) {
a.metrics.pendingRequests = metrics.TiKVBatchPendingRequests.WithLabelValues(target)
a.metrics.batchSize = metrics.TiKVBatchRequests.WithLabelValues(target)
a.metrics.sendLoopWaitHeadDur = metrics.TiKVBatchSendLoopDuration.WithLabelValues(target, "wait-head")
a.metrics.sendLoopWaitMoreDur = metrics.TiKVBatchSendLoopDuration.WithLabelValues(target, "wait-more")
a.metrics.sendLoopSendDur = metrics.TiKVBatchSendLoopDuration.WithLabelValues(target, "send")
a.metrics.recvLoopRecvDur = metrics.TiKVBatchRecvLoopDuration.WithLabelValues(target, "recv")
a.metrics.recvLoopProcessDur = metrics.TiKVBatchRecvLoopDuration.WithLabelValues(target, "process")
a.metrics.headArrivalInterval = metrics.TiKVBatchHeadArrivalInterval.WithLabelValues(target)
a.metrics.batchMoreRequests = metrics.TiKVBatchMoreRequests.WithLabelValues(target)
a.metrics.bestBatchSize = metrics.TiKVBatchBestSize.WithLabelValues(target)
}
func (a *batchConn) isIdle() bool {
return atomic.LoadUint32(&a.idle) != 0
}
// fetchAllPendingRequests fetches all pending requests from the channel.
func (a *batchConn) fetchAllPendingRequests(
maxBatchSize int,
) time.Time {
func (a *batchConn) fetchAllPendingRequests(maxBatchSize int) (headRecvTime time.Time, headArrivalInterval time.Duration) {
// Block on the first element.
latestReqStartTime := a.reqBuilder.latestReqStartTime
var headEntry *batchCommandsEntry
select {
case headEntry = <-a.batchCommandsCh:
@ -264,14 +307,17 @@ func (a *batchConn) fetchAllPendingRequests(
atomic.AddUint32(&a.idle, 1)
atomic.CompareAndSwapUint32(a.idleNotify, 0, 1)
// This batchConn to be recycled
return time.Now()
return time.Now(), 0
case <-a.closed:
return time.Now()
return time.Now(), 0
}
if headEntry == nil {
return time.Now()
return time.Now(), 0
}
headRecvTime = time.Now()
if headEntry.start.After(latestReqStartTime) && !latestReqStartTime.IsZero() {
headArrivalInterval = headEntry.start.Sub(latestReqStartTime)
}
ts := time.Now()
a.reqBuilder.push(headEntry)
// This loop is for trying best to collect more requests.
@ -279,14 +325,14 @@ func (a *batchConn) fetchAllPendingRequests(
select {
case entry := <-a.batchCommandsCh:
if entry == nil {
return ts
return
}
a.reqBuilder.push(entry)
default:
return ts
return
}
}
return ts
return
}
// fetchMorePendingRequests fetches more pending requests from the channel.
@ -296,23 +342,33 @@ func (a *batchConn) fetchMorePendingRequests(
maxWaitTime time.Duration,
) {
// Try to collect `batchWaitSize` requests, or wait `maxWaitTime`.
after := time.NewTimer(maxWaitTime)
if a.fetchMoreTimer == nil {
a.fetchMoreTimer = time.NewTimer(maxWaitTime)
} else {
a.fetchMoreTimer.Reset(maxWaitTime)
}
for a.reqBuilder.len() < batchWaitSize {
select {
case entry := <-a.batchCommandsCh:
if entry == nil {
if !a.fetchMoreTimer.Stop() {
<-a.fetchMoreTimer.C
}
return
}
a.reqBuilder.push(entry)
case <-after.C:
case <-a.fetchMoreTimer.C:
return
}
}
after.Stop()
if !a.fetchMoreTimer.Stop() {
<-a.fetchMoreTimer.C
}
// Do an additional non-block try. Here we test the length with `maxBatchSize` instead
// of `batchWaitSize` because trying best to fetch more requests is necessary so that
// we can adjust the `batchWaitSize` dynamically.
yielded := false
for a.reqBuilder.len() < maxBatchSize {
select {
case entry := <-a.batchCommandsCh:
@ -321,16 +377,140 @@ func (a *batchConn) fetchMorePendingRequests(
}
a.reqBuilder.push(entry)
default:
if yielded {
return
}
// yield once to batch more requests.
runtime.Gosched()
yielded = true
}
}
}
const idleTimeout = 3 * time.Minute
var (
// presetBatchPolicies defines a set of [turboBatchOptions] as batch policies.
presetBatchPolicies = map[string]turboBatchOptions{
config.BatchPolicyBasic: {},
config.BatchPolicyStandard: {V: turboBatchTimeBased, T: 0.0001, N: 5, W: 0.2, P: 0.8, Q: 0.8},
config.BatchPolicyPositive: {V: turboBatchAlways, T: 0.0001},
}
)
const (
turboBatchAlways = iota
turboBatchTimeBased
turboBatchProbBased
)
// turboBatchOptions defines internal options for the [turboBatchTrigger].
type turboBatchOptions struct {
// V determines the batch strategy: always(v=0), time-based(v=1), prob-based(v=2).
V int `json:"v"`
// N currently is used to determine the max arrival interval (n * t).
N int `json:"n,omitempty"`
// T is the max wait time for the batch.
T float64 `json:"t,omitempty"`
// W is used to adjust the `estArrivalInterval` or `estFetchMoreProb` dynamically.
// - time-based(v=1): estArrivalInterval = w*reqArrivalInterval + (1-w)*estArrivalInterval
// - prob-based(v=2): estFetchMoreProb = w*thisProb + (1-w)*estFetchMoreProb
W float64 `json:"w,omitempty"`
// P is used to determine whether to fetch more requests:
// - time-based(v=1): estArrivalInterval < p * t
// - prob-based(v=2): estFetchMoreProb > p
P float64 `json:"p,omitempty"`
// Q is used to adjust the `batchWaitSize` dynamically.
Q float64 `json:"q,omitempty"`
}
// turboBatchTrigger is used to trigger the `fetchMorePendingRequests` dynamically according to the request arrival
// intervals. The option `v` indicates the strategy of triggering:
//
// - turboBatchAlways: always fetch more requests.
//
// - turboBatchTimeBased: fetch more requests if estArrivalInterval < p * t
// where estArrivalInterval = w*reqArrivalInterval + (1-w)*estArrivalInterval
// and reqArrivalInterval = min(reqArrivalInterval, n * t)
//
// - turboBatchProbBased: fetch more requests if estFetchMoreProb > p
// where estFetchMoreProb = w*thisProb + (1-w)*estFetchMoreProb
// and thisProb = reqArrivalInterval < t ? 1 : 0
//
// The option `q` is used to adjust the `batchWaitSize` dynamically. If the fractional part of the `avgBatchWaitSize` is
// greater or equal to `q`, the `batchWaitSize` will be increased by 1.
type turboBatchTrigger struct {
opts turboBatchOptions
estFetchMoreProb float64
estArrivalInterval float64
maxArrivalInterval float64
}
func newTurboBatchTriggerFromPolicy(policy string) (trigger turboBatchTrigger, ok bool) {
if opts, found := presetBatchPolicies[policy]; found {
return turboBatchTrigger{opts: opts}, true
}
rawOpts, _ := strings.CutPrefix(policy, config.BatchPolicyCustom)
if err := json.Unmarshal([]byte(strings.TrimSpace(rawOpts)), &trigger.opts); err != nil {
return turboBatchTrigger{opts: presetBatchPolicies[config.DefBatchPolicy]}, false
}
ok = true
return
}
func (t *turboBatchTrigger) turboWaitSeconds() float64 {
return t.opts.T
}
func (t *turboBatchTrigger) turboWaitTime() time.Duration {
return time.Duration(t.opts.T * float64(time.Second))
}
func (t *turboBatchTrigger) needFetchMore(reqArrivalInterval time.Duration) bool {
if t.opts.V == turboBatchTimeBased {
thisArrivalInterval := reqArrivalInterval.Seconds()
if t.maxArrivalInterval == 0 {
t.maxArrivalInterval = t.turboWaitSeconds() * float64(t.opts.N)
}
if thisArrivalInterval > t.maxArrivalInterval {
thisArrivalInterval = t.maxArrivalInterval
}
if t.estArrivalInterval == 0 {
t.estArrivalInterval = thisArrivalInterval
} else {
t.estArrivalInterval = t.opts.W*thisArrivalInterval + (1-t.opts.W)*t.estArrivalInterval
}
return t.estArrivalInterval < t.turboWaitSeconds()*t.opts.P
} else if t.opts.V == turboBatchProbBased {
thisProb := .0
if reqArrivalInterval.Seconds() < t.turboWaitSeconds() {
thisProb = 1
}
t.estFetchMoreProb = t.opts.W*thisProb + (1-t.opts.W)*t.estFetchMoreProb
return t.estFetchMoreProb > t.opts.P
} else {
return true
}
}
func (t *turboBatchTrigger) preferredBatchWaitSize(avgBatchWaitSize float64, defBatchWaitSize int) int {
if t.opts.V == turboBatchAlways {
return defBatchWaitSize
}
n, m := math.Modf(avgBatchWaitSize)
batchWaitSize := int(n)
if m >= t.opts.Q {
batchWaitSize++
}
return batchWaitSize
}
// BatchSendLoopPanicCounter is only used for testing.
var BatchSendLoopPanicCounter int64 = 0
var initBatchPolicyWarn sync.Once
func (a *batchConn) batchSendLoop(cfg config.TiKVClient) {
defer func() {
if r := recover(); r != nil {
@ -344,11 +524,20 @@ func (a *batchConn) batchSendLoop(cfg config.TiKVClient) {
}
}()
bestBatchWaitSize := cfg.BatchWaitSize
trigger, ok := newTurboBatchTriggerFromPolicy(cfg.BatchPolicy)
if !ok {
initBatchPolicyWarn.Do(func() {
logutil.BgLogger().Warn("fallback to default batch policy due to invalid value", zap.String("value", cfg.BatchPolicy))
})
}
turboBatchWaitTime := trigger.turboWaitTime()
avgBatchWaitSize := float64(cfg.BatchWaitSize)
for {
sendLoopStartTime := time.Now()
a.reqBuilder.reset()
start := a.fetchAllPendingRequests(int(cfg.MaxBatchSize))
headRecvTime, headArrivalInterval := a.fetchAllPendingRequests(int(cfg.MaxBatchSize))
// curl -X PUT -d 'return(true)' http://0.0.0.0:10080/fail/tikvclient/mockBlockOnBatchClient
if val, err := util.EvalFailpoint("mockBlockOnBatchClient"); err == nil {
@ -357,27 +546,37 @@ func (a *batchConn) batchSendLoop(cfg config.TiKVClient) {
}
}
if a.reqBuilder.len() < int(cfg.MaxBatchSize) && cfg.MaxBatchWaitTime > 0 {
if batchSize := a.reqBuilder.len(); batchSize < int(cfg.MaxBatchSize) {
if cfg.MaxBatchWaitTime > 0 && atomic.LoadUint64(&a.tikvTransportLayerLoad) > uint64(cfg.OverloadThreshold) {
// If the target TiKV is overload, wait a while to collect more requests.
if atomic.LoadUint64(&a.tikvTransportLayerLoad) >= uint64(cfg.OverloadThreshold) {
metrics.TiKVBatchWaitOverLoad.Inc()
a.fetchMorePendingRequests(int(cfg.MaxBatchSize), int(bestBatchWaitSize), cfg.MaxBatchWaitTime)
a.fetchMorePendingRequests(int(cfg.MaxBatchSize), int(cfg.BatchWaitSize), cfg.MaxBatchWaitTime)
} else if turboBatchWaitTime > 0 && headArrivalInterval > 0 && trigger.needFetchMore(headArrivalInterval) {
batchWaitSize := trigger.preferredBatchWaitSize(avgBatchWaitSize, int(cfg.BatchWaitSize))
a.fetchMorePendingRequests(int(cfg.MaxBatchSize), batchWaitSize, turboBatchWaitTime)
a.metrics.batchMoreRequests.Observe(float64(a.reqBuilder.len() - batchSize))
}
}
a.pendingRequests.Observe(float64(len(a.batchCommandsCh) + a.reqBuilder.len()))
length := a.reqBuilder.len()
a.metrics.pendingRequests.Observe(float64(len(a.batchCommandsCh) + length))
if uint(length) == 0 {
// The batch command channel is closed.
return
} else if uint(length) < bestBatchWaitSize && bestBatchWaitSize > 1 {
// Waits too long to collect requests, reduce the target batch size.
bestBatchWaitSize--
} else if uint(length) > bestBatchWaitSize+4 && bestBatchWaitSize < cfg.MaxBatchSize {
bestBatchWaitSize++
} else {
avgBatchWaitSize = 0.2*float64(length) + 0.8*avgBatchWaitSize
}
a.metrics.bestBatchSize.Observe(avgBatchWaitSize)
a.metrics.headArrivalInterval.Observe(headArrivalInterval.Seconds())
a.metrics.sendLoopWaitHeadDur.Observe(headRecvTime.Sub(sendLoopStartTime).Seconds())
a.metrics.sendLoopWaitMoreDur.Observe(time.Since(sendLoopStartTime).Seconds())
a.getClientAndSend()
metrics.TiKVBatchSendLatency.Observe(float64(time.Since(start)))
sendLoopEndTime := time.Now()
a.metrics.sendLoopSendDur.Observe(sendLoopEndTime.Sub(sendLoopStartTime).Seconds())
if dur := sendLoopEndTime.Sub(headRecvTime); dur > 5*time.Millisecond {
metrics.TiKVBatchSendTailLatency.Observe(dur.Seconds())
}
}
}
@ -429,10 +628,12 @@ func (a *batchConn) getClientAndSend() {
}
defer cli.unlockForSend()
available := cli.available()
reqSendTime := time.Now()
batch := 0
req, forwardingReqs := a.reqBuilder.buildWithLimit(available, func(id uint64, e *batchCommandsEntry) {
cli.batched.Store(id, e)
cli.sent.Add(1)
atomic.StoreInt64(&e.sendLat, int64(reqSendTime.Sub(e.start)))
if trace.IsEnabled() {
trace.Log(e.ctx, "rpc", "send")
}
@ -446,7 +647,7 @@ func (a *batchConn) getClientAndSend() {
cli.send(forwardedHost, req)
}
if batch > 0 {
a.batchSize.Observe(float64(batch))
a.metrics.batchSize.Observe(float64(batch))
}
}
@ -490,7 +691,6 @@ type batchCommandsStream struct {
}
func (s *batchCommandsStream) recv() (resp *tikvpb.BatchCommandsResponse, err error) {
now := time.Now()
defer func() {
if r := recover(); r != nil {
metrics.TiKVPanicCounter.WithLabelValues(metrics.LabelBatchRecvLoop).Inc()
@ -499,11 +699,6 @@ func (s *batchCommandsStream) recv() (resp *tikvpb.BatchCommandsResponse, err er
zap.Stack("stack"))
err = errors.New("batch conn recv paniced")
}
if err == nil {
metrics.BatchRecvHistogramOK.Observe(float64(time.Since(now)))
} else {
metrics.BatchRecvHistogramError.Observe(float64(time.Since(now)))
}
}()
if _, err := util.EvalFailpoint("gotErrorInRecvLoop"); err == nil {
return nil, errors.New("injected error in batchRecvLoop")
@ -567,6 +762,8 @@ type batchCommandsClient struct {
// eventListener is the listener set by external code to observe some events in the client. It's stored in a atomic
// pointer to make setting thread-safe.
eventListener *atomic.Pointer[ClientEventListener]
metrics *batchConnMetrics
}
func (c *batchCommandsClient) isStopped() bool {
@ -708,7 +905,7 @@ func (c *batchCommandsClient) recreateStreamingClientOnce(streamClient *batchCom
return err
}
func (c *batchCommandsClient) batchRecvLoop(cfg config.TiKVClient, tikvTransportLayerLoad *uint64, streamClient *batchCommandsStream) {
func (c *batchCommandsClient) batchRecvLoop(cfg config.TiKVClient, tikvTransportLayerLoad *uint64, connMetrics *batchConnMetrics, streamClient *batchCommandsStream) {
defer func() {
if r := recover(); r != nil {
metrics.TiKVPanicCounter.WithLabelValues(metrics.LabelBatchRecvLoop).Inc()
@ -716,13 +913,16 @@ func (c *batchCommandsClient) batchRecvLoop(cfg config.TiKVClient, tikvTransport
zap.Any("r", r),
zap.Stack("stack"))
logutil.BgLogger().Info("restart batchRecvLoop")
go c.batchRecvLoop(cfg, tikvTransportLayerLoad, streamClient)
go c.batchRecvLoop(cfg, tikvTransportLayerLoad, connMetrics, streamClient)
}
}()
epoch := atomic.LoadUint64(&c.epoch)
for {
recvLoopStartTime := time.Now()
resp, err := streamClient.recv()
respRecvTime := time.Now()
connMetrics.recvLoopRecvDur.Observe(respRecvTime.Sub(recvLoopStartTime).Seconds())
if err != nil {
if c.isStopped() {
return
@ -764,6 +964,7 @@ func (c *batchCommandsClient) batchRecvLoop(cfg config.TiKVClient, tikvTransport
}
entry := value.(*batchCommandsEntry)
atomic.StoreInt64(&entry.recvLat, int64(respRecvTime.Sub(entry.start)))
if trace.IsEnabled() {
trace.Log(entry.ctx, "rpc", "received")
}
@ -781,6 +982,7 @@ func (c *batchCommandsClient) batchRecvLoop(cfg config.TiKVClient, tikvTransport
// We need to consider TiKV load only if batch-wait strategy is enabled.
atomic.StoreUint64(tikvTransportLayerLoad, transportLayerLoad)
}
connMetrics.recvLoopProcessDur.Observe(time.Since(recvLoopStartTime).Seconds())
}
}
@ -875,7 +1077,7 @@ func (c *batchCommandsClient) initBatchClient(forwardedHost string) error {
} else {
c.forwardedClients[forwardedHost] = streamClient
}
go c.batchRecvLoop(c.tikvClientCfg, c.tikvLoad, streamClient)
go c.batchRecvLoop(c.tikvClientCfg, c.tikvLoad, c.metrics, streamClient)
return nil
}
@ -908,11 +1110,20 @@ func sendBatchRequest(
canceled: 0,
err: nil,
pri: priority,
start: time.Now(),
}
timer := time.NewTimer(timeout)
defer timer.Stop()
defer func() {
timer.Stop()
if sendLat := atomic.LoadInt64(&entry.sendLat); sendLat > 0 {
metrics.BatchRequestDurationSend.Observe(time.Duration(sendLat).Seconds())
}
if recvLat := atomic.LoadInt64(&entry.recvLat); recvLat > 0 {
metrics.BatchRequestDurationRecv.Observe(time.Duration(recvLat).Seconds())
}
metrics.BatchRequestDurationDone.Observe(time.Since(entry.start).Seconds())
}()
start := time.Now()
select {
case batchConn.batchCommandsCh <- entry:
case <-ctx.Done():
@ -925,8 +1136,6 @@ func sendBatchRequest(
case <-timer.C:
return nil, errors.WithMessage(context.DeadlineExceeded, "wait sendLoop")
}
waitSendDuration := time.Since(start)
metrics.TiKVBatchWaitDuration.Observe(float64(waitSendDuration))
select {
case res, ok := <-entry.res:
@ -945,8 +1154,13 @@ func sendBatchRequest(
return nil, errors.New("batchConn closed")
case <-timer.C:
atomic.StoreInt32(&entry.canceled, 1)
reason := fmt.Sprintf("wait recvLoop timeout, timeout:%s, wait_send_duration:%s, wait_recv_duration:%s",
timeout, util.FormatDuration(waitSendDuration), util.FormatDuration(time.Since(start)-waitSendDuration))
reason := fmt.Sprintf("wait recvLoop timeout, timeout:%s", timeout)
if sendLat := atomic.LoadInt64(&entry.sendLat); sendLat > 0 {
reason += fmt.Sprintf(", send:%s", util.FormatDuration(time.Duration(sendLat)))
if recvLat := atomic.LoadInt64(&entry.recvLat); recvLat > 0 {
reason += fmt.Sprintf(", recv:%s", util.FormatDuration(time.Duration(recvLat-sendLat)))
}
}
return nil, errors.WithMessage(context.DeadlineExceeded, reason)
}
}

View File

@ -148,7 +148,7 @@ func TestSendWhenReconnect(t *testing.T) {
req := tikvrpc.NewRequest(tikvrpc.CmdEmpty, &tikvpb.BatchCommandsEmptyRequest{})
_, err = rpcClient.SendRequest(context.Background(), addr, req, 5*time.Second)
require.Regexp(t, "wait recvLoop timeout, timeout:5s, wait_send_duration:.*, wait_recv_duration:.*: context deadline exceeded", err.Error())
require.Regexp(t, "wait recvLoop timeout, timeout:5s: context deadline exceeded", err.Error())
server.Stop()
}
@ -1067,3 +1067,76 @@ func TestConcurrentCloseConnPanic(t *testing.T) {
}()
wg.Wait()
}
func TestBatchPolicy(t *testing.T) {
t.Run(config.BatchPolicyBasic, func(t *testing.T) {
trigger, ok := newTurboBatchTriggerFromPolicy(config.BatchPolicyBasic)
require.True(t, ok)
require.False(t, trigger.turboWaitTime() > 0)
})
t.Run(config.BatchPolicyPositive, func(t *testing.T) {
trigger, ok := newTurboBatchTriggerFromPolicy(config.BatchPolicyPositive)
require.True(t, ok)
require.Equal(t, trigger.turboWaitTime(), 100*time.Microsecond)
require.True(t, trigger.needFetchMore(time.Hour))
require.True(t, trigger.needFetchMore(time.Millisecond))
require.Equal(t, 8, trigger.preferredBatchWaitSize(1, 8))
require.Equal(t, 8, trigger.preferredBatchWaitSize(1.2, 8))
require.Equal(t, 8, trigger.preferredBatchWaitSize(1.8, 8))
})
t.Run(config.BatchPolicyStandard, func(t *testing.T) {
trigger, ok := newTurboBatchTriggerFromPolicy(config.BatchPolicyStandard)
require.True(t, ok)
require.Equal(t, 1, trigger.preferredBatchWaitSize(1, 8))
require.Equal(t, 1, trigger.preferredBatchWaitSize(1.2, 8))
require.Equal(t, 2, trigger.preferredBatchWaitSize(1.8, 8))
require.Equal(t, trigger.turboWaitTime(), 100*time.Microsecond)
require.False(t, trigger.needFetchMore(100*time.Microsecond))
require.False(t, trigger.needFetchMore(80*time.Microsecond))
require.True(t, trigger.needFetchMore(10*time.Microsecond))
require.True(t, trigger.needFetchMore(80*time.Microsecond))
require.False(t, trigger.needFetchMore(90*time.Microsecond))
for i := 0; i < 50; i++ {
trigger.needFetchMore(time.Hour)
}
require.Less(t, trigger.estArrivalInterval, trigger.maxArrivalInterval)
for i := 0; i < 8; i++ {
require.False(t, trigger.needFetchMore(10*time.Microsecond))
}
require.True(t, trigger.needFetchMore(10*time.Microsecond))
})
t.Run(config.BatchPolicyCustom, func(t *testing.T) {
trigger, ok := newTurboBatchTriggerFromPolicy(config.BatchPolicyCustom + " {} ")
require.True(t, ok)
require.Equal(t, trigger.opts, presetBatchPolicies[config.BatchPolicyBasic])
trigger, ok = newTurboBatchTriggerFromPolicy(`{"t":0.0001}`)
require.True(t, ok)
require.Equal(t, trigger.opts, presetBatchPolicies[config.BatchPolicyPositive])
trigger, ok = newTurboBatchTriggerFromPolicy(`{"v":1,"t":0.0001,"n":5,"w":0.2,"p":0.8,"q":0.8}`)
require.True(t, ok)
require.Equal(t, trigger.opts, presetBatchPolicies[config.BatchPolicyStandard])
trigger, ok = newTurboBatchTriggerFromPolicy(`{"v":2,"t":0.001,"w":0.2,"p":0.5}`)
require.True(t, ok)
require.Equal(t, 2, trigger.preferredBatchWaitSize(1, 8))
require.Equal(t, 2, trigger.preferredBatchWaitSize(1.2, 8))
require.Equal(t, trigger.turboWaitTime(), time.Millisecond)
require.False(t, trigger.needFetchMore(time.Millisecond-time.Microsecond))
require.False(t, trigger.needFetchMore(time.Millisecond-time.Microsecond))
require.False(t, trigger.needFetchMore(time.Millisecond-time.Microsecond))
require.True(t, trigger.needFetchMore(time.Millisecond-time.Microsecond))
require.False(t, trigger.needFetchMore(time.Millisecond))
})
t.Run("invalid", func(t *testing.T) {
for _, val := range []string{
"", "invalid", "custom", "custom {x:1}",
} {
trigger, ok := newTurboBatchTriggerFromPolicy(val)
require.False(t, ok)
require.Equal(t, trigger.opts, presetBatchPolicies[config.DefBatchPolicy])
}
})
}

View File

@ -1715,13 +1715,7 @@ func (s *RegionRequestSender) onRegionError(
if s.replicaSelector != nil {
s.replicaSelector.onDataIsNotReady()
}
if !req.IsGlobalStaleRead() {
// only backoff local stale reads as global should retry immediately against the leader as a normal read
err = bo.Backoff(retry.BoMaxDataNotReady, errors.New("data is not ready"))
if err != nil {
return false, err
}
}
// do not backoff data-is-not-ready as we always retry with normal snapshot read.
return true, nil
}

View File

@ -64,15 +64,19 @@ var (
TiKVLocalLatchWaitTimeHistogram prometheus.Histogram
TiKVStatusDuration *prometheus.HistogramVec
TiKVStatusCounter *prometheus.CounterVec
TiKVBatchWaitDuration prometheus.Histogram
TiKVBatchSendLatency prometheus.Histogram
TiKVBatchSendTailLatency prometheus.Histogram
TiKVBatchSendLoopDuration *prometheus.SummaryVec
TiKVBatchRecvLoopDuration *prometheus.SummaryVec
TiKVBatchHeadArrivalInterval *prometheus.SummaryVec
TiKVBatchBestSize *prometheus.SummaryVec
TiKVBatchMoreRequests *prometheus.SummaryVec
TiKVBatchWaitOverLoad prometheus.Counter
TiKVBatchPendingRequests *prometheus.HistogramVec
TiKVBatchRequests *prometheus.HistogramVec
TiKVBatchRequestDuration *prometheus.SummaryVec
TiKVBatchClientUnavailable prometheus.Histogram
TiKVBatchClientWaitEstablish prometheus.Histogram
TiKVBatchClientRecycle prometheus.Histogram
TiKVBatchRecvLatency *prometheus.HistogramVec
TiKVRangeTaskStats *prometheus.GaugeVec
TiKVRangeTaskPushDuration *prometheus.HistogramVec
TiKVTokenWaitDuration prometheus.Histogram
@ -358,35 +362,60 @@ func initMetrics(namespace, subsystem string, constLabels prometheus.Labels) {
ConstLabels: constLabels,
}, []string{LblResult})
TiKVBatchWaitDuration = prometheus.NewHistogram(
TiKVBatchSendTailLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "batch_wait_duration",
Buckets: prometheus.ExponentialBuckets(1, 2, 34), // 1ns ~ 8s
Help: "batch wait duration",
Name: "batch_send_tail_latency_seconds",
Buckets: prometheus.ExponentialBuckets(0.005, 2, 10), // 5ms ~ 2.56s
Help: "batch send tail latency",
ConstLabels: constLabels,
})
TiKVBatchSendLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
TiKVBatchSendLoopDuration = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "batch_send_latency",
Buckets: prometheus.ExponentialBuckets(1, 2, 34), // 1ns ~ 8s
Help: "batch send latency",
Name: "batch_send_loop_duration_seconds",
Help: "batch send loop duration breakdown by steps",
ConstLabels: constLabels,
})
}, []string{"store", "step"})
TiKVBatchRecvLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
TiKVBatchRecvLoopDuration = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "batch_recv_latency",
Buckets: prometheus.ExponentialBuckets(1000, 2, 34), // 1us ~ 8000s
Help: "batch recv latency",
Name: "batch_recv_loop_duration_seconds",
Help: "batch recv loop duration breakdown by steps",
ConstLabels: constLabels,
}, []string{LblResult})
}, []string{"store", "step"})
TiKVBatchHeadArrivalInterval = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "batch_head_arrival_interval_seconds",
Help: "arrival interval of the head request in batch",
ConstLabels: constLabels,
}, []string{"store"})
TiKVBatchBestSize = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "batch_best_size",
Help: "best batch size estimated by the batch client",
ConstLabels: constLabels,
}, []string{"store"})
TiKVBatchMoreRequests = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "batch_more_requests_total",
Help: "number of requests batched by extra fetch",
ConstLabels: constLabels,
}, []string{"store"})
TiKVBatchWaitOverLoad = prometheus.NewCounter(
prometheus.CounterOpts{
@ -417,6 +446,15 @@ func initMetrics(namespace, subsystem string, constLabels prometheus.Labels) {
ConstLabels: constLabels,
}, []string{"store"})
TiKVBatchRequestDuration = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "batch_request_duration_seconds",
Help: "batch request duration breakdown by steps",
ConstLabels: constLabels,
}, []string{"step"})
TiKVBatchClientUnavailable = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: namespace,
@ -839,12 +877,16 @@ func RegisterMetrics() {
prometheus.MustRegister(TiKVLocalLatchWaitTimeHistogram)
prometheus.MustRegister(TiKVStatusDuration)
prometheus.MustRegister(TiKVStatusCounter)
prometheus.MustRegister(TiKVBatchWaitDuration)
prometheus.MustRegister(TiKVBatchSendLatency)
prometheus.MustRegister(TiKVBatchRecvLatency)
prometheus.MustRegister(TiKVBatchSendTailLatency)
prometheus.MustRegister(TiKVBatchSendLoopDuration)
prometheus.MustRegister(TiKVBatchRecvLoopDuration)
prometheus.MustRegister(TiKVBatchHeadArrivalInterval)
prometheus.MustRegister(TiKVBatchBestSize)
prometheus.MustRegister(TiKVBatchMoreRequests)
prometheus.MustRegister(TiKVBatchWaitOverLoad)
prometheus.MustRegister(TiKVBatchPendingRequests)
prometheus.MustRegister(TiKVBatchRequests)
prometheus.MustRegister(TiKVBatchRequestDuration)
prometheus.MustRegister(TiKVBatchClientUnavailable)
prometheus.MustRegister(TiKVBatchClientWaitEstablish)
prometheus.MustRegister(TiKVBatchClientRecycle)

View File

@ -173,6 +173,10 @@ var (
StaleReadLocalOutBytes prometheus.Counter
StaleReadRemoteInBytes prometheus.Counter
StaleReadRemoteOutBytes prometheus.Counter
BatchRequestDurationSend prometheus.Observer
BatchRequestDurationRecv prometheus.Observer
BatchRequestDurationDone prometheus.Observer
)
func initShortcuts() {
@ -287,8 +291,9 @@ func initShortcuts() {
OnePCTxnCounterError = TiKVOnePCTxnCounter.WithLabelValues("err")
OnePCTxnCounterFallback = TiKVOnePCTxnCounter.WithLabelValues("fallback")
BatchRecvHistogramOK = TiKVBatchRecvLatency.WithLabelValues("ok")
BatchRecvHistogramError = TiKVBatchRecvLatency.WithLabelValues("err")
BatchRequestDurationSend = TiKVBatchRequestDuration.WithLabelValues("send")
BatchRequestDurationRecv = TiKVBatchRequestDuration.WithLabelValues("recv")
BatchRequestDurationDone = TiKVBatchRequestDuration.WithLabelValues("done")
PrewriteAssertionUsageCounterNone = TiKVPrewriteAssertionUsageCounter.WithLabelValues("none")
PrewriteAssertionUsageCounterExist = TiKVPrewriteAssertionUsageCounter.WithLabelValues("exist")

View File

@ -1216,6 +1216,11 @@ func (rs *SnapshotRuntimeStats) String() string {
return buf.String()
}
// GetTimeDetail returns the timeDetail
func (rs *SnapshotRuntimeStats) GetTimeDetail() *util.TimeDetail {
return rs.timeDetail
}
// GetCmdRPCCount returns the count of the corresponding kind of rpc requests
func (rs *SnapshotRuntimeStats) GetCmdRPCCount(cmd tikvrpc.CmdType) int64 {
if rs.rpcStats == nil || len(rs.rpcStats.RPCStats) == 0 {