mirror of https://github.com/tikv/client-go.git
538 lines
17 KiB
Go
538 lines
17 KiB
Go
// Copyright 2024 TiKV Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package transaction
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"fmt"
|
|
"strconv"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/docker/go-units"
|
|
"github.com/golang/protobuf/proto" //nolint:staticcheck
|
|
"github.com/opentracing/opentracing-go"
|
|
"github.com/pingcap/errors"
|
|
"github.com/pingcap/kvproto/pkg/kvrpcpb"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/tikv/client-go/v2/config/retry"
|
|
tikverr "github.com/tikv/client-go/v2/error"
|
|
"github.com/tikv/client-go/v2/internal/client"
|
|
"github.com/tikv/client-go/v2/internal/locate"
|
|
"github.com/tikv/client-go/v2/internal/logutil"
|
|
"github.com/tikv/client-go/v2/kv"
|
|
"github.com/tikv/client-go/v2/tikvrpc"
|
|
"github.com/tikv/client-go/v2/txnkv/rangetask"
|
|
"github.com/tikv/client-go/v2/txnkv/txnlock"
|
|
"github.com/tikv/client-go/v2/util"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
// PipelinedRequestSource is the source of the Flush & ResolveLock requests in a txn with pipelined memdb.
|
|
// txn.GetRequestSource may cause data race because the upper layer may edit the source while the flush requests are built in background.
|
|
// So we use the fixed source from the upper layer to avoid the data race.
|
|
// This also distinguishes the resource usage between p-DML(pipelined DML) and other small DMLs.
|
|
const PipelinedRequestSource = "external_pdml"
|
|
|
|
type actionPipelinedFlush struct {
|
|
generation uint64
|
|
}
|
|
|
|
var _ twoPhaseCommitAction = actionPipelinedFlush{}
|
|
|
|
func (action actionPipelinedFlush) String() string {
|
|
return "pipelined_flush"
|
|
}
|
|
|
|
func (action actionPipelinedFlush) tiKVTxnRegionsNumHistogram() prometheus.Observer {
|
|
return nil
|
|
}
|
|
|
|
func (c *twoPhaseCommitter) buildPipelinedFlushRequest(batch batchMutations, generation uint64) *tikvrpc.Request {
|
|
m := batch.mutations
|
|
mutations := make([]*kvrpcpb.Mutation, m.Len())
|
|
|
|
for i := 0; i < m.Len(); i++ {
|
|
assertion := kvrpcpb.Assertion_None
|
|
if m.IsAssertExists(i) {
|
|
assertion = kvrpcpb.Assertion_Exist
|
|
}
|
|
if m.IsAssertNotExist(i) {
|
|
assertion = kvrpcpb.Assertion_NotExist
|
|
}
|
|
mutations[i] = &kvrpcpb.Mutation{
|
|
Op: m.GetOp(i),
|
|
Key: m.GetKey(i),
|
|
Value: m.GetValue(i),
|
|
Assertion: assertion,
|
|
}
|
|
}
|
|
|
|
minCommitTS := c.startTS + 1
|
|
|
|
req := &kvrpcpb.FlushRequest{
|
|
Mutations: mutations,
|
|
PrimaryKey: c.primary(),
|
|
StartTs: c.startTS,
|
|
MinCommitTs: minCommitTS,
|
|
Generation: generation,
|
|
LockTtl: max(defaultLockTTL, ManagedLockTTL),
|
|
AssertionLevel: c.txn.assertionLevel,
|
|
}
|
|
|
|
r := tikvrpc.NewRequest(
|
|
tikvrpc.CmdFlush, req, kvrpcpb.Context{
|
|
Priority: c.priority,
|
|
SyncLog: c.syncLog,
|
|
ResourceGroupTag: c.resourceGroupTag,
|
|
DiskFullOpt: c.txn.diskFullOpt,
|
|
TxnSource: c.txn.txnSource,
|
|
MaxExecutionDurationMs: uint64(client.MaxWriteExecutionTime.Milliseconds()),
|
|
RequestSource: PipelinedRequestSource,
|
|
ResourceControlContext: &kvrpcpb.ResourceControlContext{
|
|
ResourceGroupName: c.resourceGroupName,
|
|
},
|
|
},
|
|
)
|
|
if c.resourceGroupTag == nil && c.resourceGroupTagger != nil {
|
|
c.resourceGroupTagger(r)
|
|
}
|
|
return r
|
|
}
|
|
|
|
func (action actionPipelinedFlush) handleSingleBatch(
|
|
c *twoPhaseCommitter, bo *retry.Backoffer, batch batchMutations,
|
|
) (err error) {
|
|
if len(c.primaryKey) == 0 {
|
|
logutil.Logger(bo.GetCtx()).Error(
|
|
"[pipelined dml] primary key should be set before pipelined flush",
|
|
zap.Uint64("startTS", c.startTS),
|
|
zap.Uint64("generation", action.generation),
|
|
)
|
|
return errors.New("[pipelined dml] primary key should be set before pipelined flush")
|
|
}
|
|
|
|
tBegin := time.Now()
|
|
attempts := 0
|
|
|
|
req := c.buildPipelinedFlushRequest(batch, action.generation)
|
|
sender := locate.NewRegionRequestSender(c.store.GetRegionCache(), c.store.GetTiKVClient(), c.store.GetOracle())
|
|
var resolvingRecordToken *int
|
|
|
|
for {
|
|
attempts++
|
|
reqBegin := time.Now()
|
|
if reqBegin.Sub(tBegin) > slowRequestThreshold {
|
|
logutil.Logger(bo.GetCtx()).Warn(
|
|
"[pipelined dml] slow pipelined flush request",
|
|
zap.Uint64("startTS", c.startTS),
|
|
zap.Uint64("generation", action.generation),
|
|
zap.Stringer("region", &batch.region),
|
|
zap.Int("attempts", attempts),
|
|
)
|
|
tBegin = time.Now()
|
|
}
|
|
resp, _, err := sender.SendReq(bo, req, batch.region, client.ReadTimeoutShort)
|
|
// Unexpected error occurs, return it
|
|
if err != nil {
|
|
return err
|
|
}
|
|
regionErr, err := resp.GetRegionError()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if regionErr != nil {
|
|
if err = retry.MayBackoffForRegionError(regionErr, bo); err != nil {
|
|
return err
|
|
}
|
|
if regionErr.GetDiskFull() != nil {
|
|
storeIds := regionErr.GetDiskFull().GetStoreId()
|
|
desc := " "
|
|
for _, i := range storeIds {
|
|
desc += strconv.FormatUint(i, 10) + " "
|
|
}
|
|
|
|
logutil.Logger(bo.GetCtx()).Error(
|
|
"Request failed cause of TiKV disk full",
|
|
zap.String("store_id", desc),
|
|
zap.String("reason", regionErr.GetDiskFull().GetReason()),
|
|
)
|
|
|
|
return errors.New(regionErr.String())
|
|
}
|
|
same, err := batch.relocate(bo, c.store.GetRegionCache())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if same {
|
|
continue
|
|
}
|
|
err = c.doActionOnMutations(bo, actionPipelinedFlush{generation: action.generation}, batch.mutations)
|
|
return err
|
|
}
|
|
if resp.Resp == nil {
|
|
return errors.WithStack(tikverr.ErrBodyMissing)
|
|
}
|
|
flushResp := resp.Resp.(*kvrpcpb.FlushResponse)
|
|
keyErrs := flushResp.GetErrors()
|
|
if len(keyErrs) == 0 {
|
|
// Clear the RPC Error since the request is evaluated successfully.
|
|
sender.SetRPCError(nil)
|
|
|
|
// Update CommitDetails
|
|
reqDuration := time.Since(reqBegin)
|
|
c.getDetail().MergeFlushReqDetails(
|
|
reqDuration,
|
|
batch.region.GetID(),
|
|
sender.GetStoreAddr(),
|
|
flushResp.ExecDetailsV2,
|
|
)
|
|
|
|
if batch.isPrimary {
|
|
// start keepalive after primary key is written.
|
|
c.run(c, nil, true)
|
|
}
|
|
return nil
|
|
}
|
|
locks := make([]*txnlock.Lock, 0, len(keyErrs))
|
|
|
|
logged := make(map[uint64]struct{}, 1)
|
|
for _, keyErr := range keyErrs {
|
|
// Check already exists error
|
|
if alreadyExist := keyErr.GetAlreadyExist(); alreadyExist != nil {
|
|
e := &tikverr.ErrKeyExist{AlreadyExist: alreadyExist}
|
|
return c.extractKeyExistsErr(e)
|
|
}
|
|
|
|
// Extract lock from key error
|
|
lock, err1 := txnlock.ExtractLockFromKeyErr(keyErr)
|
|
if err1 != nil {
|
|
return err1
|
|
}
|
|
if _, ok := logged[lock.TxnID]; !ok {
|
|
logutil.Logger(bo.GetCtx()).Info(
|
|
"[pipelined dml] flush encounters lock. "+
|
|
"More locks belonging to the same transaction may be omitted",
|
|
zap.Uint64("txnID", c.startTS),
|
|
zap.Uint64("generation", action.generation),
|
|
zap.Stringer("lock", lock),
|
|
)
|
|
logged[lock.TxnID] = struct{}{}
|
|
}
|
|
// If an optimistic transaction encounters a lock with larger TS, this transaction will certainly
|
|
// fail due to a WriteConflict error. So we can construct and return an error here early.
|
|
// Pessimistic transactions don't need such an optimization. If this key needs a pessimistic lock,
|
|
// TiKV will return a PessimisticLockNotFound error directly if it encounters a different lock. Otherwise,
|
|
// TiKV returns lock.TTL = 0, and we still need to resolve the lock.
|
|
if lock.TxnID > c.startTS && !c.isPessimistic {
|
|
return tikverr.NewErrWriteConflictWithArgs(
|
|
c.startTS,
|
|
lock.TxnID,
|
|
0,
|
|
lock.Key,
|
|
kvrpcpb.WriteConflict_Optimistic,
|
|
)
|
|
}
|
|
locks = append(locks, lock)
|
|
}
|
|
if resolvingRecordToken == nil {
|
|
token := c.store.GetLockResolver().RecordResolvingLocks(locks, c.startTS)
|
|
resolvingRecordToken = &token
|
|
defer c.store.GetLockResolver().ResolveLocksDone(c.startTS, *resolvingRecordToken)
|
|
} else {
|
|
c.store.GetLockResolver().UpdateResolvingLocks(locks, c.startTS, *resolvingRecordToken)
|
|
}
|
|
resolveLockOpts := txnlock.ResolveLocksOptions{
|
|
CallerStartTS: c.startTS,
|
|
Locks: locks,
|
|
Detail: &c.getDetail().ResolveLock,
|
|
}
|
|
resolveLockRes, err := c.store.GetLockResolver().ResolveLocksWithOpts(bo, resolveLockOpts)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
msBeforeExpired := resolveLockRes.TTL
|
|
if msBeforeExpired > 0 {
|
|
err = bo.BackoffWithCfgAndMaxSleep(
|
|
retry.BoTxnLock,
|
|
int(msBeforeExpired),
|
|
errors.Errorf("[pipelined dml] flush lockedKeys: %d", len(locks)),
|
|
)
|
|
if err != nil {
|
|
logutil.Logger(bo.GetCtx()).Warn(
|
|
"[pipelined dml] backoff failed during flush",
|
|
zap.Error(err),
|
|
zap.Uint64("startTS", c.startTS),
|
|
zap.Uint64("generation", action.generation),
|
|
)
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (actionPipelinedFlush) isInterruptible() bool {
|
|
return true
|
|
}
|
|
|
|
func (c *twoPhaseCommitter) pipelinedFlushMutations(bo *retry.Backoffer, mutations CommitterMutations, generation uint64) error {
|
|
if span := opentracing.SpanFromContext(bo.GetCtx()); span != nil && span.Tracer() != nil {
|
|
span1 := span.Tracer().StartSpan("twoPhaseCommitter.pipelinedFlushMutations", opentracing.ChildOf(span.Context()))
|
|
defer span1.Finish()
|
|
bo.SetCtx(opentracing.ContextWithSpan(bo.GetCtx(), span1))
|
|
}
|
|
|
|
return c.doActionOnMutations(bo, actionPipelinedFlush{generation}, mutations)
|
|
}
|
|
|
|
func (c *twoPhaseCommitter) commitFlushedMutations(bo *retry.Backoffer) error {
|
|
logutil.Logger(bo.GetCtx()).Info(
|
|
"[pipelined dml] start to commit transaction",
|
|
zap.Int("keys", c.txn.GetMemBuffer().Len()),
|
|
zap.Duration("flush_wait_duration", c.txn.GetMemBuffer().GetMetrics().WaitDuration),
|
|
zap.Duration("total_duration", c.txn.GetMemBuffer().GetMetrics().TotalDuration),
|
|
zap.Uint64("memdb traversal cache hit", c.txn.GetMemBuffer().GetMetrics().MemDBHitCount),
|
|
zap.Uint64("memdb traversal cache miss", c.txn.GetMemBuffer().GetMetrics().MemDBMissCount),
|
|
zap.String("size", units.HumanSize(float64(c.txn.GetMemBuffer().Size()))),
|
|
zap.Uint64("startTS", c.startTS),
|
|
)
|
|
commitTS, err := c.store.GetTimestampWithRetry(bo, c.txn.GetScope())
|
|
if err != nil {
|
|
logutil.Logger(bo.GetCtx()).Warn("[pipelined dml] commit transaction get commitTS failed",
|
|
zap.Error(err),
|
|
zap.Uint64("txnStartTS", c.startTS),
|
|
)
|
|
return err
|
|
}
|
|
atomic.StoreUint64(&c.commitTS, commitTS)
|
|
|
|
if _, err := util.EvalFailpoint("pipelinedCommitFail"); err == nil {
|
|
return errors.New("pipelined DML commit failed")
|
|
}
|
|
|
|
primaryMutation := NewPlainMutations(1)
|
|
primaryMutation.Push(c.pipelinedCommitInfo.primaryOp, c.primaryKey, nil, false, false, false, false)
|
|
if err = c.commitMutations(bo, &primaryMutation); err != nil {
|
|
return errors.Trace(err)
|
|
}
|
|
c.mu.Lock()
|
|
c.mu.committed = true
|
|
c.mu.Unlock()
|
|
logutil.Logger(bo.GetCtx()).Info(
|
|
"[pipelined dml] transaction is committed",
|
|
zap.Uint64("startTS", c.startTS),
|
|
zap.Uint64("commitTS", commitTS),
|
|
)
|
|
broadcastToAllStores(
|
|
c.txn,
|
|
c.store,
|
|
retry.NewBackofferWithVars(
|
|
bo.GetCtx(),
|
|
broadcastMaxBackoff,
|
|
c.txn.vars,
|
|
),
|
|
&kvrpcpb.TxnStatus{
|
|
StartTs: c.startTS,
|
|
MinCommitTs: c.minCommitTSMgr.get(),
|
|
CommitTs: commitTS,
|
|
RolledBack: false,
|
|
IsCompleted: false,
|
|
},
|
|
c.resourceGroupName,
|
|
c.resourceGroupTag,
|
|
)
|
|
|
|
if _, err := util.EvalFailpoint("pipelinedSkipResolveLock"); err == nil {
|
|
return nil
|
|
}
|
|
|
|
// async resolve the rest locks.
|
|
commitBo := retry.NewBackofferWithVars(c.store.Ctx(), CommitSecondaryMaxBackoff, c.txn.vars)
|
|
c.resolveFlushedLocks(commitBo, c.pipelinedCommitInfo.pipelinedStart, c.pipelinedCommitInfo.pipelinedEnd, true)
|
|
return nil
|
|
}
|
|
|
|
// buildPipelinedResolveHandler returns a function which resolves all locks for the given region.
|
|
// If the region cache is stale, it reloads the region info and resolve the rest ranges.
|
|
// The function also count resolved regions.
|
|
func (c *twoPhaseCommitter) buildPipelinedResolveHandler(commit bool, resolved *atomic.Uint64) (rangetask.TaskHandler, error) {
|
|
commitVersion := uint64(0)
|
|
if commit {
|
|
commitVersion = atomic.LoadUint64(&c.commitTS)
|
|
if commitVersion == 0 {
|
|
return nil, errors.New("commitTS is 0")
|
|
}
|
|
}
|
|
maxBackOff := cleanupMaxBackoff
|
|
if commit {
|
|
maxBackOff = CommitSecondaryMaxBackoff
|
|
}
|
|
regionCache := c.store.GetRegionCache()
|
|
// the handler function runs in a different goroutine, should copy the required values before it to avoid race.
|
|
kvContext := &kvrpcpb.Context{
|
|
Priority: c.priority,
|
|
SyncLog: c.syncLog,
|
|
ResourceGroupTag: c.resourceGroupTag,
|
|
DiskFullOpt: c.diskFullOpt,
|
|
TxnSource: c.txnSource,
|
|
RequestSource: PipelinedRequestSource,
|
|
ResourceControlContext: &kvrpcpb.ResourceControlContext{
|
|
ResourceGroupName: c.resourceGroupName,
|
|
},
|
|
}
|
|
return func(ctx context.Context, r kv.KeyRange) (rangetask.TaskStat, error) {
|
|
start := r.StartKey
|
|
res := rangetask.TaskStat{}
|
|
for {
|
|
lreq := &kvrpcpb.ResolveLockRequest{
|
|
StartVersion: c.startTS,
|
|
CommitVersion: commitVersion,
|
|
}
|
|
req := tikvrpc.NewRequest(tikvrpc.CmdResolveLock, lreq, *proto.Clone(kvContext).(*kvrpcpb.Context))
|
|
bo := retry.NewBackoffer(ctx, maxBackOff)
|
|
loc, err := regionCache.LocateKey(bo, start)
|
|
if err != nil {
|
|
return res, err
|
|
}
|
|
resp, err := c.store.SendReq(bo, req, loc.Region, client.MaxWriteExecutionTime)
|
|
if err != nil {
|
|
err = bo.Backoff(retry.BoRegionMiss, err)
|
|
if err != nil {
|
|
logutil.Logger(bo.GetCtx()).Error("send resolve lock request error", zap.Error(err))
|
|
return res, err
|
|
}
|
|
continue
|
|
}
|
|
regionErr, err := resp.GetRegionError()
|
|
if err != nil {
|
|
logutil.Logger(bo.GetCtx()).Error("get region error failed", zap.Error(err))
|
|
return res, err
|
|
}
|
|
if regionErr != nil {
|
|
err = bo.Backoff(retry.BoRegionMiss, errors.New(regionErr.String()))
|
|
if err != nil {
|
|
logutil.Logger(bo.GetCtx()).Error("send resolve lock get region error", zap.Error(err))
|
|
return res, err
|
|
}
|
|
continue
|
|
}
|
|
if resp.Resp == nil {
|
|
logutil.Logger(bo.GetCtx()).Error("send resolve lock response body missing", zap.Error(errors.WithStack(tikverr.ErrBodyMissing)))
|
|
return res, err
|
|
}
|
|
cmdResp := resp.Resp.(*kvrpcpb.ResolveLockResponse)
|
|
if keyErr := cmdResp.GetError(); keyErr != nil {
|
|
err = errors.Errorf("unexpected resolve err: %s", keyErr)
|
|
logutil.BgLogger().Error(
|
|
"resolveLock error",
|
|
zap.Error(err),
|
|
zap.Uint64("startVer", lreq.StartVersion),
|
|
zap.Uint64("commitVer", lreq.CommitVersion),
|
|
zap.String("debugInfo", tikverr.ExtractDebugInfoStrFromKeyErr(keyErr)),
|
|
)
|
|
return res, err
|
|
}
|
|
resolved.Add(1)
|
|
res.CompletedRegions++
|
|
if loc.EndKey == nil || bytes.Compare(loc.EndKey, r.EndKey) >= 0 {
|
|
return res, nil
|
|
}
|
|
start = loc.EndKey
|
|
}
|
|
}, nil
|
|
}
|
|
|
|
// resolveFlushedLocks resolves all locks in the given range [start, end) with the given status.
|
|
// The resolve process is running in another goroutine so this function won't block.
|
|
func (c *twoPhaseCommitter) resolveFlushedLocks(bo *retry.Backoffer, start, end []byte, commit bool) {
|
|
var resolved atomic.Uint64
|
|
handler, err := c.buildPipelinedResolveHandler(commit, &resolved)
|
|
commitTs := uint64(0)
|
|
if commit {
|
|
commitTs = atomic.LoadUint64(&c.commitTS)
|
|
}
|
|
if err != nil {
|
|
logutil.Logger(bo.GetCtx()).Error(
|
|
"[pipelined dml] build buildPipelinedResolveHandler error",
|
|
zap.Error(err),
|
|
zap.Uint64("resolved regions", resolved.Load()),
|
|
zap.Uint64("startTS", c.startTS),
|
|
zap.Uint64("commitTS", commitTs),
|
|
)
|
|
return
|
|
}
|
|
|
|
status := "rollback"
|
|
if commit {
|
|
status = "commit"
|
|
}
|
|
|
|
runner := rangetask.NewRangeTaskRunnerWithID(
|
|
fmt.Sprintf("pipelined-dml-%s", status),
|
|
fmt.Sprintf("pipelined-dml-%s-%d", status, c.startTS),
|
|
c.store,
|
|
c.txn.pipelinedResolveLockConcurrency,
|
|
handler,
|
|
)
|
|
runner.SetStatLogInterval(30 * time.Second)
|
|
runner.SetRegionsPerTask(1)
|
|
|
|
c.txn.spawnWithStorePool(func() {
|
|
if err = runner.RunOnRange(bo.GetCtx(), start, end); err != nil {
|
|
logutil.Logger(bo.GetCtx()).Error("[pipelined dml] resolve flushed locks failed",
|
|
zap.String("txn-status", status),
|
|
zap.Uint64("resolved regions", resolved.Load()),
|
|
zap.Uint64("startTS", c.startTS),
|
|
zap.Uint64("commitTS", commitTs),
|
|
zap.Uint64("session", c.sessionID),
|
|
zap.Error(err),
|
|
)
|
|
} else {
|
|
logutil.Logger(bo.GetCtx()).Info("[pipelined dml] resolve flushed locks done",
|
|
zap.String("txn-status", status),
|
|
zap.Uint64("resolved regions", resolved.Load()),
|
|
zap.Uint64("startTS", c.startTS),
|
|
zap.Uint64("commitTS", commitTs),
|
|
zap.Uint64("session", c.sessionID),
|
|
)
|
|
|
|
// wait a while before notifying txn_status_cache to evict the txn,
|
|
// which tolerates slow followers and avoids the situation that the
|
|
// txn is evicted before the follower catches up.
|
|
time.Sleep(broadcastGracePeriod)
|
|
|
|
broadcastToAllStores(
|
|
c.txn,
|
|
c.store,
|
|
retry.NewBackofferWithVars(
|
|
bo.GetCtx(),
|
|
broadcastMaxBackoff,
|
|
c.txn.vars,
|
|
),
|
|
&kvrpcpb.TxnStatus{
|
|
StartTs: c.startTS,
|
|
MinCommitTs: 0,
|
|
CommitTs: commitTs,
|
|
RolledBack: !commit,
|
|
IsCompleted: true,
|
|
},
|
|
c.resourceGroupName,
|
|
c.resourceGroupTag,
|
|
)
|
|
}
|
|
})
|
|
}
|