txn: provide more information in commit RPC / log mvcc debug info when commit failed for `TxnLockNotFound` (#1640)

ref tikv/client-go#1631

Signed-off-by: Chao Wang <cclcwangchao@hotmail.com>
This commit is contained in:
王超 2025-05-12 19:19:20 +08:00 committed by GitHub
parent 2058fbc062
commit 3150e385e3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 276 additions and 19 deletions

View File

@ -35,9 +35,11 @@
package error package error
import ( import (
"encoding/json"
"fmt" "fmt"
"time" "time"
"github.com/gogo/protobuf/proto"
"github.com/pingcap/kvproto/pkg/kvrpcpb" "github.com/pingcap/kvproto/pkg/kvrpcpb"
"github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/kvproto/pkg/pdpb"
"github.com/pingcap/log" "github.com/pingcap/log"
@ -355,3 +357,47 @@ func Log(err error) {
log.Error("encountered error", zap.Error(err), zap.Stack("stack")) log.Error("encountered error", zap.Error(err), zap.Stack("stack"))
} }
} }
// ExtractDebugInfoStrFromKeyErr extracts the debug info from key error
func ExtractDebugInfoStrFromKeyErr(keyErr *kvrpcpb.KeyError) string {
if keyErr.DebugInfo == nil {
return ""
}
debugInfoToMarshal := keyErr.DebugInfo
if redact.NeedRedact() {
redactMarker := []byte{'?'}
debugInfoToMarshal = proto.Clone(debugInfoToMarshal).(*kvrpcpb.DebugInfo)
for _, mvccInfo := range debugInfoToMarshal.MvccInfo {
mvccInfo.Key = redactMarker
if mvcc := mvccInfo.Mvcc; mvcc != nil {
if lock := mvcc.Lock; lock != nil {
lock.Primary = redactMarker
lock.ShortValue = redactMarker
for i := range lock.Secondaries {
lock.Secondaries[i] = redactMarker
}
}
for _, write := range mvcc.Writes {
if write != nil {
write.ShortValue = redactMarker
}
}
for _, value := range mvcc.Values {
if value != nil {
value.Value = redactMarker
}
}
}
}
}
debugStr, err := json.Marshal(debugInfoToMarshal)
if err != nil {
log.Error("encountered error when extracting debug info for keyError", zap.Error(err), zap.Stack("stack"))
return ""
}
return string(debugStr)
}

68
error/error_test.go Normal file
View File

@ -0,0 +1,68 @@
package error
import (
"testing"
"github.com/pingcap/errors"
"github.com/pingcap/kvproto/pkg/kvrpcpb"
"github.com/stretchr/testify/assert"
)
func TestExtractDebugInfoStrFromKeyErr(t *testing.T) {
origRedact := errors.RedactLogEnabled.Load()
defer errors.RedactLogEnabled.Store(origRedact)
errors.RedactLogEnabled.Store(errors.RedactLogDisable)
// empty debug info
assert.Equal(t, "", ExtractDebugInfoStrFromKeyErr(&kvrpcpb.KeyError{
TxnLockNotFound: &kvrpcpb.TxnLockNotFound{Key: []byte("byte")},
}))
// non-empty debug info
debugInfo := &kvrpcpb.DebugInfo{
MvccInfo: []*kvrpcpb.MvccDebugInfo{
{
Key: []byte("byte"),
Mvcc: &kvrpcpb.MvccInfo{
Lock: &kvrpcpb.MvccLock{
Type: kvrpcpb.Op_Del,
StartTs: 128,
Primary: []byte("k1"),
Secondaries: [][]byte{
[]byte("k1"),
[]byte("k2"),
},
ShortValue: []byte("v1"),
},
Writes: []*kvrpcpb.MvccWrite{
{
Type: kvrpcpb.Op_Insert,
StartTs: 64,
CommitTs: 86,
ShortValue: []byte{0x1, 0x2, 0x3, 0x4, 0x5, 0x6},
},
},
Values: []*kvrpcpb.MvccValue{
{
StartTs: 64,
Value: []byte{0x11, 0x12},
},
},
},
},
},
}
expectedStr := `{"mvcc_info":[{"key":"Ynl0ZQ==","mvcc":{"lock":{"type":1,"start_ts":128,"primary":"azE=","short_value":"djE=","secondaries":["azE=","azI="]},"writes":[{"type":4,"start_ts":64,"commit_ts":86,"short_value":"AQIDBAUG"}],"values":[{"start_ts":64,"value":"ERI="}]}}]}`
assert.Equal(t, expectedStr, ExtractDebugInfoStrFromKeyErr(&kvrpcpb.KeyError{
TxnLockNotFound: &kvrpcpb.TxnLockNotFound{Key: []byte("byte")},
DebugInfo: debugInfo,
}))
// redact log enabled
errors.RedactLogEnabled.Store(errors.RedactLogEnable)
expectedStr = `{"mvcc_info":[{"key":"Pw==","mvcc":{"lock":{"type":1,"start_ts":128,"primary":"Pw==","short_value":"Pw==","secondaries":["Pw==","Pw=="]},"writes":[{"type":4,"start_ts":64,"commit_ts":86,"short_value":"Pw=="}],"values":[{"start_ts":64,"value":"Pw=="}]}}]}`
assert.Equal(t, expectedStr, ExtractDebugInfoStrFromKeyErr(&kvrpcpb.KeyError{
TxnLockNotFound: &kvrpcpb.TxnLockNotFound{Key: []byte("byte")},
DebugInfo: debugInfo,
}))
}

View File

@ -149,6 +149,11 @@ func (c *codecV2) EncodeRequest(req *tikvrpc.Request) (*tikvrpc.Request, error)
case tikvrpc.CmdCommit: case tikvrpc.CmdCommit:
r := *req.Commit() r := *req.Commit()
r.Keys = c.encodeKeys(r.Keys) r.Keys = c.encodeKeys(r.Keys)
if len(r.PrimaryKey) > 0 {
// Only encode the primary key if it is not empty.
// Otherwise, it means `PrimaryKey` is not set, left it as empty to indicate it is not set in RPC.
r.PrimaryKey = c.EncodeKey(r.PrimaryKey)
}
req.Req = &r req.Req = &r
case tikvrpc.CmdCleanup: case tikvrpc.CmdCleanup:
r := *req.Cleanup() r := *req.Cleanup()
@ -938,6 +943,19 @@ func (c *codecV2) decodeKeyError(keyError *kvrpcpb.KeyError) (*kvrpcpb.KeyError,
return nil, err return nil, err
} }
} }
if keyError.TxnLockNotFound != nil {
keyError.TxnLockNotFound.Key, err = c.DecodeKey(keyError.TxnLockNotFound.Key)
if err != nil {
return nil, err
}
}
if debugInfo := keyError.DebugInfo; debugInfo != nil {
for _, mvccInfo := range debugInfo.MvccInfo {
if mvccInfo.Key, err = c.DecodeKey(mvccInfo.Key); err != nil {
return nil, err
}
}
}
return keyError, nil return keyError, nil
} }

View File

@ -50,21 +50,65 @@ func (suite *testCodecV2Suite) SetupSuite() {
func (suite *testCodecV2Suite) TestEncodeRequest() { func (suite *testCodecV2Suite) TestEncodeRequest() {
re := suite.Require() re := suite.Require()
req := &tikvrpc.Request{ requests := []struct {
Type: tikvrpc.CmdRawGet, name string
Req: &kvrpcpb.RawGetRequest{ req *tikvrpc.Request
Key: []byte("key"), validate func(*tikvrpc.Request)
}{
{
name: "CmdRawGet",
req: &tikvrpc.Request{
Type: tikvrpc.CmdRawGet,
Req: &kvrpcpb.RawGetRequest{
Key: []byte("key"),
},
},
validate: func(encoded *tikvrpc.Request) {
re.Equal(append(keyspacePrefix, []byte("key")...), encoded.RawGet().Key)
},
},
{
name: "CmdCommitWithOutPrimaryKey",
req: &tikvrpc.Request{
Type: tikvrpc.CmdCommit,
Req: &kvrpcpb.CommitRequest{
Keys: [][]byte{[]byte("key1"), []byte("key2")},
},
},
validate: func(encoded *tikvrpc.Request) {
re.Equal([][]byte{
append(keyspacePrefix, []byte("key1")...),
append(keyspacePrefix, []byte("key2")...),
}, encoded.Commit().Keys)
re.Empty(encoded.Commit().PrimaryKey)
},
},
{
name: "CmdCommitWithPrimaryKey",
req: &tikvrpc.Request{
Type: tikvrpc.CmdCommit,
Req: &kvrpcpb.CommitRequest{
Keys: [][]byte{[]byte("key1"), []byte("key2")},
PrimaryKey: []byte("key1"),
},
},
validate: func(encoded *tikvrpc.Request) {
re.Equal([][]byte{
append(keyspacePrefix, []byte("key1")...),
append(keyspacePrefix, []byte("key2")...),
}, encoded.Commit().Keys)
re.Equal(append(keyspacePrefix, []byte("key1")...), encoded.Commit().PrimaryKey)
},
}, },
} }
req.ApiVersion = kvrpcpb.APIVersion_V2
r, err := suite.codec.EncodeRequest(req) for _, req := range requests {
re.NoError(err) suite.Run(req.name, func() {
re.Equal(append(keyspacePrefix, []byte("key")...), r.RawGet().Key) encoded, err := suite.codec.EncodeRequest(req.req)
re.NoError(err)
r, err = suite.codec.EncodeRequest(req) req.validate(encoded)
re.NoError(err) })
re.Equal(append(keyspacePrefix, []byte("key")...), r.RawGet().Key) }
} }
func (suite *testCodecV2Suite) TestEncodeV2KeyRanges() { func (suite *testCodecV2Suite) TestEncodeV2KeyRanges() {
@ -278,6 +322,55 @@ func (suite *testCodecV2Suite) TestDecodeEpochNotMatch() {
} }
} }
func (suite *testCodecV2Suite) TestDecodeKeyError() {
re := suite.Require()
errors := []struct {
name string
err *kvrpcpb.KeyError
validate func(*kvrpcpb.KeyError)
}{
{
name: "TxnLockNotFound",
err: &kvrpcpb.KeyError{
TxnLockNotFound: &kvrpcpb.TxnLockNotFound{
Key: append(keyspacePrefix, []byte("key1")...),
},
},
validate: func(decoded *kvrpcpb.KeyError) {
re.Equal([]byte("key1"), decoded.TxnLockNotFound.Key)
},
},
{
name: "MvccDebugInfo",
err: &kvrpcpb.KeyError{
TxnLockNotFound: &kvrpcpb.TxnLockNotFound{
Key: append(keyspacePrefix, []byte("key1")...),
},
DebugInfo: &kvrpcpb.DebugInfo{
MvccInfo: []*kvrpcpb.MvccDebugInfo{
{
Key: append(keyspacePrefix, []byte("key1")...),
Mvcc: &kvrpcpb.MvccInfo{},
},
},
},
},
validate: func(decoded *kvrpcpb.KeyError) {
re.Equal([]byte("key1"), decoded.TxnLockNotFound.Key)
re.Equal(1, len(decoded.DebugInfo.MvccInfo))
re.Equal([]byte("key1"), decoded.DebugInfo.MvccInfo[0].Key)
},
},
}
codec := suite.codec
for _, keyErr := range errors {
decoded, err := codec.decodeKeyError(keyErr.err)
re.NoError(err)
keyErr.validate(decoded)
}
}
func (suite *testCodecV2Suite) TestGetKeyspaceID() { func (suite *testCodecV2Suite) TestGetKeyspaceID() {
suite.Equal(KeyspaceID(testKeyspaceID), suite.codec.GetKeyspaceID()) suite.Equal(KeyspaceID(testKeyspaceID), suite.codec.GetKeyspaceID())
} }

View File

@ -73,10 +73,18 @@ func (action actionCommit) tiKVTxnRegionsNumHistogram() prometheus.Observer {
func (action actionCommit) handleSingleBatch(c *twoPhaseCommitter, bo *retry.Backoffer, batch batchMutations) error { func (action actionCommit) handleSingleBatch(c *twoPhaseCommitter, bo *retry.Backoffer, batch batchMutations) error {
keys := batch.mutations.GetKeys() keys := batch.mutations.GetKeys()
var commitRole kvrpcpb.CommitRole
if batch.isPrimary {
commitRole = kvrpcpb.CommitRole_Primary
} else {
commitRole = kvrpcpb.CommitRole_Secondary
}
req := tikvrpc.NewRequest(tikvrpc.CmdCommit, &kvrpcpb.CommitRequest{ req := tikvrpc.NewRequest(tikvrpc.CmdCommit, &kvrpcpb.CommitRequest{
StartVersion: c.startTS, StartVersion: c.startTS,
Keys: keys, Keys: keys,
PrimaryKey: c.primary(),
CommitVersion: c.commitTS, CommitVersion: c.commitTS,
CommitRole: commitRole,
}, kvrpcpb.Context{ }, kvrpcpb.Context{
Priority: c.priority, Priority: c.priority,
SyncLog: c.syncLog, SyncLog: c.syncLog,
@ -212,13 +220,18 @@ func (action actionCommit) handleSingleBatch(c *twoPhaseCommitter, bo *retry.Bac
zap.Error(err), zap.Error(err),
zap.Uint64("txnStartTS", c.startTS), zap.Uint64("txnStartTS", c.startTS),
zap.Uint64("commitTS", c.commitTS), zap.Uint64("commitTS", c.commitTS),
zap.Strings("keys", hexBatchKeys(keys))) zap.Strings("keys", hexBatchKeys(keys)),
zap.Uint64("sessionID", c.sessionID),
zap.String("debugInfo", tikverr.ExtractDebugInfoStrFromKeyErr(keyErr)))
return err return err
} }
// The transaction maybe rolled back by concurrent transactions. // The transaction maybe rolled back by concurrent transactions.
logutil.Logger(bo.GetCtx()).Debug("2PC failed commit primary key", logutil.Logger(bo.GetCtx()).Debug("2PC failed commit primary key",
zap.Error(err), zap.Error(err),
zap.Uint64("txnStartTS", c.startTS)) zap.Uint64("txnStartTS", c.startTS),
zap.Uint64("commitTS", c.commitTS),
zap.Uint64("sessionID", c.sessionID),
)
return err return err
} }
break break

View File

@ -438,7 +438,13 @@ func (c *twoPhaseCommitter) buildPipelinedResolveHandler(commit bool, resolved *
cmdResp := resp.Resp.(*kvrpcpb.ResolveLockResponse) cmdResp := resp.Resp.(*kvrpcpb.ResolveLockResponse)
if keyErr := cmdResp.GetError(); keyErr != nil { if keyErr := cmdResp.GetError(); keyErr != nil {
err = errors.Errorf("unexpected resolve err: %s", keyErr) err = errors.Errorf("unexpected resolve err: %s", keyErr)
logutil.BgLogger().Error("resolveLock error", zap.Error(err)) logutil.BgLogger().Error(
"resolveLock error",
zap.Error(err),
zap.Uint64("startVer", lreq.StartVersion),
zap.Uint64("commitVer", lreq.CommitVersion),
zap.String("debugInfo", tikverr.ExtractDebugInfoStrFromKeyErr(keyErr)),
)
return res, err return res, err
} }
resolved.Add(1) resolved.Add(1)

View File

@ -344,7 +344,13 @@ func (lr *LockResolver) BatchResolveLocks(bo *retry.Backoffer, locks []*Lock, lo
} }
cmdResp := resp.Resp.(*kvrpcpb.ResolveLockResponse) cmdResp := resp.Resp.(*kvrpcpb.ResolveLockResponse)
if keyErr := cmdResp.GetError(); keyErr != nil { if keyErr := cmdResp.GetError(); keyErr != nil {
return false, errors.Errorf("unexpected resolve err: %s", keyErr) err = errors.Errorf("unexpected resolve err: %s", keyErr)
logutil.BgLogger().Error(
"resolveLock error",
zap.Error(err),
zap.String("debugInfo", tikverr.ExtractDebugInfoStrFromKeyErr(keyErr)),
)
return false, err
} }
logutil.BgLogger().Info("BatchResolveLocks: resolve locks in a batch", logutil.BgLogger().Info("BatchResolveLocks: resolve locks in a batch",
@ -1124,10 +1130,13 @@ func (lr *LockResolver) resolveRegionLocks(bo *retry.Backoffer, l *Lock, region
cmdResp := resp.Resp.(*kvrpcpb.ResolveLockResponse) cmdResp := resp.Resp.(*kvrpcpb.ResolveLockResponse)
if keyErr := cmdResp.GetError(); keyErr != nil { if keyErr := cmdResp.GetError(); keyErr != nil {
err = errors.Errorf("unexpected resolve err: %s, lock: %v", keyErr, l) err = errors.Errorf("unexpected resolve err: %s, lock: %v", keyErr, l)
logutil.BgLogger().Error("resolveLock error", zap.Error(err)) logutil.BgLogger().Error("resolveLock error",
zap.Error(err),
zap.String("debugInfo", tikverr.ExtractDebugInfoStrFromKeyErr(keyErr)),
)
} }
return nil return err
} }
func (lr *LockResolver) resolveLock(bo *retry.Backoffer, l *Lock, status TxnStatus, lite bool, cleanRegions map[locate.RegionVerID]struct{}) error { func (lr *LockResolver) resolveLock(bo *retry.Backoffer, l *Lock, status TxnStatus, lite bool, cleanRegions map[locate.RegionVerID]struct{}) error {
@ -1190,7 +1199,11 @@ func (lr *LockResolver) resolveLock(bo *retry.Backoffer, l *Lock, status TxnStat
cmdResp := resp.Resp.(*kvrpcpb.ResolveLockResponse) cmdResp := resp.Resp.(*kvrpcpb.ResolveLockResponse)
if keyErr := cmdResp.GetError(); keyErr != nil { if keyErr := cmdResp.GetError(); keyErr != nil {
err = errors.Errorf("unexpected resolve err: %s, lock: %v", keyErr, l) err = errors.Errorf("unexpected resolve err: %s, lock: %v", keyErr, l)
logutil.BgLogger().Error("resolveLock error", zap.Error(err)) logutil.BgLogger().Error(
"resolveLock error",
zap.Error(err),
zap.String("debugInfo", tikverr.ExtractDebugInfoStrFromKeyErr(keyErr)),
)
return err return err
} }
if !resolveLite { if !resolveLite {