mirror of https://github.com/tikv/client-go.git
369 lines
10 KiB
Go
369 lines
10 KiB
Go
// Copyright 2016 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package store
|
|
|
|
import (
|
|
"container/list"
|
|
"context"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/pingcap/kvproto/pkg/kvrpcpb"
|
|
"github.com/pkg/errors"
|
|
log "github.com/sirupsen/logrus"
|
|
"github.com/tikv/client-go/config"
|
|
"github.com/tikv/client-go/locate"
|
|
"github.com/tikv/client-go/metrics"
|
|
"github.com/tikv/client-go/retry"
|
|
"github.com/tikv/client-go/rpc"
|
|
)
|
|
|
|
// LockResolver resolves locks and also caches resolved txn status.
|
|
type LockResolver struct {
|
|
store *TiKVStore
|
|
conf *config.Config
|
|
mu struct {
|
|
sync.RWMutex
|
|
// resolved caches resolved txns (FIFO, txn id -> txnStatus).
|
|
resolved map[uint64]TxnStatus
|
|
recentResolved *list.List
|
|
}
|
|
}
|
|
|
|
func newLockResolver(store *TiKVStore) *LockResolver {
|
|
r := &LockResolver{
|
|
store: store,
|
|
conf: store.GetConfig(),
|
|
}
|
|
r.mu.resolved = make(map[uint64]TxnStatus)
|
|
r.mu.recentResolved = list.New()
|
|
return r
|
|
}
|
|
|
|
// NewLockResolver is exported for other pkg to use, suppress unused warning.
|
|
var _ = NewLockResolver
|
|
|
|
// NewLockResolver creates a LockResolver.
|
|
// It is exported for other pkg to use. For instance, binlog service needs
|
|
// to determine a transaction's commit state.
|
|
func NewLockResolver(etcdAddrs []string, conf config.Config) (*LockResolver, error) {
|
|
s, err := NewStore(etcdAddrs, conf)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return s.GetLockResolver(), nil
|
|
}
|
|
|
|
// TxnStatus represents a txn's final status. It should be Commit or Rollback.
|
|
type TxnStatus uint64
|
|
|
|
// IsCommitted returns true if the txn's final status is Commit.
|
|
func (s TxnStatus) IsCommitted() bool { return s > 0 }
|
|
|
|
// CommitTS returns the txn's commitTS. It is valid iff `IsCommitted` is true.
|
|
func (s TxnStatus) CommitTS() uint64 { return uint64(s) }
|
|
|
|
// Lock represents a lock from tikv server.
|
|
type Lock struct {
|
|
Key []byte
|
|
Primary []byte
|
|
TxnID uint64
|
|
TTL uint64
|
|
}
|
|
|
|
// NewLock creates a new *Lock.
|
|
func NewLock(l *kvrpcpb.LockInfo, defaultTTL uint64) *Lock {
|
|
ttl := l.GetLockTtl()
|
|
if ttl == 0 {
|
|
ttl = defaultTTL
|
|
}
|
|
return &Lock{
|
|
Key: l.GetKey(),
|
|
Primary: l.GetPrimaryLock(),
|
|
TxnID: l.GetLockVersion(),
|
|
TTL: ttl,
|
|
}
|
|
}
|
|
|
|
func (lr *LockResolver) saveResolved(txnID uint64, status TxnStatus) {
|
|
lr.mu.Lock()
|
|
defer lr.mu.Unlock()
|
|
|
|
if _, ok := lr.mu.resolved[txnID]; ok {
|
|
return
|
|
}
|
|
lr.mu.resolved[txnID] = status
|
|
lr.mu.recentResolved.PushBack(txnID)
|
|
if len(lr.mu.resolved) > lr.conf.Txn.ResolveCacheSize {
|
|
front := lr.mu.recentResolved.Front()
|
|
delete(lr.mu.resolved, front.Value.(uint64))
|
|
lr.mu.recentResolved.Remove(front)
|
|
}
|
|
}
|
|
|
|
func (lr *LockResolver) getResolved(txnID uint64) (TxnStatus, bool) {
|
|
lr.mu.RLock()
|
|
defer lr.mu.RUnlock()
|
|
|
|
s, ok := lr.mu.resolved[txnID]
|
|
return s, ok
|
|
}
|
|
|
|
// BatchResolveLocks resolve locks in a batch
|
|
func (lr *LockResolver) BatchResolveLocks(bo *retry.Backoffer, locks []*Lock, loc locate.RegionVerID) (bool, error) {
|
|
if len(locks) == 0 {
|
|
return true, nil
|
|
}
|
|
|
|
metrics.LockResolverCounter.WithLabelValues("batch_resolve").Inc()
|
|
|
|
var expiredLocks []*Lock
|
|
for _, l := range locks {
|
|
if lr.store.GetOracle().IsExpired(l.TxnID, l.TTL) {
|
|
metrics.LockResolverCounter.WithLabelValues("expired").Inc()
|
|
expiredLocks = append(expiredLocks, l)
|
|
} else {
|
|
metrics.LockResolverCounter.WithLabelValues("not_expired").Inc()
|
|
}
|
|
}
|
|
if len(expiredLocks) != len(locks) {
|
|
log.Errorf("BatchResolveLocks: get %d Locks, but only %d are expired, maybe safe point is wrong!", len(locks), len(expiredLocks))
|
|
return false, nil
|
|
}
|
|
|
|
startTime := time.Now()
|
|
txnInfos := make(map[uint64]uint64)
|
|
for _, l := range expiredLocks {
|
|
if _, ok := txnInfos[l.TxnID]; ok {
|
|
continue
|
|
}
|
|
|
|
status, err := lr.getTxnStatus(bo, l.TxnID, l.Primary)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
txnInfos[l.TxnID] = uint64(status)
|
|
}
|
|
log.Infof("BatchResolveLocks: it took %v to lookup %v txn status", time.Since(startTime), len(txnInfos))
|
|
|
|
var listTxnInfos []*kvrpcpb.TxnInfo
|
|
for txnID, status := range txnInfos {
|
|
listTxnInfos = append(listTxnInfos, &kvrpcpb.TxnInfo{
|
|
Txn: txnID,
|
|
Status: status,
|
|
})
|
|
}
|
|
|
|
req := &rpc.Request{
|
|
Type: rpc.CmdResolveLock,
|
|
ResolveLock: &kvrpcpb.ResolveLockRequest{
|
|
TxnInfos: listTxnInfos,
|
|
},
|
|
}
|
|
startTime = time.Now()
|
|
resp, err := lr.store.SendReq(bo, req, loc, lr.conf.RPC.ReadTimeoutShort)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
regionErr, err := resp.GetRegionError()
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
if regionErr != nil {
|
|
err = bo.Backoff(retry.BoRegionMiss, errors.New(regionErr.String()))
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
return false, nil
|
|
}
|
|
|
|
cmdResp := resp.ResolveLock
|
|
if cmdResp == nil {
|
|
return false, errors.WithStack(rpc.ErrBodyMissing)
|
|
}
|
|
if keyErr := cmdResp.GetError(); keyErr != nil {
|
|
return false, errors.Errorf("unexpected resolve err: %s", keyErr)
|
|
}
|
|
|
|
log.Infof("BatchResolveLocks: it took %v to resolve %v locks in a batch.", time.Since(startTime), len(expiredLocks))
|
|
return true, nil
|
|
}
|
|
|
|
// ResolveLocks tries to resolve Locks. The resolving process is in 3 steps:
|
|
// 1) Use the `lockTTL` to pick up all expired locks. Only locks that are too
|
|
// old are considered orphan locks and will be handled later. If all locks
|
|
// are expired then all locks will be resolved so the returned `ok` will be
|
|
// true, otherwise caller should sleep a while before retry.
|
|
// 2) For each lock, query the primary key to get txn(which left the lock)'s
|
|
// commit status.
|
|
// 3) Send `ResolveLock` cmd to the lock's region to resolve all locks belong to
|
|
// the same transaction.
|
|
func (lr *LockResolver) ResolveLocks(bo *retry.Backoffer, locks []*Lock) (ok bool, err error) {
|
|
if len(locks) == 0 {
|
|
return true, nil
|
|
}
|
|
|
|
metrics.LockResolverCounter.WithLabelValues("resolve").Inc()
|
|
|
|
var expiredLocks []*Lock
|
|
for _, l := range locks {
|
|
if lr.store.GetOracle().IsExpired(l.TxnID, l.TTL) {
|
|
metrics.LockResolverCounter.WithLabelValues("expired").Inc()
|
|
expiredLocks = append(expiredLocks, l)
|
|
} else {
|
|
metrics.LockResolverCounter.WithLabelValues("not_expired").Inc()
|
|
}
|
|
}
|
|
if len(expiredLocks) == 0 {
|
|
return false, nil
|
|
}
|
|
|
|
// TxnID -> []Region, record resolved Regions.
|
|
// TODO: Maybe put it in LockResolver and share by all txns.
|
|
cleanTxns := make(map[uint64]map[locate.RegionVerID]struct{})
|
|
for _, l := range expiredLocks {
|
|
status, err := lr.getTxnStatus(bo, l.TxnID, l.Primary)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
cleanRegions := cleanTxns[l.TxnID]
|
|
if cleanRegions == nil {
|
|
cleanRegions = make(map[locate.RegionVerID]struct{})
|
|
cleanTxns[l.TxnID] = cleanRegions
|
|
}
|
|
|
|
err = lr.resolveLock(bo, l, status, cleanRegions)
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
}
|
|
return len(expiredLocks) == len(locks), nil
|
|
}
|
|
|
|
// GetTxnStatus queries tikv-server for a txn's status (commit/rollback).
|
|
// If the primary key is still locked, it will launch a Rollback to abort it.
|
|
// To avoid unnecessarily aborting too many txns, it is wiser to wait a few
|
|
// seconds before calling it after Prewrite.
|
|
func (lr *LockResolver) GetTxnStatus(txnID uint64, primary []byte) (TxnStatus, error) {
|
|
bo := retry.NewBackoffer(context.Background(), retry.CleanupMaxBackoff)
|
|
return lr.getTxnStatus(bo, txnID, primary)
|
|
}
|
|
|
|
func (lr *LockResolver) getTxnStatus(bo *retry.Backoffer, txnID uint64, primary []byte) (TxnStatus, error) {
|
|
if s, ok := lr.getResolved(txnID); ok {
|
|
return s, nil
|
|
}
|
|
|
|
metrics.LockResolverCounter.WithLabelValues("query_txn_status").Inc()
|
|
|
|
var status TxnStatus
|
|
req := &rpc.Request{
|
|
Type: rpc.CmdCleanup,
|
|
Cleanup: &kvrpcpb.CleanupRequest{
|
|
Key: primary,
|
|
StartVersion: txnID,
|
|
},
|
|
}
|
|
for {
|
|
loc, err := lr.store.GetRegionCache().LocateKey(bo, primary)
|
|
if err != nil {
|
|
return status, err
|
|
}
|
|
resp, err := lr.store.SendReq(bo, req, loc.Region, lr.conf.RPC.ReadTimeoutShort)
|
|
if err != nil {
|
|
return status, err
|
|
}
|
|
regionErr, err := resp.GetRegionError()
|
|
if err != nil {
|
|
return status, err
|
|
}
|
|
if regionErr != nil {
|
|
err = bo.Backoff(retry.BoRegionMiss, errors.New(regionErr.String()))
|
|
if err != nil {
|
|
return status, err
|
|
}
|
|
continue
|
|
}
|
|
cmdResp := resp.Cleanup
|
|
if cmdResp == nil {
|
|
return status, errors.WithStack(rpc.ErrBodyMissing)
|
|
}
|
|
if keyErr := cmdResp.GetError(); keyErr != nil {
|
|
err = errors.Errorf("unexpected cleanup err: %s, tid: %v", keyErr, txnID)
|
|
log.Error(err)
|
|
return status, err
|
|
}
|
|
if cmdResp.CommitVersion != 0 {
|
|
status = TxnStatus(cmdResp.GetCommitVersion())
|
|
metrics.LockResolverCounter.WithLabelValues("query_txn_status_committed").Inc()
|
|
} else {
|
|
metrics.LockResolverCounter.WithLabelValues("query_txn_status_rolled_back").Inc()
|
|
}
|
|
lr.saveResolved(txnID, status)
|
|
return status, nil
|
|
}
|
|
}
|
|
|
|
func (lr *LockResolver) resolveLock(bo *retry.Backoffer, l *Lock, status TxnStatus, cleanRegions map[locate.RegionVerID]struct{}) error {
|
|
metrics.LockResolverCounter.WithLabelValues("query_resolve_locks").Inc()
|
|
for {
|
|
loc, err := lr.store.GetRegionCache().LocateKey(bo, l.Key)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if _, ok := cleanRegions[loc.Region]; ok {
|
|
return nil
|
|
}
|
|
req := &rpc.Request{
|
|
Type: rpc.CmdResolveLock,
|
|
ResolveLock: &kvrpcpb.ResolveLockRequest{
|
|
StartVersion: l.TxnID,
|
|
},
|
|
}
|
|
if status.IsCommitted() {
|
|
req.ResolveLock.CommitVersion = status.CommitTS()
|
|
}
|
|
resp, err := lr.store.SendReq(bo, req, loc.Region, lr.conf.RPC.ReadTimeoutShort)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
regionErr, err := resp.GetRegionError()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if regionErr != nil {
|
|
err = bo.Backoff(retry.BoRegionMiss, errors.New(regionErr.String()))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
continue
|
|
}
|
|
cmdResp := resp.ResolveLock
|
|
if cmdResp == nil {
|
|
return errors.WithStack(rpc.ErrBodyMissing)
|
|
}
|
|
if keyErr := cmdResp.GetError(); keyErr != nil {
|
|
err = errors.Errorf("unexpected resolve err: %s, lock: %v", keyErr, l)
|
|
log.Error(err)
|
|
return err
|
|
}
|
|
cleanRegions[loc.Region] = struct{}{}
|
|
return nil
|
|
}
|
|
}
|