// Copyright 2021 TiKV Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// NOTE: The code in this file is based on code from the
// TiDB project, licensed under the Apache License v 2.0
//
// https://github.com/pingcap/tidb/tree/cc5e161ac06827589c4966674597c137cc9e809c/store/tikv/pessimistic.go
//

// Copyright 2020 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package transaction

import (
	"encoding/hex"
	"math/rand"
	"strings"
	"sync/atomic"
	"time"

	"github.com/pingcap/kvproto/pkg/errorpb"
	"github.com/pingcap/kvproto/pkg/kvrpcpb"
	"github.com/pkg/errors"
	"github.com/prometheus/client_golang/prometheus"
	"github.com/tikv/client-go/v2/config/retry"
	tikverr "github.com/tikv/client-go/v2/error"
	"github.com/tikv/client-go/v2/internal/client"
	"github.com/tikv/client-go/v2/internal/locate"
	"github.com/tikv/client-go/v2/internal/logutil"
	"github.com/tikv/client-go/v2/kv"
	"github.com/tikv/client-go/v2/metrics"
	"github.com/tikv/client-go/v2/tikvrpc"
	"github.com/tikv/client-go/v2/txnkv/txnlock"
	"github.com/tikv/client-go/v2/util"
	"go.uber.org/zap"
)

type actionPessimisticLock struct {
	*kv.LockCtx
	wakeUpMode kvrpcpb.PessimisticLockWakeUpMode
	isInternal bool
}
type actionPessimisticRollback struct {
	isInternal bool
}

var (
	_ twoPhaseCommitAction = actionPessimisticLock{}
	_ twoPhaseCommitAction = actionPessimisticRollback{}
)

func (action actionPessimisticLock) String() string {
	return "pessimistic_lock"
}

func (action actionPessimisticLock) tiKVTxnRegionsNumHistogram() prometheus.Observer {
	if action.isInternal {
		return metrics.TxnRegionsNumHistogramPessimisticLockInternal
	}
	return metrics.TxnRegionsNumHistogramPessimisticLock
}

func (action actionPessimisticRollback) String() string {
	return "pessimistic_rollback"
}

func (action actionPessimisticRollback) tiKVTxnRegionsNumHistogram() prometheus.Observer {
	if action.isInternal {
		return metrics.TxnRegionsNumHistogramPessimisticRollbackInternal
	}
	return metrics.TxnRegionsNumHistogramPessimisticRollback
}

type diagnosticContext struct {
	resolvingRecordToken *int
	sender               *locate.RegionRequestSender
	reqDuration          time.Duration
}

func (action actionPessimisticLock) handleSingleBatch(
	c *twoPhaseCommitter, bo *retry.Backoffer, batch batchMutations,
) error {
	convertMutationsToPb := func(committerMutations CommitterMutations) []*kvrpcpb.Mutation {
		mutations := make([]*kvrpcpb.Mutation, committerMutations.Len())
		c.txn.GetMemBuffer().RLock()
		for i := 0; i < committerMutations.Len(); i++ {
			mut := &kvrpcpb.Mutation{
				Op:  kvrpcpb.Op_PessimisticLock,
				Key: committerMutations.GetKey(i),
			}
			if c.txn.us.HasPresumeKeyNotExists(committerMutations.GetKey(i)) {
				mut.Assertion = kvrpcpb.Assertion_NotExist
			}
			mutations[i] = mut
		}
		c.txn.GetMemBuffer().RUnlock()
		return mutations
	}

	m := batch.mutations
	mutations := convertMutationsToPb(m)
	req := tikvrpc.NewRequest(
		tikvrpc.CmdPessimisticLock, &kvrpcpb.PessimisticLockRequest{
			Mutations:        mutations,
			PrimaryLock:      c.primary(),
			StartVersion:     c.startTS,
			ForUpdateTs:      c.forUpdateTS,
			IsFirstLock:      c.isFirstLock,
			WaitTimeout:      action.LockWaitTime(),
			ReturnValues:     action.ReturnValues,
			CheckExistence:   action.CheckExistence,
			MinCommitTs:      c.forUpdateTS + 1,
			WakeUpMode:       action.wakeUpMode,
			LockOnlyIfExists: action.LockOnlyIfExists,
		}, kvrpcpb.Context{
			Priority:               c.priority,
			SyncLog:                c.syncLog,
			ResourceGroupTag:       action.LockCtx.ResourceGroupTag,
			MaxExecutionDurationMs: uint64(client.MaxWriteExecutionTime.Milliseconds()),
			RequestSource:          c.txn.GetRequestSource(),
			ResourceControlContext: &kvrpcpb.ResourceControlContext{
				ResourceGroupName: c.resourceGroupName,
			},
		},
	)
	if action.LockCtx.ResourceGroupTag == nil && action.LockCtx.ResourceGroupTagger != nil {
		req.ResourceGroupTag = action.LockCtx.ResourceGroupTagger(req.Req.(*kvrpcpb.PessimisticLockRequest))
	}
	lockWaitStartTime := action.WaitStartTime
	diagCtx := diagnosticContext{}
	defer func() {
		if diagCtx.resolvingRecordToken != nil {
			c.store.GetLockResolver().ResolveLocksDone(c.startTS, *diagCtx.resolvingRecordToken)
		}
	}()
	for {
		// if lockWaitTime set, refine the request `WaitTimeout` field based on timeout limit
		if action.LockWaitTime() > 0 && action.LockWaitTime() != kv.LockAlwaysWait {
			timeLeft := action.LockWaitTime() - (time.Since(lockWaitStartTime)).Milliseconds()
			if timeLeft <= 0 {
				req.PessimisticLock().WaitTimeout = kv.LockNoWait
			} else {
				req.PessimisticLock().WaitTimeout = timeLeft
			}
		}
		elapsed := uint64(time.Since(c.txn.startTime) / time.Millisecond)
		ttl := elapsed + atomic.LoadUint64(&ManagedLockTTL)
		if _, err := util.EvalFailpoint("shortPessimisticLockTTL"); err == nil {
			ttl = 1
			keys := make([]string, 0, len(mutations))
			for _, m := range mutations {
				keys = append(keys, hex.EncodeToString(m.Key))
			}
			logutil.BgLogger().Info(
				"[failpoint] injected lock ttl = 1 on pessimistic lock",
				zap.Uint64("txnStartTS", c.startTS), zap.Strings("keys", keys),
			)
		}
		req.PessimisticLock().LockTtl = ttl
		if _, err := util.EvalFailpoint("PessimisticLockErrWriteConflict"); err == nil {
			time.Sleep(300 * time.Millisecond)
			return errors.WithStack(&tikverr.ErrWriteConflict{WriteConflict: nil})
		}
		sender := locate.NewRegionRequestSender(c.store.GetRegionCache(), c.store.GetTiKVClient())
		startTime := time.Now()
		resp, _, err := sender.SendReq(bo, req, batch.region, client.ReadTimeoutShort)
		diagCtx.reqDuration = time.Since(startTime)
		diagCtx.sender = sender
		if action.LockCtx.Stats != nil {
			atomic.AddInt64(&action.LockCtx.Stats.LockRPCTime, int64(diagCtx.reqDuration))
			atomic.AddInt64(&action.LockCtx.Stats.LockRPCCount, 1)
		}
		if err != nil {
			return err
		}

		if action.wakeUpMode == kvrpcpb.PessimisticLockWakeUpMode_WakeUpModeNormal {
			finished, err := action.handlePessimisticLockResponseNormalMode(c, bo, &batch, mutations, resp, &diagCtx)
			if err != nil {
				return err
			}
			if finished {
				return nil
			}
		} else if action.wakeUpMode == kvrpcpb.PessimisticLockWakeUpMode_WakeUpModeForceLock {
			finished, err := action.handlePessimisticLockResponseForceLockMode(c, bo, &batch, mutations, resp, &diagCtx)
			if err != nil {
				return err
			}
			if finished {
				return nil
			}
		}
	}
}

func (action actionPessimisticLock) handleRegionError(
	c *twoPhaseCommitter, bo *retry.Backoffer, batch *batchMutations, regionErr *errorpb.Error,
) (finished bool, err error) {
	// For other region error and the fake region error, backoff because
	// there's something wrong.
	// For the real EpochNotMatch error, don't backoff.
	if regionErr.GetEpochNotMatch() == nil || locate.IsFakeRegionError(regionErr) {
		err = bo.Backoff(retry.BoRegionMiss, errors.New(regionErr.String()))
		if err != nil {
			return true, err
		}
	}
	same, err := batch.relocate(bo, c.store.GetRegionCache())
	if err != nil {
		return true, err
	}
	if same {
		return false, nil
	}
	err = c.pessimisticLockMutations(bo, action.LockCtx, action.wakeUpMode, batch.mutations)
	return true, err
}

// When handling wait timeout, if the current lock is updated within the threshold, do not try to resolve lock
// The default timeout in TiKV is 1 second. 300ms should be appropriate for common hot update workloads.
const skipResolveThresholdMs = 300

func (action actionPessimisticLock) handleKeyErrorForResolve(
	c *twoPhaseCommitter, keyErrs []*kvrpcpb.KeyError,
) (locks []*txnlock.Lock, finished bool, err error) {
	for _, keyErr := range keyErrs {
		// Check already exists error
		if alreadyExist := keyErr.GetAlreadyExist(); alreadyExist != nil {
			e := &tikverr.ErrKeyExist{AlreadyExist: alreadyExist}
			return nil, true, c.extractKeyExistsErr(e)
		}
		if deadlock := keyErr.Deadlock; deadlock != nil {
			return nil, true, errors.WithStack(&tikverr.ErrDeadlock{Deadlock: deadlock})
		}

		// Do not resolve the lock if the lock was recently updated which indicates the txn holding the lock is
		// much likely alive.
		// This should only happen for wait timeout.
		if lockInfo := keyErr.GetLocked(); lockInfo != nil &&
			lockInfo.DurationToLastUpdateMs > 0 &&
			lockInfo.DurationToLastUpdateMs < skipResolveThresholdMs {
			continue
		}

		// Extract lock from key error
		lock, err1 := txnlock.ExtractLockFromKeyErr(keyErr)
		if err1 != nil {
			return nil, true, err1
		}
		locks = append(locks, lock)
	}
	if len(locks) == 0 {
		return nil, false, nil
	}
	return locks, false, nil
}

func (action actionPessimisticLock) handlePessimisticLockResponseNormalMode(
	c *twoPhaseCommitter, bo *retry.Backoffer, batch *batchMutations, mutationsPb []*kvrpcpb.Mutation,
	resp *tikvrpc.Response, diagCtx *diagnosticContext,
) (finished bool, err error) {
	regionErr, err := resp.GetRegionError()
	if err != nil {
		return true, err
	}
	if regionErr != nil {
		return action.handleRegionError(c, bo, batch, regionErr)
	}
	if resp.Resp == nil {
		return true, errors.WithStack(tikverr.ErrBodyMissing)
	}
	lockResp := resp.Resp.(*kvrpcpb.PessimisticLockResponse)
	if len(lockResp.Results) != 0 {
		// We use old protocol in this mode. The `Result` field should not be used.
		return true, errors.New("Pessimistic lock response corrupted")
	}
	keyErrs := lockResp.GetErrors()
	if len(keyErrs) == 0 {

		if action.LockCtx.Stats != nil {
			action.LockCtx.Stats.MergeReqDetails(
				diagCtx.reqDuration,
				batch.region.GetID(),
				diagCtx.sender.GetStoreAddr(),
				lockResp.ExecDetailsV2,
			)
		}

		if batch.isPrimary {
			// After locking the primary key, we should protect the primary lock from expiring
			// now in case locking the remaining keys take a long time.
			c.run(c, action.LockCtx)
		}

		// Handle the case that the TiKV's version is too old and doesn't support `CheckExistence`.
		// If `CheckExistence` is set, `ReturnValues` is not set and `CheckExistence` is not supported, skip
		// retrieving value totally (indicated by `skipRetrievingValue`) to avoid panicking.
		skipRetrievingValue := !action.ReturnValues && action.CheckExistence && len(lockResp.NotFounds) == 0

		if (action.ReturnValues || action.CheckExistence) && !skipRetrievingValue {
			action.ValuesLock.Lock()
			for i, mutation := range mutationsPb {
				var value []byte
				if action.ReturnValues {
					value = lockResp.Values[i]
				}
				var exists = !lockResp.NotFounds[i]
				action.Values[string(mutation.Key)] = kv.ReturnedValue{
					Value:  value,
					Exists: exists,
				}
			}
			action.ValuesLock.Unlock()
		}
		return true, nil
	}

	locks, finished, err := action.handleKeyErrorForResolve(c, keyErrs)
	if err != nil {
		return finished, err
	}
	if len(locks) == 0 {
		return false, nil
	}

	// Because we already waited on tikv, no need to Backoff here.
	// tikv default will wait 3s(also the maximum wait value) when lock error occurs
	if diagCtx.resolvingRecordToken == nil {
		token := c.store.GetLockResolver().RecordResolvingLocks(locks, c.startTS)
		diagCtx.resolvingRecordToken = &token
	} else {
		c.store.GetLockResolver().UpdateResolvingLocks(locks, c.startTS, *diagCtx.resolvingRecordToken)
	}
	resolveLockOpts := txnlock.ResolveLocksOptions{
		CallerStartTS: 0,
		Locks:         locks,
	}
	if action.LockCtx.Stats != nil {
		resolveLockOpts.Detail = &action.LockCtx.Stats.ResolveLock
	}
	resolveLockRes, err := c.store.GetLockResolver().ResolveLocksWithOpts(bo, resolveLockOpts)
	if err != nil {
		return true, err
	}

	// If msBeforeTxnExpired is not zero, it means there are still locks blocking us acquiring
	// the pessimistic lock. We should return acquire fail with nowait set or timeout error if necessary.
	if resolveLockRes.TTL > 0 {
		if action.LockWaitTime() == kv.LockNoWait {
			return true, errors.WithStack(tikverr.ErrLockAcquireFailAndNoWaitSet)
		} else if action.LockWaitTime() == kv.LockAlwaysWait {
			// do nothing but keep wait
		} else {
			// the lockWaitTime is set, we should return wait timeout if we are still blocked by a lock
			if time.Since(action.WaitStartTime).Milliseconds() >= action.LockWaitTime() {
				return true, errors.WithStack(tikverr.ErrLockWaitTimeout)
			}
		}
		if action.LockCtx.PessimisticLockWaited != nil {
			atomic.StoreInt32(action.LockCtx.PessimisticLockWaited, 1)
		}
	}

	return false, nil
}

func (action actionPessimisticLock) handlePessimisticLockResponseForceLockMode(
	c *twoPhaseCommitter, bo *retry.Backoffer, batch *batchMutations, mutationsPb []*kvrpcpb.Mutation,
	resp *tikvrpc.Response, diagCtx *diagnosticContext,
) (finished bool, err error) {
	regionErr, err := resp.GetRegionError()
	if err != nil {
		return true, err
	}
	if resp.Resp == nil {
		return true, errors.WithStack(tikverr.ErrBodyMissing)
	}
	lockResp := resp.Resp.(*kvrpcpb.PessimisticLockResponse)
	isMutationFailed := false
	keyErrs := lockResp.GetErrors()

	// We only allow single key in ForceLock mode now.
	if len(mutationsPb) > 1 || len(lockResp.Results) > 1 {
		panic("unreachable")
	}
	if batch.isPrimary &&
		len(lockResp.Results) > 0 &&
		lockResp.Results[0].Type != kvrpcpb.PessimisticLockKeyResultType_LockResultFailed {
		// After locking the primary key, we should protect the primary lock from expiring.
		c.run(c, action.LockCtx)
	}

	if len(lockResp.Results) > 0 {
		res := lockResp.Results[0]
		switch res.Type {
		case kvrpcpb.PessimisticLockKeyResultType_LockResultNormal:
			if action.ReturnValues {
				action.ValuesLock.Lock()
				action.Values[string(mutationsPb[0].Key)] = kv.ReturnedValue{
					Value:  res.Value,
					Exists: res.Existence,
				}
				action.ValuesLock.Unlock()
			} else if action.CheckExistence {
				action.ValuesLock.Lock()
				action.Values[string(mutationsPb[0].Key)] = kv.ReturnedValue{
					Exists: res.Existence,
				}
				action.ValuesLock.Unlock()
			}
		case kvrpcpb.PessimisticLockKeyResultType_LockResultLockedWithConflict:
			action.ValuesLock.Lock()
			if action.Values == nil {
				action.Values = make(map[string]kv.ReturnedValue, 1)
			}
			action.Values[string(mutationsPb[0].Key)] = kv.ReturnedValue{
				Value:                res.Value,
				Exists:               res.Existence,
				LockedWithConflictTS: res.LockedWithConflictTs,
			}
			if res.LockedWithConflictTs > action.MaxLockedWithConflictTS {
				action.MaxLockedWithConflictTS = res.LockedWithConflictTs
			}
			action.ValuesLock.Unlock()
		case kvrpcpb.PessimisticLockKeyResultType_LockResultFailed:
			isMutationFailed = true
		default:
			panic("unreachable")
		}
	}

	if len(lockResp.Results) > 0 && !isMutationFailed {
		if action.LockCtx.Stats != nil {
			action.LockCtx.Stats.MergeReqDetails(
				diagCtx.reqDuration,
				batch.region.GetID(),
				diagCtx.sender.GetStoreAddr(),
				lockResp.ExecDetailsV2,
			)
		}
	}

	locks, finished, err := action.handleKeyErrorForResolve(c, keyErrs)
	if err != nil {
		return finished, err
	}

	if regionErr != nil {
		return action.handleRegionError(c, bo, batch, regionErr)
	}

	if isMutationFailed {
		if len(locks) > 0 {
			// Because we already waited on tikv, no need to Backoff here.
			// tikv default will wait 3s(also the maximum wait value) when lock error occurs
			if diagCtx.resolvingRecordToken == nil {
				token := c.store.GetLockResolver().RecordResolvingLocks(locks, c.startTS)
				diagCtx.resolvingRecordToken = &token
			} else {
				c.store.GetLockResolver().UpdateResolvingLocks(locks, c.startTS, *diagCtx.resolvingRecordToken)
			}
			resolveLockOpts := txnlock.ResolveLocksOptions{
				CallerStartTS: 0,
				Locks:         locks,
			}
			if action.LockCtx.Stats != nil {
				resolveLockOpts.Detail = &action.LockCtx.Stats.ResolveLock
			}
			resolveLockRes, err := c.store.GetLockResolver().ResolveLocksWithOpts(bo, resolveLockOpts)
			if err != nil {
				return true, err
			}

			// If msBeforeTxnExpired is not zero, it means there are still locks blocking us acquiring
			// the pessimistic lock. We should return acquire fail with nowait set or timeout error if necessary.
			if resolveLockRes.TTL > 0 {
				if action.LockWaitTime() == kv.LockNoWait {
					return true, errors.WithStack(tikverr.ErrLockAcquireFailAndNoWaitSet)
				} else if action.LockWaitTime() == kv.LockAlwaysWait {
					// do nothing but keep wait
				} else {
					// the lockWaitTime is set, we should return wait timeout if we are still blocked by a lock
					if time.Since(action.WaitStartTime).Milliseconds() >= action.LockWaitTime() {
						return true, errors.WithStack(tikverr.ErrLockWaitTimeout)
					}
				}
				if action.LockCtx.PessimisticLockWaited != nil {
					atomic.StoreInt32(action.LockCtx.PessimisticLockWaited, 1)
				}
			}
			return false, nil
		}

		// This can be the situation where KeyIsLocked errors are generated by timeout,
		// and we decide not to resolve them. Instead, just retry
		return false, nil
	}

	if len(locks) != 0 {
		// If the key error is KeyIsLocked, we assume the server must have set resp.Results.
		return true, errors.New("Pessimistic lock response corrupted")
	}

	if len(lockResp.Results) == 0 {
		// If the `Results` field is missing in response, there must be either some unretryable error in keyErrs or some
		// region error, therefore the function must have returned in above logic. This is supposed to be an unreachable
		// path if TiKV is implemented correctly.
		return true, errors.New("Pessimistic lock response corrupted")
	}

	return true, nil
}

func (actionPessimisticRollback) handleSingleBatch(
	c *twoPhaseCommitter, bo *retry.Backoffer, batch batchMutations,
) error {
	forUpdateTS := c.forUpdateTS
	if c.maxLockedWithConflictTS > forUpdateTS {
		forUpdateTS = c.maxLockedWithConflictTS
	}
	req := tikvrpc.NewRequest(
		tikvrpc.CmdPessimisticRollback, &kvrpcpb.PessimisticRollbackRequest{
			StartVersion: c.startTS,
			ForUpdateTs:  forUpdateTS,
			Keys:         batch.mutations.GetKeys(),
		},
	)
	req.RequestSource = util.RequestSourceFromCtx(bo.GetCtx())
	req.MaxExecutionDurationMs = uint64(client.MaxWriteExecutionTime.Milliseconds())
	resp, err := c.store.SendReq(bo, req, batch.region, client.ReadTimeoutShort)

	if err != nil {
		return err
	}
	regionErr, err := resp.GetRegionError()
	if err != nil {
		return err
	}
	if regionErr != nil {
		err = bo.Backoff(retry.BoRegionMiss, errors.New(regionErr.String()))
		if err != nil {
			return err
		}
		return c.pessimisticRollbackMutations(bo, batch.mutations)
	}
	return nil
}

func (c *twoPhaseCommitter) pessimisticLockMutations(
	bo *retry.Backoffer, lockCtx *kv.LockCtx, lockWaitMode kvrpcpb.PessimisticLockWakeUpMode,
	mutations CommitterMutations,
) error {
	if c.sessionID > 0 {
		if val, err := util.EvalFailpoint("beforePessimisticLock"); err == nil {
			// Pass multiple instructions in one string, delimited by commas, to trigger multiple behaviors, like
			// `return("delay,fail")`. Then they will be executed sequentially at once.
			if v, ok := val.(string); ok {
				for _, action := range strings.Split(v, ",") {
					if action == "delay" {
						duration := time.Duration(rand.Int63n(int64(time.Second) * 5))
						logutil.Logger(bo.GetCtx()).Info(
							"[failpoint] injected delay at pessimistic lock",
							zap.Uint64("txnStartTS", c.startTS), zap.Duration("duration", duration),
						)
						time.Sleep(duration)
					} else if action == "fail" {
						logutil.Logger(bo.GetCtx()).Info(
							"[failpoint] injected failure at pessimistic lock",
							zap.Uint64("txnStartTS", c.startTS),
						)
						return errors.New("injected failure at pessimistic lock")
					}
				}
			}
		}
	}
	return c.doActionOnMutations(
		bo,
		actionPessimisticLock{LockCtx: lockCtx, wakeUpMode: lockWaitMode, isInternal: c.txn.isInternal()},
		mutations,
	)
}

func (c *twoPhaseCommitter) pessimisticRollbackMutations(bo *retry.Backoffer, mutations CommitterMutations) error {
	isInternal := false
	if c.txn != nil {
		isInternal = c.txn.isInternal()
	} else {
		isInternal = c.isInternal
	}
	return c.doActionOnMutations(bo, actionPessimisticRollback{isInternal: isInternal}, mutations)
}