mirror of https://github.com/tikv/client-go.git
Make slow store filtering the highest priority in replica selector v2 (#1267)
* Add some logs Signed-off-by: MyonKeminta <MyonKeminta@users.noreply.github.com> * Make slow store filtering the highest priority in replica selector v2 Signed-off-by: MyonKeminta <MyonKeminta@users.noreply.github.com> * Add non stale read case to TestMultiReplicaInOneAZ Signed-off-by: MyonKeminta <MyonKeminta@users.noreply.github.com> * Enrich the multi replcia in one AZ case but it failed... Signed-off-by: MyonKeminta <MyonKeminta@users.noreply.github.com> * update test to adapt the fix on master branch Signed-off-by: MyonKeminta <MyonKeminta@users.noreply.github.com> * Remove TestMultiReplicaInOneAZ Signed-off-by: MyonKeminta <MyonKeminta@users.noreply.github.com> --------- Signed-off-by: MyonKeminta <MyonKeminta@users.noreply.github.com> Co-authored-by: MyonKeminta <MyonKeminta@users.noreply.github.com>
This commit is contained in:
parent
8fc819c1ca
commit
642a09bef1
2
go.sum
2
go.sum
|
|
@ -111,8 +111,6 @@ github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ
|
||||||
github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||||
github.com/tiancaiamao/gp v0.0.0-20221230034425-4025bc8a4d4a h1:J/YdBZ46WKpXsxsW93SG+q0F8KI+yFrcIDT4c/RNoc4=
|
github.com/tiancaiamao/gp v0.0.0-20221230034425-4025bc8a4d4a h1:J/YdBZ46WKpXsxsW93SG+q0F8KI+yFrcIDT4c/RNoc4=
|
||||||
github.com/tiancaiamao/gp v0.0.0-20221230034425-4025bc8a4d4a/go.mod h1:h4xBhSNtOeEosLJ4P7JyKXX7Cabg7AVkWCK5gV2vOrM=
|
github.com/tiancaiamao/gp v0.0.0-20221230034425-4025bc8a4d4a/go.mod h1:h4xBhSNtOeEosLJ4P7JyKXX7Cabg7AVkWCK5gV2vOrM=
|
||||||
github.com/tikv/pd/client v0.0.0-20240319071242-d3b94c97c12b h1:LUeYme5++BRU4DSEi2BmdIki0dRki4dFt2/8IhmIXy4=
|
|
||||||
github.com/tikv/pd/client v0.0.0-20240319071242-d3b94c97c12b/go.mod h1:Z/QAgOt29zvwBTd0H6pdx45VO6KRNc/O/DzGkVmSyZg=
|
|
||||||
github.com/tikv/pd/client v0.0.0-20240320081713-c00c42e77b31 h1:qiIt9AyEUW5yabTbCIgwxSMKi3p8ZE/YAk1Z6+fJq8M=
|
github.com/tikv/pd/client v0.0.0-20240320081713-c00c42e77b31 h1:qiIt9AyEUW5yabTbCIgwxSMKi3p8ZE/YAk1Z6+fJq8M=
|
||||||
github.com/tikv/pd/client v0.0.0-20240320081713-c00c42e77b31/go.mod h1:Z/QAgOt29zvwBTd0H6pdx45VO6KRNc/O/DzGkVmSyZg=
|
github.com/tikv/pd/client v0.0.0-20240320081713-c00c42e77b31/go.mod h1:Z/QAgOt29zvwBTd0H6pdx45VO6KRNc/O/DzGkVmSyZg=
|
||||||
github.com/twmb/murmur3 v1.1.3 h1:D83U0XYKcHRYwYIpBKf3Pks91Z0Byda/9SJ8B6EMRcA=
|
github.com/twmb/murmur3 v1.1.3 h1:D83U0XYKcHRYwYIpBKf3Pks91Z0Byda/9SJ8B6EMRcA=
|
||||||
|
|
|
||||||
|
|
@ -655,8 +655,23 @@ type RegionCache struct {
|
||||||
clusterID uint64
|
clusterID uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type regionCacheOptions struct {
|
||||||
|
noHealthTick bool
|
||||||
|
}
|
||||||
|
|
||||||
|
type RegionCacheOpt func(*regionCacheOptions)
|
||||||
|
|
||||||
|
func RegionCacheNoHealthTick(o *regionCacheOptions) {
|
||||||
|
o.noHealthTick = true
|
||||||
|
}
|
||||||
|
|
||||||
// NewRegionCache creates a RegionCache.
|
// NewRegionCache creates a RegionCache.
|
||||||
func NewRegionCache(pdClient pd.Client) *RegionCache {
|
func NewRegionCache(pdClient pd.Client, opt ...RegionCacheOpt) *RegionCache {
|
||||||
|
var options regionCacheOptions
|
||||||
|
for _, o := range opt {
|
||||||
|
o(&options)
|
||||||
|
}
|
||||||
|
|
||||||
c := &RegionCache{
|
c := &RegionCache{
|
||||||
pdClient: pdClient,
|
pdClient: pdClient,
|
||||||
}
|
}
|
||||||
|
|
@ -705,7 +720,9 @@ func NewRegionCache(pdClient pd.Client) *RegionCache {
|
||||||
needCheckStores = c.checkAndResolve(needCheckStores[:0], func(s *Store) bool { return filter(s.getResolveState()) })
|
needCheckStores = c.checkAndResolve(needCheckStores[:0], func(s *Store) bool { return filter(s.getResolveState()) })
|
||||||
return false
|
return false
|
||||||
}, time.Duration(refreshStoreInterval/4)*time.Second, c.getCheckStoreEvents())
|
}, time.Duration(refreshStoreInterval/4)*time.Second, c.getCheckStoreEvents())
|
||||||
c.bg.schedule(repeat(c.checkAndUpdateStoreHealthStatus), time.Duration(refreshStoreInterval/4)*time.Second)
|
if !options.noHealthTick {
|
||||||
|
c.bg.schedule(repeat(c.checkAndUpdateStoreHealthStatus), time.Duration(refreshStoreInterval/4)*time.Second)
|
||||||
|
}
|
||||||
c.bg.schedule(repeat(c.reportStoreReplicaFlows), time.Duration(refreshStoreInterval/2)*time.Second)
|
c.bg.schedule(repeat(c.reportStoreReplicaFlows), time.Duration(refreshStoreInterval/2)*time.Second)
|
||||||
if refreshCacheInterval := config.GetGlobalConfig().RegionsRefreshInterval; refreshCacheInterval > 0 {
|
if refreshCacheInterval := config.GetGlobalConfig().RegionsRefreshInterval; refreshCacheInterval > 0 {
|
||||||
c.bg.schedule(func(ctx context.Context, _ time.Time) bool {
|
c.bg.schedule(func(ctx context.Context, _ time.Time) bool {
|
||||||
|
|
|
||||||
|
|
@ -2088,7 +2088,7 @@ func (s *testRegionCacheSuite) TestHealthCheckWithStoreReplace() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *testRegionCacheSuite) TestTiKVSideSlowScore() {
|
func (s *testRegionCacheSuite) TestTiKVSideSlowScore() {
|
||||||
stats := newStoreHealthStatus()
|
stats := newStoreHealthStatus(1)
|
||||||
s.LessOrEqual(stats.GetHealthStatusDetail().TiKVSideSlowScore, int64(1))
|
s.LessOrEqual(stats.GetHealthStatusDetail().TiKVSideSlowScore, int64(1))
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
stats.tick(now)
|
stats.tick(now)
|
||||||
|
|
@ -2124,7 +2124,7 @@ func (s *testRegionCacheSuite) TestTiKVSideSlowScore() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *testRegionCacheSuite) TestStoreHealthStatus() {
|
func (s *testRegionCacheSuite) TestStoreHealthStatus() {
|
||||||
stats := newStoreHealthStatus()
|
stats := newStoreHealthStatus(1)
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
s.False(stats.IsSlow())
|
s.False(stats.IsSlow())
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -236,7 +236,7 @@ type ReplicaSelectMixedStrategy struct {
|
||||||
func (s *ReplicaSelectMixedStrategy) next(selector *replicaSelectorV2, region *Region) *replica {
|
func (s *ReplicaSelectMixedStrategy) next(selector *replicaSelectorV2, region *Region) *replica {
|
||||||
replicas := selector.replicas
|
replicas := selector.replicas
|
||||||
maxScoreIdxes := make([]int, 0, len(replicas))
|
maxScoreIdxes := make([]int, 0, len(replicas))
|
||||||
maxScore := -1
|
var maxScore storeSelectionScore = -1
|
||||||
reloadRegion := false
|
reloadRegion := false
|
||||||
for i, r := range replicas {
|
for i, r := range replicas {
|
||||||
epochStale := r.isEpochStale()
|
epochStale := r.isEpochStale()
|
||||||
|
|
@ -289,7 +289,7 @@ func (s *ReplicaSelectMixedStrategy) isCandidate(r *replica, isLeader bool, epoc
|
||||||
if r.dataIsNotReady && !isLeader {
|
if r.dataIsNotReady && !isLeader {
|
||||||
// If the replica is failed by data not ready with stale read, we can retry it with replica-read.
|
// If the replica is failed by data not ready with stale read, we can retry it with replica-read.
|
||||||
// after https://github.com/tikv/tikv/pull/15726, the leader will not return DataIsNotReady error,
|
// after https://github.com/tikv/tikv/pull/15726, the leader will not return DataIsNotReady error,
|
||||||
// then no need to retry leader again, if you try it again, you may got a NotLeader error.
|
// then no need to retry leader again. If you try it again, you may get a NotLeader error.
|
||||||
maxAttempt = 2
|
maxAttempt = 2
|
||||||
}
|
}
|
||||||
if r.isExhausted(maxAttempt, 0) {
|
if r.isExhausted(maxAttempt, 0) {
|
||||||
|
|
@ -310,20 +310,51 @@ func (s *ReplicaSelectMixedStrategy) isCandidate(r *replica, isLeader bool, epoc
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type storeSelectionScore int64
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// The definition of the score is:
|
// The definition of the score is:
|
||||||
// MSB LSB
|
// MSB LSB
|
||||||
// [unused bits][1 bit: LabelMatches][1 bit: PreferLeader][2 bits: NormalPeer + NotSlow]
|
// [unused bits][1 bit: NotSlow][1 bit: LabelMatches][1 bit: PreferLeader][1 bit: NormalPeer][1 bit: NotAttempted]
|
||||||
flagLabelMatches = 1 << 4
|
flagNotAttempted storeSelectionScore = 1 << iota
|
||||||
flagPreferLeader = 1 << 3
|
flagNormalPeer
|
||||||
flagNormalPeer = 1 << 2
|
flagPreferLeader
|
||||||
flagNotSlow = 1 << 1
|
flagLabelMatches
|
||||||
flagNotAttempt = 1
|
flagNotSlow
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func (s storeSelectionScore) String() string {
|
||||||
|
if s == 0 {
|
||||||
|
return "0"
|
||||||
|
}
|
||||||
|
res := ""
|
||||||
|
appendFactor := func(name string) {
|
||||||
|
if len(res) != 0 {
|
||||||
|
res += "|"
|
||||||
|
}
|
||||||
|
res += name
|
||||||
|
}
|
||||||
|
if (s & flagNotSlow) != 0 {
|
||||||
|
appendFactor("NotSlow")
|
||||||
|
}
|
||||||
|
if (s & flagLabelMatches) != 0 {
|
||||||
|
appendFactor("LableMatches")
|
||||||
|
}
|
||||||
|
if (s & flagPreferLeader) != 0 {
|
||||||
|
appendFactor("PreferLeader")
|
||||||
|
}
|
||||||
|
if (s & flagNormalPeer) != 0 {
|
||||||
|
appendFactor("NormalPeer")
|
||||||
|
}
|
||||||
|
if (s & flagNotAttempted) != 0 {
|
||||||
|
appendFactor("NotAttempted")
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
// calculateScore calculates the score of the replica.
|
// calculateScore calculates the score of the replica.
|
||||||
func (s *ReplicaSelectMixedStrategy) calculateScore(r *replica, isLeader bool) int {
|
func (s *ReplicaSelectMixedStrategy) calculateScore(r *replica, isLeader bool) storeSelectionScore {
|
||||||
score := 0
|
var score storeSelectionScore = 0
|
||||||
if r.store.IsStoreMatch(s.stores) && r.store.IsLabelsMatch(s.labels) {
|
if r.store.IsStoreMatch(s.stores) && r.store.IsLabelsMatch(s.labels) {
|
||||||
score |= flagLabelMatches
|
score |= flagLabelMatches
|
||||||
}
|
}
|
||||||
|
|
@ -338,7 +369,8 @@ func (s *ReplicaSelectMixedStrategy) calculateScore(r *replica, isLeader bool) i
|
||||||
}
|
}
|
||||||
} else if s.tryLeader {
|
} else if s.tryLeader {
|
||||||
if len(s.labels) > 0 {
|
if len(s.labels) > 0 {
|
||||||
// When the leader has matching labels, prefer leader than other mismatching peers.
|
// When label matching is enabled, prefer selecting the leader for replicas that has same label-matching
|
||||||
|
// results.
|
||||||
score |= flagPreferLeader
|
score |= flagPreferLeader
|
||||||
} else {
|
} else {
|
||||||
score |= flagNormalPeer
|
score |= flagNormalPeer
|
||||||
|
|
@ -357,7 +389,7 @@ func (s *ReplicaSelectMixedStrategy) calculateScore(r *replica, isLeader bool) i
|
||||||
score |= flagNotSlow
|
score |= flagNotSlow
|
||||||
}
|
}
|
||||||
if r.attempts == 0 {
|
if r.attempts == 0 {
|
||||||
score |= flagNotAttempt
|
score |= flagNotAttempted
|
||||||
}
|
}
|
||||||
return score
|
return score
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,8 @@ func (s *testReplicaSelectorSuite) SetupTest(t *testing.T) {
|
||||||
s.cluster = mocktikv.NewCluster(s.mvccStore)
|
s.cluster = mocktikv.NewCluster(s.mvccStore)
|
||||||
s.storeIDs, s.peerIDs, s.regionID, s.leaderPeer = mocktikv.BootstrapWithMultiStores(s.cluster, 3)
|
s.storeIDs, s.peerIDs, s.regionID, s.leaderPeer = mocktikv.BootstrapWithMultiStores(s.cluster, 3)
|
||||||
pdCli := &CodecPDClient{mocktikv.NewPDClient(s.cluster), apicodec.NewCodecV1(apicodec.ModeTxn)}
|
pdCli := &CodecPDClient{mocktikv.NewPDClient(s.cluster), apicodec.NewCodecV1(apicodec.ModeTxn)}
|
||||||
s.cache = NewRegionCache(pdCli)
|
// Disable the tick on health status.
|
||||||
|
s.cache = NewRegionCache(pdCli, RegionCacheNoHealthTick)
|
||||||
s.bo = retry.NewNoopBackoff(context.Background())
|
s.bo = retry.NewNoopBackoff(context.Background())
|
||||||
s.SetT(t)
|
s.SetT(t)
|
||||||
s.SetS(s)
|
s.SetS(s)
|
||||||
|
|
@ -166,29 +167,29 @@ func TestReplicaSelectorCalculateScore(t *testing.T) {
|
||||||
score := strategy.calculateScore(r, isLeader)
|
score := strategy.calculateScore(r, isLeader)
|
||||||
s.Equal(r.store.healthStatus.IsSlow(), false)
|
s.Equal(r.store.healthStatus.IsSlow(), false)
|
||||||
if isLeader {
|
if isLeader {
|
||||||
s.Equal(score, flagLabelMatches+flagNotSlow+flagNotAttempt)
|
s.Equal(score, flagLabelMatches+flagNotSlow+flagNotAttempted)
|
||||||
} else {
|
} else {
|
||||||
s.Equal(score, flagLabelMatches+flagNormalPeer+flagNotSlow+flagNotAttempt)
|
s.Equal(score, flagLabelMatches+flagNormalPeer+flagNotSlow+flagNotAttempted)
|
||||||
}
|
}
|
||||||
r.store.healthStatus.markAlreadySlow()
|
r.store.healthStatus.markAlreadySlow()
|
||||||
s.Equal(r.store.healthStatus.IsSlow(), true)
|
s.Equal(r.store.healthStatus.IsSlow(), true)
|
||||||
score = strategy.calculateScore(r, isLeader)
|
score = strategy.calculateScore(r, isLeader)
|
||||||
if isLeader {
|
if isLeader {
|
||||||
s.Equal(score, flagLabelMatches+flagNotAttempt)
|
s.Equal(score, flagLabelMatches+flagNotAttempted)
|
||||||
} else {
|
} else {
|
||||||
s.Equal(score, flagLabelMatches+flagNormalPeer+flagNotAttempt)
|
s.Equal(score, flagLabelMatches+flagNormalPeer+flagNotAttempted)
|
||||||
}
|
}
|
||||||
strategy.tryLeader = true
|
strategy.tryLeader = true
|
||||||
score = strategy.calculateScore(r, isLeader)
|
score = strategy.calculateScore(r, isLeader)
|
||||||
s.Equal(score, flagLabelMatches+flagNormalPeer+flagNotAttempt)
|
s.Equal(score, flagLabelMatches+flagNormalPeer+flagNotAttempted)
|
||||||
strategy.preferLeader = true
|
strategy.preferLeader = true
|
||||||
score = strategy.calculateScore(r, isLeader)
|
score = strategy.calculateScore(r, isLeader)
|
||||||
s.Equal(score, flagLabelMatches+flagNormalPeer+flagNotAttempt)
|
s.Equal(score, flagLabelMatches+flagNormalPeer+flagNotAttempted)
|
||||||
strategy.learnerOnly = true
|
strategy.learnerOnly = true
|
||||||
strategy.tryLeader = false
|
strategy.tryLeader = false
|
||||||
strategy.preferLeader = false
|
strategy.preferLeader = false
|
||||||
score = strategy.calculateScore(r, isLeader)
|
score = strategy.calculateScore(r, isLeader)
|
||||||
s.Equal(score, flagLabelMatches+flagNotAttempt)
|
s.Equal(score, flagLabelMatches+flagNotAttempted)
|
||||||
labels := []*metapb.StoreLabel{
|
labels := []*metapb.StoreLabel{
|
||||||
{
|
{
|
||||||
Key: "zone",
|
Key: "zone",
|
||||||
|
|
@ -197,7 +198,7 @@ func TestReplicaSelectorCalculateScore(t *testing.T) {
|
||||||
}
|
}
|
||||||
strategy.labels = labels
|
strategy.labels = labels
|
||||||
score = strategy.calculateScore(r, isLeader)
|
score = strategy.calculateScore(r, isLeader)
|
||||||
s.Equal(score, flagNotAttempt)
|
s.Equal(score, flagNotAttempted)
|
||||||
|
|
||||||
strategy = ReplicaSelectMixedStrategy{
|
strategy = ReplicaSelectMixedStrategy{
|
||||||
leaderIdx: rc.getStore().workTiKVIdx,
|
leaderIdx: rc.getStore().workTiKVIdx,
|
||||||
|
|
@ -206,9 +207,9 @@ func TestReplicaSelectorCalculateScore(t *testing.T) {
|
||||||
}
|
}
|
||||||
score = strategy.calculateScore(r, isLeader)
|
score = strategy.calculateScore(r, isLeader)
|
||||||
if isLeader {
|
if isLeader {
|
||||||
s.Equal(score, flagPreferLeader+flagNotAttempt)
|
s.Equal(score, flagPreferLeader+flagNotAttempted)
|
||||||
} else {
|
} else {
|
||||||
s.Equal(score, flagNormalPeer+flagNotAttempt)
|
s.Equal(score, flagNormalPeer+flagNotAttempted)
|
||||||
}
|
}
|
||||||
|
|
||||||
strategy = ReplicaSelectMixedStrategy{
|
strategy = ReplicaSelectMixedStrategy{
|
||||||
|
|
@ -217,10 +218,10 @@ func TestReplicaSelectorCalculateScore(t *testing.T) {
|
||||||
labels: labels,
|
labels: labels,
|
||||||
}
|
}
|
||||||
score = strategy.calculateScore(r, isLeader)
|
score = strategy.calculateScore(r, isLeader)
|
||||||
s.Equal(score, flagNormalPeer+flagNotAttempt)
|
s.Equal(score, flagNormalPeer+flagNotAttempted)
|
||||||
r.store.labels = labels
|
r.store.labels = labels
|
||||||
score = strategy.calculateScore(r, isLeader)
|
score = strategy.calculateScore(r, isLeader)
|
||||||
s.Equal(score, flagLabelMatches+flagNormalPeer+flagNotAttempt)
|
s.Equal(score, flagLabelMatches+flagNormalPeer+flagNotAttempted)
|
||||||
r.store.labels = nil
|
r.store.labels = nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -2570,6 +2571,159 @@ func TestReplicaReadAccessPathByLearnerCase(t *testing.T) {
|
||||||
s.True(s.runCaseAndCompare(ca))
|
s.True(s.runCaseAndCompare(ca))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestReplicaReadAvoidSlowStore(t *testing.T) {
|
||||||
|
s := new(testReplicaSelectorSuite)
|
||||||
|
s.SetupTest(t)
|
||||||
|
defer s.TearDownTest()
|
||||||
|
|
||||||
|
s.changeRegionLeader(3)
|
||||||
|
store, exists := s.cache.getStore(1)
|
||||||
|
s.True(exists)
|
||||||
|
|
||||||
|
for _, staleRead := range []bool{false, true} {
|
||||||
|
for _, withLabel := range []bool{false, true} {
|
||||||
|
var label *metapb.StoreLabel
|
||||||
|
if withLabel {
|
||||||
|
label = &metapb.StoreLabel{Key: "id", Value: "1"}
|
||||||
|
}
|
||||||
|
|
||||||
|
s.T().Logf("test case: stale read: %v, with label: %v, slow: false", staleRead, withLabel)
|
||||||
|
|
||||||
|
ca := replicaSelectorAccessPathCase{
|
||||||
|
reqType: tikvrpc.CmdGet,
|
||||||
|
readType: kv.ReplicaReadMixed,
|
||||||
|
staleRead: staleRead,
|
||||||
|
timeout: 0,
|
||||||
|
busyThresholdMs: 0,
|
||||||
|
label: label,
|
||||||
|
accessErr: []RegionErrorType{},
|
||||||
|
expect: &accessPathResult{
|
||||||
|
accessPath: []string{
|
||||||
|
fmt.Sprintf("{addr: store1, replica-read: %v, stale-read: %v}", !staleRead, staleRead),
|
||||||
|
},
|
||||||
|
respErr: "",
|
||||||
|
respRegionError: nil,
|
||||||
|
backoffCnt: 0,
|
||||||
|
backoffDetail: []string{},
|
||||||
|
regionIsValid: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
s.True(s.runCaseAndCompare(ca))
|
||||||
|
|
||||||
|
s.T().Logf("test case: stale read: %v, with label: %v, slow: true", staleRead, withLabel)
|
||||||
|
expectedFirstStore := 2
|
||||||
|
if withLabel {
|
||||||
|
// Leader is preferred in this case
|
||||||
|
expectedFirstStore = 3
|
||||||
|
}
|
||||||
|
ca = replicaSelectorAccessPathCase{
|
||||||
|
reqType: tikvrpc.CmdGet,
|
||||||
|
readType: kv.ReplicaReadMixed,
|
||||||
|
staleRead: staleRead,
|
||||||
|
timeout: 0,
|
||||||
|
busyThresholdMs: 0,
|
||||||
|
label: label,
|
||||||
|
accessErr: []RegionErrorType{},
|
||||||
|
expect: &accessPathResult{
|
||||||
|
accessPath: []string{
|
||||||
|
fmt.Sprintf("{addr: store%v, replica-read: %v, stale-read: %v}", expectedFirstStore, !staleRead, staleRead),
|
||||||
|
},
|
||||||
|
respErr: "",
|
||||||
|
respRegionError: nil,
|
||||||
|
backoffCnt: 0,
|
||||||
|
backoffDetail: []string{},
|
||||||
|
regionIsValid: true,
|
||||||
|
},
|
||||||
|
beforeRun: func() {
|
||||||
|
s.resetStoreState()
|
||||||
|
store.healthStatus.updateTiKVServerSideSlowScore(100, time.Now())
|
||||||
|
},
|
||||||
|
}
|
||||||
|
// v1 doesn't support avoiding slow stores. We only test this on v2.
|
||||||
|
s.True(s.runCase(ca, true))
|
||||||
|
|
||||||
|
s.T().Logf("test case: stale read: %v, with label: %v, slow: false, encoutner err: true", staleRead, withLabel)
|
||||||
|
var expectedSecondPath string
|
||||||
|
if staleRead {
|
||||||
|
// Retry leader, and fallback to leader-read mode.
|
||||||
|
expectedSecondPath = "{addr: store3, replica-read: false, stale-read: false}"
|
||||||
|
} else {
|
||||||
|
if withLabel {
|
||||||
|
// Prefer retrying leader.
|
||||||
|
expectedSecondPath = "{addr: store3, replica-read: true, stale-read: false}"
|
||||||
|
} else {
|
||||||
|
// Retry any another replica.
|
||||||
|
expectedSecondPath = "{addr: store2, replica-read: true, stale-read: false}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ca = replicaSelectorAccessPathCase{
|
||||||
|
reqType: tikvrpc.CmdGet,
|
||||||
|
readType: kv.ReplicaReadMixed,
|
||||||
|
staleRead: staleRead,
|
||||||
|
timeout: 0,
|
||||||
|
busyThresholdMs: 0,
|
||||||
|
label: label,
|
||||||
|
accessErr: []RegionErrorType{ServerIsBusyErr},
|
||||||
|
expect: &accessPathResult{
|
||||||
|
accessPath: []string{
|
||||||
|
fmt.Sprintf("{addr: store1, replica-read: %v, stale-read: %v}", !staleRead, staleRead),
|
||||||
|
// Retry leader.
|
||||||
|
// For stale read, it fallbacks to leader read. However, replica-read doesn't do so.
|
||||||
|
expectedSecondPath,
|
||||||
|
},
|
||||||
|
respErr: "",
|
||||||
|
respRegionError: nil,
|
||||||
|
backoffCnt: 0,
|
||||||
|
backoffDetail: []string{},
|
||||||
|
regionIsValid: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
s.True(s.runCaseAndCompare(ca))
|
||||||
|
|
||||||
|
s.T().Logf("test case: stale read: %v, with label: %v, slow: true, encoutner err: true", staleRead, withLabel)
|
||||||
|
if expectedFirstStore == 3 {
|
||||||
|
// Retry on store 2 which is a follower.
|
||||||
|
// Stale-read mode falls back to replica-read mode.
|
||||||
|
expectedSecondPath = "{addr: store2, replica-read: true, stale-read: false}"
|
||||||
|
} else {
|
||||||
|
if staleRead {
|
||||||
|
// Retry in leader read mode
|
||||||
|
expectedSecondPath = "{addr: store3, replica-read: false, stale-read: false}"
|
||||||
|
} else {
|
||||||
|
// Retry with the same mode, which is replica-read mode.
|
||||||
|
expectedSecondPath = "{addr: store3, replica-read: true, stale-read: false}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ca = replicaSelectorAccessPathCase{
|
||||||
|
reqType: tikvrpc.CmdGet,
|
||||||
|
readType: kv.ReplicaReadMixed,
|
||||||
|
staleRead: staleRead,
|
||||||
|
timeout: 0,
|
||||||
|
busyThresholdMs: 0,
|
||||||
|
label: label,
|
||||||
|
accessErr: []RegionErrorType{ServerIsBusyErr},
|
||||||
|
expect: &accessPathResult{
|
||||||
|
accessPath: []string{
|
||||||
|
fmt.Sprintf("{addr: store%v, replica-read: %v, stale-read: %v}", expectedFirstStore, !staleRead, staleRead),
|
||||||
|
expectedSecondPath,
|
||||||
|
},
|
||||||
|
respErr: "",
|
||||||
|
respRegionError: nil,
|
||||||
|
backoffCnt: 0,
|
||||||
|
backoffDetail: []string{},
|
||||||
|
regionIsValid: true,
|
||||||
|
},
|
||||||
|
beforeRun: func() {
|
||||||
|
s.resetStoreState()
|
||||||
|
store.healthStatus.updateTiKVServerSideSlowScore(100, time.Now())
|
||||||
|
},
|
||||||
|
}
|
||||||
|
s.True(s.runCase(ca, true))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestReplicaReadAccessPathByGenError(t *testing.T) {
|
func TestReplicaReadAccessPathByGenError(t *testing.T) {
|
||||||
s := new(testReplicaSelectorSuite)
|
s := new(testReplicaSelectorSuite)
|
||||||
s.SetupTest(t)
|
s.SetupTest(t)
|
||||||
|
|
@ -2934,7 +3088,7 @@ func (s *testReplicaSelectorSuite) resetStoreState() {
|
||||||
for _, store := range rc.getStore().stores {
|
for _, store := range rc.getStore().stores {
|
||||||
store.loadStats.Store(nil)
|
store.loadStats.Store(nil)
|
||||||
store.healthStatus.clientSideSlowScore.resetSlowScore()
|
store.healthStatus.clientSideSlowScore.resetSlowScore()
|
||||||
store.healthStatus.updateTiKVServerSideSlowScore(0, time.Now())
|
store.healthStatus.resetTiKVServerSideSlowScoreForTest()
|
||||||
store.healthStatus.updateSlowFlag()
|
store.healthStatus.updateSlowFlag()
|
||||||
atomic.StoreUint32(&store.livenessState, uint32(reachable))
|
atomic.StoreUint32(&store.livenessState, uint32(reachable))
|
||||||
store.setResolveState(resolved)
|
store.setResolveState(resolved)
|
||||||
|
|
|
||||||
|
|
@ -124,7 +124,7 @@ func (ss *SlowScoreStat) updateSlowScore() {
|
||||||
}
|
}
|
||||||
atomic.CompareAndSwapUint64(&ss.avgTimecost, avgTimecost, ss.tsCntSlidingWindow.Avg())
|
atomic.CompareAndSwapUint64(&ss.avgTimecost, avgTimecost, ss.tsCntSlidingWindow.Avg())
|
||||||
|
|
||||||
// Resets the counter of inteval timecost
|
// Resets the counter of interval timecost
|
||||||
atomic.StoreUint64(&ss.intervalTimecost, 0)
|
atomic.StoreUint64(&ss.intervalTimecost, 0)
|
||||||
atomic.StoreUint64(&ss.intervalUpdCount, 0)
|
atomic.StoreUint64(&ss.intervalUpdCount, 0)
|
||||||
}
|
}
|
||||||
|
|
@ -155,11 +155,17 @@ func (ss *SlowScoreStat) markAlreadySlow() {
|
||||||
|
|
||||||
// resetSlowScore resets the slow score to 0. It's used for test.
|
// resetSlowScore resets the slow score to 0. It's used for test.
|
||||||
func (ss *SlowScoreStat) resetSlowScore() {
|
func (ss *SlowScoreStat) resetSlowScore() {
|
||||||
atomic.StoreUint64(&ss.avgScore, 0)
|
*ss = SlowScoreStat{
|
||||||
|
avgScore: 1,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ss *SlowScoreStat) isSlow() bool {
|
func (ss *SlowScoreStat) isSlow() bool {
|
||||||
return ss.getSlowScore() >= slowScoreThreshold
|
return clientSideSlowScoreIsSlow(ss.getSlowScore())
|
||||||
|
}
|
||||||
|
|
||||||
|
func clientSideSlowScoreIsSlow(value uint64) bool {
|
||||||
|
return value >= slowScoreThreshold
|
||||||
}
|
}
|
||||||
|
|
||||||
// replicaFlowsType indicates the type of the destination replica of flows.
|
// replicaFlowsType indicates the type of the destination replica of flows.
|
||||||
|
|
@ -170,7 +176,7 @@ const (
|
||||||
toLeader replicaFlowsType = iota
|
toLeader replicaFlowsType = iota
|
||||||
// toFollower indicates that flows are sent to followers' replica
|
// toFollower indicates that flows are sent to followers' replica
|
||||||
toFollower
|
toFollower
|
||||||
// numflowsDestType reserved to keep max replicaFlowsType value.
|
// numReplicaFlowsType is reserved to keep max replicaFlowsType value.
|
||||||
numReplicaFlowsType
|
numReplicaFlowsType
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -214,7 +214,7 @@ func newStore(
|
||||||
peerAddr: peerAddr,
|
peerAddr: peerAddr,
|
||||||
saddr: statusAddr,
|
saddr: statusAddr,
|
||||||
// Make sure healthStatus field is never null.
|
// Make sure healthStatus field is never null.
|
||||||
healthStatus: newStoreHealthStatus(),
|
healthStatus: newStoreHealthStatus(id),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -223,7 +223,7 @@ func newUninitializedStore(id uint64) *Store {
|
||||||
return &Store{
|
return &Store{
|
||||||
storeID: id,
|
storeID: id,
|
||||||
// Make sure healthStatus field is never null.
|
// Make sure healthStatus field is never null.
|
||||||
healthStatus: newStoreHealthStatus(),
|
healthStatus: newStoreHealthStatus(id),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -794,6 +794,9 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
type StoreHealthStatus struct {
|
type StoreHealthStatus struct {
|
||||||
|
// Used for logging.
|
||||||
|
storeID uint64
|
||||||
|
|
||||||
isSlow atomic.Bool
|
isSlow atomic.Bool
|
||||||
|
|
||||||
// A statistic for counting the request latency to this store
|
// A statistic for counting the request latency to this store
|
||||||
|
|
@ -816,8 +819,18 @@ type HealthStatusDetail struct {
|
||||||
TiKVSideSlowScore int64
|
TiKVSideSlowScore int64
|
||||||
}
|
}
|
||||||
|
|
||||||
func newStoreHealthStatus() *StoreHealthStatus {
|
func (d HealthStatusDetail) IsSlow() bool {
|
||||||
return &StoreHealthStatus{}
|
return clientSideSlowScoreIsSlow(uint64(d.ClientSideSlowScore)) || d.TiKVSideSlowScore >= tikvSlowScoreSlowThreshold
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d HealthStatusDetail) String() string {
|
||||||
|
return fmt.Sprintf("{ ClientSideSlowScore: %d, TiKVSideSlowScore: %d }", d.ClientSideSlowScore, d.TiKVSideSlowScore)
|
||||||
|
}
|
||||||
|
|
||||||
|
func newStoreHealthStatus(storeID uint64) *StoreHealthStatus {
|
||||||
|
return &StoreHealthStatus{
|
||||||
|
storeID: storeID,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsSlow returns whether current Store is slow.
|
// IsSlow returns whether current Store is slow.
|
||||||
|
|
@ -935,9 +948,18 @@ func (s *StoreHealthStatus) updateTiKVServerSideSlowScore(score int64, currTime
|
||||||
s.tikvSideSlowScore.lastUpdateTime.Store(newUpdateTime)
|
s.tikvSideSlowScore.lastUpdateTime.Store(newUpdateTime)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *StoreHealthStatus) resetTiKVServerSideSlowScoreForTest() {
|
||||||
|
s.setTiKVSlowScoreLastUpdateTimeForTest(time.Now().Add(-time.Hour * 2))
|
||||||
|
s.updateTiKVServerSideSlowScore(1, time.Now().Add(-time.Hour))
|
||||||
|
}
|
||||||
|
|
||||||
func (s *StoreHealthStatus) updateSlowFlag() {
|
func (s *StoreHealthStatus) updateSlowFlag() {
|
||||||
isSlow := s.clientSideSlowScore.isSlow() || s.tikvSideSlowScore.score.Load() >= tikvSlowScoreSlowThreshold
|
healthDetail := s.GetHealthStatusDetail()
|
||||||
s.isSlow.Store(isSlow)
|
isSlow := healthDetail.IsSlow()
|
||||||
|
old := s.isSlow.Swap(isSlow)
|
||||||
|
if old != isSlow {
|
||||||
|
logutil.BgLogger().Info("store health status changed", zap.Uint64("storeID", s.storeID), zap.Bool("isSlow", isSlow), zap.Stringer("healthDetail", healthDetail))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// setTiKVSlowScoreLastUpdateTimeForTest force sets last update time of TiKV server side slow score to specified value.
|
// setTiKVSlowScoreLastUpdateTimeForTest force sets last update time of TiKV server side slow score to specified value.
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue