chore: update pex replica clean logic (#3272)

Signed-off-by: Jim Ma <majinjing3@gmail.com>
This commit is contained in:
Jim Ma 2024-05-16 15:04:01 +08:00 committed by GitHub
parent 756b6b49ea
commit 3ddf37acfb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 51 additions and 26 deletions

View File

@ -983,4 +983,6 @@ type PeerExchangeOption struct {
ReSyncInterval time.Duration `mapstructure:"reSyncInterval" yaml:"reSyncInterval"`
// ReplicaThreshold is used for keeping replicas in all peers is not bigger than threshold to save storage
ReplicaThreshold int `mapstructure:"replicaThreshold" yaml:"replicaThreshold"`
// ReplicaCleanPercentage is percentage probability to clean local replica when reach threshold, available values: [0, 100]
ReplicaCleanPercentage int32 `mapstructure:"replicaCleanPercentage" yaml:"replicaCleanPercentage"`
}

View File

@ -196,6 +196,7 @@ var peerHostConfig = func() *DaemonOption {
InitialBroadcastDelay: 3 * time.Minute,
ReSyncInterval: 10 * time.Minute,
ReplicaThreshold: 2,
ReplicaCleanPercentage: 1,
},
}
}

View File

@ -196,6 +196,7 @@ var peerHostConfig = func() *DaemonOption {
InitialBroadcastDelay: 3 * time.Minute,
ReSyncInterval: 10 * time.Minute,
ReplicaThreshold: 2,
ReplicaCleanPercentage: 1,
},
}
}

View File

@ -257,7 +257,8 @@ func New(opt *config.DaemonOption, d dfpath.Dfpath) (Daemon, error) {
},
pex.WithInitialRetryInterval(opt.PeerExchange.InitialInterval),
pex.WithReSyncInterval(opt.PeerExchange.ReSyncInterval),
pex.WithReplicaThreshold(opt.PeerExchange.ReplicaThreshold))
pex.WithReplicaThreshold(opt.PeerExchange.ReplicaThreshold),
pex.WithReplicaCleanPercentage(opt.PeerExchange.ReplicaCleanPercentage))
if err != nil {
return nil, err
}

View File

@ -46,6 +46,7 @@ type peerExchangeConfig struct {
initialRetryInterval time.Duration
reSyncInterval time.Duration
replicaThreshold int
replicaCleanPercentage int32
}
func WithName(name string) func(*memberlist.Config, *peerExchangeConfig) {
@ -102,6 +103,14 @@ func WithReplicaThreshold(threshold int) func(*memberlist.Config, *peerExchangeC
}
}
func WithReplicaCleanPercentage(percentage int32) func(*memberlist.Config, *peerExchangeConfig) {
return func(memberConfig *memberlist.Config, pexConfig *peerExchangeConfig) {
if percentage > 0 {
pexConfig.replicaCleanPercentage = percentage
}
}
}
func NewPeerExchange(
reclaim ReclaimFunc,
lister InitialMemberLister,
@ -130,6 +139,7 @@ func NewPeerExchange(
logger.Infof("peer exchange initial retry interval: %s", pexConfig.initialRetryInterval)
logger.Infof("peer exchange re-sync interval: %s", pexConfig.reSyncInterval)
logger.Infof("peer exchange replica threshold: %d", pexConfig.replicaThreshold)
logger.Infof("peer exchange replica clean percentage: %d", pexConfig.replicaCleanPercentage)
pex := &peerExchange{
config: pexConfig,
@ -168,7 +178,11 @@ func (p *peerExchange) SearchPeer(task string) SearchPeerResult {
case SearchPeerResultTypeLocal:
// check replica threshold and reclaim local cache
if len(searchPeerResult.Peers) > p.config.replicaThreshold {
p.tryReclaim(task, searchPeerResult)
if p.tryReclaim(task, searchPeerResult) {
// change result type to remote and drop local peer
searchPeerResult.Type = SearchPeerResultTypeRemote
searchPeerResult.Peers = searchPeerResult.Peers[1:]
}
}
case SearchPeerResultTypeRemote:
if len(searchPeerResult.Peers) < p.config.replicaThreshold {
@ -179,18 +193,24 @@ func (p *peerExchange) SearchPeer(task string) SearchPeerResult {
return searchPeerResult
}
func (p *peerExchange) tryReclaim(task string, searchPeerResult SearchPeerResult) {
func (p *peerExchange) tryReclaim(task string, searchPeerResult SearchPeerResult) bool {
if p.config.replicaCleanPercentage == 0 {
return false
}
r := rand.New(rand.NewSource(time.Now().UnixNano()))
// reclaim with 1% probability for shrink double reclaim with other members
if r.Int31n(100) == 0 {
// reclaim with probability for shrink double reclaim with other members
// Int31n is [0, n), +1 for percentage [1, 100]
if r.Int31n(100)+1 > p.config.replicaCleanPercentage {
return false
}
// when Type is SearchPeerResultTypeLocal, peer 0 is always local peer
peer := searchPeerResult.Peers[0].PeerID
searchPeerResult.Type = SearchPeerResultTypeRemote
p.memberManager.logger.Debugf("task %s replica threshold reached, try to reclaim local peer cache %s", task, peer)
err := p.reclaim(task, peer)
if err != nil {
p.memberManager.logger.Warnf("task %s peer %s reclaim local cache error: %s", task, peer, err)
}
}
return true
}
func (p *peerExchange) BroadcastPeer(data *dfdaemonv1.PeerMetadata) {