chore: update pex replica clean logic (#3272)

Signed-off-by: Jim Ma <majinjing3@gmail.com>
This commit is contained in:
Jim Ma 2024-05-16 15:04:01 +08:00 committed by GitHub
parent 756b6b49ea
commit 3ddf37acfb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 51 additions and 26 deletions

View File

@ -983,4 +983,6 @@ type PeerExchangeOption struct {
ReSyncInterval time.Duration `mapstructure:"reSyncInterval" yaml:"reSyncInterval"` ReSyncInterval time.Duration `mapstructure:"reSyncInterval" yaml:"reSyncInterval"`
// ReplicaThreshold is used for keeping replicas in all peers is not bigger than threshold to save storage // ReplicaThreshold is used for keeping replicas in all peers is not bigger than threshold to save storage
ReplicaThreshold int `mapstructure:"replicaThreshold" yaml:"replicaThreshold"` ReplicaThreshold int `mapstructure:"replicaThreshold" yaml:"replicaThreshold"`
// ReplicaCleanPercentage is percentage probability to clean local replica when reach threshold, available values: [0, 100]
ReplicaCleanPercentage int32 `mapstructure:"replicaCleanPercentage" yaml:"replicaCleanPercentage"`
} }

View File

@ -191,11 +191,12 @@ var peerHostConfig = func() *DaemonOption {
LogMaxAge: DefaultLogRotateMaxAge, LogMaxAge: DefaultLogRotateMaxAge,
LogMaxBackups: DefaultLogRotateMaxBackups, LogMaxBackups: DefaultLogRotateMaxBackups,
PeerExchange: PeerExchangeOption{ PeerExchange: PeerExchangeOption{
Enable: false, Enable: false,
InitialInterval: time.Minute, InitialInterval: time.Minute,
InitialBroadcastDelay: 3 * time.Minute, InitialBroadcastDelay: 3 * time.Minute,
ReSyncInterval: 10 * time.Minute, ReSyncInterval: 10 * time.Minute,
ReplicaThreshold: 2, ReplicaThreshold: 2,
ReplicaCleanPercentage: 1,
}, },
} }
} }

View File

@ -191,11 +191,12 @@ var peerHostConfig = func() *DaemonOption {
LogMaxAge: DefaultLogRotateMaxAge, LogMaxAge: DefaultLogRotateMaxAge,
LogMaxBackups: DefaultLogRotateMaxBackups, LogMaxBackups: DefaultLogRotateMaxBackups,
PeerExchange: PeerExchangeOption{ PeerExchange: PeerExchangeOption{
Enable: false, Enable: false,
InitialInterval: time.Minute, InitialInterval: time.Minute,
InitialBroadcastDelay: 3 * time.Minute, InitialBroadcastDelay: 3 * time.Minute,
ReSyncInterval: 10 * time.Minute, ReSyncInterval: 10 * time.Minute,
ReplicaThreshold: 2, ReplicaThreshold: 2,
ReplicaCleanPercentage: 1,
}, },
} }
} }

View File

@ -257,7 +257,8 @@ func New(opt *config.DaemonOption, d dfpath.Dfpath) (Daemon, error) {
}, },
pex.WithInitialRetryInterval(opt.PeerExchange.InitialInterval), pex.WithInitialRetryInterval(opt.PeerExchange.InitialInterval),
pex.WithReSyncInterval(opt.PeerExchange.ReSyncInterval), pex.WithReSyncInterval(opt.PeerExchange.ReSyncInterval),
pex.WithReplicaThreshold(opt.PeerExchange.ReplicaThreshold)) pex.WithReplicaThreshold(opt.PeerExchange.ReplicaThreshold),
pex.WithReplicaCleanPercentage(opt.PeerExchange.ReplicaCleanPercentage))
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -43,9 +43,10 @@ type peerExchange struct {
} }
type peerExchangeConfig struct { type peerExchangeConfig struct {
initialRetryInterval time.Duration initialRetryInterval time.Duration
reSyncInterval time.Duration reSyncInterval time.Duration
replicaThreshold int replicaThreshold int
replicaCleanPercentage int32
} }
func WithName(name string) func(*memberlist.Config, *peerExchangeConfig) { func WithName(name string) func(*memberlist.Config, *peerExchangeConfig) {
@ -102,6 +103,14 @@ func WithReplicaThreshold(threshold int) func(*memberlist.Config, *peerExchangeC
} }
} }
func WithReplicaCleanPercentage(percentage int32) func(*memberlist.Config, *peerExchangeConfig) {
return func(memberConfig *memberlist.Config, pexConfig *peerExchangeConfig) {
if percentage > 0 {
pexConfig.replicaCleanPercentage = percentage
}
}
}
func NewPeerExchange( func NewPeerExchange(
reclaim ReclaimFunc, reclaim ReclaimFunc,
lister InitialMemberLister, lister InitialMemberLister,
@ -130,6 +139,7 @@ func NewPeerExchange(
logger.Infof("peer exchange initial retry interval: %s", pexConfig.initialRetryInterval) logger.Infof("peer exchange initial retry interval: %s", pexConfig.initialRetryInterval)
logger.Infof("peer exchange re-sync interval: %s", pexConfig.reSyncInterval) logger.Infof("peer exchange re-sync interval: %s", pexConfig.reSyncInterval)
logger.Infof("peer exchange replica threshold: %d", pexConfig.replicaThreshold) logger.Infof("peer exchange replica threshold: %d", pexConfig.replicaThreshold)
logger.Infof("peer exchange replica clean percentage: %d", pexConfig.replicaCleanPercentage)
pex := &peerExchange{ pex := &peerExchange{
config: pexConfig, config: pexConfig,
@ -168,7 +178,11 @@ func (p *peerExchange) SearchPeer(task string) SearchPeerResult {
case SearchPeerResultTypeLocal: case SearchPeerResultTypeLocal:
// check replica threshold and reclaim local cache // check replica threshold and reclaim local cache
if len(searchPeerResult.Peers) > p.config.replicaThreshold { if len(searchPeerResult.Peers) > p.config.replicaThreshold {
p.tryReclaim(task, searchPeerResult) if p.tryReclaim(task, searchPeerResult) {
// change result type to remote and drop local peer
searchPeerResult.Type = SearchPeerResultTypeRemote
searchPeerResult.Peers = searchPeerResult.Peers[1:]
}
} }
case SearchPeerResultTypeRemote: case SearchPeerResultTypeRemote:
if len(searchPeerResult.Peers) < p.config.replicaThreshold { if len(searchPeerResult.Peers) < p.config.replicaThreshold {
@ -179,18 +193,24 @@ func (p *peerExchange) SearchPeer(task string) SearchPeerResult {
return searchPeerResult return searchPeerResult
} }
func (p *peerExchange) tryReclaim(task string, searchPeerResult SearchPeerResult) { func (p *peerExchange) tryReclaim(task string, searchPeerResult SearchPeerResult) bool {
r := rand.New(rand.NewSource(time.Now().UnixNano())) if p.config.replicaCleanPercentage == 0 {
// reclaim with 1% probability for shrink double reclaim with other members return false
if r.Int31n(100) == 0 {
peer := searchPeerResult.Peers[0].PeerID
searchPeerResult.Type = SearchPeerResultTypeRemote
p.memberManager.logger.Debugf("task %s replica threshold reached, try to reclaim local peer cache %s", task, peer)
err := p.reclaim(task, peer)
if err != nil {
p.memberManager.logger.Warnf("task %s peer %s reclaim local cache error: %s", task, peer, err)
}
} }
r := rand.New(rand.NewSource(time.Now().UnixNano()))
// reclaim with probability for shrink double reclaim with other members
// Int31n is [0, n), +1 for percentage [1, 100]
if r.Int31n(100)+1 > p.config.replicaCleanPercentage {
return false
}
// when Type is SearchPeerResultTypeLocal, peer 0 is always local peer
peer := searchPeerResult.Peers[0].PeerID
p.memberManager.logger.Debugf("task %s replica threshold reached, try to reclaim local peer cache %s", task, peer)
err := p.reclaim(task, peer)
if err != nil {
p.memberManager.logger.Warnf("task %s peer %s reclaim local cache error: %s", task, peer, err)
}
return true
} }
func (p *peerExchange) BroadcastPeer(data *dfdaemonv1.PeerMetadata) { func (p *peerExchange) BroadcastPeer(data *dfdaemonv1.PeerMetadata) {