fix: parent peertask conductor race condition (#3154)
Signed-off-by: Jim Ma <majinjing3@gmail.com>
This commit is contained in:
parent
afc54df6b3
commit
d18625b3e2
|
|
@ -85,6 +85,10 @@ type peerTaskConductor struct {
|
||||||
needBackSource *atomic.Bool
|
needBackSource *atomic.Bool
|
||||||
seed bool
|
seed bool
|
||||||
|
|
||||||
|
// sub peer task need ensure parent storage registered, success or failed
|
||||||
|
storageRegistered chan struct{}
|
||||||
|
storageRegisterSuccess bool
|
||||||
|
|
||||||
peerTaskManager *peerTaskManager
|
peerTaskManager *peerTaskManager
|
||||||
|
|
||||||
storage storage.TaskStorageDriver
|
storage storage.TaskStorageDriver
|
||||||
|
|
@ -238,6 +242,7 @@ func (ptm *peerTaskManager) newPeerTaskConductor(
|
||||||
seed: seed,
|
seed: seed,
|
||||||
parent: parent,
|
parent: parent,
|
||||||
rg: rg,
|
rg: rg,
|
||||||
|
storageRegistered: make(chan struct{}),
|
||||||
}
|
}
|
||||||
|
|
||||||
ptc.pieceDownloadCtx, ptc.pieceDownloadCancel = context.WithCancel(ptc.ctx)
|
ptc.pieceDownloadCtx, ptc.pieceDownloadCancel = context.WithCancel(ptc.ctx)
|
||||||
|
|
@ -1272,7 +1277,7 @@ func (pt *peerTaskConductor) reportFailResult(request *DownloadPieceRequest, res
|
||||||
span.End()
|
span.End()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pt *peerTaskConductor) initStorage(desiredLocation string) (err error) {
|
func (pt *peerTaskConductor) registerStorage(desiredLocation string) (err error) {
|
||||||
// prepare storage
|
// prepare storage
|
||||||
if pt.parent == nil {
|
if pt.parent == nil {
|
||||||
pt.storage, err = pt.StorageManager.RegisterTask(pt.ctx,
|
pt.storage, err = pt.StorageManager.RegisterTask(pt.ctx,
|
||||||
|
|
@ -1300,10 +1305,13 @@ func (pt *peerTaskConductor) initStorage(desiredLocation string) (err error) {
|
||||||
Range: pt.rg,
|
Range: pt.rg,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
defer close(pt.storageRegistered)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
pt.Log().Errorf("register task to storage manager failed: %s", err)
|
pt.Log().Errorf("register task to storage manager failed: %s", err)
|
||||||
}
|
|
||||||
return err
|
return err
|
||||||
|
}
|
||||||
|
pt.storageRegisterSuccess = true
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pt *peerTaskConductor) UpdateStorage() error {
|
func (pt *peerTaskConductor) UpdateStorage() error {
|
||||||
|
|
|
||||||
|
|
@ -203,6 +203,7 @@ func (ptm *peerTaskManager) getOrCreatePeerTaskConductor(
|
||||||
rg *nethttp.Range,
|
rg *nethttp.Range,
|
||||||
desiredLocation string,
|
desiredLocation string,
|
||||||
seed bool) (*peerTaskConductor, bool, error) {
|
seed bool) (*peerTaskConductor, bool, error) {
|
||||||
|
retry:
|
||||||
if ptc, ok := ptm.findPeerTaskConductor(taskID); ok {
|
if ptc, ok := ptm.findPeerTaskConductor(taskID); ok {
|
||||||
logger.Debugf("peer task found: %s/%s", ptc.taskID, ptc.peerID)
|
logger.Debugf("peer task found: %s/%s", ptc.taskID, ptc.peerID)
|
||||||
return ptc, false, nil
|
return ptc, false, nil
|
||||||
|
|
@ -226,7 +227,17 @@ func (ptm *peerTaskManager) getOrCreatePeerTaskConductor(
|
||||||
metrics.PeerTaskCount.Add(1)
|
metrics.PeerTaskCount.Add(1)
|
||||||
logger.Debugf("peer task created: %s/%s", ptc.taskID, ptc.peerID)
|
logger.Debugf("peer task created: %s/%s", ptc.taskID, ptc.peerID)
|
||||||
|
|
||||||
err := ptc.initStorage(desiredLocation)
|
// wait parent RegisterTask done
|
||||||
|
if parent != nil {
|
||||||
|
<-parent.storageRegistered
|
||||||
|
if !parent.storageRegisterSuccess {
|
||||||
|
parent = nil
|
||||||
|
logger.Warnf("parent peer task %s/%s register failed, fallback to non-sub peer task", parent.taskID, parent.peerID)
|
||||||
|
goto retry
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
err := ptc.registerStorage(desiredLocation)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
ptc.Errorf("init storage error: %s", err)
|
ptc.Errorf("init storage error: %s", err)
|
||||||
ptc.cancelNotRegisterred(commonv1.Code_ClientError, err.Error())
|
ptc.cancelNotRegisterred(commonv1.Code_ClientError, err.Error())
|
||||||
|
|
@ -250,7 +261,7 @@ func (ptm *peerTaskManager) createSplitedPeerTaskConductor(
|
||||||
metrics.PeerTaskCount.Add(1)
|
metrics.PeerTaskCount.Add(1)
|
||||||
logger.Debugf("standalone peer task created: %s/%s", ptc.taskID, ptc.peerID)
|
logger.Debugf("standalone peer task created: %s/%s", ptc.taskID, ptc.peerID)
|
||||||
|
|
||||||
err := ptc.initStorage(desiredLocation)
|
err := ptc.registerStorage(desiredLocation)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
ptc.Errorf("init storage error: %s", err)
|
ptc.Errorf("init storage error: %s", err)
|
||||||
ptc.cancelNotRegisterred(commonv1.Code_ClientError, err.Error())
|
ptc.cancelNotRegisterred(commonv1.Code_ClientError, err.Error())
|
||||||
|
|
@ -290,7 +301,7 @@ func (ptm *peerTaskManager) prefetchParentTask(request *schedulerv1.PeerTaskRequ
|
||||||
limit = ptm.PerPeerRateLimit
|
limit = ptm.PerPeerRateLimit
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.Infof("prefetch peer task %s/%s", taskID, req.PeerId)
|
logger.Infof("prefetch peer task %s/%s, sub peer task %s/%s", taskID, req.PeerId, request.TaskId, request.PeerId)
|
||||||
prefetch, err := ptm.getPeerTaskConductor(context.Background(), taskID, req, limit, nil, nil, desiredLocation, false)
|
prefetch, err := ptm.getPeerTaskConductor(context.Background(), taskID, req, limit, nil, nil, desiredLocation, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Errorf("prefetch peer task %s error: %s", taskID, err)
|
logger.Errorf("prefetch peer task %s error: %s", taskID, err)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue