dragonfly/client/daemon/peer/peertask_manager.go

505 lines
14 KiB
Go

/*
* Copyright 2020 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//go:generate mockgen -destination peertask_manager_mock.go -source peertask_manager.go -package peer
package peer
import (
"context"
"errors"
"fmt"
"io"
"sync"
"github.com/go-http-utils/headers"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/trace"
"golang.org/x/time/rate"
"google.golang.org/grpc/status"
commonv1 "d7y.io/api/pkg/apis/common/v1"
schedulerv1 "d7y.io/api/pkg/apis/scheduler/v1"
"d7y.io/dragonfly/v2/client/daemon/metrics"
"d7y.io/dragonfly/v2/client/daemon/storage"
logger "d7y.io/dragonfly/v2/internal/dflog"
"d7y.io/dragonfly/v2/internal/util"
"d7y.io/dragonfly/v2/pkg/idgen"
nethttp "d7y.io/dragonfly/v2/pkg/net/http"
schedulerclient "d7y.io/dragonfly/v2/pkg/rpc/scheduler/client"
)
// TaskManager processes all peer tasks request
type TaskManager interface {
// StartFileTask starts a peer task to download a file
// return a progress channel for request download progress
// tiny stands task file is tiny and task is done
StartFileTask(ctx context.Context, req *FileTaskRequest) (
progress chan *FileTaskProgress, err error)
// StartStreamTask starts a peer task with stream io
StartStreamTask(ctx context.Context, req *StreamTaskRequest) (
readCloser io.ReadCloser, attribute map[string]string, err error)
// StartSeedTask starts a seed peer task
StartSeedTask(ctx context.Context, req *SeedTaskRequest) (
seedTaskResult *SeedTaskResponse, reuse bool, err error)
Subscribe(request *commonv1.PieceTaskRequest) (*SubscribeResponse, bool)
IsPeerTaskRunning(taskID string, peerID string) (Task, bool)
// StatTask checks whether the given task exists in P2P network
StatTask(ctx context.Context, taskID string) (*schedulerv1.Task, error)
// AnnouncePeerTask announces peer task info to P2P network
AnnouncePeerTask(ctx context.Context, meta storage.PeerTaskMetadata, url string, taskType commonv1.TaskType, urlMeta *commonv1.UrlMeta) error
GetPieceManager() PieceManager
// Stop stops the PeerTaskManager
Stop(ctx context.Context) error
}
// Task represents common interface to operate a peer task
type Task interface {
Logger
Context() context.Context
Log() *logger.SugaredLoggerOnWith
GetStorage() storage.TaskStorageDriver
GetPeerID() string
GetTaskID() string
GetTotalPieces() int32
SetTotalPieces(int32)
GetContentLength() int64
SetContentLength(int64)
AddTraffic(uint64)
GetTraffic() uint64
SetPieceMd5Sign(string)
GetPieceMd5Sign() string
PublishPieceInfo(pieceNum int32, size uint32)
ReportPieceResult(request *DownloadPieceRequest, result *DownloadPieceResult, err error)
UpdateSourceErrorStatus(st *status.Status)
}
type Logger interface {
Log() *logger.SugaredLoggerOnWith
}
type TinyData struct {
TaskID string
PeerID string
Content []byte
}
var tracer trace.Tracer
func init() {
tracer = otel.Tracer("dfget-daemon")
}
type peerTaskManager struct {
TaskManagerOption
conductorLock sync.Locker
runningPeerTasks sync.Map
trafficShaper TrafficShaper
}
type TaskManagerOption struct {
TaskOption
SchedulerClient schedulerclient.V1
PerPeerRateLimit rate.Limit
TotalRateLimit rate.Limit
TrafficShaperType string
// Multiplex indicates to reuse the data of completed peer tasks
Multiplex bool
// Prefetch indicates to prefetch the whole files of ranged requests
Prefetch bool
GetPiecesMaxRetry int
SplitRunningTasks bool
}
func NewPeerTaskManager(opt *TaskManagerOption) (TaskManager, error) {
ptm := &peerTaskManager{
TaskManagerOption: *opt,
runningPeerTasks: sync.Map{},
conductorLock: &sync.Mutex{},
trafficShaper: NewTrafficShaper(opt.TrafficShaperType, opt.TotalRateLimit, util.ComputePieceSize),
}
ptm.trafficShaper.Start()
return ptm, nil
}
func (ptm *peerTaskManager) findPeerTaskConductor(key string) (*peerTaskConductor, bool) {
pt, ok := ptm.runningPeerTasks.Load(key)
if !ok {
return nil, false
}
return pt.(*peerTaskConductor), true
}
func (ptm *peerTaskManager) getPeerTaskConductor(ctx context.Context,
taskID string,
request *schedulerv1.PeerTaskRequest,
limit rate.Limit,
parent *peerTaskConductor,
rg *nethttp.Range,
desiredLocation string,
seed bool) (*peerTaskConductor, error) {
var (
ptc *peerTaskConductor
created bool
err error
)
if ptm.SplitRunningTasks {
ptc, created, err = ptm.createSplitedPeerTaskConductor(
ctx, taskID, request, limit, parent, rg, desiredLocation, seed)
} else {
ptc, created, err = ptm.getOrCreatePeerTaskConductor(
ctx, taskID, request, limit, parent, rg, desiredLocation, seed)
}
if err != nil {
return nil, err
}
if created {
if err = ptc.start(); err != nil {
return nil, err
}
}
return ptc, err
}
// getOrCreatePeerTaskConductor will get or create a peerTaskConductor,
// if created, return (ptc, true, nil), otherwise return (ptc, false, nil)
func (ptm *peerTaskManager) getOrCreatePeerTaskConductor(
ctx context.Context,
taskID string,
request *schedulerv1.PeerTaskRequest,
limit rate.Limit,
parent *peerTaskConductor,
rg *nethttp.Range,
desiredLocation string,
seed bool) (*peerTaskConductor, bool, error) {
if ptc, ok := ptm.findPeerTaskConductor(taskID); ok {
logger.Debugf("peer task found: %s/%s", ptc.taskID, ptc.peerID)
return ptc, false, nil
}
ptc := ptm.newPeerTaskConductor(ctx, request, limit, parent, rg, seed)
ptm.conductorLock.Lock()
// double check
if p, ok := ptm.findPeerTaskConductor(taskID); ok {
ptm.conductorLock.Unlock()
logger.Debugf("peer task found: %s/%s", p.taskID, p.peerID)
if seed && !p.seed && !p.needBackSource.Load() {
p.Warnf("new seed request received, switch to back source, may be produced by multiple schedulers")
p.markBackSource()
}
metrics.PeerTaskCacheHitCount.Add(1)
return p, false, nil
}
ptm.runningPeerTasks.Store(taskID, ptc)
ptm.conductorLock.Unlock()
metrics.PeerTaskCount.Add(1)
logger.Debugf("peer task created: %s/%s", ptc.taskID, ptc.peerID)
err := ptc.initStorage(desiredLocation)
if err != nil {
ptc.Errorf("init storage error: %s", err)
ptc.cancelNotRegisterred(commonv1.Code_ClientError, err.Error())
return nil, false, err
}
return ptc, true, nil
}
func (ptm *peerTaskManager) createSplitedPeerTaskConductor(
ctx context.Context,
taskID string,
request *schedulerv1.PeerTaskRequest,
limit rate.Limit,
parent *peerTaskConductor,
rg *nethttp.Range,
desiredLocation string,
seed bool) (*peerTaskConductor, bool, error) {
ptc := ptm.newPeerTaskConductor(ctx, request, limit, parent, rg, seed)
ptm.runningPeerTasks.Store(taskID+"/"+ptc.peerID, ptc)
metrics.PeerTaskCount.Add(1)
logger.Debugf("standalone peer task created: %s/%s", ptc.taskID, ptc.peerID)
err := ptc.initStorage(desiredLocation)
if err != nil {
ptc.Errorf("init storage error: %s", err)
ptc.cancelNotRegisterred(commonv1.Code_ClientError, err.Error())
return nil, false, err
}
return ptc, true, nil
}
func (ptm *peerTaskManager) enabledPrefetch(rg *nethttp.Range) bool {
return ptm.Prefetch && rg != nil
}
func (ptm *peerTaskManager) prefetchParentTask(request *schedulerv1.PeerTaskRequest, desiredLocation string) *peerTaskConductor {
req := &schedulerv1.PeerTaskRequest{
Url: request.Url,
PeerId: request.PeerId,
PeerHost: ptm.PeerHost,
IsMigrating: request.IsMigrating,
UrlMeta: &commonv1.UrlMeta{
Digest: request.UrlMeta.Digest,
Tag: request.UrlMeta.Tag,
Filter: request.UrlMeta.Filter,
Header: map[string]string{},
},
}
for k, v := range request.UrlMeta.Header {
if k == headers.Range {
continue
}
req.UrlMeta.Header[k] = v
}
taskID := idgen.TaskIDV1(req.Url, req.UrlMeta)
req.PeerId = idgen.PeerIDV1(req.PeerHost.Ip)
var limit = rate.Inf
if ptm.PerPeerRateLimit > 0 {
limit = ptm.PerPeerRateLimit
}
logger.Infof("prefetch peer task %s/%s", taskID, req.PeerId)
prefetch, err := ptm.getPeerTaskConductor(context.Background(), taskID, req, limit, nil, nil, desiredLocation, false)
if err != nil {
logger.Errorf("prefetch peer task %s/%s error: %s", prefetch.taskID, prefetch.peerID, err)
return nil
}
if prefetch != nil && prefetch.peerID == req.PeerId {
metrics.PrefetchTaskCount.Add(1)
}
return prefetch
}
func (ptm *peerTaskManager) StartFileTask(ctx context.Context, req *FileTaskRequest) (chan *FileTaskProgress, error) {
if req.KeepOriginalOffset && !ptm.Prefetch {
return nil, fmt.Errorf("please enable prefetch when use original offset feature")
}
if ptm.Multiplex {
progress, ok := ptm.tryReuseFilePeerTask(ctx, req)
if ok {
metrics.PeerTaskCacheHitCount.Add(1)
return progress, nil
}
}
// TODO ensure scheduler is ok first
var limit = rate.Inf
if ptm.PerPeerRateLimit > 0 {
limit = ptm.PerPeerRateLimit
}
if req.Limit > 0 {
limit = rate.Limit(req.Limit)
}
ctx, pt, err := ptm.newFileTask(ctx, req, limit)
if err != nil {
return nil, err
}
// FIXME when failed due to SchedulerClient error, relocate SchedulerClient and retry
progress, err := pt.Start(ctx)
return progress, err
}
func (ptm *peerTaskManager) StartStreamTask(ctx context.Context, req *StreamTaskRequest) (io.ReadCloser, map[string]string, error) {
peerTaskRequest := &schedulerv1.PeerTaskRequest{
Url: req.URL,
UrlMeta: req.URLMeta,
PeerId: req.PeerID,
PeerHost: ptm.PeerHost,
IsMigrating: false,
}
if ptm.Multiplex {
// try breakpoint resume for task has range header
if req.Range != nil && !ptm.SplitRunningTasks {
// find running parent task
parentTaskID := idgen.ParentTaskIDV1(req.URL, req.URLMeta)
parentTask, ok := ptm.findPeerTaskConductor(parentTaskID)
if ok && parentTask.GetContentLength() > 0 {
// only allow resume for range from breakpoint to end
if req.Range.Start+req.Range.Length == parentTask.GetContentLength() {
pt := ptm.newResumeStreamTask(ctx, parentTask, req.Range)
return pt.Start(ctx)
}
}
}
// reuse by completed task
r, attr, ok := ptm.tryReuseStreamPeerTask(ctx, req)
if ok {
metrics.PeerTaskCacheHitCount.Add(1)
return r, attr, nil
}
}
pt, err := ptm.newStreamTask(ctx, peerTaskRequest, req.Range)
if err != nil {
return nil, nil, err
}
// FIXME when failed due to SchedulerClient error, relocate SchedulerClient and retry
readCloser, attribute, err := pt.Start(ctx)
return readCloser, attribute, err
}
func (ptm *peerTaskManager) StartSeedTask(ctx context.Context, req *SeedTaskRequest) (response *SeedTaskResponse, reuse bool, err error) {
response, ok := ptm.tryReuseSeedPeerTask(ctx, req)
if ok {
metrics.PeerTaskCacheHitCount.Add(1)
return response, true, nil
}
var limit = rate.Inf
if ptm.PerPeerRateLimit > 0 {
limit = ptm.PerPeerRateLimit
}
if req.Limit > 0 {
limit = rate.Limit(req.Limit)
}
response, err = ptm.newSeedTask(ctx, req, limit)
if err != nil {
return nil, false, err
}
return response, false, nil
}
type SubscribeResponse struct {
Storage storage.TaskStorageDriver
PieceInfoChannel chan *PieceInfo
Success chan struct{}
Fail chan struct{}
FailReason func() error
}
func (ptm *peerTaskManager) getRunningTaskKey(taskID, peerID string) string {
if ptm.SplitRunningTasks {
return taskID + "/" + peerID
}
return taskID
}
func (ptm *peerTaskManager) Subscribe(request *commonv1.PieceTaskRequest) (*SubscribeResponse, bool) {
ptc, ok := ptm.findPeerTaskConductor(ptm.getRunningTaskKey(request.TaskId, request.DstPid))
if !ok {
return nil, false
}
result := &SubscribeResponse{
Storage: ptc.storage,
PieceInfoChannel: ptc.broker.Subscribe(),
Success: ptc.successCh,
Fail: ptc.failCh,
FailReason: ptc.getFailedError,
}
return result, true
}
func (ptm *peerTaskManager) Stop(ctx context.Context) error {
// TODO
if ptm.trafficShaper != nil {
ptm.trafficShaper.Stop()
}
return nil
}
func (ptm *peerTaskManager) PeerTaskDone(taskID, peerID string) {
key := ptm.getRunningTaskKey(taskID, peerID)
logger.Debugf("delete done task %s in running tasks", key)
ptm.runningPeerTasks.Delete(key)
if ptm.trafficShaper != nil {
ptm.trafficShaper.RemoveTask(key)
}
}
func (ptm *peerTaskManager) IsPeerTaskRunning(taskID, peerID string) (Task, bool) {
ptc, ok := ptm.runningPeerTasks.Load(ptm.getRunningTaskKey(taskID, peerID))
if ok {
return ptc.(*peerTaskConductor), ok
}
return nil, ok
}
func (ptm *peerTaskManager) StatTask(ctx context.Context, taskID string) (*schedulerv1.Task, error) {
req := &schedulerv1.StatTaskRequest{
TaskId: taskID,
}
return ptm.SchedulerClient.StatTask(ctx, req)
}
func (ptm *peerTaskManager) GetPieceManager() PieceManager {
return ptm.PieceManager
}
func (ptm *peerTaskManager) AnnouncePeerTask(ctx context.Context, meta storage.PeerTaskMetadata, url string, taskType commonv1.TaskType, urlMeta *commonv1.UrlMeta) error {
// Check if the given task is completed in local StorageManager.
if ptm.StorageManager.FindCompletedTask(meta.TaskID) == nil {
return errors.New("task not found in local storage")
}
// Prepare AnnounceTaskRequest.
totalPieces, err := ptm.StorageManager.GetTotalPieces(ctx, &meta)
if err != nil {
return err
}
piecePacket, err := ptm.StorageManager.GetPieces(ctx, &commonv1.PieceTaskRequest{
TaskId: meta.TaskID,
DstPid: meta.PeerID,
StartNum: 0,
Limit: uint32(totalPieces),
})
if err != nil {
return err
}
piecePacket.DstAddr = fmt.Sprintf("%s:%d", ptm.PeerHost.Ip, ptm.PeerHost.DownPort)
// Announce peer task to scheduler
if err := ptm.SchedulerClient.AnnounceTask(ctx, &schedulerv1.AnnounceTaskRequest{
TaskId: meta.TaskID,
TaskType: taskType,
Url: url,
UrlMeta: urlMeta,
PeerHost: ptm.PeerHost,
PiecePacket: piecePacket,
}); err != nil {
return err
}
return nil
}