/* * Copyright 2020 The Dragonfly Authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package peer import ( "context" "sync" "github.com/pkg/errors" "go.opentelemetry.io/otel/semconv" "go.opentelemetry.io/otel/trace" "go.uber.org/atomic" "golang.org/x/time/rate" "d7y.io/dragonfly/v2/client/config" logger "d7y.io/dragonfly/v2/internal/dflog" "d7y.io/dragonfly/v2/internal/idgen" "d7y.io/dragonfly/v2/pkg/rpc/base" "d7y.io/dragonfly/v2/pkg/rpc/scheduler" schedulerclient "d7y.io/dragonfly/v2/pkg/rpc/scheduler/client" ) type FilePeerTaskRequest struct { scheduler.PeerTaskRequest Output string Limit float64 DisableBackSource bool Pattern string Callsystem string } // FilePeerTask represents a peer task to download a file type FilePeerTask interface { Task // Start start the special peer task, return a *FilePeerTaskProgress channel for updating download progress Start(ctx context.Context) (chan *FilePeerTaskProgress, error) } type filePeerTask struct { peerTask // progressCh holds progress status progressCh chan *FilePeerTaskProgress progressStopCh chan bool // disableBackSource indicates not back source when failed disableBackSource bool pattern string callsystem string } var _ FilePeerTask = (*filePeerTask)(nil) type ProgressState struct { Success bool Code base.Code Msg string } type FilePeerTaskProgress struct { State *ProgressState TaskID string PeerID string ContentLength int64 CompletedLength int64 PeerTaskDone bool DoneCallback func() } func newFilePeerTask(ctx context.Context, host *scheduler.PeerHost, pieceManager PieceManager, request *FilePeerTaskRequest, schedulerClient schedulerclient.SchedulerClient, schedulerOption config.SchedulerOption, perPeerRateLimit rate.Limit) (context.Context, *filePeerTask, *TinyData, error) { ctx, span := tracer.Start(ctx, config.SpanFilePeerTask, trace.WithSpanKind(trace.SpanKindClient)) span.SetAttributes(config.AttributePeerHost.String(host.Uuid)) span.SetAttributes(semconv.NetHostIPKey.String(host.Ip)) span.SetAttributes(config.AttributePeerID.String(request.PeerId)) span.SetAttributes(semconv.HTTPURLKey.String(request.Url)) logger.Infof("request overview, url: %s, filter: %s, meta: %s, biz: %s, peer: %s", request.Url, request.UrlMeta.Filter, request.UrlMeta, request.UrlMeta.Tag, request.PeerId) // trace register regCtx, regSpan := tracer.Start(ctx, config.SpanRegisterTask) logger.Infof("step 1: peer %s start to register", request.PeerId) result, err := schedulerClient.RegisterPeerTask(regCtx, &request.PeerTaskRequest) regSpan.RecordError(err) regSpan.End() var needBackSource bool if err != nil { logger.Errorf("step 1: peer %s register failed: %v", request.PeerId, err) if schedulerOption.DisableAutoBackSource { logger.Errorf("register peer task failed: %s, peer id: %s, auto back source disabled", err, request.PeerId) span.RecordError(err) span.End() return ctx, nil, nil, err } needBackSource = true // can not detect source or scheduler error, create a new dummy scheduler client schedulerClient = &dummySchedulerClient{} result = &scheduler.RegisterResult{TaskId: idgen.TaskID(request.Url, request.UrlMeta)} logger.Warnf("register peer task failed: %s, peer id: %s, try to back source", err, request.PeerId) } if result == nil { defer span.End() span.RecordError(err) err = errors.Errorf("step 1: peer register result is nil") return ctx, nil, nil, err } span.SetAttributes(config.AttributeTaskID.String(result.TaskId)) logger.Infof("step 1: register task success, task id: %s, peer id: %s, SizeScope: %s", result.TaskId, request.PeerId, base.SizeScope_name[int32(result.SizeScope)]) var singlePiece *scheduler.SinglePiece if !needBackSource { switch result.SizeScope { case base.SizeScope_SMALL: span.SetAttributes(config.AttributePeerTaskSizeScope.String("small")) logger.Infof("%s/%s size scope: small", result.TaskId, request.PeerId) if piece, ok := result.DirectPiece.(*scheduler.RegisterResult_SinglePiece); ok { singlePiece = piece.SinglePiece } case base.SizeScope_TINY: defer span.End() span.SetAttributes(config.AttributePeerTaskSizeScope.String("tiny")) logger.Infof("%s/%s size scope: tiny", result.TaskId, request.PeerId) if piece, ok := result.DirectPiece.(*scheduler.RegisterResult_PieceContent); ok { return ctx, nil, &TinyData{ span: span, TaskID: result.TaskId, PeerID: request.PeerId, Content: piece.PieceContent, }, nil } err = errors.Errorf("scheduler return tiny piece but can not parse piece content") span.RecordError(err) return ctx, nil, nil, err case base.SizeScope_NORMAL: span.SetAttributes(config.AttributePeerTaskSizeScope.String("normal")) logger.Infof("%s/%s size scope: normal", result.TaskId, request.PeerId) } } logger.Infof("step 2: start report peer %s piece result", request.PeerId) peerPacketStream, err := schedulerClient.ReportPieceResult(ctx, result.TaskId, &request.PeerTaskRequest) if err != nil { logger.Errorf("step 2: peer %s report piece failed: err", request.PeerId, err) defer span.End() span.RecordError(err) return ctx, nil, nil, err } var limiter *rate.Limiter if perPeerRateLimit > 0 { limiter = rate.NewLimiter(perPeerRateLimit, int(perPeerRateLimit)) } pt := &filePeerTask{ progressCh: make(chan *FilePeerTaskProgress), progressStopCh: make(chan bool), disableBackSource: request.DisableBackSource, pattern: request.Pattern, callsystem: request.Callsystem, peerTask: peerTask{ host: host, needBackSource: needBackSource, request: &request.PeerTaskRequest, peerPacketStream: peerPacketStream, pieceManager: pieceManager, peerPacketReady: make(chan bool, 1), peerID: request.PeerId, taskID: result.TaskId, singlePiece: singlePiece, done: make(chan struct{}), span: span, once: sync.Once{}, readyPieces: NewBitmap(), requestedPieces: NewBitmap(), failedPieceCh: make(chan int32, config.DefaultPieceChanSize), failedReason: failedReasonNotSet, failedCode: base.Code_UnknownError, contentLength: atomic.NewInt64(-1), pieceParallelCount: atomic.NewInt32(0), totalPiece: -1, schedulerOption: schedulerOption, schedulerClient: schedulerClient, limiter: limiter, completedLength: atomic.NewInt64(0), usedTraffic: atomic.NewInt64(0), SugaredLoggerOnWith: logger.With("peer", request.PeerId, "task", result.TaskId, "component", "filePeerTask"), }, } // bind func that base peer task did not implement pt.backSourceFunc = pt.backSource pt.setContentLengthFunc = pt.SetContentLength pt.setTotalPiecesFunc = pt.SetTotalPieces pt.reportPieceResultFunc = pt.ReportPieceResult return ctx, pt, nil, nil } func (pt *filePeerTask) Start(ctx context.Context) (chan *FilePeerTaskProgress, error) { pt.ctx, pt.cancel = context.WithCancel(ctx) if pt.needBackSource { pt.contentLength.Store(-1) _ = pt.callback.Init(pt) go pt.backSource() return pt.progressCh, nil } pt.pullPieces(pt.cleanUnfinished) // return a progress channel for request download progress return pt.progressCh, nil } func (pt *filePeerTask) ReportPieceResult(result *pieceTaskResult) error { // goroutine safe for channel and send on closed channel defer pt.recoverFromPanic() pt.Debugf("report piece %d result, success: %t", result.piece.PieceNum, result.pieceResult.Success) // retry failed piece if !result.pieceResult.Success { result.pieceResult.FinishedCount = pt.readyPieces.Settled() _ = pt.peerPacketStream.Send(result.pieceResult) if result.notRetry { pt.Warnf("piece %d download failed, no retry", result.piece.PieceNum) return nil } select { case <-pt.done: pt.Infof("peer task done, stop to send failed piece") case <-pt.ctx.Done(): pt.Debugf("context done due to %s, stop to send failed piece", pt.ctx.Err()) case pt.failedPieceCh <- result.pieceResult.PieceInfo.PieceNum: pt.Warnf("piece %d download failed, retry later", result.piece.PieceNum) } return nil } pt.lock.Lock() if pt.readyPieces.IsSet(result.pieceResult.PieceInfo.PieceNum) { pt.lock.Unlock() pt.Warnf("piece %d is already reported, skipped", result.pieceResult.PieceInfo.PieceNum) return nil } // mark piece processed pt.readyPieces.Set(result.pieceResult.PieceInfo.PieceNum) pt.completedLength.Add(int64(result.piece.RangeSize)) pt.lock.Unlock() result.pieceResult.FinishedCount = pt.readyPieces.Settled() _ = pt.peerPacketStream.Send(result.pieceResult) // send progress first to avoid close channel panic p := &FilePeerTaskProgress{ State: &ProgressState{ Success: result.pieceResult.Success, Code: result.pieceResult.Code, Msg: "downloading", }, TaskID: pt.taskID, PeerID: pt.peerID, ContentLength: pt.contentLength.Load(), CompletedLength: pt.completedLength.Load(), PeerTaskDone: false, } select { case <-pt.progressStopCh: case pt.progressCh <- p: pt.Debugf("progress sent, %d/%d", p.CompletedLength, p.ContentLength) case <-pt.ctx.Done(): pt.Warnf("send progress failed, peer task context done due to %s", pt.ctx.Err()) return pt.ctx.Err() } if !pt.isCompleted() { return nil } return pt.finish() } func (pt *filePeerTask) finish() error { var err error if err = pt.callback.ValidateDigest(pt); err != nil { pt.Errorf("validate digest error: %s", err) pt.span.RecordError(err) pt.cleanUnfinished() return err } // send last progress pt.once.Do(func() { defer pt.recoverFromPanic() // send EOF piece result to scheduler _ = pt.peerPacketStream.Send( scheduler.NewEndPieceResult(pt.taskID, pt.peerID, pt.readyPieces.Settled())) pt.Debugf("finish end piece result sent") var ( success = true code = base.Code_Success message = "Success" progressDone bool ) // callback to store data to output if err = pt.callback.Done(pt); err != nil { pt.Errorf("peer task done callback failed: %s", err) pt.span.RecordError(err) success = false code = base.Code_ClientError message = err.Error() } pg := &FilePeerTaskProgress{ State: &ProgressState{ Success: success, Code: code, Msg: message, }, TaskID: pt.taskID, PeerID: pt.peerID, ContentLength: pt.contentLength.Load(), CompletedLength: pt.completedLength.Load(), PeerTaskDone: true, DoneCallback: func() { progressDone = true close(pt.progressStopCh) }, } // wait client received progress pt.Infof("try to send finish progress, completed length: %d, state: (%t, %d, %s)", pg.CompletedLength, pg.State.Success, pg.State.Code, pg.State.Msg) select { case pt.progressCh <- pg: pt.Infof("finish progress sent") case <-pt.ctx.Done(): pt.Warnf("finish progress sent failed, context done") } // wait progress stopped select { case <-pt.progressStopCh: pt.Infof("progress stopped") case <-pt.ctx.Done(): if progressDone { pt.Debugf("progress stopped and context done") } else { pt.Warnf("wait progress stopped failed, context done, but progress not stopped") } } pt.Debugf("finished: close channel") pt.success = true close(pt.done) pt.span.SetAttributes(config.AttributePeerTaskSuccess.Bool(true)) pt.span.End() }) return err } func (pt *filePeerTask) cleanUnfinished() { defer pt.cancel() // send last progress pt.once.Do(func() { defer pt.recoverFromPanic() // send EOF piece result to scheduler _ = pt.peerPacketStream.Send( scheduler.NewEndPieceResult(pt.taskID, pt.peerID, pt.readyPieces.Settled())) pt.Debugf("clean up end piece result sent") if err := pt.callback.Fail(pt, pt.failedCode, pt.failedReason); err != nil { pt.span.RecordError(err) pt.Errorf("peer task fail callback failed: %s", err) } var progressDone bool pg := &FilePeerTaskProgress{ State: &ProgressState{ Success: false, Code: pt.failedCode, Msg: pt.failedReason, }, TaskID: pt.taskID, PeerID: pt.peerID, ContentLength: pt.contentLength.Load(), CompletedLength: pt.completedLength.Load(), PeerTaskDone: true, DoneCallback: func() { progressDone = true close(pt.progressStopCh) }, } // wait client received progress pt.Infof("try to send unfinished progress, completed length: %d, state: (%t, %d, %s)", pg.CompletedLength, pg.State.Success, pg.State.Code, pg.State.Msg) select { case pt.progressCh <- pg: pt.Debugf("unfinished progress sent") case <-pt.ctx.Done(): pt.Debugf("send unfinished progress failed, context done: %v", pt.ctx.Err()) } // wait progress stopped select { case <-pt.progressStopCh: pt.Infof("progress stopped") case <-pt.ctx.Done(): if progressDone { pt.Debugf("progress stopped and context done") } else { pt.Warnf("wait progress stopped failed, context done, but progress not stopped") } } pt.Debugf("clean unfinished: close channel") close(pt.done) pt.span.SetAttributes(config.AttributePeerTaskSuccess.Bool(false)) pt.span.SetAttributes(config.AttributePeerTaskCode.Int(int(pt.failedCode))) pt.span.SetAttributes(config.AttributePeerTaskMessage.String(pt.failedReason)) pt.span.End() }) } // TODO SetContentLength 需要和pt.finish解绑,以便在下载进度处可以看到文件长度 func (pt *filePeerTask) SetContentLength(i int64) error { pt.contentLength.Store(i) if !pt.isCompleted() { return errors.New("SetContentLength should call after task completed") } return pt.finish() } func (pt *filePeerTask) SetTotalPieces(i int32) { pt.totalPiece = i } func (pt *filePeerTask) backSource() { backSourceCtx, backSourceSpan := tracer.Start(pt.ctx, config.SpanBackSource) defer backSourceSpan.End() defer pt.cleanUnfinished() if pt.disableBackSource { pt.Errorf(reasonBackSourceDisabled) pt.failedReason = reasonBackSourceDisabled return } _ = pt.callback.Init(pt) reportPieceCtx, reportPieceSpan := tracer.Start(backSourceCtx, config.SpanReportPieceResult) defer reportPieceSpan.End() if peerPacketStream, err := pt.schedulerClient.ReportPieceResult(reportPieceCtx, pt.taskID, pt.request); err != nil { logger.Errorf("step 2: peer %s report piece failed: err", pt.request.PeerId, err) } else { pt.peerPacketStream = peerPacketStream } logger.Infof("step 2: start report peer %s back source piece result", pt.request.PeerId) err := pt.pieceManager.DownloadSource(pt.ctx, pt, pt.request) if err != nil { pt.Errorf("download from source error: %s", err) pt.failedReason = err.Error() backSourceSpan.SetAttributes(config.AttributePeerTaskSuccess.Bool(false)) backSourceSpan.RecordError(err) return } pt.Infof("download from source ok") backSourceSpan.SetAttributes(config.AttributePeerTaskSuccess.Bool(true)) _ = pt.finish() }