dragonfly/client/daemon/peer/peertask_conductor.go

1566 lines
46 KiB
Go

/*
* Copyright 2022 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package peer
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"runtime/debug"
"sync"
"time"
semconv "go.opentelemetry.io/otel/semconv/v1.7.0"
"go.opentelemetry.io/otel/trace"
"go.uber.org/atomic"
"golang.org/x/time/rate"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/status"
commonv1 "d7y.io/api/pkg/apis/common/v1"
errordetailsv1 "d7y.io/api/pkg/apis/errordetails/v1"
schedulerv1 "d7y.io/api/pkg/apis/scheduler/v1"
"d7y.io/dragonfly/v2/client/config"
"d7y.io/dragonfly/v2/client/daemon/metrics"
"d7y.io/dragonfly/v2/client/daemon/storage"
"d7y.io/dragonfly/v2/internal/dferrors"
logger "d7y.io/dragonfly/v2/internal/dflog"
"d7y.io/dragonfly/v2/pkg/digest"
"d7y.io/dragonfly/v2/pkg/idgen"
nethttp "d7y.io/dragonfly/v2/pkg/net/http"
"d7y.io/dragonfly/v2/pkg/rpc/common"
schedulerclient "d7y.io/dragonfly/v2/pkg/rpc/scheduler/client"
"d7y.io/dragonfly/v2/pkg/source"
)
const (
// TODO implement peer task health check
// reasonContextCanceled = "context canceled"
// reasonReScheduleTimeout = "wait more available peers from scheduler timeout"
reasonScheduleTimeout = "wait first peer packet from scheduler timeout"
reasonPeerGoneFromScheduler = "scheduler says client should disconnect"
reasonBackSourceDisabled = "download from source disabled"
failedReasonNotSet = "unknown"
)
var _ Task = (*peerTaskConductor)(nil)
// peerTaskConductor will fetch all pieces from other peers and send pieces info to broker
type peerTaskConductor struct {
TaskOption
*logger.SugaredLoggerOnWith
// ctx is with span info for tracing
// we use successCh and failCh mark task success or fail
ctx context.Context
ctxCancel context.CancelFunc
// piece download uses this context
pieceDownloadCtx context.Context
// when back source, cancel all piece download action
pieceDownloadCancel context.CancelFunc
// request is the original PeerTaskRequest
request *schedulerv1.PeerTaskRequest
// needBackSource indicates downloading resource from instead of other peers
needBackSource *atomic.Bool
seed bool
peerTaskManager *peerTaskManager
storage storage.TaskStorageDriver
schedulerClient schedulerclient.V1
// peer task meta info
peerID string
taskID string
totalPiece *atomic.Int32
digest *atomic.String
contentLength *atomic.Int64
completedLength *atomic.Int64
usedTraffic *atomic.Uint64
header atomic.Value
broker *pieceBroker
sizeScope commonv1.SizeScope
singlePiece *schedulerv1.SinglePiece
tinyData *TinyData
// peerPacketStream stands schedulerclient.PeerPacketStream from scheduler
peerPacketStream schedulerv1.Scheduler_ReportPieceResultClient
legacyPeerCount *atomic.Int64
// pieceTaskSyncManager syncs piece task from other peers
pieceTaskSyncManager *pieceTaskSyncManager
// same actions must be done only once, like close done channel and so on
statusOnce sync.Once
// done channel will be closed when peer task success
successCh chan struct{}
// fail channel will be closed after peer task fail
failCh chan struct{}
// span stands open telemetry trace span
span trace.Span
// failedReason will be set when peer task failed
failedReason string
// failedReason will be set when peer task failed
failedCode commonv1.Code
// readyPieces stands all downloaded pieces
readyPieces *Bitmap
// lock used by piece result manage, when update readyPieces, lock first
readyPiecesLock sync.RWMutex
// runningPieces stands all downloading pieces
runningPieces *Bitmap
// lock used by piece download worker
runningPiecesLock sync.Mutex
// requestedPieces stands all pieces requested from peers
requestedPieces *Bitmap
// lock used by piece download worker
requestedPiecesLock sync.RWMutex
// lock used by send piece result
sendPieceResultLock sync.Mutex
// trafficShaper used to automatically allocate bandwidth for every peer task
trafficShaper TrafficShaper
// limiter will be used when enable per peer task rate limit
limiter *rate.Limiter
startTime time.Time
// subtask only
parent *peerTaskConductor
rg *nethttp.Range
sourceErrorStatus *status.Status
}
type TaskOption struct {
// PeerHost info about current PeerHost
PeerHost *schedulerv1.PeerHost
// PieceManager will be used for downloading piece
PieceManager PieceManager
StorageManager storage.Manager
// schedule options
SchedulerOption config.SchedulerOption
CalculateDigest bool
GRPCCredentials credentials.TransportCredentials
GRPCDialTimeout time.Duration
// WatchdogTimeout > 0 indicates to start watch dog for every single peer task
WatchdogTimeout time.Duration
}
func (ptm *peerTaskManager) newPeerTaskConductor(
ctx context.Context,
request *schedulerv1.PeerTaskRequest,
limit rate.Limit,
parent *peerTaskConductor,
rg *nethttp.Range,
seed bool) *peerTaskConductor {
// use a new context with span info
ctx = trace.ContextWithSpan(context.Background(), trace.SpanFromContext(ctx))
ctx, span := tracer.Start(ctx, config.SpanPeerTask, trace.WithSpanKind(trace.SpanKindClient))
span.SetAttributes(config.AttributePeerHost.String(ptm.PeerHost.Id))
span.SetAttributes(semconv.NetHostIPKey.String(ptm.PeerHost.Ip))
span.SetAttributes(config.AttributePeerID.String(request.PeerId))
span.SetAttributes(semconv.HTTPURLKey.String(request.Url))
taskID := idgen.TaskIDV1(request.Url, request.UrlMeta)
request.TaskId = taskID
// init log with values
var (
log *logger.SugaredLoggerOnWith
traceID = span.SpanContext().TraceID()
)
logKV := []any{
"peer", request.PeerId,
"task", taskID,
"component", "PeerTask",
}
if traceID.IsValid() {
logKV = append(logKV, "trace", traceID.String())
}
log = logger.With(logKV...)
span.SetAttributes(config.AttributeTaskID.String(taskID))
ctx, cancel := context.WithCancel(ctx)
ptc := &peerTaskConductor{
TaskOption: ptm.TaskOption,
peerTaskManager: ptm,
request: request,
startTime: time.Now(),
ctx: ctx,
ctxCancel: cancel,
broker: newPieceBroker(),
peerID: request.PeerId,
taskID: taskID,
successCh: make(chan struct{}),
failCh: make(chan struct{}),
legacyPeerCount: atomic.NewInt64(0),
span: span,
readyPieces: NewBitmap(),
runningPieces: NewBitmap(),
requestedPieces: NewBitmap(),
failedReason: failedReasonNotSet,
failedCode: commonv1.Code_UnknownError,
contentLength: atomic.NewInt64(-1),
totalPiece: atomic.NewInt32(-1),
digest: atomic.NewString(""),
trafficShaper: ptm.trafficShaper,
limiter: rate.NewLimiter(limit, int(limit)),
completedLength: atomic.NewInt64(0),
usedTraffic: atomic.NewUint64(0),
SugaredLoggerOnWith: log,
seed: seed,
parent: parent,
rg: rg,
}
ptc.pieceDownloadCtx, ptc.pieceDownloadCancel = context.WithCancel(ptc.ctx)
return ptc
}
// register to scheduler, if error and disable auto back source, return error, otherwise return nil
func (pt *peerTaskConductor) register() error {
pt.Debugf("request overview, pid: %s, url: %s, filter: %s, tag: %s, range: %s, digest: %s, header: %#v",
pt.request.PeerId, pt.request.Url, pt.request.UrlMeta.Filter, pt.request.UrlMeta.Tag, pt.request.UrlMeta.Range, pt.request.UrlMeta.Digest, pt.request.UrlMeta.Header)
// trace register
regCtx, cancel := context.WithTimeout(pt.ctx, pt.SchedulerOption.ScheduleTimeout.Duration)
defer cancel()
regCtx, regSpan := tracer.Start(regCtx, config.SpanRegisterTask)
var (
needBackSource bool
sizeScope commonv1.SizeScope
singlePiece *schedulerv1.SinglePiece
tinyData *TinyData
)
pt.Infof("step 1: peer %s start to register", pt.request.PeerId)
pt.schedulerClient = pt.peerTaskManager.SchedulerClient
result, err := pt.schedulerClient.RegisterPeerTask(regCtx, pt.request)
regSpan.RecordError(err)
regSpan.End()
if err != nil {
if err == context.DeadlineExceeded {
pt.Errorf("scheduler did not response in %s", pt.SchedulerOption.ScheduleTimeout.Duration)
}
pt.Errorf("step 1: peer %s register failed: %s", pt.request.PeerId, err)
if pt.SchedulerOption.DisableAutoBackSource {
// when peer register failed, some actions need to do with peerPacketStream
pt.peerPacketStream = &dummyPeerPacketStream{}
pt.Errorf("register peer task failed: %s, peer id: %s, auto back source disabled", err, pt.request.PeerId)
pt.span.RecordError(err)
pt.cancel(commonv1.Code_SchedError, err.Error())
return err
}
needBackSource = true
// can not detect source or scheduler error, create a new dummy scheduler client
pt.schedulerClient = &dummySchedulerClient{}
result = &schedulerv1.RegisterResult{TaskId: pt.taskID}
pt.Warnf("register peer task failed: %s, peer id: %s, try to back source", err, pt.request.PeerId)
} else {
pt.Infof("register task success, SizeScope: %s", commonv1.SizeScope_name[int32(result.SizeScope)])
}
var header map[string]string
if !needBackSource {
sizeScope = result.SizeScope
switch result.SizeScope {
case commonv1.SizeScope_NORMAL:
pt.span.SetAttributes(config.AttributePeerTaskSizeScope.String("normal"))
case commonv1.SizeScope_SMALL:
pt.span.SetAttributes(config.AttributePeerTaskSizeScope.String("small"))
if piece, ok := result.DirectPiece.(*schedulerv1.RegisterResult_SinglePiece); ok {
singlePiece = piece.SinglePiece
}
if result.ExtendAttribute != nil {
header = result.ExtendAttribute.Header
}
case commonv1.SizeScope_TINY:
pt.span.SetAttributes(config.AttributePeerTaskSizeScope.String("tiny"))
if piece, ok := result.DirectPiece.(*schedulerv1.RegisterResult_PieceContent); ok {
tinyData = &TinyData{
TaskID: result.TaskId,
PeerID: pt.request.PeerId,
Content: piece.PieceContent,
}
} else {
err = errors.New("scheduler return tiny piece but can not parse piece content")
// when peer register failed, some actions need to do with peerPacketStream
pt.peerPacketStream = &dummyPeerPacketStream{}
pt.span.RecordError(err)
pt.Errorf("%s", err)
pt.cancel(commonv1.Code_SchedError, err.Error())
return err
}
if result.ExtendAttribute != nil {
header = result.ExtendAttribute.Header
}
case commonv1.SizeScope_EMPTY:
tinyData = &TinyData{
TaskID: result.TaskId,
PeerID: pt.request.PeerId,
Content: []byte{},
}
pt.span.SetAttributes(config.AttributePeerTaskSizeScope.String("empty"))
if result.ExtendAttribute != nil {
header = result.ExtendAttribute.Header
}
}
}
peerPacketStream, err := pt.schedulerClient.ReportPieceResult(pt.ctx, pt.request)
pt.Infof("step 2: start report piece result")
if err != nil {
// when peer register failed, some actions need to do with peerPacketStream
pt.peerPacketStream = &dummyPeerPacketStream{}
pt.span.RecordError(err)
pt.cancel(commonv1.Code_SchedError, err.Error())
return err
}
pt.peerPacketStream = peerPacketStream
pt.sizeScope = sizeScope
pt.singlePiece = singlePiece
pt.tinyData = tinyData
pt.needBackSource = atomic.NewBool(needBackSource)
if len(header) > 0 {
pt.SetHeader(header)
}
return nil
}
func (pt *peerTaskConductor) start() error {
// when is seed task, setup back source
if pt.seed {
pt.peerPacketStream = &dummyPeerPacketStream{}
pt.schedulerClient = &dummySchedulerClient{}
pt.sizeScope = commonv1.SizeScope_NORMAL
pt.needBackSource = atomic.NewBool(true)
} else {
// register to scheduler
if err := pt.register(); err != nil {
return err
}
}
pt.trafficShaper.AddTask(pt.peerTaskManager.getRunningTaskKey(pt.taskID, pt.peerID), pt)
go pt.broker.Start()
go pt.pullPieces()
return nil
}
func (pt *peerTaskConductor) GetPeerID() string {
return pt.peerID
}
func (pt *peerTaskConductor) GetTaskID() string {
return pt.taskID
}
func (pt *peerTaskConductor) GetStorage() storage.TaskStorageDriver {
return pt.storage
}
func (pt *peerTaskConductor) GetContentLength() int64 {
return pt.contentLength.Load()
}
func (pt *peerTaskConductor) SetContentLength(i int64) {
pt.contentLength.Store(i)
}
func (pt *peerTaskConductor) AddTraffic(n uint64) {
pt.usedTraffic.Add(n)
}
func (pt *peerTaskConductor) GetTraffic() uint64 {
return pt.usedTraffic.Load()
}
func (pt *peerTaskConductor) GetTotalPieces() int32 {
return pt.totalPiece.Load()
}
func (pt *peerTaskConductor) SetTotalPieces(i int32) {
pt.totalPiece.Store(i)
}
func (pt *peerTaskConductor) SetPieceMd5Sign(md5 string) {
pt.digest.Store(md5)
}
func (pt *peerTaskConductor) GetPieceMd5Sign() string {
return pt.digest.Load()
}
func (pt *peerTaskConductor) SetHeader(header map[string]string) {
var hdr = &source.Header{}
for k, v := range header {
hdr.Set(k, v)
}
pt.header.Store(hdr)
}
func (pt *peerTaskConductor) GetHeader() *source.Header {
hdr := pt.header.Load()
if hdr != nil {
return hdr.(*source.Header)
}
return nil
}
func (pt *peerTaskConductor) Context() context.Context {
return pt.ctx
}
func (pt *peerTaskConductor) Log() *logger.SugaredLoggerOnWith {
return pt.SugaredLoggerOnWith
}
func (pt *peerTaskConductor) UpdateSourceErrorStatus(st *status.Status) {
pt.sourceErrorStatus = st
}
func (pt *peerTaskConductor) cancel(code commonv1.Code, reason string) {
pt.statusOnce.Do(func() {
pt.failedCode = code
pt.failedReason = reason
pt.fail()
})
}
func (pt *peerTaskConductor) cancelNotRegisterred(code commonv1.Code, reason string) {
pt.statusOnce.Do(func() {
pt.failedCode = code
pt.failedReason = reason
metrics.PeerTaskFailedCount.WithLabelValues(metrics.FailTypeInit).Add(1)
pt.peerTaskManager.PeerTaskDone(pt.taskID, pt.peerID)
pt.span.SetAttributes(config.AttributePeerTaskSuccess.Bool(false))
pt.span.SetAttributes(config.AttributePeerTaskCode.Int(int(pt.failedCode)))
pt.span.SetAttributes(config.AttributePeerTaskMessage.String(pt.failedReason))
close(pt.failCh)
pt.broker.Stop()
pt.span.End()
pt.pieceDownloadCancel()
if pt.pieceTaskSyncManager != nil {
pt.pieceTaskSyncManager.cancel()
}
})
}
// only use when receive back source code from scheduler
func (pt *peerTaskConductor) markBackSource() {
pt.needBackSource.Store(true)
}
// only use when legacy get piece from peers schedule timeout
func (pt *peerTaskConductor) forceBackSource() {
pt.needBackSource.Store(true)
pt.backSource()
}
func (pt *peerTaskConductor) backSource() {
// cancel all piece download
pt.pieceDownloadCancel()
// cancel all sync pieces
if pt.pieceTaskSyncManager != nil {
pt.pieceTaskSyncManager.cancel()
}
ctx, span := tracer.Start(pt.ctx, config.SpanBackSource)
pt.SetContentLength(-1)
err := pt.PieceManager.DownloadSource(ctx, pt, pt.request, pt.rg)
if err != nil {
pt.Errorf("download from source error: %s", err)
span.SetAttributes(config.AttributePeerTaskSuccess.Bool(false))
span.RecordError(err)
if isBackSourceError(err) {
pt.cancel(commonv1.Code_ClientBackSourceError, err.Error())
} else {
pt.cancel(commonv1.Code_ClientError, err.Error())
}
span.End()
return
}
pt.Done()
pt.Infof("download from source ok")
span.SetAttributes(config.AttributePeerTaskSuccess.Bool(true))
span.End()
return
}
func (pt *peerTaskConductor) pullPieces() {
if pt.needBackSource.Load() {
pt.backSource()
return
}
switch pt.sizeScope {
case commonv1.SizeScope_EMPTY:
pt.storeEmptyPeerTask()
case commonv1.SizeScope_TINY:
pt.storeTinyPeerTask()
case commonv1.SizeScope_SMALL:
pt.pullSinglePiece()
case commonv1.SizeScope_NORMAL:
pt.pullPiecesWithP2P()
default:
pt.cancel(commonv1.Code_ClientError, fmt.Sprintf("unknown size scope: %d", pt.sizeScope))
}
}
func (pt *peerTaskConductor) pullPiecesWithP2P() {
var (
// keep same size with pt.failedPieceCh for avoiding deadlock
pieceRequestQueue = NewPieceDispatcher(config.DefaultPieceDispatcherRandomRatio, pt.Log())
)
ctx, cancel := context.WithCancel(pt.ctx)
pt.pieceTaskSyncManager = &pieceTaskSyncManager{
ctx: ctx,
ctxCancel: cancel,
peerTaskConductor: pt,
pieceRequestQueue: pieceRequestQueue,
workers: map[string]*pieceTaskSynchronizer{},
}
pt.receivePeerPacket(pieceRequestQueue)
}
func (pt *peerTaskConductor) storeEmptyPeerTask() {
pt.SetContentLength(0)
pt.SetTotalPieces(0)
ctx := pt.ctx
var err error
storageDriver, err := pt.StorageManager.RegisterTask(ctx,
&storage.RegisterTaskRequest{
PeerTaskMetadata: storage.PeerTaskMetadata{
PeerID: pt.peerID,
TaskID: pt.taskID,
},
DesiredLocation: "",
ContentLength: 0,
TotalPieces: 0,
})
pt.storage = storageDriver
if err != nil {
pt.Errorf("register tiny data storage failed: %s", err)
pt.cancel(commonv1.Code_ClientError, err.Error())
return
}
if err = pt.UpdateStorage(); err != nil {
pt.Errorf("update tiny data storage failed: %s", err)
pt.cancel(commonv1.Code_ClientError, err.Error())
return
}
pt.Debug("store empty metadata")
pt.Done()
}
func (pt *peerTaskConductor) storeTinyPeerTask() {
contentLength := int64(len(pt.tinyData.Content))
pt.SetContentLength(contentLength)
pt.SetTotalPieces(1)
ctx := pt.ctx
var err error
storageDriver, err := pt.StorageManager.RegisterTask(ctx,
&storage.RegisterTaskRequest{
PeerTaskMetadata: storage.PeerTaskMetadata{
PeerID: pt.tinyData.PeerID,
TaskID: pt.tinyData.TaskID,
},
DesiredLocation: "",
ContentLength: contentLength,
TotalPieces: 1,
// TODO check digest
})
pt.storage = storageDriver
if err != nil {
pt.Errorf("register tiny data storage failed: %s", err)
pt.cancel(commonv1.Code_ClientError, err.Error())
return
}
n, err := pt.GetStorage().WritePiece(ctx,
&storage.WritePieceRequest{
PeerTaskMetadata: storage.PeerTaskMetadata{
PeerID: pt.tinyData.PeerID,
TaskID: pt.tinyData.TaskID,
},
PieceMetadata: storage.PieceMetadata{
Num: 0,
Md5: "",
Offset: 0,
Range: nethttp.Range{
Start: 0,
Length: contentLength,
},
Style: 0,
},
UnknownLength: false,
Reader: bytes.NewBuffer(pt.tinyData.Content),
GenMetadata: func(n int64) (int32, int64, bool) {
return 1, contentLength, true
},
})
if err != nil {
pt.Errorf("write tiny data storage failed: %s", err)
pt.cancel(commonv1.Code_ClientError, err.Error())
return
}
if n != contentLength {
pt.Errorf("write tiny data storage failed, want: %d, wrote: %d", contentLength, n)
pt.cancel(commonv1.Code_ClientError, err.Error())
return
}
err = pt.UpdateStorage()
if err != nil {
pt.Errorf("update tiny data storage failed: %s", err)
pt.cancel(commonv1.Code_ClientError, err.Error())
return
}
pt.Debugf("store tiny data, len: %d", contentLength)
pt.PublishPieceInfo(0, uint32(contentLength))
}
func (pt *peerTaskConductor) receivePeerPacket(pieceRequestQueue PieceDispatcher) {
var (
lastNotReadyPiece int32 = 0
peerPacket *schedulerv1.PeerPacket
err error
firstPacketReceived bool
firstPacketDone = make(chan bool)
)
// only record first schedule result
// other schedule result will record as an event in peer task span
_, firstPeerSpan := tracer.Start(pt.ctx, config.SpanFirstSchedule)
defer func() {
if !firstPacketReceived {
firstPeerSpan.End()
}
if pt.needBackSource.Load() {
return
}
select {
case <-pt.successCh:
case <-pt.failCh:
default:
pt.Errorf("receivePeerPacket exit, but peer task not success or fail")
pt.Fail()
}
}()
go pt.waitFirstPeerPacket(firstPacketDone)
loop:
for {
select {
case <-pt.successCh:
pt.Infof("peer task success, stop wait peer packet from scheduler")
break loop
case <-pt.failCh:
pt.Infof("peer task fail, stop wait peer packet from scheduler")
break loop
default:
}
peerPacket, err = pt.peerPacketStream.Recv()
if err == io.EOF {
pt.Debugf("peerPacketStream closed")
break loop
}
if err != nil {
// some errors, like commonv1.Code_SchedReregister, after reregister success,
// we can continue to receive peer packet from the new scheduler
cont := pt.confirmReceivePeerPacketError(err)
if cont {
continue
}
if !firstPacketReceived {
firstPeerSpan.RecordError(err)
}
break loop
}
pt.Debugf("receive peerPacket %v", peerPacket)
if peerPacket.Code != commonv1.Code_Success {
if peerPacket.Code == commonv1.Code_SchedNeedBackSource {
// fix back source directly, then waitFirstPeerPacket timeout
if !firstPacketReceived {
close(firstPacketDone)
}
pt.forceBackSource()
pt.Infof("receive back source code")
return
}
pt.Errorf("receive peer packet with error: %d", peerPacket.Code)
if pt.isExitPeerPacketCode(peerPacket) {
pt.Errorf(pt.failedReason)
pt.cancel(pt.failedCode, pt.failedReason)
if !firstPacketReceived {
firstPeerSpan.RecordError(fmt.Errorf(pt.failedReason))
}
pt.span.AddEvent("receive exit peer packet",
trace.WithAttributes(config.AttributePeerPacketCode.Int(int(peerPacket.Code))))
pt.span.RecordError(fmt.Errorf(pt.failedReason))
break
} else {
pt.span.AddEvent("receive not success peer packet",
trace.WithAttributes(config.AttributePeerPacketCode.Int(int(peerPacket.Code))))
}
continue
}
if peerPacket.MainPeer == nil && peerPacket.CandidatePeers == nil {
pt.Warnf("scheduler client send a peerPacket with empty peers")
continue
}
pt.Infof("receive new peer packet, main peer: %s, parallel count: %d",
peerPacket.MainPeer.PeerId, peerPacket.ParallelCount)
pt.span.AddEvent("receive new peer packet",
trace.WithAttributes(config.AttributeMainPeer.String(peerPacket.MainPeer.PeerId)))
if !firstPacketReceived {
pt.initDownloadPieceWorkers(peerPacket.ParallelCount, pieceRequestQueue)
firstPeerSpan.SetAttributes(config.AttributeMainPeer.String(peerPacket.MainPeer.PeerId))
firstPeerSpan.End()
}
lastNotReadyPiece = pt.updateSynchronizers(lastNotReadyPiece, peerPacket)
if !firstPacketReceived {
// trigger legacy get piece once to avoid first schedule timeout
firstPacketReceived = true
close(firstPacketDone)
}
}
// double check to avoid waitFirstPeerPacket timeout
if !firstPacketReceived {
close(firstPacketDone)
}
}
// updateSynchronizers will convert peers to synchronizer, if failed, will update failed peers to schedulerv1.PeerPacket
func (pt *peerTaskConductor) updateSynchronizers(lastNum int32, p *schedulerv1.PeerPacket) int32 {
desiredPiece, ok := pt.getNextNotReadyPieceNum(lastNum)
if !ok {
pt.Infof("all pieces is ready, peer task completed, skip to synchronize")
p.MainPeer = nil
p.CandidatePeers = nil
return desiredPiece
}
var peers = []*schedulerv1.PeerPacket_DestPeer{p.MainPeer}
peers = append(peers, p.CandidatePeers...)
pt.pieceTaskSyncManager.syncPeers(peers, desiredPiece)
return desiredPiece
}
func (pt *peerTaskConductor) confirmReceivePeerPacketError(err error) (cont bool) {
select {
case <-pt.successCh:
return false
case <-pt.failCh:
return false
default:
}
var (
failedCode = commonv1.Code_UnknownError
failedReason string
)
// extract DfError for grpc status
err = dferrors.ConvertGRPCErrorToDfError(err)
de, ok := err.(*dferrors.DfError)
if ok {
switch de.Code {
case commonv1.Code_SchedNeedBackSource:
pt.forceBackSource()
pt.Infof("receive back source code")
return false
case commonv1.Code_SchedReregister:
pt.Infof("receive reregister code")
regErr := pt.register()
if regErr == nil {
pt.Infof("reregister ok")
return true
}
pt.Errorf("reregister to scheduler error: %s", regErr)
fallthrough
default:
failedCode = de.Code
failedReason = de.Message
pt.Errorf("receive peer packet failed: %s", pt.failedReason)
}
} else {
pt.Errorf("receive peer packet failed: %s", err)
}
pt.cancel(failedCode, failedReason)
return false
}
func (pt *peerTaskConductor) isExitPeerPacketCode(pp *schedulerv1.PeerPacket) bool {
switch pp.Code {
case commonv1.Code_ResourceLacked, commonv1.Code_BadRequest,
commonv1.Code_PeerTaskNotFound, commonv1.Code_UnknownError, commonv1.Code_RequestTimeOut:
// 1xxx
pt.failedCode = pp.Code
pt.failedReason = fmt.Sprintf("receive exit peer packet with code %d", pp.Code)
return true
case commonv1.Code_SchedError, commonv1.Code_SchedTaskStatusError, commonv1.Code_SchedPeerNotFound, commonv1.Code_SchedForbidden:
// 5xxx
pt.failedCode = pp.Code
pt.failedReason = fmt.Sprintf("receive exit peer packet with code %d", pp.Code)
return true
case commonv1.Code_SchedPeerGone:
pt.failedReason = reasonPeerGoneFromScheduler
pt.failedCode = commonv1.Code_SchedPeerGone
return true
case commonv1.Code_CDNTaskRegistryFail:
// 6xxx
pt.failedCode = pp.Code
pt.failedReason = fmt.Sprintf("receive exit peer packet with code %d", pp.Code)
return true
case commonv1.Code_BackToSourceAborted:
st := status.Newf(codes.Aborted, "source response is not valid")
st, err := st.WithDetails(pp.GetSourceError())
if err != nil {
pt.Errorf("convert source error details error: %s", err.Error())
return false
}
pt.sourceErrorStatus = st
return true
}
return false
}
func (pt *peerTaskConductor) pullSinglePiece() {
pt.Infof("single piece, dest peer id: %s, piece num: %d, size: %d",
pt.singlePiece.DstPid, pt.singlePiece.PieceInfo.PieceNum, pt.singlePiece.PieceInfo.RangeSize)
ctx, span := tracer.Start(pt.ctx, fmt.Sprintf(config.SpanDownloadPiece, pt.singlePiece.PieceInfo.PieceNum))
span.SetAttributes(config.AttributePiece.Int(int(pt.singlePiece.PieceInfo.PieceNum)))
pt.SetContentLength(int64(pt.singlePiece.PieceInfo.RangeSize))
pt.SetTotalPieces(1)
pt.SetPieceMd5Sign(digest.SHA256FromStrings(pt.singlePiece.PieceInfo.PieceMd5))
request := &DownloadPieceRequest{
storage: pt.GetStorage(),
piece: pt.singlePiece.PieceInfo,
log: pt.Log(),
TaskID: pt.GetTaskID(),
PeerID: pt.GetPeerID(),
DstPid: pt.singlePiece.DstPid,
DstAddr: pt.singlePiece.DstAddr,
}
if result, err := pt.PieceManager.DownloadPiece(ctx, request); err == nil {
pt.reportSuccessResult(request, result)
pt.PublishPieceInfo(request.piece.PieceNum, request.piece.RangeSize)
span.SetAttributes(config.AttributePieceSuccess.Bool(true))
span.End()
pt.Infof("single piece download success")
} else {
// fallback to download from other peers
span.RecordError(err)
span.SetAttributes(config.AttributePieceSuccess.Bool(false))
span.End()
pt.Warnf("single piece download failed, switch to download from other peers")
pt.ReportPieceResult(request, result, err)
pt.pullPiecesWithP2P()
}
}
func (pt *peerTaskConductor) updateMetadata(piecePacket *commonv1.PiecePacket) {
// update total piece
var metadataChanged bool
if piecePacket.TotalPiece > pt.GetTotalPieces() {
metadataChanged = true
pt.SetTotalPieces(piecePacket.TotalPiece)
pt.Debugf("update total piece count: %d, dst peer %s", piecePacket.TotalPiece, piecePacket.DstPid)
}
// update digest
if len(piecePacket.PieceMd5Sign) > 0 && len(pt.GetPieceMd5Sign()) == 0 {
metadataChanged = true
pt.SetPieceMd5Sign(piecePacket.PieceMd5Sign)
pt.Debugf("update digest: %s, dst peer %s", piecePacket.PieceMd5Sign, piecePacket.DstPid)
}
// update content length
if piecePacket.ContentLength > -1 && pt.GetContentLength() == -1 {
metadataChanged = true
pt.SetContentLength(piecePacket.ContentLength)
pt.span.SetAttributes(config.AttributeTaskContentLength.Int64(piecePacket.ContentLength))
pt.Debugf("update content length: %d, dst peer %s", piecePacket.ContentLength, piecePacket.DstPid)
} else if piecePacket.ContentLength > -1 && piecePacket.ContentLength != pt.GetContentLength() {
// corrupt data check
reason := fmt.Sprintf("corrupt data - content length did not match, current: %d, from piece packet: %d",
pt.GetContentLength(), piecePacket.ContentLength)
pt.Errorf(reason)
pt.cancel(commonv1.Code_ClientError, reason)
return
}
if piecePacket.ExtendAttribute != nil && len(piecePacket.ExtendAttribute.Header) > 0 && pt.GetHeader() == nil {
metadataChanged = true
pt.SetHeader(piecePacket.ExtendAttribute.Header)
pt.Debugf("update response header: %#v, dst peer %s", piecePacket.ExtendAttribute.Header, piecePacket.DstPid)
}
if metadataChanged {
err := pt.UpdateStorage()
if err != nil {
pt.Errorf("update storage error: %s", err)
}
}
}
func (pt *peerTaskConductor) initDownloadPieceWorkers(count int32, pieceRequestQueue PieceDispatcher) {
if count < 1 {
count = 4
}
for i := int32(0); i < count; i++ {
go pt.downloadPieceWorker(i, pieceRequestQueue)
}
}
func (pt *peerTaskConductor) waitFirstPeerPacket(done chan bool) {
// wait first available peer
select {
case <-pt.successCh:
pt.Infof("peer task succeed, no need to wait first peer")
return
case <-pt.failCh:
pt.Warnf("peer task failed, no need to wait first peer")
return
case <-done:
pt.Debugf("first peer packet received")
return
case <-time.After(pt.SchedulerOption.ScheduleTimeout.Duration):
if pt.SchedulerOption.DisableAutoBackSource {
pt.cancel(commonv1.Code_ClientScheduleTimeout, reasonBackSourceDisabled)
err := fmt.Errorf("%s, auto back source disabled", pt.failedReason)
pt.span.RecordError(err)
pt.Errorf(err.Error())
return
}
pt.Warnf("start download from source due to %s", reasonScheduleTimeout)
pt.span.AddEvent("back source due to schedule timeout")
pt.forceBackSource()
return
}
}
func (pt *peerTaskConductor) downloadPieceWorker(id int32, requests PieceDispatcher) {
for {
request, err := requests.Get()
if errors.Is(err, ErrNoValidPieceTemporarily) {
continue
}
if err != nil {
pt.Infof("piece download queue cancelled, peer download worker #%d exit, err: %v", id, err)
return
}
pt.readyPiecesLock.RLock()
if pt.readyPieces.IsSet(request.piece.PieceNum) {
pt.readyPiecesLock.RUnlock()
pt.Log().Debugf("piece %d is already downloaded, skip", request.piece.PieceNum)
continue
}
pt.readyPiecesLock.RUnlock()
result := pt.downloadPiece(id, request)
if result != nil {
requests.Report(result)
}
select {
case <-pt.pieceDownloadCtx.Done():
pt.Infof("piece download cancelled, peer download worker #%d exit", id)
return
case <-pt.successCh:
pt.Infof("peer task success, peer download worker #%d exit", id)
return
case <-pt.failCh:
pt.Errorf("peer task fail, peer download worker #%d exit", id)
return
default:
}
}
}
func (pt *peerTaskConductor) downloadPiece(workerID int32, request *DownloadPieceRequest) *DownloadPieceResult {
// only downloading piece in one worker at same time
pt.runningPiecesLock.Lock()
if pt.runningPieces.IsSet(request.piece.PieceNum) {
pt.runningPiecesLock.Unlock()
pt.Log().Debugf("piece %d is downloading, skip", request.piece.PieceNum)
// TODO save to queue for failed pieces
return nil
}
pt.runningPieces.Set(request.piece.PieceNum)
pt.runningPiecesLock.Unlock()
defer func() {
pt.runningPiecesLock.Lock()
pt.runningPieces.Clean(request.piece.PieceNum)
pt.runningPiecesLock.Unlock()
}()
ctx, span := tracer.Start(pt.pieceDownloadCtx, fmt.Sprintf(config.SpanDownloadPiece, request.piece.PieceNum))
span.SetAttributes(config.AttributePiece.Int(int(request.piece.PieceNum)))
span.SetAttributes(config.AttributePieceWorker.Int(int(workerID)))
// wait limit
if pt.limiter != nil && !pt.waitLimit(ctx, request) {
span.SetAttributes(config.AttributePieceSuccess.Bool(false))
span.End()
return nil
}
pt.Debugf("peer download worker #%d receive piece task, "+
"dest peer id: %s, piece num: %d, range start: %d, range size: %d",
workerID, request.DstPid, request.piece.PieceNum, request.piece.RangeStart, request.piece.RangeSize)
// download piece
// result is always not nil, PieceManager will report begin and end time
result, err := pt.PieceManager.DownloadPiece(ctx, request)
if err != nil {
pt.ReportPieceResult(request, result, err)
span.SetAttributes(config.AttributePieceSuccess.Bool(false))
span.End()
if pt.needBackSource.Load() {
pt.Infof("switch to back source, skip send failed piece")
return result
}
attempt, success := pt.pieceTaskSyncManager.acquire(
&commonv1.PieceTaskRequest{
Limit: 1,
TaskId: pt.taskID,
SrcPid: pt.peerID,
StartNum: uint32(request.piece.PieceNum),
})
pt.Infof("send failed piece %d to remote, attempt: %d, success: %d",
request.piece.PieceNum, attempt, success)
return result
}
// broadcast success piece
pt.reportSuccessResult(request, result)
pt.PublishPieceInfo(request.piece.PieceNum, request.piece.RangeSize)
span.SetAttributes(config.AttributePieceSuccess.Bool(true))
span.End()
return result
}
func (pt *peerTaskConductor) waitLimit(ctx context.Context, request *DownloadPieceRequest) bool {
_, waitSpan := tracer.Start(ctx, config.SpanWaitPieceLimit)
pt.trafficShaper.Record(pt.peerTaskManager.getRunningTaskKey(request.TaskID, request.PeerID), int(request.piece.RangeSize))
err := pt.limiter.WaitN(pt.ctx, int(request.piece.RangeSize))
if err == nil {
waitSpan.End()
return true
}
pt.Errorf("request limiter error: %s", err)
waitSpan.RecordError(err)
waitSpan.End()
// send error piece result
sendError := pt.sendPieceResult(&schedulerv1.PieceResult{
TaskId: pt.GetTaskID(),
SrcPid: pt.GetPeerID(),
DstPid: request.DstPid,
PieceInfo: request.piece,
Success: false,
Code: commonv1.Code_ClientRequestLimitFail,
FinishedCount: 0, // update by peer task
})
if sendError != nil {
pt.Errorf("report piece result failed %s", err)
}
pt.cancel(commonv1.Code_ClientRequestLimitFail, err.Error())
return false
}
func (pt *peerTaskConductor) isCompleted() bool {
if pt.completedLength.Load() == pt.GetContentLength() {
pt.Infof("completed content length: %d", pt.completedLength.Load())
return true
}
// corrupt data check and avoid hang for mismatch completed length
if pt.readyPieces.Settled() == pt.totalPiece.Load() {
msg := fmt.Sprintf("corrupt data - ready piece count %d seems finished, but completed length %d is not match with content length: %d",
pt.totalPiece.Load(), pt.completedLength.Load(), pt.GetContentLength())
pt.Errorf(msg)
pt.cancel(commonv1.Code_ClientError, msg)
return true
}
return false
}
// for legacy peers only
func (pt *peerTaskConductor) getNextPieceNum(cur int32) (int32, bool) {
if pt.isCompleted() {
return -1, false
}
i := cur
// try to find next not requested piece
pt.requestedPiecesLock.RLock()
defer pt.requestedPiecesLock.RUnlock()
for ; pt.requestedPieces.IsSet(i); i++ {
}
totalPiece := pt.GetTotalPieces()
if totalPiece > 0 && i >= totalPiece {
// double check, re-search not success or not requested pieces
for i = int32(0); pt.requestedPieces.IsSet(i); i++ {
}
if totalPiece > 0 && i >= totalPiece {
return -1, false
}
}
return i, true
}
func (pt *peerTaskConductor) getNextNotReadyPieceNum(cur int32) (int32, bool) {
if pt.isCompleted() {
return 0, false
}
i := cur
// try to find next not ready piece
pt.readyPiecesLock.RLock()
defer pt.readyPiecesLock.RUnlock()
for ; pt.readyPieces.IsSet(i); i++ {
}
totalPiece := pt.GetTotalPieces()
if totalPiece > 0 && i >= totalPiece {
// double check, re-search
for i = int32(0); pt.readyPieces.IsSet(i); i++ {
}
if totalPiece > 0 && i >= totalPiece {
return 0, false
}
}
return i, true
}
func (pt *peerTaskConductor) recoverFromPanic() {
if r := recover(); r != nil {
pt.Errorf("recovered from panic %q. Call stack:\n%v", r, string(debug.Stack()))
}
}
func (pt *peerTaskConductor) ReportPieceResult(request *DownloadPieceRequest, result *DownloadPieceResult, err error) {
if err == nil {
pt.reportSuccessResult(request, result)
return
}
code := commonv1.Code_ClientPieceDownloadFail
if isConnectionError(err) {
code = commonv1.Code_ClientConnectionError
} else if isPieceNotFound(err) {
code = commonv1.Code_ClientPieceNotFound
} else if isBackSourceError(err) {
code = commonv1.Code_ClientBackSourceError
}
pt.reportFailResult(request, result, code)
}
func (pt *peerTaskConductor) reportSuccessResult(request *DownloadPieceRequest, result *DownloadPieceResult) {
metrics.PieceTaskCount.Add(1)
_, span := tracer.Start(pt.ctx, config.SpanReportPieceResult)
span.SetAttributes(config.AttributeWritePieceSuccess.Bool(true))
err := pt.sendPieceResult(
&schedulerv1.PieceResult{
TaskId: pt.GetTaskID(),
SrcPid: pt.GetPeerID(),
DstPid: request.DstPid,
PieceInfo: request.piece,
BeginTime: uint64(result.BeginTime),
EndTime: uint64(result.FinishTime),
Success: true,
Code: commonv1.Code_Success,
FinishedCount: pt.readyPieces.Settled(),
// TODO range_start, range_size, piece_md5, piece_offset, piece_style
})
if err != nil {
pt.Errorf("report piece task error: %v", err)
span.RecordError(err)
}
span.End()
}
func (pt *peerTaskConductor) reportFailResult(request *DownloadPieceRequest, result *DownloadPieceResult, code commonv1.Code) {
metrics.PieceTaskFailedCount.Add(1)
_, span := tracer.Start(pt.ctx, config.SpanReportPieceResult)
span.SetAttributes(config.AttributeWritePieceSuccess.Bool(false))
err := pt.sendPieceResult(&schedulerv1.PieceResult{
TaskId: pt.GetTaskID(),
SrcPid: pt.GetPeerID(),
DstPid: request.DstPid,
PieceInfo: request.piece,
BeginTime: uint64(result.BeginTime),
EndTime: uint64(result.FinishTime),
Success: false,
Code: code,
FinishedCount: pt.readyPieces.Settled(),
})
if err != nil {
pt.Errorf("report piece task error: %v", err)
}
span.End()
}
func (pt *peerTaskConductor) initStorage(desiredLocation string) (err error) {
// prepare storage
if pt.parent == nil {
pt.storage, err = pt.StorageManager.RegisterTask(pt.ctx,
&storage.RegisterTaskRequest{
PeerTaskMetadata: storage.PeerTaskMetadata{
PeerID: pt.GetPeerID(),
TaskID: pt.GetTaskID(),
},
DesiredLocation: desiredLocation,
ContentLength: pt.GetContentLength(),
TotalPieces: pt.GetTotalPieces(),
PieceMd5Sign: pt.GetPieceMd5Sign(),
})
} else {
pt.storage, err = pt.StorageManager.RegisterSubTask(pt.ctx,
&storage.RegisterSubTaskRequest{
Parent: storage.PeerTaskMetadata{
PeerID: pt.parent.GetPeerID(),
TaskID: pt.parent.GetTaskID(),
},
SubTask: storage.PeerTaskMetadata{
PeerID: pt.GetPeerID(),
TaskID: pt.GetTaskID(),
},
Range: pt.rg,
})
}
if err != nil {
pt.Log().Errorf("register task to storage manager failed: %s", err)
}
return err
}
func (pt *peerTaskConductor) UpdateStorage() error {
// update storage
err := pt.GetStorage().UpdateTask(pt.ctx,
&storage.UpdateTaskRequest{
PeerTaskMetadata: storage.PeerTaskMetadata{
PeerID: pt.GetPeerID(),
TaskID: pt.GetTaskID(),
},
ContentLength: pt.GetContentLength(),
TotalPieces: pt.GetTotalPieces(),
PieceMd5Sign: pt.GetPieceMd5Sign(),
Header: pt.GetHeader(),
})
if err != nil {
pt.Log().Errorf("update task to storage manager failed: %s", err)
return err
}
return nil
}
func (pt *peerTaskConductor) Done() {
pt.statusOnce.Do(pt.done)
}
func (pt *peerTaskConductor) done() {
defer func() {
pt.broker.Stop()
pt.span.End()
pt.pieceDownloadCancel()
if pt.pieceTaskSyncManager != nil {
pt.pieceTaskSyncManager.cancel()
}
pt.ctxCancel()
}()
var (
cost = time.Since(pt.startTime).Milliseconds()
success = true
code = commonv1.Code_Success
)
pt.Log().Infof("peer task done, cost: %dms", cost)
// TODO merge error handle
// update storage metadata
if err := pt.UpdateStorage(); err == nil {
// validate digest
if err = pt.Validate(); err == nil {
close(pt.successCh)
pt.span.SetAttributes(config.AttributePeerTaskSuccess.Bool(true))
} else {
close(pt.failCh)
success = false
code = commonv1.Code_ClientError
pt.failedCode = commonv1.Code_ClientError
pt.failedReason = err.Error()
pt.span.SetAttributes(config.AttributePeerTaskSuccess.Bool(false))
pt.span.SetAttributes(config.AttributePeerTaskCode.Int(int(pt.failedCode)))
pt.span.SetAttributes(config.AttributePeerTaskMessage.String(pt.failedReason))
pt.Errorf("validate digest failed: %s", err)
metrics.PeerTaskFailedCount.WithLabelValues(metrics.FailTypeP2P).Add(1)
}
} else {
close(pt.failCh)
success = false
code = commonv1.Code_ClientError
pt.failedCode = commonv1.Code_ClientError
pt.failedReason = err.Error()
pt.span.SetAttributes(config.AttributePeerTaskSuccess.Bool(false))
pt.span.SetAttributes(config.AttributePeerTaskCode.Int(int(pt.failedCode)))
pt.span.SetAttributes(config.AttributePeerTaskMessage.String(pt.failedReason))
pt.Errorf("update storage error: %v", err)
metrics.PeerTaskFailedCount.WithLabelValues(metrics.FailTypeP2P).Add(1)
}
pt.peerTaskManager.PeerTaskDone(pt.taskID, pt.peerID)
peerResultCtx, peerResultSpan := tracer.Start(pt.ctx, config.SpanReportPeerResult)
defer peerResultSpan.End()
// Send EOF piece result to scheduler.
err := pt.sendPieceResult(
&schedulerv1.PieceResult{
TaskId: pt.taskID,
SrcPid: pt.peerID,
FinishedCount: pt.readyPieces.Settled(),
PieceInfo: &commonv1.PieceInfo{
PieceNum: common.EndOfPiece,
},
})
pt.Debugf("peer task finished, end piece result sent result: %v", err)
err = pt.peerPacketStream.CloseSend()
pt.Debugf("close stream result: %v", err)
err = pt.schedulerClient.ReportPeerResult(
peerResultCtx,
&schedulerv1.PeerResult{
TaskId: pt.GetTaskID(),
PeerId: pt.GetPeerID(),
SrcIp: pt.PeerHost.Ip,
Idc: pt.PeerHost.Idc,
Url: pt.request.Url,
ContentLength: pt.GetContentLength(),
Traffic: pt.GetTraffic(),
TotalPieceCount: pt.GetTotalPieces(),
Cost: uint32(cost),
Success: success,
Code: code,
})
if err != nil {
peerResultSpan.RecordError(err)
pt.Errorf("step 3: report successful peer result, error: %v", err)
} else {
pt.Infof("step 3: report successful peer result ok")
}
}
func (pt *peerTaskConductor) Fail() {
pt.statusOnce.Do(pt.fail)
}
func (pt *peerTaskConductor) fail() {
if pt.failedCode == commonv1.Code_ClientBackSourceError {
metrics.PeerTaskFailedCount.WithLabelValues(metrics.FailTypeBackSource).Add(1)
} else {
metrics.PeerTaskFailedCount.WithLabelValues(metrics.FailTypeP2P).Add(1)
}
defer func() {
close(pt.failCh)
pt.broker.Stop()
pt.span.End()
pt.pieceDownloadCancel()
if pt.pieceTaskSyncManager != nil {
pt.pieceTaskSyncManager.cancel()
}
// mark storage to reclaim
_ = pt.StorageManager.UnregisterTask(
pt.ctx,
storage.CommonTaskRequest{
PeerID: pt.peerID,
TaskID: pt.taskID,
})
pt.ctxCancel()
}()
pt.peerTaskManager.PeerTaskDone(pt.taskID, pt.peerID)
var end = time.Now()
pt.Log().Errorf("peer task failed, code: %d, reason: %s", pt.failedCode, pt.failedReason)
// Send EOF piece result to scheduler.
err := pt.sendPieceResult(&schedulerv1.PieceResult{
TaskId: pt.taskID,
SrcPid: pt.peerID,
FinishedCount: pt.readyPieces.Settled(),
PieceInfo: &commonv1.PieceInfo{
PieceNum: common.EndOfPiece,
},
})
pt.Debugf("end piece result sent: %v, peer task finished", err)
err = pt.peerPacketStream.CloseSend()
pt.Debugf("close stream result: %v", err)
ctx := trace.ContextWithSpan(context.Background(), trace.SpanFromContext(pt.ctx))
peerResultCtx, peerResultSpan := tracer.Start(ctx, config.SpanReportPeerResult)
defer peerResultSpan.End()
var sourceError *errordetailsv1.SourceError
if pt.sourceErrorStatus != nil {
for _, detail := range pt.sourceErrorStatus.Details() {
switch d := detail.(type) {
case *errordetailsv1.SourceError:
sourceError = d
}
}
}
peerResult := &schedulerv1.PeerResult{
TaskId: pt.GetTaskID(),
PeerId: pt.GetPeerID(),
SrcIp: pt.PeerHost.Ip,
Idc: pt.PeerHost.Idc,
Url: pt.request.Url,
ContentLength: pt.GetContentLength(),
Traffic: pt.GetTraffic(),
TotalPieceCount: pt.GetTotalPieces(),
Cost: uint32(end.Sub(pt.startTime).Milliseconds()),
Success: false,
Code: pt.failedCode,
}
if sourceError != nil {
peerResult.Errordetails = &schedulerv1.PeerResult_SourceError{
SourceError: sourceError,
}
}
err = pt.schedulerClient.ReportPeerResult(peerResultCtx, peerResult)
if err != nil {
peerResultSpan.RecordError(err)
pt.Log().Errorf("step 3: report fail peer result, error: %v", err)
} else {
pt.Log().Infof("step 3: report fail peer result ok")
}
pt.span.SetAttributes(config.AttributePeerTaskSuccess.Bool(false))
pt.span.SetAttributes(config.AttributePeerTaskCode.Int(int(pt.failedCode)))
pt.span.SetAttributes(config.AttributePeerTaskMessage.String(pt.failedReason))
}
// Validate stores metadata and validates digest
func (pt *peerTaskConductor) Validate() error {
err := pt.GetStorage().Store(pt.ctx,
&storage.StoreRequest{
CommonTaskRequest: storage.CommonTaskRequest{
PeerID: pt.peerID,
TaskID: pt.taskID,
},
MetadataOnly: true,
TotalPieces: pt.GetTotalPieces(),
})
if err != nil {
pt.Errorf("store metadata error: %s", err)
return err
}
if !pt.CalculateDigest {
return nil
}
err = pt.GetStorage().ValidateDigest(
&storage.PeerTaskMetadata{
PeerID: pt.GetPeerID(),
TaskID: pt.GetTaskID(),
})
if err != nil {
pt.Errorf("validate digest error: %s", err)
return err
}
pt.Debugf("validate digest ok")
return err
}
func (pt *peerTaskConductor) PublishPieceInfo(pieceNum int32, size uint32) {
// mark piece ready
pt.readyPiecesLock.Lock()
if pt.readyPieces.IsSet(pieceNum) {
pt.readyPiecesLock.Unlock()
pt.Warnf("piece %d is already reported, skipped", pieceNum)
return
}
// mark piece processed
pt.readyPieces.Set(pieceNum)
pt.completedLength.Add(int64(size))
pt.readyPiecesLock.Unlock()
finished := pt.isCompleted()
if finished {
pt.Done()
}
pt.broker.Publish(
&PieceInfo{
Num: pieceNum,
Finished: finished,
})
}
func (pt *peerTaskConductor) sendPieceResult(pr *schedulerv1.PieceResult) error {
pt.sendPieceResultLock.Lock()
err := pt.peerPacketStream.Send(pr)
pt.sendPieceResultLock.Unlock()
return err
}
func (pt *peerTaskConductor) getFailedError() error {
if pt.sourceErrorStatus != nil {
return pt.sourceErrorStatus.Err()
}
return fmt.Errorf("peer task failed: %d/%s", pt.failedCode, pt.failedReason)
}