dragonfly/client/daemon/peer/peertask_reuse.go

417 lines
14 KiB
Go

/*
* Copyright 2020 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package peer
import (
"context"
"fmt"
"io"
"os"
"time"
"github.com/go-http-utils/headers"
semconv "go.opentelemetry.io/otel/semconv/v1.7.0"
"go.opentelemetry.io/otel/trace"
commonv1 "d7y.io/api/v2/pkg/apis/common/v1"
"d7y.io/dragonfly/v2/client/config"
"d7y.io/dragonfly/v2/client/daemon/storage"
logger "d7y.io/dragonfly/v2/internal/dflog"
"d7y.io/dragonfly/v2/pkg/idgen"
"d7y.io/dragonfly/v2/pkg/net/http"
)
var _ *logger.SugaredLoggerOnWith // pin this package for no log code generation
// reuse task search logic:
// A. prefetch feature enabled
// for ranged request, 1, find completed subtask, 2, find partial completed parent task
// for non-ranged request, just find completed task
// B. prefetch feature disabled
// for ranged request, 1, find completed normal task, 2, find partial completed parent task
// for non-ranged request, just find completed task
func (ptm *peerTaskManager) tryReuseFilePeerTask(ctx context.Context,
request *FileTaskRequest) (chan *FileTaskProgress, bool) {
taskID := idgen.TaskIDV1(request.Url, request.UrlMeta)
var (
reuse *storage.ReusePeerTask
reuseRange *http.Range // the range of parent peer task data to read
log *logger.SugaredLoggerOnWith
length int64
err error
)
if ptm.enabledPrefetch(request.Range) {
reuse = ptm.StorageManager.FindCompletedSubTask(taskID)
} else {
reuse = ptm.StorageManager.FindCompletedTask(taskID)
}
if reuse == nil {
if request.Range == nil {
return nil, false
}
// for ranged request, check the parent task
reuseRange = request.Range
taskID = idgen.ParentTaskIDV1(request.Url, request.UrlMeta)
reuse = ptm.StorageManager.FindPartialCompletedTask(taskID, reuseRange)
if reuse == nil {
return nil, false
}
}
logKV := []any{
"peer", request.PeerId,
"task", taskID,
}
if spanContext := trace.SpanFromContext(ctx).SpanContext(); spanContext.TraceID().IsValid() {
logKV = append(logKV, "trace", spanContext.TraceID().String())
}
if reuseRange == nil {
logKV = append(logKV, "component", "reuseFilePeerTask")
log = logger.With(logKV...)
log.Infof("reuse from peer task: %s, total size: %d", reuse.PeerID, reuse.ContentLength)
length = reuse.ContentLength
} else {
logKV = append(logKV, "range", request.UrlMeta.Range, "component", "reuseRangeFilePeerTask")
log = logger.With(logKV...)
log.Infof("reuse partial data from peer task: %s, total size: %d, range: %s",
reuse.PeerID, reuse.ContentLength, request.UrlMeta.Range)
// correct range like: bytes=1024-
if reuseRange.Start+reuseRange.Length > reuse.ContentLength {
reuseRange.Length = reuse.ContentLength - reuseRange.Start
if reuseRange.Length < 0 {
return nil, false
}
}
length = reuseRange.Length
}
_, span := tracer.Start(ctx, config.SpanReusePeerTask, trace.WithSpanKind(trace.SpanKindClient))
span.SetAttributes(config.AttributePeerHost.String(ptm.PeerHost.Id))
span.SetAttributes(semconv.NetHostIPKey.String(ptm.PeerHost.Ip))
span.SetAttributes(config.AttributeTaskID.String(taskID))
span.SetAttributes(config.AttributePeerID.String(request.PeerId))
span.SetAttributes(config.AttributeReusePeerID.String(reuse.PeerID))
span.SetAttributes(semconv.HTTPURLKey.String(request.Url))
if reuseRange != nil {
span.SetAttributes(config.AttributeReuseRange.String(request.UrlMeta.Range))
}
defer span.End()
log.Infof("reuse from peer task: %s, total size: %d, target size: %d", reuse.PeerID, reuse.ContentLength, length)
span.AddEvent("reuse peer task", trace.WithAttributes(config.AttributePeerID.String(reuse.PeerID)))
start := time.Now()
if reuseRange == nil || request.KeepOriginalOffset {
storeRequest := &storage.StoreRequest{
CommonTaskRequest: storage.CommonTaskRequest{
PeerID: reuse.PeerID,
TaskID: taskID,
Destination: request.Output,
},
MetadataOnly: false,
StoreDataOnly: true,
TotalPieces: reuse.TotalPieces,
OriginalOffset: request.KeepOriginalOffset,
}
err = ptm.StorageManager.Store(ctx, storeRequest)
} else {
err = ptm.storePartialFile(ctx, request, log, reuse, reuseRange)
}
if err != nil {
log.Errorf("store error when reuse peer task: %s", err)
span.SetAttributes(config.AttributePeerTaskSuccess.Bool(false))
span.RecordError(err)
return nil, false
}
// check reuse target is valid
stat, err := os.Stat(request.Output)
if err != nil {
log.Errorf("stat error when reuse peer task: %s", err)
span.SetAttributes(config.AttributePeerTaskSuccess.Bool(false))
span.RecordError(err)
return nil, false
}
if request.KeepOriginalOffset {
// KeepOriginalOffset case
if length > 0 && stat.Size() == 0 {
err = fmt.Errorf("reuse failed, output file size is zero, but target length %d is not zero", length)
log.Errorf(err.Error())
span.SetAttributes(config.AttributePeerTaskSuccess.Bool(false))
span.RecordError(err)
return nil, false
}
} else if length != stat.Size() {
// normal case
err = fmt.Errorf("reuse failed, output file size %d is not same with target length %d", stat.Size(), length)
log.Errorf(err.Error())
span.SetAttributes(config.AttributePeerTaskSuccess.Bool(false))
span.RecordError(err)
return nil, false
}
var cost = time.Since(start).Milliseconds()
log.Infof("reuse file peer task done, cost: %dms", cost)
pg := &FileTaskProgress{
State: &ProgressState{
Success: true,
Code: commonv1.Code_Success,
Msg: "Success",
},
TaskID: taskID,
PeerID: request.PeerId,
ContentLength: length,
CompletedLength: length,
PeerTaskDone: true,
DoneCallback: func() {},
}
// make a new buffered channel, because we did not need to call newFileTask
progressCh := make(chan *FileTaskProgress, 1)
progressCh <- pg
span.SetAttributes(config.AttributePeerTaskSuccess.Bool(true))
span.SetAttributes(config.AttributePeerTaskCost.Int64(cost))
return progressCh, true
}
func (ptm *peerTaskManager) storePartialFile(ctx context.Context, request *FileTaskRequest,
log *logger.SugaredLoggerOnWith, reuse *storage.ReusePeerTask, rg *http.Range) error {
f, err := os.OpenFile(request.Output, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0644)
if err != nil {
log.Errorf("open dest file error when reuse peer task: %s", err)
return err
}
rc, err := ptm.StorageManager.ReadAllPieces(ctx,
&storage.ReadAllPiecesRequest{PeerTaskMetadata: reuse.PeerTaskMetadata, Range: rg})
if err != nil {
log.Errorf("read pieces error when reuse peer task: %s", err)
return err
}
defer rc.Close()
n, err := io.Copy(f, rc)
if err != nil {
log.Errorf("copy data error when reuse peer task: %s", err)
return err
}
if n != rg.Length {
log.Errorf("copy data length not match when reuse peer task, actual: %d, desire: %d", n, rg.Length)
return io.ErrShortBuffer
}
return nil
}
func (ptm *peerTaskManager) tryReuseStreamPeerTask(ctx context.Context,
request *StreamTaskRequest) (io.ReadCloser, map[string]string, bool) {
taskID := idgen.TaskIDV1(request.URL, request.URLMeta)
var (
reuse *storage.ReusePeerTask
reuseRange *http.Range // the range of parent peer task data to read
log *logger.SugaredLoggerOnWith
length int64
)
if ptm.enabledPrefetch(request.Range) {
reuse = ptm.StorageManager.FindCompletedSubTask(taskID)
} else {
reuse = ptm.StorageManager.FindCompletedTask(taskID)
}
if reuse == nil {
if request.Range == nil {
return nil, nil, false
}
// for ranged request, check the parent task
reuseRange = request.Range
taskID = idgen.ParentTaskIDV1(request.URL, request.URLMeta)
reuse = ptm.StorageManager.FindPartialCompletedTask(taskID, reuseRange)
if reuse == nil {
return nil, nil, false
}
}
logKV := []any{
"peer", request.PeerID,
"task", taskID,
}
if spanContext := trace.SpanFromContext(ctx).SpanContext(); spanContext.TraceID().IsValid() {
logKV = append(logKV, "trace", spanContext.TraceID().String())
}
if reuseRange == nil {
logKV = append(logKV, "component", "reuseStreamPeerTask")
log = logger.With(logKV...)
log.Infof("reuse from peer task: %s, total size: %d", reuse.PeerID, reuse.ContentLength)
length = reuse.ContentLength
} else {
logKV = append(logKV, "component", "reuseRangeStreamPeerTask")
log = logger.With(logKV...)
log.Infof("reuse partial data from peer task: %s, total size: %d, range: %s",
reuse.PeerID, reuse.ContentLength, request.URLMeta.Range)
// correct range like: bytes=1024-
if reuseRange.Length > reuse.ContentLength-reuseRange.Start {
reuseRange.Length = reuse.ContentLength - reuseRange.Start
if reuseRange.Length < 0 {
return nil, nil, false
}
}
length = reuseRange.Length
}
ctx, span := tracer.Start(ctx, config.SpanStreamTask, trace.WithSpanKind(trace.SpanKindClient))
span.SetAttributes(config.AttributePeerHost.String(ptm.PeerHost.Id))
span.SetAttributes(semconv.NetHostIPKey.String(ptm.PeerHost.Ip))
span.SetAttributes(config.AttributeTaskID.String(taskID))
span.SetAttributes(config.AttributePeerID.String(request.PeerID))
span.SetAttributes(config.AttributeReusePeerID.String(reuse.PeerID))
span.SetAttributes(semconv.HTTPURLKey.String(request.URL))
if reuseRange != nil {
span.SetAttributes(config.AttributeReuseRange.String(request.URLMeta.Range))
}
defer span.End()
rc, err := ptm.StorageManager.ReadAllPieces(ctx,
&storage.ReadAllPiecesRequest{PeerTaskMetadata: reuse.PeerTaskMetadata, Range: reuseRange})
if err != nil {
log.Errorf("read pieces error when reuse peer task: %s", err)
span.SetAttributes(config.AttributePeerTaskSuccess.Bool(false))
span.RecordError(err)
return nil, nil, false
}
exa, err := ptm.StorageManager.GetExtendAttribute(ctx, &reuse.PeerTaskMetadata)
if err != nil {
log.Errorf("get extend attribute error when reuse peer task: %s", err)
span.SetAttributes(config.AttributePeerTaskSuccess.Bool(false))
span.RecordError(err)
return nil, nil, false
}
attr := map[string]string{}
attr[config.HeaderDragonflyTask] = taskID
attr[config.HeaderDragonflyPeer] = request.PeerID
attr[headers.ContentLength] = fmt.Sprintf("%d", length)
if exa != nil {
for k, v := range exa.Header {
attr[k] = v
}
}
if reuseRange != nil {
attr[config.HeaderDragonflyRange] = request.URLMeta.Range
attr[headers.ContentRange] = fmt.Sprintf("bytes %d-%d/%d", reuseRange.Start,
reuseRange.Start+reuseRange.Length-1, reuse.ContentLength)
} else if request.Range != nil {
// the length is from reuse task, ensure it equal with request
if length != request.Range.Length {
log.Errorf("target task length %d did not match range length %d", length, request.Range.Length)
return nil, nil, false
}
attr[headers.ContentRange] = fmt.Sprintf("bytes %d-%d/*", request.Range.Start,
request.Range.Start+request.Range.Length-1)
}
// TODO record time when file closed, need add a type to implement Close and WriteTo
span.SetAttributes(config.AttributePeerTaskSuccess.Bool(true))
return rc, attr, true
}
func (ptm *peerTaskManager) tryReuseSeedPeerTask(ctx context.Context,
request *SeedTaskRequest) (*SeedTaskResponse, bool) {
taskID := idgen.TaskIDV1(request.Url, request.UrlMeta)
var (
reuse *storage.ReusePeerTask
reuseRange *http.Range // the range of parent peer task data to read
log *logger.SugaredLoggerOnWith
)
if ptm.enabledPrefetch(request.Range) {
reuse = ptm.StorageManager.FindCompletedSubTask(taskID)
} else {
reuse = ptm.StorageManager.FindCompletedTask(taskID)
}
if reuse == nil {
return nil, false
// if request.Range == nil {
// return nil, false
// }
// TODO, mock SeedTaskResponse for sub task
// for ranged request, check the parent task
//reuseRange = request.Range
//taskID = idgen.ParentTaskID(request.Url, request.UrlMeta)
//reuse = ptm.StorageManager.FindPartialCompletedTask(taskID, reuseRange)
//if reuse == nil {
// return nil, false
//}
}
if reuseRange == nil {
log = logger.With("peer", request.PeerId, "task", taskID, "component", "reuseSeedPeerTask")
log.Infof("reuse from peer task: %s, total size: %d", reuse.PeerID, reuse.ContentLength)
} else {
log = logger.With("peer", request.PeerId, "task", taskID, "range", request.UrlMeta.Range,
"component", "reuseRangeSeedPeerTask")
log.Infof("reuse partial data from peer task: %s, total size: %d, range: %s",
reuse.PeerID, reuse.ContentLength, request.UrlMeta.Range)
}
ctx, span := tracer.Start(ctx, config.SpanReusePeerTask, trace.WithSpanKind(trace.SpanKindClient))
span.SetAttributes(config.AttributePeerHost.String(ptm.PeerHost.Id))
span.SetAttributes(semconv.NetHostIPKey.String(ptm.PeerHost.Ip))
span.SetAttributes(config.AttributeTaskID.String(taskID))
span.SetAttributes(config.AttributePeerID.String(request.PeerId))
span.SetAttributes(config.AttributeReusePeerID.String(reuse.PeerID))
span.SetAttributes(semconv.HTTPURLKey.String(request.Url))
if reuseRange != nil {
span.SetAttributes(config.AttributeReuseRange.String(request.UrlMeta.Range))
}
successCh := make(chan struct{}, 1)
successCh <- struct{}{}
span.SetAttributes(config.AttributePeerTaskSuccess.Bool(true))
return &SeedTaskResponse{
Context: ctx,
Span: span,
TaskID: taskID,
PeerID: reuse.PeerID,
SubscribeResponse: SubscribeResponse{
Storage: reuse.Storage,
PieceInfoChannel: nil,
Success: successCh,
Fail: nil,
FailReason: func() error {
return nil
},
},
}, true
}