feat: merge ranged request storage into parent (#1078)

* feat: merge ranged request storage into parent

Signed-off-by: Jim Ma <majinjing3@gmail.com>
This commit is contained in:
Jim Ma 2022-02-23 18:02:33 +08:00 committed by Gaius
parent 3c45cdd6ac
commit 4597f74283
No known key found for this signature in database
GPG Key ID: 8B4E5D1290FA2FFB
33 changed files with 1356 additions and 459 deletions

View File

@ -70,6 +70,7 @@ jobs:
- name: Run E2E test
run: |
make build-e2e-sha256sum
ginkgo -v -r --race --fail-fast --cover --trace --progress test/e2e -- --feature-gates=dfget-range=true
cat coverprofile.out >> coverage.txt

View File

@ -194,7 +194,11 @@ build-deb-dfget: build-linux-dfget
# Generate dfget man page
build-dfget-man-page:
@pandoc -s -t man ./docs/en/cli-reference/dfget.1.md -o ./docs/en/cli-reference/dfget.1
.PHONY: build-man-page
.PHONY: build-dfget-man-page
build-e2e-sha256sum:
@GOOS=linux GOARCH=amd64 go build -o /tmp/sha256sum-offset test/tools/sha256sum-offset/main.go
.PHONY: build-e2e-sha256sum
# Run unittests
test:
@ -219,12 +223,12 @@ install-e2e-test:
.PHONY: install-e2e-test
# Run E2E tests
e2e-test: install-e2e-test
e2e-test: install-e2e-test build-e2e-sha256sum
@ginkgo -v -r --race --fail-fast --cover --trace --progress test/e2e
.PHONY: e2e-test
# Run E2E tests with coverage
e2e-test-coverage: install-e2e-test
e2e-test-coverage: install-e2e-test build-e2e-sha256sum
@ginkgo -v -r --race --fail-fast --cover --trace --progress test/e2e
@cat coverprofile.out >> coverage.txt
.PHONY: e2e-test-coverage
@ -311,6 +315,7 @@ help:
@echo "make build-scheduler build scheduler"
@echo "make build-manager build manager"
@echo "make build-manager-console build manager console"
@echo "make build-e2e-sha256sum build sha256sum test tool"
@echo "make install-cdn install CDN"
@echo "make install-dfget install dfget"
@echo "make install-scheduler install scheduler"

View File

@ -127,6 +127,11 @@ type ClientOption struct {
RecursiveAcceptRegex string `yaml:"acceptRegex,omitempty" mapstructure:"accept-regex,omitempty"`
RecursiveRejectRegex string `yaml:"rejectRegex,omitempty" mapstructure:"reject-regex,omitempty"`
KeepOriginalOffset bool `yaml:"keepOriginalOffset,omitempty" mapstructure:"original-offset,omitempty"`
// Range stands download range for url, like: 0-9, will download 10 bytes from 0 to 9 ([0:9])
Range string `yaml:"range,omitempty" mapstructure:"range,omitempty"`
}
func NewDfgetConfig() *ClientOption {

View File

@ -199,6 +199,8 @@ type ProxyOption struct {
Proxies []*Proxy `mapstructure:"proxies" yaml:"proxies"`
HijackHTTPS *HijackConfig `mapstructure:"hijackHTTPS" yaml:"hijackHTTPS"`
DumpHTTPContent bool `mapstructure:"dumpHTTPContent" yaml:"dumpHTTPContent"`
// ExtraRegistryMirrors add more mirror for different ports
ExtraRegistryMirrors []*RegistryMirror `mapstructure:"extraRegistryMirrors" yaml:"extraRegistryMirrors"`
}
func (p *ProxyOption) UnmarshalJSON(b []byte) error {

View File

@ -411,7 +411,6 @@ func (cd *clientDaemon) Serve() error {
// serve proxy sni service
if cd.Option.Proxy.HijackHTTPS != nil && len(cd.Option.Proxy.HijackHTTPS.SNI) > 0 {
for _, opt := range cd.Option.Proxy.HijackHTTPS.SNI {
listener, port, err := cd.prepareTCPListener(config.ListenOption{
TCPListen: opt,
}, false)
@ -475,7 +474,7 @@ func (cd *clientDaemon) Serve() error {
// dynconfig register client daemon
cd.dynconfig.Register(cd)
// servce dynconfig
// serve dynconfig
g.Go(func() error {
if err := cd.dynconfig.Serve(); err != nil {
logger.Errorf("dynconfig start failed %v", err)

View File

@ -139,9 +139,18 @@ type peerTaskConductor struct {
limiter *rate.Limiter
startTime time.Time
// subtask only
parent *peerTaskConductor
rg *clientutil.Range
}
func (ptm *peerTaskManager) newPeerTaskConductor(ctx context.Context, request *scheduler.PeerTaskRequest, limit rate.Limit) *peerTaskConductor {
func (ptm *peerTaskManager) newPeerTaskConductor(
ctx context.Context,
request *scheduler.PeerTaskRequest,
limit rate.Limit,
parent *peerTaskConductor,
rg *clientutil.Range) *peerTaskConductor {
// use a new context with span info
ctx = trace.ContextWithSpan(context.Background(), trace.SpanFromContext(ctx))
ctx, span := tracer.Start(ctx, config.SpanPeerTask, trace.WithSpanKind(trace.SpanKindClient))
@ -200,17 +209,22 @@ func (ptm *peerTaskManager) newPeerTaskConductor(ctx context.Context, request *s
completedLength: atomic.NewInt64(0),
usedTraffic: atomic.NewUint64(0),
SugaredLoggerOnWith: log,
parent: parent,
rg: rg,
}
ptc.pieceTaskPoller = &pieceTaskPoller{
getPiecesMaxRetry: ptm.getPiecesMaxRetry,
peerTaskConductor: ptc,
}
return ptc
}
// register to scheduler, if error and disable auto back source, return error, otherwise return nil
func (pt *peerTaskConductor) register() error {
logger.Debugf("request overview, pid: %s, url: %s, filter: %s, tag: %s, range: %s, digest: %s, header: %#v",
pt.Debugf("request overview, pid: %s, url: %s, filter: %s, tag: %s, range: %s, digest: %s, header: %#v",
pt.request.PeerId, pt.request.Url, pt.request.UrlMeta.Filter, pt.request.UrlMeta.Tag, pt.request.UrlMeta.Range, pt.request.UrlMeta.Digest, pt.request.UrlMeta.Header)
// trace register
regCtx, cancel := context.WithTimeout(pt.ctx, pt.peerTaskManager.schedulerOption.ScheduleTimeout.Duration)
@ -224,7 +238,7 @@ func (pt *peerTaskConductor) register() error {
tinyData *TinyData
)
logger.Infof("step 1: peer %s start to register", pt.request.PeerId)
pt.Infof("step 1: peer %s start to register", pt.request.PeerId)
schedulerClient := pt.peerTaskManager.schedulerClient
result, err := schedulerClient.RegisterPeerTask(regCtx, pt.request)
@ -233,11 +247,11 @@ func (pt *peerTaskConductor) register() error {
if err != nil {
if err == context.DeadlineExceeded {
logger.Errorf("scheduler did not response in %s", pt.peerTaskManager.schedulerOption.ScheduleTimeout.Duration)
pt.Errorf("scheduler did not response in %s", pt.peerTaskManager.schedulerOption.ScheduleTimeout.Duration)
}
logger.Errorf("step 1: peer %s register failed: %s", pt.request.PeerId, err)
pt.Errorf("step 1: peer %s register failed: %s", pt.request.PeerId, err)
if pt.peerTaskManager.schedulerOption.DisableAutoBackSource {
logger.Errorf("register peer task failed: %s, peer id: %s, auto back source disabled", err, pt.request.PeerId)
pt.Errorf("register peer task failed: %s, peer id: %s, auto back source disabled", err, pt.request.PeerId)
pt.span.RecordError(err)
pt.cancel(base.Code_SchedError, err.Error())
return err
@ -246,7 +260,7 @@ func (pt *peerTaskConductor) register() error {
// can not detect source or scheduler error, create a new dummy scheduler client
schedulerClient = &dummySchedulerClient{}
result = &scheduler.RegisterResult{TaskId: pt.taskID}
logger.Warnf("register peer task failed: %s, peer id: %s, try to back source", err, pt.request.PeerId)
pt.Warnf("register peer task failed: %s, peer id: %s, try to back source", err, pt.request.PeerId)
}
pt.Infof("register task success, SizeScope: %s", base.SizeScope_name[int32(result.SizeScope)])
@ -371,10 +385,6 @@ func (pt *peerTaskConductor) backSource() {
backSourceCtx, backSourceSpan := tracer.Start(pt.ctx, config.SpanBackSource)
defer backSourceSpan.End()
pt.contentLength.Store(-1)
if err := pt.InitStorage(); err != nil {
pt.cancel(base.Code_ClientError, err.Error())
return
}
err := pt.pieceManager.DownloadSource(backSourceCtx, pt, pt.request)
if err != nil {
pt.Errorf("download from source error: %s", err)
@ -408,25 +418,25 @@ func (pt *peerTaskConductor) pullPieces() {
}
func (pt *peerTaskConductor) storeTinyPeerTask() {
// TODO store tiny data asynchronous
l := int64(len(pt.tinyData.Content))
pt.SetContentLength(l)
pt.SetTotalPieces(1)
ctx := pt.ctx
var err error
storageDriver, err := pt.peerTaskManager.storageManager.RegisterTask(ctx,
storage.RegisterTaskRequest{
CommonTaskRequest: storage.CommonTaskRequest{
&storage.RegisterTaskRequest{
PeerTaskMetadata: storage.PeerTaskMetadata{
PeerID: pt.tinyData.PeerID,
TaskID: pt.tinyData.TaskID,
},
ContentLength: l,
TotalPieces: 1,
DesiredLocation: "",
ContentLength: l,
TotalPieces: 1,
// TODO check digest
})
pt.storage = storageDriver
if err != nil {
logger.Errorf("register tiny data storage failed: %s", err)
pt.Errorf("register tiny data storage failed: %s", err)
pt.cancel(base.Code_ClientError, err.Error())
return
}
@ -453,24 +463,24 @@ func (pt *peerTaskConductor) storeTinyPeerTask() {
},
})
if err != nil {
logger.Errorf("write tiny data storage failed: %s", err)
pt.Errorf("write tiny data storage failed: %s", err)
pt.cancel(base.Code_ClientError, err.Error())
return
}
if n != l {
logger.Errorf("write tiny data storage failed", n, l)
pt.Errorf("write tiny data storage failed, want: %d, wrote: %d", l, n)
pt.cancel(base.Code_ClientError, err.Error())
return
}
err = pt.UpdateStorage()
if err != nil {
logger.Errorf("update tiny data storage failed: %s", err)
pt.Errorf("update tiny data storage failed: %s", err)
pt.cancel(base.Code_ClientError, err.Error())
return
}
logger.Debugf("store tiny data, len: %d", l)
pt.Debugf("store tiny data, len: %d", l)
pt.PublishPieceInfo(0, uint32(l))
}
@ -645,13 +655,6 @@ func (pt *peerTaskConductor) pullSinglePiece() {
pt.contentLength.Store(int64(pt.singlePiece.PieceInfo.RangeSize))
pt.SetTotalPieces(1)
pt.SetPieceMd5Sign(digestutils.Sha256(pt.singlePiece.PieceInfo.PieceMd5))
if err := pt.InitStorage(); err != nil {
pt.cancel(base.Code_ClientError, err.Error())
span.RecordError(err)
span.SetAttributes(config.AttributePieceSuccess.Bool(false))
span.End()
return
}
request := &DownloadPieceRequest{
storage: pt.GetStorage(),
@ -793,11 +796,7 @@ func (pt *peerTaskConductor) init(piecePacket *base.PiecePacket, pieceBufferSize
if piecePacket.ContentLength > -1 {
pt.span.SetAttributes(config.AttributeTaskContentLength.Int64(piecePacket.ContentLength))
}
if err := pt.InitStorage(); err != nil {
pt.span.RecordError(err)
pt.cancel(base.Code_ClientError, err.Error())
return nil, false
}
pc := pt.peerPacket.Load().(*scheduler.PeerPacket).ParallelCount
pieceRequestCh := make(chan *DownloadPieceRequest, pieceBufferSize)
for i := int32(0); i < pc; i++ {
@ -1106,24 +1105,34 @@ func (pt *peerTaskConductor) reportFailResult(request *DownloadPieceRequest, res
span.End()
}
func (pt *peerTaskConductor) InitStorage() (err error) {
pt.lock.Lock()
defer pt.lock.Unlock()
// check storage for partial back source cases.
if pt.storage != nil {
return nil
}
func (pt *peerTaskConductor) initStorage(desiredLocation string) (err error) {
// prepare storage
pt.storage, err = pt.storageManager.RegisterTask(pt.ctx,
storage.RegisterTaskRequest{
CommonTaskRequest: storage.CommonTaskRequest{
PeerID: pt.GetPeerID(),
TaskID: pt.GetTaskID(),
},
ContentLength: pt.GetContentLength(),
TotalPieces: pt.GetTotalPieces(),
PieceMd5Sign: pt.GetPieceMd5Sign(),
})
if pt.parent == nil {
pt.storage, err = pt.storageManager.RegisterTask(pt.ctx,
&storage.RegisterTaskRequest{
PeerTaskMetadata: storage.PeerTaskMetadata{
PeerID: pt.GetPeerID(),
TaskID: pt.GetTaskID(),
},
DesiredLocation: desiredLocation,
ContentLength: pt.GetContentLength(),
TotalPieces: pt.GetTotalPieces(),
PieceMd5Sign: pt.GetPieceMd5Sign(),
})
} else {
pt.storage, err = pt.storageManager.RegisterSubTask(pt.ctx,
&storage.RegisterSubTaskRequest{
Parent: storage.PeerTaskMetadata{
PeerID: pt.parent.GetPeerID(),
TaskID: pt.parent.GetTaskID(),
},
SubTask: storage.PeerTaskMetadata{
PeerID: pt.GetPeerID(),
TaskID: pt.GetTaskID(),
},
Range: pt.rg,
})
}
if err != nil {
pt.Log().Errorf("register task to storage manager failed: %s", err)
}

View File

@ -22,6 +22,7 @@ import (
"go.opentelemetry.io/otel/trace"
"golang.org/x/time/rate"
"d7y.io/dragonfly/v2/client/clientutil"
"d7y.io/dragonfly/v2/client/config"
"d7y.io/dragonfly/v2/client/daemon/metrics"
"d7y.io/dragonfly/v2/client/daemon/storage"
@ -33,11 +34,13 @@ import (
type FileTaskRequest struct {
scheduler.PeerTaskRequest
Output string
Limit float64
DisableBackSource bool
Pattern string
Callsystem string
Output string
Limit float64
DisableBackSource bool
Pattern string
Callsystem string
Range *clientutil.Range
KeepOriginalOffset bool
}
// FileTask represents a peer task to download a file
@ -84,16 +87,20 @@ func (ptm *peerTaskManager) newFileTask(
request *FileTaskRequest,
limit rate.Limit) (context.Context, *fileTask, error) {
metrics.FileTaskCount.Add(1)
ptc, err := ptm.getPeerTaskConductor(ctx, idgen.TaskID(request.Url, request.UrlMeta), &request.PeerTaskRequest, limit)
// prefetch parent request
var parent *peerTaskConductor
if ptm.enablePrefetch && request.Range != nil {
parent = ptm.prefetchParentTask(&request.PeerTaskRequest, request.Output)
}
taskID := idgen.TaskID(request.Url, request.UrlMeta)
ptc, err := ptm.getPeerTaskConductor(ctx, taskID, &request.PeerTaskRequest, limit, parent, request.Range, request.Output)
if err != nil {
return nil, nil, err
}
// prefetch parent request
if ptm.enablePrefetch && request.UrlMeta.Range != "" {
go ptm.prefetch(&request.PeerTaskRequest)
}
ctx, span := tracer.Start(ctx, config.SpanFileTask, trace.WithSpanKind(trace.SpanKindClient))
ctx, span := tracer.Start(ctx, config.SpanFileTask, trace.WithSpanKind(trace.SpanKindClient))
pt := &fileTask{
SugaredLoggerOnWith: ptc.SugaredLoggerOnWith,
ctx: ctx,
@ -165,8 +172,9 @@ func (f *fileTask) storeToOutput() {
TaskID: f.peerTaskConductor.GetTaskID(),
Destination: f.request.Output,
},
MetadataOnly: false,
TotalPieces: f.peerTaskConductor.GetTotalPieces(),
MetadataOnly: false,
TotalPieces: f.peerTaskConductor.GetTotalPieces(),
OriginalOffset: f.request.KeepOriginalOffset,
})
if err != nil {
f.sendFailProgress(base.Code_ClientError, err.Error())

View File

@ -18,6 +18,7 @@ package peer
import (
"context"
"fmt"
"io"
"sync"
@ -26,6 +27,7 @@ import (
"go.opentelemetry.io/otel/trace"
"golang.org/x/time/rate"
"d7y.io/dragonfly/v2/client/clientutil"
"d7y.io/dragonfly/v2/client/config"
"d7y.io/dragonfly/v2/client/daemon/metrics"
"d7y.io/dragonfly/v2/client/daemon/storage"
@ -163,8 +165,11 @@ func (ptm *peerTaskManager) findPeerTaskConductor(taskID string) (*peerTaskCondu
func (ptm *peerTaskManager) getPeerTaskConductor(ctx context.Context,
taskID string,
request *scheduler.PeerTaskRequest,
limit rate.Limit) (*peerTaskConductor, error) {
ptc, created, err := ptm.getOrCreatePeerTaskConductor(ctx, taskID, request, limit)
limit rate.Limit,
parent *peerTaskConductor,
rg *clientutil.Range,
desiredLocation string) (*peerTaskConductor, error) {
ptc, created, err := ptm.getOrCreatePeerTaskConductor(ctx, taskID, request, limit, parent, rg, desiredLocation)
if err != nil {
return nil, err
}
@ -182,12 +187,15 @@ func (ptm *peerTaskManager) getOrCreatePeerTaskConductor(
ctx context.Context,
taskID string,
request *scheduler.PeerTaskRequest,
limit rate.Limit) (*peerTaskConductor, bool, error) {
limit rate.Limit,
parent *peerTaskConductor,
rg *clientutil.Range,
desiredLocation string) (*peerTaskConductor, bool, error) {
if ptc, ok := ptm.findPeerTaskConductor(taskID); ok {
logger.Debugf("peer task found: %s/%s", ptc.taskID, ptc.peerID)
return ptc, false, nil
}
ptc := ptm.newPeerTaskConductor(ctx, request, limit)
ptc := ptm.newPeerTaskConductor(ctx, request, limit, parent, rg)
ptm.conductorLock.Lock()
// double check
@ -200,10 +208,11 @@ func (ptm *peerTaskManager) getOrCreatePeerTaskConductor(
ptm.runningPeerTasks.Store(taskID, ptc)
ptm.conductorLock.Unlock()
metrics.PeerTaskCount.Add(1)
return ptc, true, nil
logger.Debugf("peer task created: %s/%s", ptc.taskID, ptc.peerID)
return ptc, true, ptc.initStorage(desiredLocation)
}
func (ptm *peerTaskManager) prefetch(request *scheduler.PeerTaskRequest) {
func (ptm *peerTaskManager) prefetchParentTask(request *scheduler.PeerTaskRequest, desiredLocation string) *peerTaskConductor {
req := &scheduler.PeerTaskRequest{
Url: request.Url,
PeerId: request.PeerId,
@ -232,16 +241,22 @@ func (ptm *peerTaskManager) prefetch(request *scheduler.PeerTaskRequest) {
}
logger.Infof("prefetch peer task %s/%s", taskID, req.PeerId)
prefetch, err := ptm.getPeerTaskConductor(context.Background(), taskID, req, limit)
prefetch, err := ptm.getPeerTaskConductor(context.Background(), taskID, req, limit, nil, nil, desiredLocation)
if err != nil {
logger.Errorf("prefetch peer task %s/%s error: %s", prefetch.taskID, prefetch.peerID, err)
return nil
}
if prefetch != nil && prefetch.peerID == req.PeerId {
metrics.PrefetchTaskCount.Add(1)
}
return prefetch
}
func (ptm *peerTaskManager) StartFileTask(ctx context.Context, req *FileTaskRequest) (chan *FileTaskProgress, *TinyData, error) {
if req.KeepOriginalOffset && !ptm.enablePrefetch {
return nil, nil, fmt.Errorf("please enable prefetch when use original offset feature")
}
if ptm.enableMultiplex {
progress, ok := ptm.tryReuseFilePeerTask(ctx, req)
if ok {
@ -285,7 +300,7 @@ func (ptm *peerTaskManager) StartStreamTask(ctx context.Context, req *StreamTask
}
}
pt, err := ptm.newStreamTask(ctx, peerTaskRequest)
pt, err := ptm.newStreamTask(ctx, peerTaskRequest, req.Range)
if err != nil {
return nil, nil, err
}

View File

@ -651,7 +651,8 @@ func (ts *testSpec) runConductorTest(assert *testifyassert.Assertions, require *
PeerHost: &scheduler.PeerHost{},
}
ptc, created, err := ptm.getOrCreatePeerTaskConductor(context.Background(), taskID, peerTaskRequest, rate.Limit(pieceSize*4))
ptc, created, err := ptm.getOrCreatePeerTaskConductor(
context.Background(), taskID, peerTaskRequest, rate.Limit(pieceSize*4), nil, nil, "")
assert.Nil(err, "load first peerTaskConductor")
assert.True(created, "should create a new peerTaskConductor")
@ -695,7 +696,8 @@ func (ts *testSpec) runConductorTest(assert *testifyassert.Assertions, require *
PeerId: fmt.Sprintf("should-not-use-peer-%d", i),
PeerHost: &scheduler.PeerHost{},
}
p, created, err := ptm.getOrCreatePeerTaskConductor(context.Background(), taskID, request, rate.Limit(pieceSize*3))
p, created, err := ptm.getOrCreatePeerTaskConductor(
context.Background(), taskID, request, rate.Limit(pieceSize*3), nil, nil, "")
assert.Nil(err, fmt.Sprintf("load peerTaskConductor %d", i))
assert.Equal(ptc.peerID, p.GetPeerID(), fmt.Sprintf("ptc %d should be same with ptc", i))
assert.False(created, "should not create a new peerTaskConductor")

View File

@ -20,7 +20,6 @@ import (
"context"
"fmt"
"io"
"math"
"os"
"time"
@ -34,7 +33,6 @@ import (
logger "d7y.io/dragonfly/v2/internal/dflog"
"d7y.io/dragonfly/v2/pkg/idgen"
"d7y.io/dragonfly/v2/pkg/rpc/base"
"d7y.io/dragonfly/v2/pkg/util/rangeutils"
)
var _ *logger.SugaredLoggerOnWith // pin this package for no log code generation
@ -42,7 +40,12 @@ var _ *logger.SugaredLoggerOnWith // pin this package for no log code generation
func (ptm *peerTaskManager) tryReuseFilePeerTask(ctx context.Context,
request *FileTaskRequest) (chan *FileTaskProgress, bool) {
taskID := idgen.TaskID(request.Url, request.UrlMeta)
reuse := ptm.storageManager.FindCompletedTask(taskID)
var reuse *storage.ReusePeerTask
if ptm.enablePrefetch && request.Range != nil {
reuse = ptm.storageManager.FindCompletedSubTask(taskID)
} else {
reuse = ptm.storageManager.FindCompletedTask(taskID)
}
var (
rg *clientutil.Range // the range of parent peer task data to read
log *logger.SugaredLoggerOnWith
@ -55,16 +58,7 @@ func (ptm *peerTaskManager) tryReuseFilePeerTask(ctx context.Context,
if reuse == nil {
return nil, false
}
var r *rangeutils.Range
r, err = rangeutils.ParseRange(request.UrlMeta.Range, math.MaxInt)
if err != nil {
logger.Warnf("parse range %s error: %s", request.UrlMeta.Range, err)
return nil, false
}
rg = &clientutil.Range{
Start: int64(r.StartIndex),
Length: int64(r.EndIndex - r.StartIndex + 1),
}
rg = request.Range
}
if rg == nil {
@ -95,18 +89,19 @@ func (ptm *peerTaskManager) tryReuseFilePeerTask(ctx context.Context,
span.AddEvent("reuse peer task", trace.WithAttributes(config.AttributePeerID.String(reuse.PeerID)))
start := time.Now()
if rg == nil {
if rg == nil || request.KeepOriginalOffset {
storeRequest := &storage.StoreRequest{
CommonTaskRequest: storage.CommonTaskRequest{
PeerID: reuse.PeerID,
TaskID: taskID,
Destination: request.Output,
},
MetadataOnly: false,
StoreOnly: true,
TotalPieces: reuse.TotalPieces,
MetadataOnly: false,
StoreDataOnly: true,
TotalPieces: reuse.TotalPieces,
OriginalOffset: request.KeepOriginalOffset,
}
err = ptm.storageManager.Store(context.Background(), storeRequest)
err = ptm.storageManager.Store(ctx, storeRequest)
} else {
err = ptm.storePartialFile(ctx, request, log, reuse, rg)
}
@ -173,7 +168,12 @@ func (ptm *peerTaskManager) storePartialFile(ctx context.Context, request *FileT
func (ptm *peerTaskManager) tryReuseStreamPeerTask(ctx context.Context,
request *StreamTaskRequest) (io.ReadCloser, map[string]string, bool) {
taskID := idgen.TaskID(request.URL, request.URLMeta)
reuse := ptm.storageManager.FindCompletedTask(taskID)
var reuse *storage.ReusePeerTask
if ptm.enablePrefetch && request.Range != nil {
reuse = ptm.storageManager.FindCompletedSubTask(taskID)
} else {
reuse = ptm.storageManager.FindCompletedTask(taskID)
}
var (
rg *clientutil.Range // the range of parent peer task data to read
log *logger.SugaredLoggerOnWith

View File

@ -64,20 +64,24 @@ type streamTask struct {
func (ptm *peerTaskManager) newStreamTask(
ctx context.Context,
request *scheduler.PeerTaskRequest) (*streamTask, error) {
request *scheduler.PeerTaskRequest,
rg *clientutil.Range) (*streamTask, error) {
metrics.StreamTaskCount.Add(1)
var limit = rate.Inf
if ptm.perPeerRateLimit > 0 {
limit = ptm.perPeerRateLimit
}
ptc, err := ptm.getPeerTaskConductor(ctx, idgen.TaskID(request.Url, request.UrlMeta), request, limit)
if err != nil {
return nil, err
}
// prefetch parent request
if ptm.enablePrefetch && request.UrlMeta.Range != "" {
go ptm.prefetch(request)
var parent *peerTaskConductor
if ptm.enablePrefetch && rg != nil {
parent = ptm.prefetchParentTask(request, "")
}
taskID := idgen.TaskID(request.Url, request.UrlMeta)
ptc, err := ptm.getPeerTaskConductor(ctx, taskID, request, limit, parent, rg, "")
if err != nil {
return nil, err
}
ctx, span := tracer.Start(ctx, config.SpanStreamTask, trace.WithSpanKind(trace.SpanKindClient))

View File

@ -265,7 +265,7 @@ func TestStreamPeerTask_BackSource_Partial_WithContentLength(t *testing.T) {
PeerHost: &scheduler.PeerHost{},
}
ctx := context.Background()
pt, err := ptm.newStreamTask(ctx, req)
pt, err := ptm.newStreamTask(ctx, req, nil)
assert.Nil(err, "new stream peer task")
rc, _, err := pt.Start(ctx)

View File

@ -174,13 +174,13 @@ func TestPieceManager_DownloadSource(t *testing.T) {
return logger.With("test case", tc.name)
})
taskStorage, err = storageManager.RegisterTask(context.Background(),
storage.RegisterTaskRequest{
CommonTaskRequest: storage.CommonTaskRequest{
PeerID: mockPeerTask.GetPeerID(),
TaskID: mockPeerTask.GetTaskID(),
Destination: output,
&storage.RegisterTaskRequest{
PeerTaskMetadata: storage.PeerTaskMetadata{
PeerID: mockPeerTask.GetPeerID(),
TaskID: mockPeerTask.GetTaskID(),
},
ContentLength: int64(len(testBytes)),
DesiredLocation: output,
ContentLength: int64(len(testBytes)),
})
assert.Nil(err)
defer storageManager.CleanUp()

View File

@ -19,6 +19,7 @@ package rpcserver
import (
"context"
"fmt"
"math"
"net"
"os"
@ -37,6 +38,7 @@ import (
dfdaemongrpc "d7y.io/dragonfly/v2/pkg/rpc/dfdaemon"
dfdaemonserver "d7y.io/dragonfly/v2/pkg/rpc/dfdaemon/server"
"d7y.io/dragonfly/v2/pkg/rpc/scheduler"
"d7y.io/dragonfly/v2/pkg/util/rangeutils"
)
type Server interface {
@ -143,11 +145,23 @@ func (m *server) Download(ctx context.Context,
PeerId: idgen.PeerID(m.peerHost.Ip),
PeerHost: m.peerHost,
},
Output: req.Output,
Limit: req.Limit,
DisableBackSource: req.DisableBackSource,
Pattern: req.Pattern,
Callsystem: req.Callsystem,
Output: req.Output,
Limit: req.Limit,
DisableBackSource: req.DisableBackSource,
Pattern: req.Pattern,
Callsystem: req.Callsystem,
KeepOriginalOffset: req.KeepOriginalOffset,
}
if len(req.UrlMeta.Range) > 0 {
r, err := rangeutils.ParseRange(req.UrlMeta.Range, math.MaxInt)
if err != nil {
err = fmt.Errorf("parse range %s error: %s", req.UrlMeta.Range, err)
return err
}
peerTask.Range = &clientutil.Range{
Start: int64(r.StartIndex),
Length: int64(r.Length()),
}
}
log := logger.With("peer", peerTask.PeerId, "component", "downloadService")

View File

@ -19,10 +19,12 @@ package storage
import (
"context"
"encoding/json"
"fmt"
"io"
"os"
"path"
"sync"
"syscall"
"time"
"go.uber.org/atomic"
@ -53,6 +55,8 @@ type localTaskStore struct {
// content stores tiny file which length less than 128 bytes
content []byte
subtasks map[PeerTaskMetadata]*localSubTaskStore
}
var _ TaskStorageDriver = (*localTaskStore)(nil)
@ -63,6 +67,30 @@ func (t *localTaskStore) touch() {
t.lastAccess.Store(access)
}
func (t *localTaskStore) SubTask(req *RegisterSubTaskRequest) *localSubTaskStore {
subtask := &localSubTaskStore{
parent: t,
Range: req.Range,
persistentMetadata: persistentMetadata{
TaskID: req.SubTask.TaskID,
TaskMeta: map[string]string{},
ContentLength: req.Range.Length,
TotalPieces: -1,
PeerID: req.SubTask.PeerID,
Pieces: map[int32]PieceMetadata{},
PieceMd5Sign: "",
DataFilePath: "",
Done: false,
},
SugaredLoggerOnWith: logger.With("task", req.SubTask.TaskID,
"parent", req.Parent.TaskID, "peer", req.SubTask.PeerID, "component", "localSubTaskStore"),
}
t.Lock()
t.subtasks[req.SubTask] = subtask
t.Unlock()
return subtask
}
func (t *localTaskStore) WritePiece(ctx context.Context, req *WritePieceRequest) (int64, error) {
t.touch()
@ -261,6 +289,7 @@ func (t *localTaskStore) ReadAllPieces(ctx context.Context, req *ReadAllPiecesRe
if err != nil {
return nil, err
}
if req.Range == nil {
return file, nil
}
@ -281,21 +310,28 @@ func (t *localTaskStore) Store(ctx context.Context, req *StoreRequest) error {
// Store is called in callback.Done, mark local task store done, for fast search
t.Done = true
t.touch()
if req.TotalPieces > 0 {
if req.TotalPieces > 0 && t.TotalPieces == -1 {
t.Lock()
t.TotalPieces = req.TotalPieces
t.Unlock()
}
if !req.StoreOnly {
if !req.StoreDataOnly {
err := t.saveMetadata()
if err != nil {
t.Warnf("save task metadata error: %s", err)
return err
}
}
if req.MetadataOnly {
return nil
}
if req.OriginalOffset {
return hardlink(t.SugaredLoggerOnWith, req.Destination, t.DataFilePath)
}
_, err := os.Stat(req.Destination)
if err == nil {
// remove exist file
@ -370,6 +406,9 @@ func (t *localTaskStore) GetPieces(ctx context.Context, req *base.PieceTaskReque
}
func (t *localTaskStore) CanReclaim() bool {
if t.invalid.Load() {
return true
}
access := time.Unix(0, t.lastAccess.Load())
reclaim := access.Add(t.expireTime).Before(time.Now())
t.Debugf("reclaim check, last access: %v, reclaim: %v", access, reclaim)
@ -388,6 +427,21 @@ func (t *localTaskStore) MarkReclaim() {
})
t.reclaimMarked.Store(true)
t.Infof("task %s/%s will be reclaimed, marked", t.TaskID, t.PeerID)
t.Lock()
var keys []PeerTaskMetadata
for key := range t.subtasks {
t.gcCallback(CommonTaskRequest{
PeerID: key.PeerID,
TaskID: key.TaskID,
})
t.Infof("sub task %s/%s will be reclaimed, marked", key.TaskID, key.PeerID)
keys = append(keys, key)
}
for _, key := range keys {
delete(t.subtasks, key)
}
t.Unlock()
}
func (t *localTaskStore) Reclaim() error {
@ -509,3 +563,49 @@ func (l *limitedReadFile) WriteTo(w io.Writer) (n int64, err error) {
}
return io.Copy(w, l.reader)
}
func hardlink(log *logger.SugaredLoggerOnWith, dst, src string) error {
dstStat, err := os.Stat(dst)
if os.IsNotExist(err) {
// hard link
err = os.Link(src, dst)
if err != nil {
log.Errorf("hardlink from %q to %q error: %s", src, dst, err)
return err
}
log.Infof("hardlink from %q to %q success", src, dst)
return nil
}
if err != nil {
log.Errorf("stat %q error: %s", src, err)
return err
}
// target already exists, check inode
srcStat, err := os.Stat(src)
if err != nil {
log.Errorf("stat %q error: %s", src, err)
return err
}
dstSysStat, ok := dstStat.Sys().(*syscall.Stat_t)
if !ok {
log.Errorf("can not get inode for %q", dst)
return err
}
srcSysStat, ok := srcStat.Sys().(*syscall.Stat_t)
if ok {
log.Errorf("can not get inode for %q", src)
return err
}
if dstSysStat.Dev == srcSysStat.Dev && dstSysStat.Ino == srcSysStat.Ino {
log.Debugf("target inode match underlay data inode, skip hard link")
return nil
}
err = fmt.Errorf("target file %q exists, with different inode with underlay data %q", dst, src)
return err
}

View File

@ -0,0 +1,379 @@
/*
* Copyright 2022 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package storage
import (
"context"
"io"
"os"
"sync"
"go.uber.org/atomic"
"d7y.io/dragonfly/v2/client/clientutil"
logger "d7y.io/dragonfly/v2/internal/dflog"
"d7y.io/dragonfly/v2/pkg/rpc/base"
"d7y.io/dragonfly/v2/pkg/util/digestutils"
)
// TODO need refactor with localTaskStore, currently, localSubTaskStore code copies from localTaskStore
type localSubTaskStore struct {
sync.RWMutex
persistentMetadata
*logger.SugaredLoggerOnWith
parent *localTaskStore
// when digest not match, invalid will be set
invalid atomic.Bool
Range *clientutil.Range
}
func (t *localSubTaskStore) WritePiece(ctx context.Context, req *WritePieceRequest) (int64, error) {
// piece already exists
t.RLock()
if piece, ok := t.Pieces[req.Num]; ok {
t.RUnlock()
// discard already downloaded data for back source
n, err := io.CopyN(io.Discard, req.Reader, piece.Range.Length)
if err != nil && err != io.EOF {
return n, err
}
if n != piece.Range.Length {
return n, ErrShortRead
}
return piece.Range.Length, nil
}
t.RUnlock()
// TODO different with localTaskStore
file, err := os.OpenFile(t.parent.DataFilePath, os.O_RDWR, defaultFileMode)
if err != nil {
return 0, err
}
defer file.Close()
// TODO different with localTaskStore
if _, err = file.Seek(t.Range.Start+req.Range.Start, io.SeekStart); err != nil {
return 0, err
}
n, err := io.Copy(file, io.LimitReader(req.Reader, req.Range.Length))
if err != nil {
return 0, err
}
// when UnknownLength and size is align to piece num
if req.UnknownLength && n == 0 {
t.Lock()
t.genDigest(n, req)
t.Unlock()
return 0, nil
}
if n != req.Range.Length {
if req.UnknownLength {
// when back source, and can not detect content length, we need update real length
req.Range.Length = n
// when n == 0, skip
if n == 0 {
t.Lock()
t.genDigest(n, req)
t.Unlock()
return 0, nil
}
} else {
return n, ErrShortRead
}
}
// when Md5 is empty, try to get md5 from reader, it's useful for back source
if req.PieceMetadata.Md5 == "" {
t.Debugf("piece md5 not found in metadata, read from reader")
if get, ok := req.Reader.(digestutils.DigestReader); ok {
req.PieceMetadata.Md5 = get.Digest()
t.Infof("read md5 from reader, value: %s", req.PieceMetadata.Md5)
} else {
t.Debugf("reader is not a DigestReader")
}
}
t.Debugf("wrote %d bytes to file %s, piece %d, start %d, length: %d",
n, t.DataFilePath, req.Num, req.Range.Start, req.Range.Length)
t.Lock()
defer t.Unlock()
// double check
if _, ok := t.Pieces[req.Num]; ok {
return n, nil
}
t.Pieces[req.Num] = req.PieceMetadata
t.genDigest(n, req)
return n, nil
}
func (t *localSubTaskStore) ReadPiece(ctx context.Context, req *ReadPieceRequest) (io.Reader, io.Closer, error) {
if t.invalid.Load() {
t.Errorf("invalid digest, refuse to get pieces")
return nil, nil, ErrInvalidDigest
}
// TODO different with localTaskStore
t.parent.touch()
file, err := os.Open(t.parent.DataFilePath)
if err != nil {
return nil, nil, err
}
// If req.Num is equal to -1, range has a fixed value.
if req.Num != -1 {
t.RLock()
if piece, ok := t.Pieces[req.Num]; ok {
t.RUnlock()
req.Range = piece.Range
} else {
t.RUnlock()
file.Close()
t.Errorf("invalid piece num: %d", req.Num)
return nil, nil, ErrPieceNotFound
}
}
// TODO different with localTaskStore
if _, err = file.Seek(t.Range.Start+req.Range.Start, io.SeekStart); err != nil {
file.Close()
t.Errorf("file seek failed: %v", err)
return nil, nil, err
}
// who call ReadPiece, who close the io.ReadCloser
return io.LimitReader(file, req.Range.Length), file, nil
}
func (t *localSubTaskStore) ReadAllPieces(ctx context.Context, req *ReadAllPiecesRequest) (io.ReadCloser, error) {
if t.invalid.Load() {
t.Errorf("invalid digest, refuse to read all pieces")
return nil, ErrInvalidDigest
}
t.parent.touch()
// who call ReadPiece, who close the io.ReadCloser
file, err := os.Open(t.parent.DataFilePath)
if err != nil {
return nil, err
}
var (
start int64
length int64
)
if req.Range == nil {
start, length = t.Range.Start, t.Range.Length
} else {
start, length = t.Range.Start+req.Range.Start, t.Range.Length
}
if _, err = file.Seek(start, io.SeekStart); err != nil {
file.Close()
t.Errorf("file seek to %d failed: %v", start, err)
return nil, err
}
return &limitedReadFile{
reader: io.LimitReader(file, length),
closer: file,
}, nil
}
func (t *localSubTaskStore) GetPieces(ctx context.Context, req *base.PieceTaskRequest) (*base.PiecePacket, error) {
if t.invalid.Load() {
t.Errorf("invalid digest, refuse to get pieces")
return nil, ErrInvalidDigest
}
t.RLock()
defer t.RUnlock()
t.parent.touch()
piecePacket := &base.PiecePacket{
TaskId: req.TaskId,
DstPid: t.PeerID,
TotalPiece: t.TotalPieces,
ContentLength: t.ContentLength,
PieceMd5Sign: t.PieceMd5Sign,
}
if t.TotalPieces > -1 && int32(req.StartNum) >= t.TotalPieces {
t.Warnf("invalid start num: %d", req.StartNum)
}
for i := int32(0); i < int32(req.Limit); i++ {
if piece, ok := t.Pieces[int32(req.StartNum)+i]; ok {
piecePacket.PieceInfos = append(piecePacket.PieceInfos, &base.PieceInfo{
PieceNum: piece.Num,
RangeStart: uint64(piece.Range.Start),
RangeSize: uint32(piece.Range.Length),
PieceMd5: piece.Md5,
PieceOffset: piece.Offset,
PieceStyle: piece.Style,
})
}
}
return piecePacket, nil
}
func (t *localSubTaskStore) UpdateTask(ctx context.Context, req *UpdateTaskRequest) error {
t.parent.touch()
t.Lock()
defer t.Unlock()
t.persistentMetadata.ContentLength = req.ContentLength
if req.TotalPieces > 0 {
t.TotalPieces = req.TotalPieces
t.Debugf("update total pieces: %d", t.TotalPieces)
}
if len(t.PieceMd5Sign) == 0 && len(req.PieceMd5Sign) > 0 {
t.PieceMd5Sign = req.PieceMd5Sign
t.Debugf("update piece md5 sign: %s", t.PieceMd5Sign)
}
return nil
}
func (t *localSubTaskStore) Store(ctx context.Context, req *StoreRequest) error {
// Store is called in callback.Done, mark local task store done, for fast search
t.Done = true
t.parent.touch()
if req.TotalPieces > 0 {
t.Lock()
t.TotalPieces = req.TotalPieces
t.Unlock()
}
if req.MetadataOnly {
return nil
}
if req.OriginalOffset {
return hardlink(t.SugaredLoggerOnWith, req.Destination, t.parent.DataFilePath)
}
_, err := os.Stat(req.Destination)
if err == nil {
// remove exist file
t.Infof("destination file %q exists, purge it first", req.Destination)
os.Remove(req.Destination)
}
file, err := os.Open(t.parent.DataFilePath)
if err != nil {
t.Debugf("open tasks data error: %s", err)
return err
}
defer file.Close()
_, err = file.Seek(t.Range.Start, io.SeekStart)
if err != nil {
t.Debugf("task seek file error: %s", err)
return err
}
dstFile, err := os.OpenFile(req.Destination, os.O_CREATE|os.O_RDWR|os.O_TRUNC, defaultFileMode)
if err != nil {
t.Errorf("open tasks destination file error: %s", err)
return err
}
defer dstFile.Close()
// copy_file_range is valid in linux
// https://go-review.googlesource.com/c/go/+/229101/
n, err := io.Copy(dstFile, io.LimitReader(file, t.ContentLength))
t.Debugf("copied tasks data %d bytes to %s", n, req.Destination)
return err
}
func (t *localSubTaskStore) ValidateDigest(req *PeerTaskMetadata) error {
t.Lock()
defer t.Unlock()
if t.persistentMetadata.PieceMd5Sign == "" {
t.invalid.Store(true)
return ErrDigestNotSet
}
if t.TotalPieces <= 0 {
t.Errorf("total piece count not set when validate digest")
t.invalid.Store(true)
return ErrPieceCountNotSet
}
var pieceDigests []string
for i := int32(0); i < t.TotalPieces; i++ {
pieceDigests = append(pieceDigests, t.Pieces[i].Md5)
}
digest := digestutils.Sha256(pieceDigests...)
if digest != t.PieceMd5Sign {
t.Errorf("invalid digest, desired: %s, actual: %s", t.PieceMd5Sign, digest)
t.invalid.Store(true)
return ErrInvalidDigest
}
return nil
}
func (t *localSubTaskStore) IsInvalid(req *PeerTaskMetadata) (bool, error) {
return t.invalid.Load(), nil
}
func (t *localSubTaskStore) genDigest(n int64, req *WritePieceRequest) {
if req.GenPieceDigest == nil || t.PieceMd5Sign != "" {
return
}
total, gen := req.GenPieceDigest(n)
if !gen {
return
}
t.TotalPieces = total
var pieceDigests []string
for i := int32(0); i < t.TotalPieces; i++ {
pieceDigests = append(pieceDigests, t.Pieces[i].Md5)
}
digest := digestutils.Sha256(pieceDigests...)
t.PieceMd5Sign = digest
t.Infof("generated digest: %s", digest)
}
func (t *localSubTaskStore) CanReclaim() bool {
if t.parent.Done || t.invalid.Load() {
return true
}
return false
}
func (t *localSubTaskStore) MarkReclaim() {
t.parent.gcCallback(CommonTaskRequest{
PeerID: t.PeerID,
TaskID: t.TaskID,
})
t.Infof("sub task %s/%s will be reclaimed, marked", t.TaskID, t.PeerID)
t.parent.Lock()
delete(t.parent.subtasks, PeerTaskMetadata{
PeerID: t.PeerID,
TaskID: t.TaskID,
})
t.parent.Unlock()
}
func (t *localSubTaskStore) Reclaim() error {
return nil
}

View File

@ -25,7 +25,6 @@ import (
"math/rand"
"os"
"path"
"path/filepath"
"testing"
"time"
@ -37,136 +36,286 @@ import (
logger "d7y.io/dragonfly/v2/internal/dflog"
"d7y.io/dragonfly/v2/pkg/rpc/base"
_ "d7y.io/dragonfly/v2/pkg/rpc/dfdaemon/server"
"d7y.io/dragonfly/v2/pkg/util/digestutils"
)
func TestMain(m *testing.M) {
os.Exit(m.Run())
}
func TestLocalTaskStore_PutAndGetPiece_Simple(t *testing.T) {
func TestLocalTaskStore_PutAndGetPiece(t *testing.T) {
assert := testifyassert.New(t)
testBytes, err := os.ReadFile(test.File)
assert.Nil(err, "load test file")
md5Test, _ := calcFileMd5(test.File, nil)
dst := path.Join(test.DataDir, taskData+".copy")
defer os.Remove(dst)
var (
taskID = "task-d4bb1c273a9889fea14abd4651994fe8"
peerID = "peer-d4bb1c273a9889fea14abd4651994fe8"
pieceSize = 512
)
sm, err := NewStorageManager(config.SimpleLocalTaskStoreStrategy,
&config.StorageOption{
DataPath: test.DataDir,
TaskExpireTime: clientutil.Duration{
Duration: time.Minute,
testCases := []struct {
name string
strategy config.StoreStrategy
create func(s *storageManager, taskID, peerID string) (TaskStorageDriver, error)
}{
{
name: "normal",
strategy: config.SimpleLocalTaskStoreStrategy,
create: func(s *storageManager, taskID, peerID string) (TaskStorageDriver, error) {
return s.CreateTask(
&RegisterTaskRequest{
PeerTaskMetadata: PeerTaskMetadata{
PeerID: peerID,
TaskID: taskID,
},
DesiredLocation: dst,
ContentLength: int64(len(testBytes)),
})
},
}, func(request CommonTaskRequest) {
})
if err != nil {
t.Fatal(err)
}
var s = sm.(*storageManager)
_, err = s.CreateTask(
RegisterTaskRequest{
CommonTaskRequest: CommonTaskRequest{
PeerID: peerID,
TaskID: taskID,
Destination: dst,
},
{
name: "normal",
strategy: config.AdvanceLocalTaskStoreStrategy,
create: func(s *storageManager, taskID, peerID string) (TaskStorageDriver, error) {
return s.CreateTask(
&RegisterTaskRequest{
PeerTaskMetadata: PeerTaskMetadata{
PeerID: peerID,
TaskID: taskID,
},
DesiredLocation: dst,
ContentLength: int64(len(testBytes)),
})
},
ContentLength: int64(len(testBytes)),
})
assert.Nil(err, "create task storage")
ts, ok := s.LoadTask(PeerTaskMetadata{
PeerID: peerID,
TaskID: taskID,
})
assert.True(ok, "")
},
{
name: "subtask",
strategy: config.AdvanceLocalTaskStoreStrategy,
create: func(s *storageManager, taskID, peerID string) (TaskStorageDriver, error) {
var (
parentPeerID = peerID + "-parent"
parentTaskID = taskID + "-parent"
)
var pieces []struct {
index int
start int
end int // not contain in data
}
for i := 0; i*pieceSize < len(testBytes); i++ {
start := i * pieceSize
end := start + pieceSize
if end > len(testBytes) {
end = len(testBytes)
}
pieces = append(pieces, struct {
index int
start int
end int
}{
index: i,
start: start,
end: end,
})
}
rand.Seed(time.Now().UnixNano())
rand.Shuffle(len(pieces), func(i, j int) { pieces[i], pieces[j] = pieces[j], pieces[i] })
_, err := s.CreateTask(
&RegisterTaskRequest{
PeerTaskMetadata: PeerTaskMetadata{
PeerID: parentPeerID,
TaskID: parentTaskID,
},
DesiredLocation: dst,
ContentLength: int64(len(testBytes)),
})
assert.Nil(err)
// random put all pieces
for _, p := range pieces {
_, err = ts.WritePiece(context.Background(), &WritePieceRequest{
PeerTaskMetadata: PeerTaskMetadata{
return s.RegisterSubTask(
context.Background(),
&RegisterSubTaskRequest{
Parent: PeerTaskMetadata{
PeerID: parentPeerID,
TaskID: parentTaskID,
},
SubTask: PeerTaskMetadata{
PeerID: peerID,
TaskID: taskID,
},
Range: &clientutil.Range{
Start: 100,
Length: int64(len(testBytes)),
},
})
},
},
}
for _, tc := range testCases {
t.Run(tc.name+"-"+string(tc.strategy), func(t *testing.T) {
var (
taskID = "task-d4bb1c273a9889fea14abd4651994fe8"
peerID = "peer-d4bb1c273a9889fea14abd4651994fe8"
pieceSize = 512
)
sm, err := NewStorageManager(config.SimpleLocalTaskStoreStrategy,
&config.StorageOption{
DataPath: test.DataDir,
TaskExpireTime: clientutil.Duration{
Duration: time.Minute,
},
}, func(request CommonTaskRequest) {
})
assert.Nil(err)
_, err = tc.create(sm.(*storageManager), taskID, peerID)
assert.Nil(err, "create task storage")
ts, ok := sm.(*storageManager).LoadTask(PeerTaskMetadata{
PeerID: peerID,
TaskID: taskID,
},
PieceMetadata: PieceMetadata{
Num: int32(p.index),
Md5: "",
Offset: uint64(p.start),
Range: clientutil.Range{
Start: int64(p.start),
Length: int64(p.end - p.start),
})
assert.True(ok, "load created task")
var pieces []struct {
index int
start int
end int // not contain in data
}
var piecesMd5 []string
for i := 0; i*pieceSize < len(testBytes); i++ {
start := i * pieceSize
end := start + pieceSize
if end > len(testBytes) {
end = len(testBytes)
}
pieces = append(pieces, struct {
index int
start int
end int
}{
index: i,
start: start,
end: end,
})
piecesMd5 = append(piecesMd5, calcPieceMd5(testBytes[start:end]))
}
rand.Seed(time.Now().UnixNano())
rand.Shuffle(len(pieces), func(i, j int) { pieces[i], pieces[j] = pieces[j], pieces[i] })
// random put all pieces
for _, p := range pieces {
_, err = ts.WritePiece(context.Background(), &WritePieceRequest{
PeerTaskMetadata: PeerTaskMetadata{
TaskID: taskID,
},
PieceMetadata: PieceMetadata{
Num: int32(p.index),
Md5: piecesMd5[p.index],
Offset: uint64(p.start),
Range: clientutil.Range{
Start: int64(p.start),
Length: int64(p.end - p.start),
},
Style: base.PieceStyle_PLAIN,
},
Reader: bytes.NewBuffer(testBytes[p.start:p.end]),
})
assert.Nil(err, "put piece")
}
if lts, ok := ts.(*localTaskStore); ok {
md5TaskData, _ := calcFileMd5(path.Join(lts.dataDir, taskData), nil)
assert.Equal(md5Test, md5TaskData, "md5 must match")
} else if lsts, ok := ts.(*localSubTaskStore); ok {
md5TaskData, _ := calcFileMd5(path.Join(lsts.parent.dataDir, taskData), lsts.Range)
assert.Equal(md5Test, md5TaskData, "md5 must match")
}
// shuffle again for get all pieces
rand.Shuffle(len(pieces), func(i, j int) { pieces[i], pieces[j] = pieces[j], pieces[i] })
for _, p := range pieces {
rd, cl, err := ts.ReadPiece(context.Background(), &ReadPieceRequest{
PeerTaskMetadata: PeerTaskMetadata{
TaskID: taskID,
},
PieceMetadata: PieceMetadata{
Num: int32(p.index),
Md5: piecesMd5[p.index],
Offset: uint64(p.start),
Range: clientutil.Range{
Start: int64(p.start),
Length: int64(p.end - p.start),
},
Style: base.PieceStyle_PLAIN,
},
})
assert.Nil(err, "get piece reader should be ok")
data, err := io.ReadAll(rd)
cl.Close()
assert.Nil(err, "read piece should be ok")
assert.Equal(p.end-p.start, len(data), "piece length should match")
assert.Equal(testBytes[p.start:p.end], data, "piece data should match")
}
rd, err := ts.ReadAllPieces(context.Background(), &ReadAllPiecesRequest{
PeerTaskMetadata: PeerTaskMetadata{
TaskID: taskID,
},
Style: base.PieceStyle_PLAIN,
},
Reader: bytes.NewBuffer(testBytes[p.start:p.end]),
Range: nil,
})
assert.Nil(err, "get all pieces reader should be ok")
data, err := io.ReadAll(rd)
assert.Nil(err, "read all pieces should be ok")
rd.Close()
assert.Equal(testBytes, data, "all pieces data should match")
if lts, ok := ts.(*localTaskStore); ok {
lts.genDigest(0, &WritePieceRequest{
GenPieceDigest: func(n int64) (total int32, gen bool) {
return int32(len(pieces)), true
},
})
assert.Equal(digestutils.Sha256(piecesMd5...), lts.PieceMd5Sign)
// clean up test data
lts.lastAccess.Store(time.Now().Add(-1 * time.Hour).UnixNano())
ok = lts.CanReclaim()
assert.True(ok, "task should gc")
err = lts.Reclaim()
assert.Nil(err, "task gc")
} else if lsts, ok := ts.(*localSubTaskStore); ok {
lsts.genDigest(0, &WritePieceRequest{
GenPieceDigest: func(n int64) (total int32, gen bool) {
return int32(len(pieces)), true
},
})
assert.Equal(digestutils.Sha256(piecesMd5...), lsts.PieceMd5Sign)
// keep original offset
err = lsts.Store(context.Background(),
&StoreRequest{
CommonTaskRequest: CommonTaskRequest{
Destination: dst,
},
MetadataOnly: false,
StoreDataOnly: false,
TotalPieces: 0,
OriginalOffset: true,
})
assert.Nil(err)
md5Store, err := calcFileMd5(dst, lsts.Range)
assert.Nil(err)
assert.Equal(md5Test, md5Store)
// just ranged data
err = lsts.Store(context.Background(),
&StoreRequest{
CommonTaskRequest: CommonTaskRequest{
Destination: dst,
},
MetadataOnly: false,
StoreDataOnly: false,
TotalPieces: 0,
OriginalOffset: false,
})
assert.Nil(err)
md5Store, err = calcFileMd5(dst, nil)
assert.Nil(err)
assert.Equal(md5Test, md5Store)
// clean up test data
lsts.parent.lastAccess.Store(time.Now().Add(-1 * time.Hour).UnixNano())
lsts.parent.Done = true
ok = lsts.CanReclaim()
assert.True(ok, "sub task should gc")
err = lsts.Reclaim()
assert.Nil(err, "sub task gc")
ok = lsts.parent.CanReclaim()
assert.True(ok, "parent task should gc")
err = lsts.parent.Reclaim()
assert.Nil(err, "parent task gc")
}
})
assert.Nil(err, "put piece")
}
md5Test, _ := calcFileMd5(test.File)
md5TaskData, _ := calcFileMd5(path.Join(ts.(*localTaskStore).dataDir, taskData))
assert.Equal(md5Test, md5TaskData, "md5 must match")
// shuffle again for get all pieces
rand.Shuffle(len(pieces), func(i, j int) { pieces[i], pieces[j] = pieces[j], pieces[i] })
for _, p := range pieces {
rd, cl, err := ts.ReadPiece(context.Background(), &ReadPieceRequest{
PeerTaskMetadata: PeerTaskMetadata{
TaskID: taskID,
},
PieceMetadata: PieceMetadata{
Num: int32(p.index),
Md5: "",
Offset: uint64(p.start),
Range: clientutil.Range{
Start: int64(p.start),
Length: int64(p.end - p.start),
},
Style: base.PieceStyle_PLAIN,
},
})
assert.Nil(err, "get piece should be ok")
data, err := io.ReadAll(rd)
cl.Close()
assert.Nil(err, "read piece should be ok")
assert.Equal(p.end-p.start, len(data), "piece length should match")
assert.Equal(testBytes[p.start:p.end], data, "piece data should match")
}
// clean up test data
ts.(*localTaskStore).lastAccess.Store(time.Now().Add(-1 * time.Hour).UnixNano())
ok = ts.(Reclaimer).CanReclaim()
assert.True(ok, "task should gc")
err = ts.(Reclaimer).Reclaim()
assert.Nil(err, "task gc")
}
func TestLocalTaskStore_StoreTaskData_Simple(t *testing.T) {
@ -211,146 +360,7 @@ func TestLocalTaskStore_StoreTaskData_Simple(t *testing.T) {
assert.Equal(testData, bs, "data must match")
}
func TestLocalTaskStore_ReloadPersistentTask_Simple(t *testing.T) {
}
func TestLocalTaskStore_PutAndGetPiece_Advance(t *testing.T) {
assert := testifyassert.New(t)
testBytes, err := os.ReadFile(test.File)
assert.Nil(err, "load test file")
dst := path.Join(test.DataDir, taskData+".copy")
dst, _ = filepath.Abs(dst)
defer os.Remove(dst)
var (
taskID = "task-d4bb1c273a9889fea14abd4651994fe8"
peerID = "peer-d4bb1c273a9889fea14abd4651994fe8"
pieceSize = 512
)
sm, err := NewStorageManager(config.AdvanceLocalTaskStoreStrategy,
&config.StorageOption{
DataPath: test.DataDir,
TaskExpireTime: clientutil.Duration{
Duration: time.Minute,
},
}, func(request CommonTaskRequest) {
})
if err != nil {
t.Fatal(err)
}
var s = sm.(*storageManager)
_, err = s.CreateTask(
RegisterTaskRequest{
CommonTaskRequest: CommonTaskRequest{
PeerID: peerID,
TaskID: taskID,
Destination: dst,
},
ContentLength: int64(len(testBytes)),
})
assert.Nil(err, "create task storage")
ts, ok := s.LoadTask(PeerTaskMetadata{
PeerID: peerID,
TaskID: taskID,
})
assert.True(ok, "")
var pieces []struct {
index int
start int
end int // not contain in data
}
for i := 0; i*pieceSize < len(testBytes); i++ {
start := i * pieceSize
end := start + pieceSize
if end > len(testBytes) {
end = len(testBytes)
}
pieces = append(pieces, struct {
index int
start int
end int
}{
index: i,
start: start,
end: end,
})
}
rand.Seed(time.Now().UnixNano())
rand.Shuffle(len(pieces), func(i, j int) { pieces[i], pieces[j] = pieces[j], pieces[i] })
// random put all pieces
for _, p := range pieces {
_, err = ts.WritePiece(context.Background(), &WritePieceRequest{
PeerTaskMetadata: PeerTaskMetadata{
TaskID: taskID,
},
PieceMetadata: PieceMetadata{
Num: int32(p.index),
Md5: "",
Offset: uint64(p.start),
Range: clientutil.Range{
Start: int64(p.start),
Length: int64(p.end - p.start),
},
Style: base.PieceStyle_PLAIN,
},
Reader: bytes.NewBuffer(testBytes[p.start:p.end]),
})
assert.Nil(err, "put piece")
}
md5Test, _ := calcFileMd5(test.File)
md5TaskData, _ := calcFileMd5(path.Join(ts.(*localTaskStore).dataDir, taskData))
assert.Equal(md5Test, md5TaskData, "md5 must match")
// shuffle again for get all pieces
rand.Shuffle(len(pieces), func(i, j int) { pieces[i], pieces[j] = pieces[j], pieces[i] })
for _, p := range pieces {
rd, cl, err := ts.ReadPiece(context.Background(), &ReadPieceRequest{
PeerTaskMetadata: PeerTaskMetadata{
TaskID: taskID,
},
PieceMetadata: PieceMetadata{
Num: int32(p.index),
Md5: "",
Offset: uint64(p.start),
Range: clientutil.Range{
Start: int64(p.start),
Length: int64(p.end - p.start),
},
Style: base.PieceStyle_PLAIN,
},
})
assert.Nil(err, "get piece should be ok")
data, err := io.ReadAll(rd)
cl.Close()
assert.Nil(err, "read piece should be ok")
assert.Equal(p.end-p.start, len(data), "piece length should match")
assert.Equal(testBytes[p.start:p.end], data, "piece data should match")
}
// clean up test data
ts.(*localTaskStore).lastAccess.Store(time.Now().Add(-1 * time.Hour).UnixNano())
ok = ts.(Reclaimer).CanReclaim()
assert.True(ok, "task should gc")
err = ts.(Reclaimer).Reclaim()
assert.Nil(err, "task gc")
}
func TestLocalTaskStore_StoreTaskData_Advance(t *testing.T) {
}
func TestLocalTaskStore_ReloadPersistentTask_Advance(t *testing.T) {
}
func calcFileMd5(filePath string) (string, error) {
func calcFileMd5(filePath string, rg *clientutil.Range) (string, error) {
var md5String string
file, err := os.Open(filePath)
if err != nil {
@ -358,11 +368,26 @@ func calcFileMd5(filePath string) (string, error) {
}
defer file.Close()
var rd io.Reader = file
if rg != nil {
rd = io.LimitReader(file, rg.Length)
_, err = file.Seek(rg.Start, io.SeekStart)
if err != nil {
return "", err
}
}
hash := md5.New()
if _, err := io.Copy(hash, file); err != nil {
if _, err := io.Copy(hash, rd); err != nil {
return md5String, err
}
hashInBytes := hash.Sum(nil)[:16]
md5String = hex.EncodeToString(hashInBytes)
return md5String, nil
}
func calcPieceMd5(data []byte) string {
hash := md5.New()
hash.Write(data)
return hex.EncodeToString(hash.Sum(nil)[:16])
}

View File

@ -56,10 +56,11 @@ type CommonTaskRequest struct {
}
type RegisterTaskRequest struct {
CommonTaskRequest
ContentLength int64
TotalPieces int32
PieceMd5Sign string
PeerTaskMetadata
DesiredLocation string
ContentLength int64
TotalPieces int32
PieceMd5Sign string
}
type WritePieceRequest struct {
@ -74,19 +75,29 @@ type WritePieceRequest struct {
type StoreRequest struct {
CommonTaskRequest
MetadataOnly bool
StoreOnly bool
TotalPieces int32
// StoreDataOnly stands save file only without save metadata, used in reuse cases
StoreDataOnly bool
TotalPieces int32
// OriginalOffset stands keep original offset in the target file, if the target file is not original file, return error
OriginalOffset bool
}
type ReadPieceRequest struct {
PeerTaskMetadata
PieceMetadata
}
type ReadAllPiecesRequest struct {
PeerTaskMetadata
Range *clientutil.Range
}
type RegisterSubTaskRequest struct {
Parent PeerTaskMetadata
SubTask PeerTaskMetadata
Range *clientutil.Range
}
type UpdateTaskRequest struct {
PeerTaskMetadata
ContentLength int64

View File

@ -84,9 +84,13 @@ type Manager interface {
// KeepAlive tests if storage is used in given time duration
clientutil.KeepAlive
// RegisterTask registers a task in storage driver
RegisterTask(ctx context.Context, req RegisterTaskRequest) (TaskStorageDriver, error)
RegisterTask(ctx context.Context, req *RegisterTaskRequest) (TaskStorageDriver, error)
// RegisterSubTask registers a subtask in storage driver
RegisterSubTask(ctx context.Context, req *RegisterSubTaskRequest) (TaskStorageDriver, error)
// FindCompletedTask try to find a completed task for fast path
FindCompletedTask(taskID string) *ReusePeerTask
// FindCompletedSubTask try to find a completed subtask for fast path
FindCompletedSubTask(taskID string) *ReusePeerTask
// CleanUp cleans all storage data
CleanUp()
}
@ -119,8 +123,12 @@ type storageManager struct {
dataPathStat *syscall.Stat_t
gcCallback func(CommonTaskRequest)
gcInterval time.Duration
indexRWMutex sync.RWMutex
indexTask2PeerTask map[string][]*localTaskStore // key: task id, value: slice of localTaskStore
subIndexRWMutex sync.RWMutex
subIndexTask2PeerTask map[string][]*localSubTaskStore // key: task id, value: slice of localSubTaskStore
}
var _ gc.GC = (*storageManager)(nil)
@ -155,13 +163,14 @@ func NewStorageManager(storeStrategy config.StoreStrategy, opt *config.StorageOp
}
s := &storageManager{
KeepAlive: clientutil.NewKeepAlive("storage manager"),
storeStrategy: storeStrategy,
storeOption: opt,
dataPathStat: stat.Sys().(*syscall.Stat_t),
gcCallback: gcCallback,
gcInterval: time.Minute,
indexTask2PeerTask: map[string][]*localTaskStore{},
KeepAlive: clientutil.NewKeepAlive("storage manager"),
storeStrategy: storeStrategy,
storeOption: opt,
dataPathStat: stat.Sys().(*syscall.Stat_t),
gcCallback: gcCallback,
gcInterval: time.Minute,
indexTask2PeerTask: map[string][]*localTaskStore{},
subIndexTask2PeerTask: map[string][]*localSubTaskStore{},
}
for _, o := range moreOpts {
@ -192,7 +201,7 @@ func WithGCInterval(gcInterval time.Duration) func(*storageManager) error {
}
}
func (s *storageManager) RegisterTask(ctx context.Context, req RegisterTaskRequest) (TaskStorageDriver, error) {
func (s *storageManager) RegisterTask(ctx context.Context, req *RegisterTaskRequest) (TaskStorageDriver, error) {
ts, ok := s.LoadTask(
PeerTaskMetadata{
PeerID: req.PeerID,
@ -216,6 +225,36 @@ func (s *storageManager) RegisterTask(ctx context.Context, req RegisterTaskReque
return s.CreateTask(req)
}
func (s *storageManager) RegisterSubTask(ctx context.Context, req *RegisterSubTaskRequest) (TaskStorageDriver, error) {
t, ok := s.LoadTask(
PeerTaskMetadata{
PeerID: req.Parent.PeerID,
TaskID: req.Parent.TaskID,
})
if !ok {
return nil, fmt.Errorf("task %s not found", req.Parent.TaskID)
}
subtask := t.(*localTaskStore).SubTask(req)
s.subIndexRWMutex.Lock()
if ts, ok := s.subIndexTask2PeerTask[req.SubTask.TaskID]; ok {
ts = append(ts, subtask)
s.subIndexTask2PeerTask[req.SubTask.TaskID] = ts
} else {
s.subIndexTask2PeerTask[req.SubTask.TaskID] = []*localSubTaskStore{subtask}
}
s.subIndexRWMutex.Unlock()
s.Lock()
s.tasks.Store(
PeerTaskMetadata{
PeerID: req.SubTask.PeerID,
TaskID: req.SubTask.TaskID,
}, subtask)
s.Unlock()
return subtask, nil
}
func (s *storageManager) WritePiece(ctx context.Context, req *WritePieceRequest) (int64, error) {
t, ok := s.LoadTask(
PeerTaskMetadata{
@ -225,7 +264,7 @@ func (s *storageManager) WritePiece(ctx context.Context, req *WritePieceRequest)
if !ok {
return 0, ErrTaskNotFound
}
return t.(TaskStorageDriver).WritePiece(ctx, req)
return t.WritePiece(ctx, req)
}
func (s *storageManager) ReadPiece(ctx context.Context, req *ReadPieceRequest) (io.Reader, io.Closer, error) {
@ -238,7 +277,7 @@ func (s *storageManager) ReadPiece(ctx context.Context, req *ReadPieceRequest) (
// TODO recover for local task persistentMetadata data
return nil, nil, ErrTaskNotFound
}
return t.(TaskStorageDriver).ReadPiece(ctx, req)
return t.ReadPiece(ctx, req)
}
func (s *storageManager) ReadAllPieces(ctx context.Context, req *ReadAllPiecesRequest) (io.ReadCloser, error) {
@ -251,7 +290,7 @@ func (s *storageManager) ReadAllPieces(ctx context.Context, req *ReadAllPiecesRe
// TODO recover for local task persistentMetadata data
return nil, ErrTaskNotFound
}
return t.(TaskStorageDriver).ReadAllPieces(ctx, req)
return t.ReadAllPieces(ctx, req)
}
func (s *storageManager) Store(ctx context.Context, req *StoreRequest) error {
@ -264,7 +303,7 @@ func (s *storageManager) Store(ctx context.Context, req *StoreRequest) error {
// TODO recover for local task persistentMetadata data
return ErrTaskNotFound
}
return t.(TaskStorageDriver).Store(ctx, req)
return t.Store(ctx, req)
}
func (s *storageManager) GetPieces(ctx context.Context, req *base.PieceTaskRequest) (*base.PiecePacket, error) {
@ -276,7 +315,7 @@ func (s *storageManager) GetPieces(ctx context.Context, req *base.PieceTaskReque
if !ok {
return nil, ErrTaskNotFound
}
return t.(TaskStorageDriver).GetPieces(ctx, req)
return t.GetPieces(ctx, req)
}
func (s *storageManager) LoadTask(meta PeerTaskMetadata) (TaskStorageDriver, bool) {
@ -297,10 +336,10 @@ func (s *storageManager) UpdateTask(ctx context.Context, req *UpdateTaskRequest)
if !ok {
return ErrTaskNotFound
}
return t.(TaskStorageDriver).UpdateTask(ctx, req)
return t.UpdateTask(ctx, req)
}
func (s *storageManager) CreateTask(req RegisterTaskRequest) (TaskStorageDriver, error) {
func (s *storageManager) CreateTask(req *RegisterTaskRequest) (TaskStorageDriver, error) {
s.Keep()
logger.Debugf("init local task storage, peer id: %s, task id: %s", req.PeerID, req.TaskID)
@ -320,6 +359,7 @@ func (s *storageManager) CreateTask(req RegisterTaskRequest) (TaskStorageDriver,
dataDir: dataDir,
metadataFilePath: path.Join(dataDir, taskMetadata),
expireTime: s.storeOption.TaskExpireTime.Duration,
subtasks: map[PeerTaskMetadata]*localSubTaskStore{},
SugaredLoggerOnWith: logger.With("task", req.TaskID, "peer", req.PeerID, "component", "localTaskStore"),
}
@ -334,7 +374,7 @@ func (s *storageManager) CreateTask(req RegisterTaskRequest) (TaskStorageDriver,
t.metadataFile = metadata
// fallback to simple strategy for proxy
if req.Destination == "" {
if req.DesiredLocation == "" {
t.StoreStrategy = string(config.SimpleLocalTaskStoreStrategy)
}
data := path.Join(dataDir, taskData)
@ -347,7 +387,7 @@ func (s *storageManager) CreateTask(req RegisterTaskRequest) (TaskStorageDriver,
}
f.Close()
case string(config.AdvanceLocalTaskStoreStrategy):
dir, file := path.Split(req.Destination)
dir, file := path.Split(req.DesiredLocation)
dirStat, err := os.Stat(dir)
if err != nil {
return nil, err
@ -431,6 +471,39 @@ func (s *storageManager) FindCompletedTask(taskID string) *ReusePeerTask {
return nil
}
func (s *storageManager) FindCompletedSubTask(taskID string) *ReusePeerTask {
s.subIndexRWMutex.RLock()
defer s.subIndexRWMutex.RUnlock()
ts, ok := s.subIndexTask2PeerTask[taskID]
if !ok {
return nil
}
for _, t := range ts {
if t.invalid.Load() {
continue
}
// touch it before marking reclaim
t.parent.touch()
// already marked, skip
if t.parent.reclaimMarked.Load() {
continue
}
if !t.Done {
continue
}
return &ReusePeerTask{
PeerTaskMetadata: PeerTaskMetadata{
PeerID: t.PeerID,
TaskID: taskID,
},
ContentLength: t.ContentLength,
TotalPieces: t.TotalPieces,
}
}
return nil
}
func (s *storageManager) cleanIndex(taskID, peerID string) {
s.indexRWMutex.Lock()
defer s.indexRWMutex.Unlock()
@ -450,6 +523,25 @@ func (s *storageManager) cleanIndex(taskID, peerID string) {
s.indexTask2PeerTask[taskID] = remain
}
func (s *storageManager) cleanSubIndex(taskID, peerID string) {
s.subIndexRWMutex.Lock()
defer s.subIndexRWMutex.Unlock()
ts, ok := s.subIndexTask2PeerTask[taskID]
if !ok {
return
}
var remain []*localSubTaskStore
// FIXME switch instead copy
for _, t := range ts {
if t.PeerID == peerID {
logger.Debugf("clean index for %s/%s", taskID, peerID)
continue
}
remain = append(remain, t)
}
s.subIndexTask2PeerTask[taskID] = remain
}
func (s *storageManager) ValidateDigest(req *PeerTaskMetadata) error {
t, ok := s.LoadTask(
PeerTaskMetadata{
@ -459,7 +551,7 @@ func (s *storageManager) ValidateDigest(req *PeerTaskMetadata) error {
if !ok {
return ErrTaskNotFound
}
return t.(TaskStorageDriver).ValidateDigest(req)
return t.ValidateDigest(req)
}
func (s *storageManager) IsInvalid(req *PeerTaskMetadata) (bool, error) {
@ -471,7 +563,7 @@ func (s *storageManager) IsInvalid(req *PeerTaskMetadata) (bool, error) {
if !ok {
return false, ErrTaskNotFound
}
return t.(TaskStorageDriver).IsInvalid(req)
return t.IsInvalid(req)
}
func (s *storageManager) ReloadPersistentTask(gcCallback GCCallback) error {
@ -602,17 +694,21 @@ func (s *storageManager) ReloadPersistentTask(gcCallback GCCallback) error {
}
func (s *storageManager) TryGC() (bool, error) {
// FIXME gc subtask
var markedTasks []PeerTaskMetadata
var totalNotMarkedSize int64
s.tasks.Range(func(key, task interface{}) bool {
if task.(*localTaskStore).CanReclaim() {
task.(*localTaskStore).MarkReclaim()
if task.(Reclaimer).CanReclaim() {
task.(Reclaimer).MarkReclaim()
markedTasks = append(markedTasks, key.(PeerTaskMetadata))
} else {
// just calculate not reclaimed task
totalNotMarkedSize += task.(*localTaskStore).ContentLength
logger.Debugf("task %s/%s not reach gc time",
key.(PeerTaskMetadata).TaskID, key.(PeerTaskMetadata).PeerID)
lts, ok := task.(*localTaskStore)
if ok {
// just calculate not reclaimed task
totalNotMarkedSize += lts.ContentLength
logger.Debugf("task %s/%s not reach gc time",
key.(PeerTaskMetadata).TaskID, key.(PeerTaskMetadata).PeerID)
}
}
return true
})
@ -632,7 +728,10 @@ func (s *storageManager) TryGC() (bool, error) {
var tasks []*localTaskStore
s.tasks.Range(func(key, val interface{}) bool {
// skip reclaimed task
task := val.(*localTaskStore)
task, ok := val.(*localTaskStore)
if !ok { // skip subtask
return true
}
if task.reclaimMarked.Load() {
return true
}
@ -669,14 +768,21 @@ func (s *storageManager) TryGC() (bool, error) {
if !ok {
continue
}
task := t.(*localTaskStore)
_, span := tracer.Start(context.Background(), config.SpanPeerGC)
span.SetAttributes(config.AttributePeerID.String(task.PeerID))
span.SetAttributes(config.AttributeTaskID.String(task.TaskID))
s.tasks.Delete(key)
s.cleanIndex(task.TaskID, task.PeerID)
if err := task.Reclaim(); err != nil {
if lts, ok := t.(*localTaskStore); ok {
span.SetAttributes(config.AttributePeerID.String(lts.PeerID))
span.SetAttributes(config.AttributeTaskID.String(lts.TaskID))
s.cleanIndex(lts.TaskID, lts.PeerID)
} else {
task := t.(*localSubTaskStore)
span.SetAttributes(config.AttributePeerID.String(task.PeerID))
span.SetAttributes(config.AttributeTaskID.String(task.TaskID))
s.cleanSubIndex(task.TaskID, task.PeerID)
}
if err := t.(Reclaimer).Reclaim(); err != nil {
// FIXME: retry later or push to queue
logger.Errorf("gc task %s/%s error: %s", key.TaskID, key.PeerID, err)
span.RecordError(err)
@ -706,9 +812,14 @@ func (s *storageManager) forceGC() (bool, error) {
s.tasks.Range(func(key, task interface{}) bool {
meta := key.(PeerTaskMetadata)
s.tasks.Delete(meta)
s.cleanIndex(meta.TaskID, meta.PeerID)
task.(*localTaskStore).MarkReclaim()
err := task.(*localTaskStore).Reclaim()
if _, ok := task.(*localTaskStore); ok {
s.cleanIndex(meta.TaskID, meta.PeerID)
} else {
s.cleanSubIndex(meta.TaskID, meta.PeerID)
}
task.(Reclaimer).MarkReclaim()
err := task.(Reclaimer).Reclaim()
if err != nil {
logger.Errorf("gc task store %s error: %s", key, err)
}

View File

@ -268,6 +268,20 @@ func (mr *MockManagerMockRecorder) CleanUp() *gomock.Call {
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CleanUp", reflect.TypeOf((*MockManager)(nil).CleanUp))
}
// FindCompletedSubTask mocks base method.
func (m *MockManager) FindCompletedSubTask(taskID string) *storage.ReusePeerTask {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "FindCompletedSubTask", taskID)
ret0, _ := ret[0].(*storage.ReusePeerTask)
return ret0
}
// FindCompletedSubTask indicates an expected call of FindCompletedSubTask.
func (mr *MockManagerMockRecorder) FindCompletedSubTask(taskID interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FindCompletedSubTask", reflect.TypeOf((*MockManager)(nil).FindCompletedSubTask), taskID)
}
// FindCompletedTask mocks base method.
func (m *MockManager) FindCompletedTask(taskID string) *storage.ReusePeerTask {
m.ctrl.T.Helper()
@ -355,8 +369,23 @@ func (mr *MockManagerMockRecorder) ReadPiece(ctx, req interface{}) *gomock.Call
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ReadPiece", reflect.TypeOf((*MockManager)(nil).ReadPiece), ctx, req)
}
// RegisterSubTask mocks base method.
func (m *MockManager) RegisterSubTask(ctx context.Context, req *storage.RegisterSubTaskRequest) (storage.TaskStorageDriver, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "RegisterSubTask", ctx, req)
ret0, _ := ret[0].(storage.TaskStorageDriver)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// RegisterSubTask indicates an expected call of RegisterSubTask.
func (mr *MockManagerMockRecorder) RegisterSubTask(ctx, req interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RegisterSubTask", reflect.TypeOf((*MockManager)(nil).RegisterSubTask), ctx, req)
}
// RegisterTask mocks base method.
func (m *MockManager) RegisterTask(ctx context.Context, req storage.RegisterTaskRequest) (storage.TaskStorageDriver, error) {
func (m *MockManager) RegisterTask(ctx context.Context, req *storage.RegisterTaskRequest) (storage.TaskStorageDriver, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "RegisterTask", ctx, req)
ret0, _ := ret[0].(storage.TaskStorageDriver)

View File

@ -125,7 +125,7 @@ func singleDownload(ctx context.Context, client daemonclient.DaemonClient, cfg *
}
}
if downError != nil {
if downError != nil && !cfg.KeepOriginalOffset {
wLog.Warnf("daemon downloads file error: %v", downError)
fmt.Printf("daemon downloads file error: %v\n", downError)
downError = downloadFromSource(ctx, cfg, hdr)
@ -213,6 +213,8 @@ func newDownRequest(cfg *config.DfgetConfig, hdr map[string]string) *dfdaemon.Do
var rg string
if r, ok := hdr[headers.Range]; ok {
rg = strings.TrimLeft(r, "bytes=")
} else {
rg = cfg.Range
}
return &dfdaemon.DownRequest{
Url: cfg.URL,
@ -227,10 +229,11 @@ func newDownRequest(cfg *config.DfgetConfig, hdr map[string]string) *dfdaemon.Do
Filter: cfg.Filter,
Header: hdr,
},
Pattern: cfg.Pattern,
Callsystem: cfg.CallSystem,
Uid: int64(basic.UserID),
Gid: int64(basic.UserGroup),
Pattern: cfg.Pattern,
Callsystem: cfg.CallSystem,
Uid: int64(basic.UserID),
Gid: int64(basic.UserGroup),
KeepOriginalOffset: cfg.KeepOriginalOffset,
}
}

View File

@ -182,6 +182,12 @@ func init() {
flagSet.String("reject-regex", dfgetConfig.RecursiveRejectRegex,
`Recursively download only. Specify a regular expression to reject the complete URL. In this case, you have to enclose the pattern into quotes to prevent your shell from expanding it`)
flagSet.Bool("original-offset", dfgetConfig.KeepOriginalOffset,
`Range request only. Download ranged data into target file with original offset. Daemon will make a hardlink to target file. Client can download many ranged data into one file for same url. When enabled, back source in client will be disabled`)
flagSet.String("range", dfgetConfig.Range,
`Download range. Like: 0-9, stands download 10 bytes from 0 -9, [0:9] in real url`)
// Bind cmd flags
if err := viper.BindPFlags(flagSet); err != nil {
panic(errors.Wrap(err, "bind dfget flags to viper"))

View File

@ -16,7 +16,7 @@
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// protoc-gen-go v1.27.1
// protoc v3.17.3
// protoc v3.19.1
// source: pkg/rpc/dfdaemon/dfdaemon.proto
package dfdaemon
@ -64,6 +64,8 @@ type DownRequest struct {
Uid int64 `protobuf:"varint,10,opt,name=uid,proto3" json:"uid,omitempty"`
// group id
Gid int64 `protobuf:"varint,11,opt,name=gid,proto3" json:"gid,omitempty"`
// keep original offset, used for ranged request, only available for hard link, otherwise will failed
KeepOriginalOffset bool `protobuf:"varint,12,opt,name=keep_original_offset,json=keepOriginalOffset,proto3" json:"keep_original_offset,omitempty"`
}
func (x *DownRequest) Reset() {
@ -175,6 +177,13 @@ func (x *DownRequest) GetGid() int64 {
return 0
}
func (x *DownRequest) GetKeepOriginalOffset() bool {
if x != nil {
return x.KeepOriginalOffset
}
return false
}
type DownResult struct {
state protoimpl.MessageState
sizeCache protoimpl.SizeCache
@ -256,7 +265,7 @@ var file_pkg_rpc_dfdaemon_dfdaemon_proto_rawDesc = []byte{
0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x1b, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f,
0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x65, 0x6d, 0x70, 0x74, 0x79, 0x2e, 0x70, 0x72, 0x6f, 0x74,
0x6f, 0x1a, 0x17, 0x76, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 0x65, 0x2f, 0x76, 0x61, 0x6c, 0x69,
0x64, 0x61, 0x74, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x85, 0x03, 0x0a, 0x0b, 0x44,
0x64, 0x61, 0x74, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0xb7, 0x03, 0x0a, 0x0b, 0x44,
0x6f, 0x77, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1c, 0x0a, 0x04, 0x75, 0x75,
0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x42, 0x08, 0xfa, 0x42, 0x05, 0x72, 0x03, 0xb0,
0x01, 0x01, 0x52, 0x04, 0x75, 0x75, 0x69, 0x64, 0x12, 0x1a, 0x0a, 0x03, 0x75, 0x72, 0x6c, 0x18,
@ -281,31 +290,34 @@ var file_pkg_rpc_dfdaemon_dfdaemon_proto_rawDesc = []byte{
0x28, 0x09, 0x52, 0x0a, 0x63, 0x61, 0x6c, 0x6c, 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x12, 0x10,
0x0a, 0x03, 0x75, 0x69, 0x64, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, 0x75, 0x69, 0x64,
0x12, 0x10, 0x0a, 0x03, 0x67, 0x69, 0x64, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, 0x67,
0x69, 0x64, 0x22, 0x98, 0x01, 0x0a, 0x0a, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x73, 0x75, 0x6c,
0x74, 0x12, 0x20, 0x0a, 0x07, 0x74, 0x61, 0x73, 0x6b, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01,
0x28, 0x09, 0x42, 0x07, 0xfa, 0x42, 0x04, 0x72, 0x02, 0x10, 0x01, 0x52, 0x06, 0x74, 0x61, 0x73,
0x6b, 0x49, 0x64, 0x12, 0x20, 0x0a, 0x07, 0x70, 0x65, 0x65, 0x72, 0x5f, 0x69, 0x64, 0x18, 0x03,
0x20, 0x01, 0x28, 0x09, 0x42, 0x07, 0xfa, 0x42, 0x04, 0x72, 0x02, 0x10, 0x01, 0x52, 0x06, 0x70,
0x65, 0x65, 0x72, 0x49, 0x64, 0x12, 0x32, 0x0a, 0x10, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74,
0x65, 0x64, 0x5f, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x42,
0x07, 0xfa, 0x42, 0x04, 0x32, 0x02, 0x28, 0x00, 0x52, 0x0f, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65,
0x74, 0x65, 0x64, 0x4c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x12, 0x0a, 0x04, 0x64, 0x6f, 0x6e,
0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x64, 0x6f, 0x6e, 0x65, 0x32, 0xbe, 0x01,
0x0a, 0x06, 0x44, 0x61, 0x65, 0x6d, 0x6f, 0x6e, 0x12, 0x39, 0x0a, 0x08, 0x44, 0x6f, 0x77, 0x6e,
0x6c, 0x6f, 0x61, 0x64, 0x12, 0x15, 0x2e, 0x64, 0x66, 0x64, 0x61, 0x65, 0x6d, 0x6f, 0x6e, 0x2e,
0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x14, 0x2e, 0x64, 0x66,
0x64, 0x61, 0x65, 0x6d, 0x6f, 0x6e, 0x2e, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x73, 0x75, 0x6c,
0x74, 0x30, 0x01, 0x12, 0x3a, 0x0a, 0x0d, 0x47, 0x65, 0x74, 0x50, 0x69, 0x65, 0x63, 0x65, 0x54,
0x61, 0x73, 0x6b, 0x73, 0x12, 0x16, 0x2e, 0x62, 0x61, 0x73, 0x65, 0x2e, 0x50, 0x69, 0x65, 0x63,
0x65, 0x54, 0x61, 0x73, 0x6b, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x11, 0x2e, 0x62,
0x61, 0x73, 0x65, 0x2e, 0x50, 0x69, 0x65, 0x63, 0x65, 0x50, 0x61, 0x63, 0x6b, 0x65, 0x74, 0x12,
0x3d, 0x0a, 0x0b, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x12, 0x16,
0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66,
0x2e, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x1a, 0x16, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e,
0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x42, 0x26,
0x5a, 0x24, 0x64, 0x37, 0x79, 0x2e, 0x69, 0x6f, 0x2f, 0x64, 0x72, 0x61, 0x67, 0x6f, 0x6e, 0x66,
0x6c, 0x79, 0x2f, 0x76, 0x32, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x72, 0x70, 0x63, 0x2f, 0x64, 0x66,
0x64, 0x61, 0x65, 0x6d, 0x6f, 0x6e, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
0x69, 0x64, 0x12, 0x30, 0x0a, 0x14, 0x6b, 0x65, 0x65, 0x70, 0x5f, 0x6f, 0x72, 0x69, 0x67, 0x69,
0x6e, 0x61, 0x6c, 0x5f, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x08,
0x52, 0x12, 0x6b, 0x65, 0x65, 0x70, 0x4f, 0x72, 0x69, 0x67, 0x69, 0x6e, 0x61, 0x6c, 0x4f, 0x66,
0x66, 0x73, 0x65, 0x74, 0x22, 0x98, 0x01, 0x0a, 0x0a, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x73,
0x75, 0x6c, 0x74, 0x12, 0x20, 0x0a, 0x07, 0x74, 0x61, 0x73, 0x6b, 0x5f, 0x69, 0x64, 0x18, 0x02,
0x20, 0x01, 0x28, 0x09, 0x42, 0x07, 0xfa, 0x42, 0x04, 0x72, 0x02, 0x10, 0x01, 0x52, 0x06, 0x74,
0x61, 0x73, 0x6b, 0x49, 0x64, 0x12, 0x20, 0x0a, 0x07, 0x70, 0x65, 0x65, 0x72, 0x5f, 0x69, 0x64,
0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x42, 0x07, 0xfa, 0x42, 0x04, 0x72, 0x02, 0x10, 0x01, 0x52,
0x06, 0x70, 0x65, 0x65, 0x72, 0x49, 0x64, 0x12, 0x32, 0x0a, 0x10, 0x63, 0x6f, 0x6d, 0x70, 0x6c,
0x65, 0x74, 0x65, 0x64, 0x5f, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x04, 0x20, 0x01, 0x28,
0x04, 0x42, 0x07, 0xfa, 0x42, 0x04, 0x32, 0x02, 0x28, 0x00, 0x52, 0x0f, 0x63, 0x6f, 0x6d, 0x70,
0x6c, 0x65, 0x74, 0x65, 0x64, 0x4c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x12, 0x0a, 0x04, 0x64,
0x6f, 0x6e, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x64, 0x6f, 0x6e, 0x65, 0x32,
0xbe, 0x01, 0x0a, 0x06, 0x44, 0x61, 0x65, 0x6d, 0x6f, 0x6e, 0x12, 0x39, 0x0a, 0x08, 0x44, 0x6f,
0x77, 0x6e, 0x6c, 0x6f, 0x61, 0x64, 0x12, 0x15, 0x2e, 0x64, 0x66, 0x64, 0x61, 0x65, 0x6d, 0x6f,
0x6e, 0x2e, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x14, 0x2e,
0x64, 0x66, 0x64, 0x61, 0x65, 0x6d, 0x6f, 0x6e, 0x2e, 0x44, 0x6f, 0x77, 0x6e, 0x52, 0x65, 0x73,
0x75, 0x6c, 0x74, 0x30, 0x01, 0x12, 0x3a, 0x0a, 0x0d, 0x47, 0x65, 0x74, 0x50, 0x69, 0x65, 0x63,
0x65, 0x54, 0x61, 0x73, 0x6b, 0x73, 0x12, 0x16, 0x2e, 0x62, 0x61, 0x73, 0x65, 0x2e, 0x50, 0x69,
0x65, 0x63, 0x65, 0x54, 0x61, 0x73, 0x6b, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x11,
0x2e, 0x62, 0x61, 0x73, 0x65, 0x2e, 0x50, 0x69, 0x65, 0x63, 0x65, 0x50, 0x61, 0x63, 0x6b, 0x65,
0x74, 0x12, 0x3d, 0x0a, 0x0b, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68,
0x12, 0x16, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62,
0x75, 0x66, 0x2e, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x1a, 0x16, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c,
0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x6d, 0x70, 0x74, 0x79,
0x42, 0x26, 0x5a, 0x24, 0x64, 0x37, 0x79, 0x2e, 0x69, 0x6f, 0x2f, 0x64, 0x72, 0x61, 0x67, 0x6f,
0x6e, 0x66, 0x6c, 0x79, 0x2f, 0x76, 0x32, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x72, 0x70, 0x63, 0x2f,
0x64, 0x66, 0x64, 0x61, 0x65, 0x6d, 0x6f, 0x6e, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
}
var (

View File

@ -115,6 +115,8 @@ func (m *DownRequest) Validate() error {
// no validation rules for Gid
// no validation rules for KeepOriginalOffset
return nil
}

View File

@ -46,6 +46,8 @@ message DownRequest{
int64 uid = 10;
// group id
int64 gid = 11;
// keep original offset, used for ranged request, only available for hard link, otherwise will failed
bool keep_original_offset = 12;
}
message DownResult{

View File

@ -1,4 +1,8 @@
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.2.0
// - protoc v3.19.1
// source: pkg/rpc/dfdaemon/dfdaemon.proto
package dfdaemon

View File

@ -179,6 +179,7 @@ func (client *httpSourceClient) Download(request *source.Request) (*source.Respo
resp.Body.Close()
return nil, err
}
// FIXME check response "Content-Range" header, if not found, need to wrap resp.Body
response := source.NewResponse(
resp.Body,
source.WithExpireInfo(

View File

@ -32,20 +32,21 @@ import (
var _ = Describe("Download with dfget and proxy", func() {
Context("dfget", func() {
files := getFileSizes()
singleDfgetTest("dfget daemon download should be ok",
dragonflyNamespace, "component=dfdaemon",
"dragonfly-dfdaemon-", "dfdaemon")
"dragonfly-dfdaemon-", "dfdaemon", files)
for i := 0; i < 3; i++ {
singleDfgetTest(
fmt.Sprintf("dfget daemon proxy-%d should be ok", i),
dragonflyE2ENamespace,
fmt.Sprintf("statefulset.kubernetes.io/pod-name=proxy-%d", i),
"proxy-", "proxy")
"proxy-", "proxy", files)
}
})
})
func getFileDetails() map[string]int {
func getFileSizes() map[string]int {
var details = map[string]int{}
for _, path := range e2eutil.GetFileList() {
out, err := e2eutil.DockerCommand("stat", "--printf=%s", path).CombinedOutput()
@ -76,7 +77,7 @@ func getRandomRange(size int) *clientutil.Range {
return rg
}
func singleDfgetTest(name, ns, label, podNamePrefix, container string) {
func singleDfgetTest(name, ns, label, podNamePrefix, container string, fileDetails map[string]int) {
It(name, func() {
out, err := e2eutil.KubeCtlCommand("-n", ns, "get", "pod", "-l", label,
"-o", "jsonpath='{range .items[*]}{.metadata.name}{end}'").CombinedOutput()
@ -84,23 +85,41 @@ func singleDfgetTest(name, ns, label, podNamePrefix, container string) {
Expect(err).NotTo(HaveOccurred())
fmt.Println("test in pod: " + podName)
Expect(strings.HasPrefix(podName, podNamePrefix)).Should(BeTrue())
// copy test tools into container
if featureGates.Enabled(featureGateRange) {
out, err = e2eutil.KubeCtlCommand("-n", ns, "cp", "-c", container, "/tmp/sha256sum-offset",
fmt.Sprintf("%s:/bin/", podName)).CombinedOutput()
if err != nil {
fmt.Println(string(out))
}
Expect(err).NotTo(HaveOccurred())
}
pod := e2eutil.NewPodExec(ns, podName, container)
// install curl
_, err = pod.Command("apk", "add", "-U", "curl").CombinedOutput()
Expect(err).NotTo(HaveOccurred())
for path, size := range getFileDetails() {
for path, size := range fileDetails {
url1 := e2eutil.GetFileURL(path)
url2 := e2eutil.GetNoContentLengthFileURL(path)
// make ranged requests to invoke prefetch feature
if featureGates.Enabled(featureGateRange) {
rg := getRandomRange(size)
downloadSingleFile(ns, pod, path, url1, size, rg)
downloadSingleFile(ns, pod, path, url2, size, rg)
rg1, rg2 := getRandomRange(size), getRandomRange(size)
downloadSingleFile(ns, pod, path, url1, size, rg1)
downloadSingleFile(ns, pod, path, url1, size, rg2)
// FIXME no content length cases are always failed.
// downloadSingleFile(ns, pod, path, url2, size, rg)
}
downloadSingleFile(ns, pod, path, url1, size, nil)
downloadSingleFile(ns, pod, path, url2, size, nil)
if featureGates.Enabled(featureGateNoLength) {
downloadSingleFile(ns, pod, path, url2, size, nil)
}
}
})
}
@ -110,6 +129,9 @@ func downloadSingleFile(ns string, pod *e2eutil.PodExec, path, url string, size
sha256sum []string
dfget []string
curl []string
sha256sumOffset []string
dfgetOffset []string
)
if rg == nil {
@ -118,19 +140,25 @@ func downloadSingleFile(ns string, pod *e2eutil.PodExec, path, url string, size
curl = append(curl, "/usr/bin/curl", "-x", "http://127.0.0.1:65001", "-s", "--dump-header", "-", "-o", "/tmp/curl.out", url)
} else {
sha256sum = append(sha256sum, "sh", "-c",
fmt.Sprintf("dd if=%s ibs=1 skip=%d count=%d 2> /dev/null | /usr/bin/sha256sum", path, rg.Start, rg.Length))
fmt.Sprintf("/bin/sha256sum-offset -file %s -offset %d -length %d", path, rg.Start, rg.Length))
dfget = append(dfget, "/opt/dragonfly/bin/dfget", "-O", "/tmp/d7y.out", "-H",
fmt.Sprintf("Range: bytes=%d-%d", rg.Start, rg.Start+rg.Length-1), url)
curl = append(curl, "/usr/bin/curl", "-x", "http://127.0.0.1:65001", "-s", "--dump-header", "-", "-o", "/tmp/curl.out",
"--header", fmt.Sprintf("Range: bytes=%d-%d", rg.Start, rg.Start+rg.Length-1), url)
sha256sumOffset = append(sha256sumOffset, "sh", "-c",
fmt.Sprintf("/bin/sha256sum-offset -file %s -offset %d -length %d",
"/var/lib/dragonfly/d7y.offset.out", rg.Start, rg.Length))
dfgetOffset = append(dfgetOffset, "/opt/dragonfly/bin/dfget", "--original-offset", "-O", "/var/lib/dragonfly/d7y.offset.out", "-H",
fmt.Sprintf("Range: bytes=%d-%d", rg.Start, rg.Start+rg.Length-1), url)
}
fmt.Printf("--------------------------------------------------------------------------------\n\n")
if rg == nil {
fmt.Printf("download size %d\n", size)
fmt.Printf("download %s, size %d\n", url, size)
} else {
fmt.Printf("download range: bytes=%d-%d/%d, target length: %d\n",
rg.Start, rg.Start+rg.Length-1, size, rg.Length)
fmt.Printf("download %s, size %d, range: bytes=%d-%d/%d, target length: %d\n",
url, size, rg.Start, rg.Start+rg.Length-1, size, rg.Length)
}
// get original file digest
out, err := e2eutil.DockerCommand(sha256sum...).CombinedOutput()
@ -159,6 +187,32 @@ func downloadSingleFile(ns string, pod *e2eutil.PodExec, path, url string, size
// slow download
Expect(end.Sub(start).Seconds() < 30.0).To(Equal(true))
// download file via dfget with offset
if rg != nil {
// move output for next cases and debugging
_, _ = pod.Command("/bin/sh", "-c", `
rm -f /var/lib/dragonfly/d7y.offset.out.last
cp -l /var/lib/dragonfly/d7y.offset.out /var/lib/dragonfly/d7y.offset.out.last
rm -f /var/lib/dragonfly/d7y.offset.out
`).CombinedOutput()
start = time.Now()
out, err = pod.Command(dfgetOffset...).CombinedOutput()
end = time.Now()
fmt.Println(string(out))
Expect(err).NotTo(HaveOccurred())
// get dfget downloaded file digest
out, err = pod.Command(sha256sumOffset...).CombinedOutput()
fmt.Println("dfget with offset sha256sum: " + string(out))
Expect(err).NotTo(HaveOccurred())
sha256sumz := strings.Split(string(out), " ")[0]
Expect(sha256sum1).To(Equal(sha256sumz))
// slow download
Expect(end.Sub(start).Seconds() < 30.0).To(Equal(true))
}
// skip dfdaemon
if ns == dragonflyNamespace {
fmt.Println("skip " + dragonflyNamespace + " namespace proxy tests")

View File

@ -36,8 +36,9 @@ var (
featureGates = featuregate.NewFeatureGate()
featureGatesFlag string
featureGateRange featuregate.Feature = "dfget-range"
featureGateCommit featuregate.Feature = "dfget-commit"
featureGateRange featuregate.Feature = "dfget-range"
featureGateCommit featuregate.Feature = "dfget-commit"
featureGateNoLength featuregate.Feature = "dfget-no-length"
defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{
featureGateRange: {
@ -50,6 +51,11 @@ var (
LockToDefault: false,
PreRelease: featuregate.Alpha,
},
featureGateNoLength: {
Default: true,
LockToDefault: false,
PreRelease: featuregate.Alpha,
},
}
)
@ -158,6 +164,14 @@ var _ = BeforeSuite(func() {
}
Expect(gitCommit).To(Equal(dfgetGitCommit))
if featureGates.Enabled(featureGateRange) {
out, err := e2eutil.DockerCopy("/bin/", "/tmp/sha256sum-offset").CombinedOutput()
if err != nil {
fmt.Println(string(out))
}
Expect(err).NotTo(HaveOccurred())
}
})
// TestE2E is the root of e2e test function

View File

@ -35,6 +35,13 @@ func DockerCommand(arg ...string) *exec.Cmd {
return exec.Command("docker", extArgs...)
}
func DockerCopy(dst, src string) *exec.Cmd {
container := kindDockerContainer
args := []string{"cp", src, fmt.Sprintf("%s:%s", container, dst)}
fmt.Println(fmt.Sprintf(`docker cp %s to %s:%s"`, src, container, dst))
return exec.Command("docker", args...)
}
func CriCtlCommand(arg ...string) *exec.Cmd {
extArgs := []string{"/usr/local/bin/crictl"}
extArgs = append(extArgs, arg...)
@ -42,6 +49,7 @@ func CriCtlCommand(arg ...string) *exec.Cmd {
}
func KubeCtlCommand(arg ...string) *exec.Cmd {
fmt.Println(fmt.Sprintf(`kubectl command: "kubectl" "%s"`, strings.Join(arg, `" "`)))
return exec.Command("kubectl", arg...)
}

View File

@ -56,6 +56,8 @@ dfdaemon:
verbose: true
pprofPort: 9999
metrics: 127.0.0.1:8888
download:
prefetch: true
scheduler:
disableAutoBackSource: true
proxy:

View File

@ -27,6 +27,7 @@ data:
netTopology: ""
securityDomain: ""
download:
prefetch: true
calculateDigest: true
downloadGRPC:
security:

View File

@ -0,0 +1,69 @@
/*
* Copyright 2022 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"crypto/sha256"
"encoding/hex"
"flag"
"fmt"
"io"
"os"
)
var (
offset = flag.Int64("offset", 0, "")
length = flag.Int64("length", -1, "")
file = flag.String("file", "", "")
)
func main() {
flag.Parse()
if len(*file) == 0 {
os.Exit(1)
}
f, err := os.Open(*file)
if err != nil {
panic(err)
}
defer f.Close()
if *offset > 0 {
_, err := f.Seek(*offset, io.SeekStart)
if err != nil {
panic(err)
}
}
var rd io.Reader = f
if *length > -1 {
rd = io.LimitReader(f, *length)
}
hash := sha256.New()
n, err := io.Copy(hash, rd)
if err != nil {
panic(err)
}
if *length > -1 && n != *length {
panic(io.ErrShortWrite)
}
fmt.Printf("%s %s", hex.EncodeToString(hash.Sum(nil)), *file)
}