feat: trigger task with priority (#1904)

Signed-off-by: Gaius <gaius.qi@gmail.com>
This commit is contained in:
Gaius 2022-12-08 16:16:42 +08:00
parent 481df276a3
commit d64a6d8e29
No known key found for this signature in database
GPG Key ID: 8B4E5D1290FA2FFB
13 changed files with 915 additions and 612 deletions

2
go.mod
View File

@ -3,7 +3,7 @@ module d7y.io/dragonfly/v2
go 1.19 go 1.19
require ( require (
d7y.io/api v1.3.2 d7y.io/api v1.3.3
github.com/RichardKnop/machinery v1.10.6 github.com/RichardKnop/machinery v1.10.6
github.com/Showmax/go-fqdn v1.0.0 github.com/Showmax/go-fqdn v1.0.0
github.com/VividCortex/mysqlerr v1.0.0 github.com/VividCortex/mysqlerr v1.0.0

4
go.sum
View File

@ -69,8 +69,8 @@ cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RX
cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0= cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0=
cloud.google.com/go/storage v1.14.0/go.mod h1:GrKmX003DSIwi9o29oFT7YDnHYwZoctc3fOKtUw0Xmo= cloud.google.com/go/storage v1.14.0/go.mod h1:GrKmX003DSIwi9o29oFT7YDnHYwZoctc3fOKtUw0Xmo=
cloud.google.com/go/storage v1.22.1/go.mod h1:S8N1cAStu7BOeFfE8KAQzmyyLkK8p/vmRq6kuBTW58Y= cloud.google.com/go/storage v1.22.1/go.mod h1:S8N1cAStu7BOeFfE8KAQzmyyLkK8p/vmRq6kuBTW58Y=
d7y.io/api v1.3.2 h1:ClaW+I4VwtTN3yaHcrevTkcA7PbR4WNv3gJBPnMZi1Y= d7y.io/api v1.3.3 h1:KxOfhOLd4/cbgt2rJWJbSVtehdOG+lhwRP1O492PZd4=
d7y.io/api v1.3.2/go.mod h1:HERD+sbavL0vJXkd37RZxJvpu+nXZ6ipffm4EFUbF2w= d7y.io/api v1.3.3/go.mod h1:HERD+sbavL0vJXkd37RZxJvpu+nXZ6ipffm4EFUbF2w=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20201218220906-28db891af037/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= dmitri.shuralyov.com/gpu/mtl v0.0.0-20201218220906-28db891af037/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.0.0/go.mod h1:uGG2W01BaETf0Ozp+QxxKJdMBNRWPdstHG0Fmdwn1/U= github.com/Azure/azure-sdk-for-go/sdk/azcore v1.0.0/go.mod h1:uGG2W01BaETf0Ozp+QxxKJdMBNRWPdstHG0Fmdwn1/U=

View File

@ -147,6 +147,12 @@ func WithTaskID(taskID string) *SugaredLoggerOnWith {
} }
} }
func WithHostID(hostID string) *SugaredLoggerOnWith {
return &SugaredLoggerOnWith{
withArgs: []any{"hostID", hostID},
}
}
func WithKeepAlive(hostname, ip, sourceType string, clusterID uint64) *SugaredLoggerOnWith { func WithKeepAlive(hostname, ip, sourceType string, clusterID uint64) *SugaredLoggerOnWith {
return &SugaredLoggerOnWith{ return &SugaredLoggerOnWith{
withArgs: []any{"hostname", hostname, "ip", ip, "sourceType", sourceType, "clusterID", clusterID}, withArgs: []any{"hostname", hostname, "ip", ip, "sourceType", sourceType, "clusterID", clusterID},

View File

@ -105,11 +105,11 @@ func TestDynconfig_GetManagerSourceType(t *testing.T) {
Url: "example.com", Url: "example.com",
Bio: "bar", Bio: "bar",
Priority: &managerv1.ApplicationPriority{ Priority: &managerv1.ApplicationPriority{
Value: managerv1.Priority_Level0, Value: managerv1.Priority_LEVEL0,
Urls: []*managerv1.URLPriority{ Urls: []*managerv1.URLPriority{
{ {
Regex: "blobs*", Regex: "blobs*",
Value: managerv1.Priority_Level1, Value: managerv1.Priority_LEVEL0,
}, },
}, },
}, },
@ -161,11 +161,11 @@ func TestDynconfig_GetManagerSourceType(t *testing.T) {
Url: "example.com", Url: "example.com",
Bio: "bar", Bio: "bar",
Priority: &managerv1.ApplicationPriority{ Priority: &managerv1.ApplicationPriority{
Value: managerv1.Priority_Level0, Value: managerv1.Priority_LEVEL0,
Urls: []*managerv1.URLPriority{ Urls: []*managerv1.URLPriority{
{ {
Regex: "blobs*", Regex: "blobs*",
Value: managerv1.Priority_Level1, Value: managerv1.Priority_LEVEL0,
}, },
}, },
}, },
@ -229,11 +229,11 @@ func TestDynconfig_GetManagerSourceType(t *testing.T) {
Url: "example.com", Url: "example.com",
Bio: "bar", Bio: "bar",
Priority: &managerv1.ApplicationPriority{ Priority: &managerv1.ApplicationPriority{
Value: managerv1.Priority_Level0, Value: managerv1.Priority_LEVEL0,
Urls: []*managerv1.URLPriority{ Urls: []*managerv1.URLPriority{
{ {
Regex: "blobs*", Regex: "blobs*",
Value: managerv1.Priority_Level1, Value: managerv1.Priority_LEVEL0,
}, },
}, },
}, },
@ -287,11 +287,11 @@ func TestDynconfig_GetManagerSourceType(t *testing.T) {
Url: "example.com", Url: "example.com",
Bio: "bar", Bio: "bar",
Priority: &managerv1.ApplicationPriority{ Priority: &managerv1.ApplicationPriority{
Value: managerv1.Priority_Level0, Value: managerv1.Priority_LEVEL0,
Urls: []*managerv1.URLPriority{ Urls: []*managerv1.URLPriority{
{ {
Regex: "blobs*", Regex: "blobs*",
Value: managerv1.Priority_Level1, Value: managerv1.Priority_LEVEL0,
}, },
}, },
}, },
@ -355,11 +355,11 @@ func TestDynconfig_GetManagerSourceType(t *testing.T) {
Url: "example.com", Url: "example.com",
Bio: "bar", Bio: "bar",
Priority: &managerv1.ApplicationPriority{ Priority: &managerv1.ApplicationPriority{
Value: managerv1.Priority_Level0, Value: managerv1.Priority_LEVEL0,
Urls: []*managerv1.URLPriority{ Urls: []*managerv1.URLPriority{
{ {
Regex: "blobs*", Regex: "blobs*",
Value: managerv1.Priority_Level1, Value: managerv1.Priority_LEVEL0,
}, },
}, },
}, },
@ -447,11 +447,11 @@ func TestDynconfig_GetManagerSourceType(t *testing.T) {
Url: "example.com", Url: "example.com",
Bio: "bar", Bio: "bar",
Priority: &managerv1.ApplicationPriority{ Priority: &managerv1.ApplicationPriority{
Value: managerv1.Priority_Level0, Value: managerv1.Priority_LEVEL0,
Urls: []*managerv1.URLPriority{ Urls: []*managerv1.URLPriority{
{ {
Regex: "blobs*", Regex: "blobs*",
Value: managerv1.Priority_Level1, Value: managerv1.Priority_LEVEL0,
}, },
}, },
}, },

View File

@ -22,6 +22,7 @@ import (
"io" "io"
"net/http" "net/http"
"net/url" "net/url"
"regexp"
"time" "time"
"github.com/bits-and-blooms/bitset" "github.com/bits-and-blooms/bitset"
@ -29,10 +30,12 @@ import (
"github.com/looplab/fsm" "github.com/looplab/fsm"
"go.uber.org/atomic" "go.uber.org/atomic"
managerv1 "d7y.io/api/pkg/apis/manager/v1"
schedulerv1 "d7y.io/api/pkg/apis/scheduler/v1" schedulerv1 "d7y.io/api/pkg/apis/scheduler/v1"
logger "d7y.io/dragonfly/v2/internal/dflog" logger "d7y.io/dragonfly/v2/internal/dflog"
"d7y.io/dragonfly/v2/pkg/container/set" "d7y.io/dragonfly/v2/pkg/container/set"
"d7y.io/dragonfly/v2/scheduler/config"
) )
const ( const (
@ -415,3 +418,57 @@ func (p *Peer) DownloadTinyFile() ([]byte, error) {
return io.ReadAll(resp.Body) return io.ReadAll(resp.Body)
} }
// GetPriority returns priority of peer.
func (p *Peer) GetPriority(dynconfig config.DynconfigInterface) managerv1.Priority {
pbApplications, err := dynconfig.GetApplications()
if err != nil {
p.Log.Warn(err)
return managerv1.Priority_LEVEL5
}
// If manager has no applications,
// then return Priority_LEVEL5.
if len(pbApplications) == 0 {
p.Log.Info("can not found applications")
return managerv1.Priority_LEVEL5
}
// Find peer application.
var application *managerv1.Application
for _, pbApplication := range pbApplications {
if p.Application == pbApplication.Name {
application = pbApplication
break
}
}
// If no application matches peer application,
// then return Priority_LEVEL5.
if application == nil {
p.Log.Info("can not found matching application")
return managerv1.Priority_LEVEL5
}
// If application has no priority,
// then return Priority_LEVEL5.
if application.Priority == nil {
p.Log.Info("can not found priority")
return managerv1.Priority_LEVEL5
}
// Match url priority first.
for _, url := range application.Priority.Urls {
matched, err := regexp.MatchString(url.Regex, p.Task.URL)
if err != nil {
p.Log.Warn(err)
continue
}
if matched {
return url.Value
}
}
return application.Priority.Value
}

View File

@ -17,6 +17,7 @@
package resource package resource
import ( import (
"errors"
"fmt" "fmt"
"net" "net"
"net/http" "net/http"
@ -30,11 +31,13 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
commonv1 "d7y.io/api/pkg/apis/common/v1" commonv1 "d7y.io/api/pkg/apis/common/v1"
managerv1 "d7y.io/api/pkg/apis/manager/v1"
schedulerv1 "d7y.io/api/pkg/apis/scheduler/v1" schedulerv1 "d7y.io/api/pkg/apis/scheduler/v1"
"d7y.io/api/pkg/apis/scheduler/v1/mocks" "d7y.io/api/pkg/apis/scheduler/v1/mocks"
"d7y.io/dragonfly/v2/client/util" "d7y.io/dragonfly/v2/client/util"
"d7y.io/dragonfly/v2/pkg/idgen" "d7y.io/dragonfly/v2/pkg/idgen"
configmocks "d7y.io/dragonfly/v2/scheduler/config/mocks"
) )
var ( var (
@ -455,3 +458,118 @@ func TestPeer_DownloadTinyFile(t *testing.T) {
}) })
} }
} }
func TestPeer_GetPriority(t *testing.T) {
tests := []struct {
name string
mock func(peer *Peer, md *configmocks.MockDynconfigInterfaceMockRecorder)
expect func(t *testing.T, priority managerv1.Priority)
}{
{
name: "get applications failed",
mock: func(peer *Peer, md *configmocks.MockDynconfigInterfaceMockRecorder) {
md.GetApplications().Return(nil, errors.New("bas")).Times(1)
},
expect: func(t *testing.T, priority managerv1.Priority) {
assert := assert.New(t)
assert.Equal(priority, managerv1.Priority_LEVEL5)
},
},
{
name: "can not found applications",
mock: func(peer *Peer, md *configmocks.MockDynconfigInterfaceMockRecorder) {
md.GetApplications().Return([]*managerv1.Application{}, nil).Times(1)
},
expect: func(t *testing.T, priority managerv1.Priority) {
assert := assert.New(t)
assert.Equal(priority, managerv1.Priority_LEVEL5)
},
},
{
name: "can not found matching application",
mock: func(peer *Peer, md *configmocks.MockDynconfigInterfaceMockRecorder) {
md.GetApplications().Return([]*managerv1.Application{
{
Name: "baw",
},
}, nil).Times(1)
},
expect: func(t *testing.T, priority managerv1.Priority) {
assert := assert.New(t)
assert.Equal(priority, managerv1.Priority_LEVEL5)
},
},
{
name: "can not found priority",
mock: func(peer *Peer, md *configmocks.MockDynconfigInterfaceMockRecorder) {
peer.Application = "bae"
md.GetApplications().Return([]*managerv1.Application{
{
Name: "bae",
},
}, nil).Times(1)
},
expect: func(t *testing.T, priority managerv1.Priority) {
assert := assert.New(t)
assert.Equal(priority, managerv1.Priority_LEVEL5)
},
},
{
name: "match the priority of application",
mock: func(peer *Peer, md *configmocks.MockDynconfigInterfaceMockRecorder) {
peer.Application = "baz"
md.GetApplications().Return([]*managerv1.Application{
{
Name: "baz",
Priority: &managerv1.ApplicationPriority{
Value: managerv1.Priority_LEVEL0,
},
},
}, nil).Times(1)
},
expect: func(t *testing.T, priority managerv1.Priority) {
assert := assert.New(t)
assert.Equal(priority, managerv1.Priority_LEVEL0)
},
},
{
name: "match the priority of url",
mock: func(peer *Peer, md *configmocks.MockDynconfigInterfaceMockRecorder) {
peer.Application = "bak"
peer.Task.URL = "example.com"
md.GetApplications().Return([]*managerv1.Application{
{
Name: "bak",
Priority: &managerv1.ApplicationPriority{
Value: managerv1.Priority_LEVEL0,
Urls: []*managerv1.URLPriority{
{
Regex: "am",
Value: managerv1.Priority_LEVEL1,
},
},
},
},
}, nil).Times(1)
},
expect: func(t *testing.T, priority managerv1.Priority) {
assert := assert.New(t)
assert.Equal(priority, managerv1.Priority_LEVEL1)
},
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
ctl := gomock.NewController(t)
defer ctl.Finish()
dynconfig := configmocks.NewMockDynconfigInterface(ctl)
mockHost := NewHost(mockRawHost)
mockTask := NewTask(mockTaskID, mockTaskURL, commonv1.TaskType_Normal, mockTaskURLMeta, WithBackToSourceLimit(mockTaskBackToSourceLimit))
peer := NewPeer(mockPeerID, mockTask, mockHost)
tc.mock(peer, dynconfig.EXPECT())
tc.expect(t, peer.GetPriority(dynconfig))
})
}
}

View File

@ -36,16 +36,16 @@ func TestResource_New(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
config *config.Config config *config.Config
mock func(gc *gc.MockGCMockRecorder, dynconfig *configmocks.MockDynconfigInterfaceMockRecorder) mock func(mg *gc.MockGCMockRecorder, md *configmocks.MockDynconfigInterfaceMockRecorder)
expect func(t *testing.T, resource Resource, err error) expect func(t *testing.T, resource Resource, err error)
}{ }{
{ {
name: "new resource", name: "new resource",
config: config.New(), config: config.New(),
mock: func(gc *gc.MockGCMockRecorder, dynconfig *configmocks.MockDynconfigInterfaceMockRecorder) { mock: func(mg *gc.MockGCMockRecorder, md *configmocks.MockDynconfigInterfaceMockRecorder) {
gomock.InOrder( gomock.InOrder(
gc.Add(gomock.Any()).Return(nil).Times(3), mg.Add(gomock.Any()).Return(nil).Times(3),
dynconfig.Get().Return(&config.DynconfigData{ md.Get().Return(&config.DynconfigData{
Scheduler: &managerv1.Scheduler{ Scheduler: &managerv1.Scheduler{
SeedPeers: []*managerv1.SeedPeer{ SeedPeers: []*managerv1.SeedPeer{
{ {
@ -54,9 +54,9 @@ func TestResource_New(t *testing.T) {
}, },
}, },
}, nil).Times(1), }, nil).Times(1),
dynconfig.Register(gomock.Any()).Return().Times(1), md.Register(gomock.Any()).Return().Times(1),
dynconfig.GetResolveSeedPeerAddrs().Return([]resolver.Address{}, nil).Times(1), md.GetResolveSeedPeerAddrs().Return([]resolver.Address{}, nil).Times(1),
dynconfig.Register(gomock.Any()).Return().Times(1), md.Register(gomock.Any()).Return().Times(1),
) )
}, },
expect: func(t *testing.T, resource Resource, err error) { expect: func(t *testing.T, resource Resource, err error) {
@ -68,10 +68,8 @@ func TestResource_New(t *testing.T) {
{ {
name: "new resource failed because of host manager error", name: "new resource failed because of host manager error",
config: config.New(), config: config.New(),
mock: func(gc *gc.MockGCMockRecorder, dynconfig *configmocks.MockDynconfigInterfaceMockRecorder) { mock: func(mg *gc.MockGCMockRecorder, md *configmocks.MockDynconfigInterfaceMockRecorder) {
gomock.InOrder( mg.Add(gomock.Any()).Return(errors.New("foo")).Times(1)
gc.Add(gomock.Any()).Return(errors.New("foo")).Times(1),
)
}, },
expect: func(t *testing.T, resource Resource, err error) { expect: func(t *testing.T, resource Resource, err error) {
assert := assert.New(t) assert := assert.New(t)
@ -81,10 +79,10 @@ func TestResource_New(t *testing.T) {
{ {
name: "new resource failed because of task manager error", name: "new resource failed because of task manager error",
config: config.New(), config: config.New(),
mock: func(gc *gc.MockGCMockRecorder, dynconfig *configmocks.MockDynconfigInterfaceMockRecorder) { mock: func(mg *gc.MockGCMockRecorder, md *configmocks.MockDynconfigInterfaceMockRecorder) {
gomock.InOrder( gomock.InOrder(
gc.Add(gomock.Any()).Return(nil).Times(1), mg.Add(gomock.Any()).Return(nil).Times(1),
gc.Add(gomock.Any()).Return(errors.New("foo")).Times(1), mg.Add(gomock.Any()).Return(errors.New("foo")).Times(1),
) )
}, },
expect: func(t *testing.T, resource Resource, err error) { expect: func(t *testing.T, resource Resource, err error) {
@ -95,10 +93,10 @@ func TestResource_New(t *testing.T) {
{ {
name: "new resource failed because of peer manager error", name: "new resource failed because of peer manager error",
config: config.New(), config: config.New(),
mock: func(gc *gc.MockGCMockRecorder, dynconfig *configmocks.MockDynconfigInterfaceMockRecorder) { mock: func(mg *gc.MockGCMockRecorder, md *configmocks.MockDynconfigInterfaceMockRecorder) {
gomock.InOrder( gomock.InOrder(
gc.Add(gomock.Any()).Return(nil).Times(2), mg.Add(gomock.Any()).Return(nil).Times(2),
gc.Add(gomock.Any()).Return(errors.New("foo")).Times(1), mg.Add(gomock.Any()).Return(errors.New("foo")).Times(1),
) )
}, },
expect: func(t *testing.T, resource Resource, err error) { expect: func(t *testing.T, resource Resource, err error) {
@ -109,10 +107,10 @@ func TestResource_New(t *testing.T) {
{ {
name: "new resource faild because of dynconfig get error", name: "new resource faild because of dynconfig get error",
config: config.New(), config: config.New(),
mock: func(gc *gc.MockGCMockRecorder, dynconfig *configmocks.MockDynconfigInterfaceMockRecorder) { mock: func(mg *gc.MockGCMockRecorder, md *configmocks.MockDynconfigInterfaceMockRecorder) {
gomock.InOrder( gomock.InOrder(
gc.Add(gomock.Any()).Return(nil).Times(3), mg.Add(gomock.Any()).Return(nil).Times(3),
dynconfig.Get().Return(nil, errors.New("foo")).Times(1), md.Get().Return(nil, errors.New("foo")).Times(1),
) )
}, },
expect: func(t *testing.T, resource Resource, err error) { expect: func(t *testing.T, resource Resource, err error) {
@ -123,17 +121,17 @@ func TestResource_New(t *testing.T) {
{ {
name: "new resource faild because of seed peer list is empty", name: "new resource faild because of seed peer list is empty",
config: config.New(), config: config.New(),
mock: func(gc *gc.MockGCMockRecorder, dynconfig *configmocks.MockDynconfigInterfaceMockRecorder) { mock: func(mg *gc.MockGCMockRecorder, md *configmocks.MockDynconfigInterfaceMockRecorder) {
gomock.InOrder( gomock.InOrder(
gc.Add(gomock.Any()).Return(nil).Times(3), mg.Add(gomock.Any()).Return(nil).Times(3),
dynconfig.Get().Return(&config.DynconfigData{ md.Get().Return(&config.DynconfigData{
Scheduler: &managerv1.Scheduler{ Scheduler: &managerv1.Scheduler{
SeedPeers: []*managerv1.SeedPeer{}, SeedPeers: []*managerv1.SeedPeer{},
}, },
}, nil).Times(1), }, nil).Times(1),
dynconfig.Register(gomock.Any()).Return().Times(1), md.Register(gomock.Any()).Return().Times(1),
dynconfig.GetResolveSeedPeerAddrs().Return([]resolver.Address{}, nil).Times(1), md.GetResolveSeedPeerAddrs().Return([]resolver.Address{}, nil).Times(1),
dynconfig.Register(gomock.Any()).Return().Times(1), md.Register(gomock.Any()).Return().Times(1),
) )
}, },
expect: func(t *testing.T, resource Resource, err error) { expect: func(t *testing.T, resource Resource, err error) {
@ -156,10 +154,8 @@ func TestResource_New(t *testing.T) {
Enable: false, Enable: false,
}, },
}, },
mock: func(gc *gc.MockGCMockRecorder, dynconfig *configmocks.MockDynconfigInterfaceMockRecorder) { mock: func(mg *gc.MockGCMockRecorder, md *configmocks.MockDynconfigInterfaceMockRecorder) {
gomock.InOrder( mg.Add(gomock.Any()).Return(nil).Times(3)
gc.Add(gomock.Any()).Return(nil).Times(3),
)
}, },
expect: func(t *testing.T, resource Resource, err error) { expect: func(t *testing.T, resource Resource, err error) {
assert := assert.New(t) assert := assert.New(t)

View File

@ -183,11 +183,8 @@ func (s *seedPeer) initSeedPeer(task *Task, ps *cdnsystemv1.PieceSeed) (*Peer, e
return nil, fmt.Errorf("can not find host id: %s", ps.HostId) return nil, fmt.Errorf("can not find host id: %s", ps.HostId)
} }
// New seed peer. // New and store seed peer.
peer = NewPeer(ps.PeerId, task, host, WithTag(SeedTag), WithApplication(SeedApplication)) peer = NewPeer(ps.PeerId, task, host, WithTag(SeedTag), WithApplication(SeedApplication))
peer.Log.Info("new seed peer successfully")
// Store seed peer.
s.peerManager.Store(peer) s.peerManager.Store(peer)
peer.Log.Info("seed peer has been stored") peer.Log.Info("seed peer has been stored")

View File

@ -325,7 +325,7 @@ func (t *Task) PeerOutDegree(key string) (int, error) {
} }
// HasAvailablePeer returns whether there is an available peer. // HasAvailablePeer returns whether there is an available peer.
func (t *Task) HasAvailablePeer() bool { func (t *Task) HasAvailablePeer(blocklist set.SafeSet[string]) bool {
var hasAvailablePeer bool var hasAvailablePeer bool
for _, vertex := range t.DAG.GetVertices() { for _, vertex := range t.DAG.GetVertices() {
peer := vertex.Value peer := vertex.Value
@ -333,6 +333,10 @@ func (t *Task) HasAvailablePeer() bool {
continue continue
} }
if blocklist.Contains(peer.ID) {
continue
}
if peer.FSM.Is(PeerStatePending) || if peer.FSM.Is(PeerStatePending) ||
peer.FSM.Is(PeerStateRunning) || peer.FSM.Is(PeerStateRunning) ||
peer.FSM.Is(PeerStateSucceeded) || peer.FSM.Is(PeerStateSucceeded) ||
@ -375,8 +379,8 @@ func (t *Task) LoadSeedPeer() (*Peer, bool) {
// IsSeedPeerFailed returns whether the seed peer in the task failed. // IsSeedPeerFailed returns whether the seed peer in the task failed.
func (t *Task) IsSeedPeerFailed() bool { func (t *Task) IsSeedPeerFailed() bool {
seedPeer, ok := t.LoadSeedPeer() seedPeer, loaded := t.LoadSeedPeer()
return ok && seedPeer.FSM.Is(PeerStateFailed) && time.Since(seedPeer.CreatedAt.Load()) < SeedPeerFailedTimeout return loaded && seedPeer.FSM.Is(PeerStateFailed) && time.Since(seedPeer.CreatedAt.Load()) < SeedPeerFailedTimeout
} }
// LoadPiece return piece for a key. // LoadPiece return piece for a key.

View File

@ -28,6 +28,7 @@ import (
schedulerv1 "d7y.io/api/pkg/apis/scheduler/v1" schedulerv1 "d7y.io/api/pkg/apis/scheduler/v1"
"d7y.io/api/pkg/apis/scheduler/v1/mocks" "d7y.io/api/pkg/apis/scheduler/v1/mocks"
"d7y.io/dragonfly/v2/pkg/container/set"
"d7y.io/dragonfly/v2/pkg/idgen" "d7y.io/dragonfly/v2/pkg/idgen"
"d7y.io/dragonfly/v2/pkg/types" "d7y.io/dragonfly/v2/pkg/types"
) )
@ -934,6 +935,22 @@ func TestTask_HasAvailablePeer(t *testing.T) {
backToSourceLimit int32 backToSourceLimit int32
expect func(t *testing.T, task *Task, mockPeer *Peer) expect func(t *testing.T, task *Task, mockPeer *Peer)
}{ }{
{
name: "blocklist includes peer",
id: mockTaskID,
urlMeta: mockTaskURLMeta,
url: mockTaskURL,
backToSourceLimit: mockTaskBackToSourceLimit,
expect: func(t *testing.T, task *Task, mockPeer *Peer) {
assert := assert.New(t)
mockPeer.FSM.SetState(PeerStatePending)
task.StorePeer(mockPeer)
blocklist := set.NewSafeSet[string]()
blocklist.Add(mockPeer.ID)
assert.Equal(task.HasAvailablePeer(blocklist), false)
},
},
{ {
name: "peer state is PeerStatePending", name: "peer state is PeerStatePending",
id: mockTaskID, id: mockTaskID,
@ -946,7 +963,7 @@ func TestTask_HasAvailablePeer(t *testing.T) {
mockPeer.ID = idgen.PeerID("0.0.0.0") mockPeer.ID = idgen.PeerID("0.0.0.0")
mockPeer.FSM.SetState(PeerStatePending) mockPeer.FSM.SetState(PeerStatePending)
task.StorePeer(mockPeer) task.StorePeer(mockPeer)
assert.Equal(task.HasAvailablePeer(), true) assert.Equal(task.HasAvailablePeer(set.NewSafeSet[string]()), true)
}, },
}, },
{ {
@ -961,7 +978,7 @@ func TestTask_HasAvailablePeer(t *testing.T) {
mockPeer.ID = idgen.PeerID("0.0.0.0") mockPeer.ID = idgen.PeerID("0.0.0.0")
mockPeer.FSM.SetState(PeerStateSucceeded) mockPeer.FSM.SetState(PeerStateSucceeded)
task.StorePeer(mockPeer) task.StorePeer(mockPeer)
assert.Equal(task.HasAvailablePeer(), true) assert.Equal(task.HasAvailablePeer(set.NewSafeSet[string]()), true)
}, },
}, },
{ {
@ -976,7 +993,7 @@ func TestTask_HasAvailablePeer(t *testing.T) {
mockPeer.ID = idgen.PeerID("0.0.0.0") mockPeer.ID = idgen.PeerID("0.0.0.0")
mockPeer.FSM.SetState(PeerStateRunning) mockPeer.FSM.SetState(PeerStateRunning)
task.StorePeer(mockPeer) task.StorePeer(mockPeer)
assert.Equal(task.HasAvailablePeer(), true) assert.Equal(task.HasAvailablePeer(set.NewSafeSet[string]()), true)
}, },
}, },
{ {
@ -991,7 +1008,7 @@ func TestTask_HasAvailablePeer(t *testing.T) {
mockPeer.ID = idgen.PeerID("0.0.0.0") mockPeer.ID = idgen.PeerID("0.0.0.0")
mockPeer.FSM.SetState(PeerStateBackToSource) mockPeer.FSM.SetState(PeerStateBackToSource)
task.StorePeer(mockPeer) task.StorePeer(mockPeer)
assert.Equal(task.HasAvailablePeer(), true) assert.Equal(task.HasAvailablePeer(set.NewSafeSet[string]()), true)
}, },
}, },
{ {
@ -1002,7 +1019,7 @@ func TestTask_HasAvailablePeer(t *testing.T) {
backToSourceLimit: mockTaskBackToSourceLimit, backToSourceLimit: mockTaskBackToSourceLimit,
expect: func(t *testing.T, task *Task, mockPeer *Peer) { expect: func(t *testing.T, task *Task, mockPeer *Peer) {
assert := assert.New(t) assert := assert.New(t)
assert.Equal(task.HasAvailablePeer(), false) assert.Equal(task.HasAvailablePeer(set.NewSafeSet[string]()), false)
}, },
}, },
} }

View File

@ -77,6 +77,7 @@ func (s *scheduler) ScheduleParent(ctx context.Context, peer *resource.Peer, blo
// If the scheduling exceeds the RetryBackToSourceLimit or peer needs back-to-source, // If the scheduling exceeds the RetryBackToSourceLimit or peer needs back-to-source,
// peer will download the task back-to-source. // peer will download the task back-to-source.
needBackToSource := peer.NeedBackToSource.Load() needBackToSource := peer.NeedBackToSource.Load()
peer.Log.Infof("peer needs to back-to-source: %t", needBackToSource)
if (n >= s.config.RetryBackToSourceLimit || needBackToSource) && if (n >= s.config.RetryBackToSourceLimit || needBackToSource) &&
peer.Task.CanBackToSource() { peer.Task.CanBackToSource() {
stream, ok := peer.LoadStream() stream, ok := peer.LoadStream()
@ -84,13 +85,11 @@ func (s *scheduler) ScheduleParent(ctx context.Context, peer *resource.Peer, blo
peer.Log.Error("load stream failed") peer.Log.Error("load stream failed")
return return
} }
peer.Log.Infof("schedule peer back-to-source in %d times", n)
peer.Log.Infof("peer downloads back-to-source, scheduling %d times, peer need back-to-source %t",
n, needBackToSource)
// Notify peer back-to-source. // Notify peer back-to-source.
if err := stream.Send(&schedulerv1.PeerPacket{Code: commonv1.Code_SchedNeedBackSource}); err != nil { if err := stream.Send(&schedulerv1.PeerPacket{Code: commonv1.Code_SchedNeedBackSource}); err != nil {
peer.Log.Errorf("send packet failed: %s", err.Error()) peer.Log.Error(err)
return return
} }
@ -121,23 +120,23 @@ func (s *scheduler) ScheduleParent(ctx context.Context, peer *resource.Peer, blo
// Notify peer schedule failed. // Notify peer schedule failed.
if err := stream.Send(&schedulerv1.PeerPacket{Code: commonv1.Code_SchedTaskStatusError}); err != nil { if err := stream.Send(&schedulerv1.PeerPacket{Code: commonv1.Code_SchedTaskStatusError}); err != nil {
peer.Log.Errorf("send packet failed: %s", err.Error()) peer.Log.Error(err)
return return
} }
peer.Log.Errorf("peer scheduling exceeds the limit %d times and return code %d", s.config.RetryLimit, commonv1.Code_SchedTaskStatusError) peer.Log.Errorf("peer scheduling exceeds the limit %d times", s.config.RetryLimit)
return return
} }
if _, ok := s.NotifyAndFindParent(ctx, peer, blocklist); !ok { if _, ok := s.NotifyAndFindParent(ctx, peer, blocklist); !ok {
n++ n++
peer.Log.Infof("schedule parent %d times failed", n) peer.Log.Infof("schedule parent failed in %d times ", n)
// Sleep to avoid hot looping. // Sleep to avoid hot looping.
time.Sleep(s.config.RetryInterval) time.Sleep(s.config.RetryInterval)
continue continue
} }
peer.Log.Infof("schedule parent %d times successfully", n+1) peer.Log.Infof("schedule parent successfully in %d times", n+1)
return return
} }
} }
@ -205,8 +204,7 @@ func (s *scheduler) NotifyAndFindParent(ctx context.Context, peer *resource.Peer
return []*resource.Peer{}, false return []*resource.Peer{}, false
} }
peer.Log.Infof("schedule parent successful, replace parent to %s and candidate parents is %v", peer.Log.Infof("schedule candidate parents is %#v", parentIDs)
parentIDs[0], parentIDs[1:])
return candidateParents, true return candidateParents, true
} }
@ -228,7 +226,7 @@ func (s *scheduler) FindParent(ctx context.Context, peer *resource.Peer, blockli
}, },
) )
peer.Log.Infof("find parent %s successful", candidateParents[0].ID) peer.Log.Infof("schedule candidate parent is %s", candidateParents[0].ID)
return candidateParents[0], true return candidateParents[0], true
} }
@ -258,13 +256,13 @@ func (s *scheduler) filterCandidateParents(peer *resource.Peer, blocklist set.Sa
// Candidate parent is in blocklist. // Candidate parent is in blocklist.
if blocklist.Contains(candidateParent.ID) { if blocklist.Contains(candidateParent.ID) {
peer.Log.Debugf("candidate parent %s is not selected because it is in blocklist", candidateParent.ID) peer.Log.Debugf("parent %s is not selected because it is in blocklist", candidateParent.ID)
continue continue
} }
// Candidate parent can add edge with peer. // Candidate parent can add edge with peer.
if !peer.Task.CanAddPeerEdge(candidateParent.ID, peer.ID) { if !peer.Task.CanAddPeerEdge(candidateParent.ID, peer.ID) {
peer.Log.Debugf("can not add edge with candidate parent %s", candidateParent.ID) peer.Log.Debugf("can not add edge with parent %s", candidateParent.ID)
continue continue
} }
@ -272,20 +270,20 @@ func (s *scheduler) filterCandidateParents(peer *resource.Peer, blocklist set.Sa
// because dfdaemon cannot handle the situation // because dfdaemon cannot handle the situation
// where two tasks are downloading and downloading each other. // where two tasks are downloading and downloading each other.
if peer.Host.ID == candidateParent.Host.ID { if peer.Host.ID == candidateParent.Host.ID {
peer.Log.Debugf("candidate parent %s host %s is the same as peer host", candidateParent.ID, candidateParent.Host.ID) peer.Log.Debugf("parent %s host %s is the same as peer host", candidateParent.ID, candidateParent.Host.ID)
continue continue
} }
// Candidate parent is bad node. // Candidate parent is bad node.
if s.evaluator.IsBadNode(candidateParent) { if s.evaluator.IsBadNode(candidateParent) {
peer.Log.Debugf("candidate parent %s is not selected because it is bad node", candidateParent.ID) peer.Log.Debugf("parent %s is not selected because it is bad node", candidateParent.ID)
continue continue
} }
// Candidate parent can not find in dag. // Candidate parent can not find in dag.
inDegree, err := peer.Task.PeerInDegree(candidateParent.ID) inDegree, err := peer.Task.PeerInDegree(candidateParent.ID)
if err != nil { if err != nil {
peer.Log.Debugf("can not find candidate parent %s vertex in dag", candidateParent.ID) peer.Log.Debugf("can not find parent %s vertex in dag", candidateParent.ID)
continue continue
} }
@ -297,14 +295,14 @@ func (s *scheduler) filterCandidateParents(peer *resource.Peer, blocklist set.Sa
isBackToSource := candidateParent.IsBackToSource.Load() isBackToSource := candidateParent.IsBackToSource.Load()
if candidateParent.Host.Type == types.HostTypeNormal && inDegree == 0 && !isBackToSource && if candidateParent.Host.Type == types.HostTypeNormal && inDegree == 0 && !isBackToSource &&
!candidateParent.FSM.Is(resource.PeerStateSucceeded) { !candidateParent.FSM.Is(resource.PeerStateSucceeded) {
peer.Log.Debugf("candidate parent %s is not selected, because its download state is %d %d %t %s", peer.Log.Debugf("parent %s is not selected, because its download state is %d %d %t %s",
candidateParent.ID, inDegree, int(candidateParent.Host.Type), isBackToSource, candidateParent.FSM.Current()) candidateParent.ID, inDegree, int(candidateParent.Host.Type), isBackToSource, candidateParent.FSM.Current())
continue continue
} }
// Candidate parent's free upload is empty. // Candidate parent's free upload is empty.
if candidateParent.Host.FreeUploadCount() <= 0 { if candidateParent.Host.FreeUploadCount() <= 0 {
peer.Log.Debugf("candidate parent %s is not selected because its free upload is empty, upload limit is %d, upload count is %d", peer.Log.Debugf("parent %s is not selected because its free upload is empty, upload limit is %d, upload count is %d",
candidateParent.ID, candidateParent.Host.ConcurrentUploadLimit.Load(), candidateParent.Host.ConcurrentUploadCount.Load()) candidateParent.ID, candidateParent.Host.ConcurrentUploadLimit.Load(), candidateParent.Host.ConcurrentUploadCount.Load())
continue continue
} }
@ -313,7 +311,7 @@ func (s *scheduler) filterCandidateParents(peer *resource.Peer, blocklist set.Sa
candidateParentIDs = append(candidateParentIDs, candidateParent.ID) candidateParentIDs = append(candidateParentIDs, candidateParent.ID)
} }
peer.Log.Infof("candidate parents include %#v", candidateParentIDs) peer.Log.Infof("filter candidate parents is %#v", candidateParentIDs)
return candidateParents return candidateParents
} }

View File

@ -29,6 +29,7 @@ import (
commonv1 "d7y.io/api/pkg/apis/common/v1" commonv1 "d7y.io/api/pkg/apis/common/v1"
errordetailsv1 "d7y.io/api/pkg/apis/errordetails/v1" errordetailsv1 "d7y.io/api/pkg/apis/errordetails/v1"
managerv1 "d7y.io/api/pkg/apis/manager/v1"
schedulerv1 "d7y.io/api/pkg/apis/scheduler/v1" schedulerv1 "d7y.io/api/pkg/apis/scheduler/v1"
"d7y.io/dragonfly/v2/internal/dferrors" "d7y.io/dragonfly/v2/internal/dferrors"
@ -83,22 +84,28 @@ func New(
func (s *Service) RegisterPeerTask(ctx context.Context, req *schedulerv1.PeerTaskRequest) (*schedulerv1.RegisterResult, error) { func (s *Service) RegisterPeerTask(ctx context.Context, req *schedulerv1.PeerTaskRequest) (*schedulerv1.RegisterResult, error) {
logger.WithPeer(req.PeerHost.Id, req.TaskId, req.PeerId).Infof("register peer task request: %#v %#v %#v", logger.WithPeer(req.PeerHost.Id, req.TaskId, req.PeerId).Infof("register peer task request: %#v %#v %#v",
req, req.UrlMeta, req.HostLoad) req, req.UrlMeta, req.HostLoad)
// Register task and trigger seed peer download task.
task, needBackToSource := s.registerTask(ctx, req)
host := s.registerHost(ctx, req.PeerHost)
peer := s.registerPeer(ctx, req.PeerId, task, host, req.UrlMeta.Tag, req.UrlMeta.Application)
// When the peer registers for the first time and // Store resource.
// does not have a seed peer, it will back-to-source. task := s.storeTask(ctx, req, commonv1.TaskType_Normal)
peer.NeedBackToSource.Store(needBackToSource) host := s.storeHost(ctx, req.PeerHost)
peer := s.storePeer(ctx, req.PeerId, task, host, req.UrlMeta.Tag, req.UrlMeta.Application)
// Trigger the first download of the task.
if err := s.triggerTask(ctx, task, host, peer, s.dynconfig); err != nil {
peer.Log.Error(err)
s.handleRegisterFailure(ctx, peer)
return nil, dferrors.New(commonv1.Code_SchedForbidden, err.Error())
}
// If the task does not succeed, it is scheduled as a normal task.
if !task.FSM.Is(resource.TaskStateSucceeded) { if !task.FSM.Is(resource.TaskStateSucceeded) {
peer.Log.Infof("task can not be reused directly, because of task state is %s", peer.Log.Infof("register as normal task, because of task state is %s",
task.FSM.Current()) task.FSM.Current())
result, err := s.registerNormalTask(ctx, peer) result, err := s.registerNormalTask(ctx, peer)
if err != nil { if err != nil {
peer.Log.Error(err) peer.Log.Error(err)
s.handleRegisterFailure(ctx, peer)
return nil, dferrors.New(commonv1.Code_SchedError, err.Error()) return nil, dferrors.New(commonv1.Code_SchedError, err.Error())
} }
@ -110,23 +117,20 @@ func (s *Service) RegisterPeerTask(ctx context.Context, req *schedulerv1.PeerTas
if err != nil { if err != nil {
peer.Log.Warnf("scope size is invalid: %s", err.Error()) peer.Log.Warnf("scope size is invalid: %s", err.Error())
} }
peer.Log.Infof("task size scope is %s", sizeScope)
// The task state is TaskStateSucceeded and SizeScope is not invalid. // The task state is TaskStateSucceeded and SizeScope is not invalid.
peer.Log.Info("task can be reused directly")
switch sizeScope { switch sizeScope {
case commonv1.SizeScope_EMPTY: case commonv1.SizeScope_EMPTY:
peer.Log.Info("task size scope is EMPTY")
result, err := s.registerEmptyTask(ctx, peer) result, err := s.registerEmptyTask(ctx, peer)
if err != nil { if err != nil {
peer.Log.Error(err) peer.Log.Error(err)
s.handleRegisterFailure(ctx, peer)
return nil, dferrors.New(commonv1.Code_SchedError, err.Error()) return nil, dferrors.New(commonv1.Code_SchedError, err.Error())
} }
peer.Log.Info("return empty content")
return result, nil return result, nil
case commonv1.SizeScope_TINY: case commonv1.SizeScope_TINY:
peer.Log.Info("task size scope is TINY")
// Validate data of direct piece. // Validate data of direct piece.
if !peer.Task.CanReuseDirectPiece() { if !peer.Task.CanReuseDirectPiece() {
peer.Log.Warnf("register as normal task, because of length of direct piece is %d, content length is %d", peer.Log.Warnf("register as normal task, because of length of direct piece is %d, content length is %d",
@ -140,28 +144,25 @@ func (s *Service) RegisterPeerTask(ctx context.Context, req *schedulerv1.PeerTas
break break
} }
peer.Log.Info("return direct piece")
return result, nil return result, nil
case commonv1.SizeScope_SMALL: case commonv1.SizeScope_SMALL:
peer.Log.Info("task size scope is SMALL")
result, err := s.registerSmallTask(ctx, peer) result, err := s.registerSmallTask(ctx, peer)
if err != nil { if err != nil {
peer.Log.Warnf("register as normal task, because of %s", err.Error()) peer.Log.Warnf("register as normal task, because of %s", err.Error())
break break
} }
peer.Log.Info("return the single piece")
return result, nil return result, nil
} }
peer.Log.Infof("task size scope is %s", sizeScope)
result, err := s.registerNormalTask(ctx, peer) result, err := s.registerNormalTask(ctx, peer)
if err != nil { if err != nil {
peer.Log.Error(err) peer.Log.Error(err)
s.handleRegisterFailure(ctx, peer)
return nil, dferrors.New(commonv1.Code_SchedError, err.Error()) return nil, dferrors.New(commonv1.Code_SchedError, err.Error())
} }
peer.Log.Info("return the normal task") peer.Log.Info("register as normal task, because of invalid size scope")
return result, nil return result, nil
} }
@ -187,7 +188,8 @@ func (s *Service) ReportPieceResult(stream schedulerv1.Scheduler_ReportPieceResu
if err == io.EOF { if err == io.EOF {
return nil return nil
} }
logger.Errorf("receive piece %#v error: %s", piece, err.Error())
logger.Errorf("receive piece failed: %s", err.Error())
return err return err
} }
@ -227,7 +229,7 @@ func (s *Service) ReportPieceResult(stream schedulerv1.Scheduler_ReportPieceResu
// Handle piece download successfully. // Handle piece download successfully.
if piece.Success { if piece.Success {
peer.Log.Infof("receive piece: %#v %#v", piece, piece.PieceInfo) peer.Log.Infof("receive success piece: %#v %#v", piece, piece.PieceInfo)
s.handlePieceSuccess(ctx, peer, piece) s.handlePieceSuccess(ctx, peer, piece)
// Collect peer host traffic metrics. // Collect peer host traffic metrics.
@ -236,7 +238,7 @@ func (s *Service) ReportPieceResult(stream schedulerv1.Scheduler_ReportPieceResu
if parent, loaded := s.resource.PeerManager().Load(piece.DstPid); loaded { if parent, loaded := s.resource.PeerManager().Load(piece.DstPid); loaded {
metrics.PeerHostTraffic.WithLabelValues(peer.Tag, peer.Application, metrics.PeerHostTrafficUploadType, parent.Host.ID, parent.Host.IP).Add(float64(piece.PieceInfo.RangeSize)) metrics.PeerHostTraffic.WithLabelValues(peer.Tag, peer.Application, metrics.PeerHostTrafficUploadType, parent.Host.ID, parent.Host.IP).Add(float64(piece.PieceInfo.RangeSize))
} else if !resource.IsPieceBackToSource(piece) { } else if !resource.IsPieceBackToSource(piece) {
peer.Log.Warnf("dst peer %s not found for piece %#v %#v", piece.DstPid, piece, piece.PieceInfo) peer.Log.Warnf("dst peer %s not found", piece.DstPid)
} }
} }
@ -252,13 +254,13 @@ func (s *Service) ReportPieceResult(stream schedulerv1.Scheduler_ReportPieceResu
// Handle piece download code. // Handle piece download code.
if piece.Code != commonv1.Code_Success { if piece.Code != commonv1.Code_Success {
if piece.Code == commonv1.Code_ClientWaitPieceReady { if piece.Code == commonv1.Code_ClientWaitPieceReady {
peer.Log.Debugf("receive piece code %d and wait for dfdaemon piece ready", piece.Code) peer.Log.Debug("receive wait piece")
continue continue
} }
// Handle piece download failed. // Handle piece download failed.
peer.Log.Errorf("receive failed piece: %#v", piece) peer.Log.Errorf("receive failed piece: %#v", piece)
s.handlePieceFail(ctx, peer, piece) s.handlePieceFailure(ctx, peer, piece)
continue continue
} }
@ -268,9 +270,11 @@ func (s *Service) ReportPieceResult(stream schedulerv1.Scheduler_ReportPieceResu
// ReportPeerResult handles peer result reported by dfdaemon. // ReportPeerResult handles peer result reported by dfdaemon.
func (s *Service) ReportPeerResult(ctx context.Context, req *schedulerv1.PeerResult) error { func (s *Service) ReportPeerResult(ctx context.Context, req *schedulerv1.PeerResult) error {
logger.WithTaskAndPeerID(req.TaskId, req.PeerId).Infof("report peer result request: %#v", req)
peer, loaded := s.resource.PeerManager().Load(req.PeerId) peer, loaded := s.resource.PeerManager().Load(req.PeerId)
if !loaded { if !loaded {
msg := fmt.Sprintf("report peer result and peer %s is not exists", req.PeerId) msg := fmt.Sprintf("peer %s not found", req.PeerId)
logger.Error(msg) logger.Error(msg)
return dferrors.New(commonv1.Code_SchedPeerNotFound, msg) return dferrors.New(commonv1.Code_SchedPeerNotFound, msg)
} }
@ -278,23 +282,23 @@ func (s *Service) ReportPeerResult(ctx context.Context, req *schedulerv1.PeerRes
parents := peer.Parents() parents := peer.Parents()
if !req.Success { if !req.Success {
peer.Log.Errorf("report peer failed result: %s %#v", req.Code, req) peer.Log.Error("report failed peer")
if peer.FSM.Is(resource.PeerStateBackToSource) { if peer.FSM.Is(resource.PeerStateBackToSource) {
metrics.DownloadFailureCount.WithLabelValues(peer.Tag, peer.Application, metrics.DownloadFailureBackToSourceType).Inc() metrics.DownloadFailureCount.WithLabelValues(peer.Tag, peer.Application, metrics.DownloadFailureBackToSourceType).Inc()
go s.createRecord(peer, parents, req) go s.createRecord(peer, parents, req)
s.handleTaskFail(ctx, peer.Task, req.GetSourceError(), nil) s.handleTaskFailure(ctx, peer.Task, req.GetSourceError(), nil)
s.handlePeerFail(ctx, peer) s.handlePeerFailure(ctx, peer)
return nil return nil
} }
metrics.DownloadFailureCount.WithLabelValues(peer.Tag, peer.Application, metrics.DownloadFailureP2PType).Inc() metrics.DownloadFailureCount.WithLabelValues(peer.Tag, peer.Application, metrics.DownloadFailureP2PType).Inc()
go s.createRecord(peer, parents, req) go s.createRecord(peer, parents, req)
s.handlePeerFail(ctx, peer) s.handlePeerFailure(ctx, peer)
return nil return nil
} }
metrics.PeerTaskDownloadDuration.WithLabelValues(peer.Tag, peer.Application).Observe(float64(req.Cost)) metrics.PeerTaskDownloadDuration.WithLabelValues(peer.Tag, peer.Application).Observe(float64(req.Cost))
peer.Log.Infof("report peer result: %#v", req) peer.Log.Info("report success peer")
if peer.FSM.Is(resource.PeerStateBackToSource) { if peer.FSM.Is(resource.PeerStateBackToSource) {
go s.createRecord(peer, parents, req) go s.createRecord(peer, parents, req)
s.handleTaskSuccess(ctx, peer.Task, req) s.handleTaskSuccess(ctx, peer.Task, req)
@ -309,14 +313,16 @@ func (s *Service) ReportPeerResult(ctx context.Context, req *schedulerv1.PeerRes
// AnnounceTask informs scheduler a peer has completed task. // AnnounceTask informs scheduler a peer has completed task.
func (s *Service) AnnounceTask(ctx context.Context, req *schedulerv1.AnnounceTaskRequest) error { func (s *Service) AnnounceTask(ctx context.Context, req *schedulerv1.AnnounceTaskRequest) error {
logger.WithPeer(req.PeerHost.Id, req.TaskId, req.PiecePacket.DstPid).Infof("announce task request: %#v %#v %#v %#v",
req, req.UrlMeta, req.PeerHost, req.PiecePacket,
)
taskID := req.TaskId taskID := req.TaskId
peerID := req.PiecePacket.DstPid peerID := req.PiecePacket.DstPid
task := resource.NewTask(taskID, req.Url, req.TaskType, req.UrlMeta) task := resource.NewTask(taskID, req.Url, req.TaskType, req.UrlMeta)
task, _ = s.resource.TaskManager().LoadOrStore(task) task, _ = s.resource.TaskManager().LoadOrStore(task)
host := s.registerHost(ctx, req.PeerHost) host := s.storeHost(ctx, req.PeerHost)
peer := s.registerPeer(ctx, peerID, task, host, req.UrlMeta.Tag, req.UrlMeta.Application) peer := s.storePeer(ctx, peerID, task, host, req.UrlMeta.Tag, req.UrlMeta.Application)
peer.Log.Infof("announce peer task request: %#v %#v %#v %#v", req, req.UrlMeta, req.PeerHost, req.PiecePacket)
// If the task state is not TaskStateSucceeded, // If the task state is not TaskStateSucceeded,
// advance the task state to TaskStateSucceeded. // advance the task state to TaskStateSucceeded.
@ -379,6 +385,8 @@ func (s *Service) AnnounceTask(ctx context.Context, req *schedulerv1.AnnounceTas
// StatTask checks the current state of the task. // StatTask checks the current state of the task.
func (s *Service) StatTask(ctx context.Context, req *schedulerv1.StatTaskRequest) (*schedulerv1.Task, error) { func (s *Service) StatTask(ctx context.Context, req *schedulerv1.StatTaskRequest) (*schedulerv1.Task, error) {
logger.WithTaskID(req.TaskId).Infof("stat task request: %#v", req)
task, loaded := s.resource.TaskManager().Load(req.TaskId) task, loaded := s.resource.TaskManager().Load(req.TaskId)
if !loaded { if !loaded {
msg := fmt.Sprintf("task %s not found", req.TaskId) msg := fmt.Sprintf("task %s not found", req.TaskId)
@ -386,7 +394,6 @@ func (s *Service) StatTask(ctx context.Context, req *schedulerv1.StatTaskRequest
return nil, dferrors.New(commonv1.Code_PeerTaskNotFound, msg) return nil, dferrors.New(commonv1.Code_PeerTaskNotFound, msg)
} }
task.Log.Debug("task has been found")
return &schedulerv1.Task{ return &schedulerv1.Task{
Id: task.ID, Id: task.ID,
Type: task.Type, Type: task.Type,
@ -394,12 +401,14 @@ func (s *Service) StatTask(ctx context.Context, req *schedulerv1.StatTaskRequest
TotalPieceCount: task.TotalPieceCount.Load(), TotalPieceCount: task.TotalPieceCount.Load(),
State: task.FSM.Current(), State: task.FSM.Current(),
PeerCount: int32(task.PeerCount()), PeerCount: int32(task.PeerCount()),
HasAvailablePeer: task.HasAvailablePeer(), HasAvailablePeer: task.HasAvailablePeer(set.NewSafeSet[string]()),
}, nil }, nil
} }
// LeaveTask releases peer in scheduler. // LeaveTask releases peer in scheduler.
func (s *Service) LeaveTask(ctx context.Context, req *schedulerv1.PeerTarget) error { func (s *Service) LeaveTask(ctx context.Context, req *schedulerv1.PeerTarget) error {
logger.WithTaskAndPeerID(req.TaskId, req.PeerId).Infof("leave task request: %#v", req)
peer, loaded := s.resource.PeerManager().Load(req.PeerId) peer, loaded := s.resource.PeerManager().Load(req.PeerId)
if !loaded { if !loaded {
msg := fmt.Sprintf("peer %s not found", req.PeerId) msg := fmt.Sprintf("peer %s not found", req.PeerId)
@ -408,10 +417,8 @@ func (s *Service) LeaveTask(ctx context.Context, req *schedulerv1.PeerTarget) er
} }
metrics.LeaveTaskCount.WithLabelValues(peer.Tag, peer.Application).Inc() metrics.LeaveTaskCount.WithLabelValues(peer.Tag, peer.Application).Inc()
peer.Log.Infof("client releases peer, causing the peer to leave: %#v", req)
if err := peer.FSM.Event(resource.PeerEventLeave); err != nil { if err := peer.FSM.Event(resource.PeerEventLeave); err != nil {
metrics.LeaveTaskFailureCount.WithLabelValues(peer.Tag, peer.Application).Inc() metrics.LeaveTaskFailureCount.WithLabelValues(peer.Tag, peer.Application).Inc()
msg := fmt.Sprintf("peer fsm event failed: %s", err.Error()) msg := fmt.Sprintf("peer fsm event failed: %s", err.Error())
peer.Log.Error(msg) peer.Log.Error(msg)
return dferrors.New(commonv1.Code_SchedTaskStatusError, msg) return dferrors.New(commonv1.Code_SchedTaskStatusError, msg)
@ -466,7 +473,8 @@ func (s *Service) AnnounceHost(ctx context.Context, req *schedulerv1.AnnounceHos
// LeaveHost releases host in scheduler. // LeaveHost releases host in scheduler.
func (s *Service) LeaveHost(ctx context.Context, req *schedulerv1.LeaveHostRequest) error { func (s *Service) LeaveHost(ctx context.Context, req *schedulerv1.LeaveHostRequest) error {
logger.Infof("leave host: %#v", req) logger.WithHostID(req.Id).Infof("leave host request: %#v", req)
host, loaded := s.resource.HostManager().Load(req.Id) host, loaded := s.resource.HostManager().Load(req.Id)
if !loaded { if !loaded {
msg := fmt.Sprintf("host %s not found", req.Id) msg := fmt.Sprintf("host %s not found", req.Id)
@ -478,59 +486,102 @@ func (s *Service) LeaveHost(ctx context.Context, req *schedulerv1.LeaveHostReque
return nil return nil
} }
// registerTask creates a new task or reuses a previous task. // triggerTask triggers the first download of the task.
func (s *Service) registerTask(ctx context.Context, req *schedulerv1.PeerTaskRequest) (*resource.Task, bool) { func (s *Service) triggerTask(ctx context.Context, task *resource.Task, host *resource.Host, peer *resource.Peer, dynconfig config.DynconfigInterface) error {
task, loaded := s.resource.TaskManager().Load(req.TaskId) // If task has available peer, peer does not need to be triggered.
if loaded { blocklist := set.NewSafeSet[string]()
// Task is the pointer, if the task already exists, the next request will blocklist.Add(peer.ID)
// update the task's Url and UrlMeta in task manager. if (task.FSM.Is(resource.TaskStateRunning) ||
task.URL = req.Url task.FSM.Is(resource.TaskStateSucceeded)) &&
task.URLMeta = req.UrlMeta task.HasAvailablePeer(blocklist) {
peer.Log.Info("peer does not need to trigger")
if (task.FSM.Is(resource.TaskStatePending) || task.FSM.Is(resource.TaskStateRunning) || task.FSM.Is(resource.TaskStateSucceeded)) && task.HasAvailablePeer() { return nil
task.Log.Infof("task dose not need to back-to-source, because of task has available peer and state is %s", task.FSM.Current())
return task, false
}
} else {
// Create a task for the first time.
task = resource.NewTask(req.TaskId, req.Url, commonv1.TaskType_Normal, req.UrlMeta, resource.WithBackToSourceLimit(int32(s.config.Scheduler.BackToSourceCount)))
s.resource.TaskManager().Store(task)
} }
// If the task triggers the TaskEventDownload failed and it has no available peer, // If the task triggers the TaskEventDownload failed and it has no available peer,
// let the peer do the scheduling. // let the peer do the scheduling.
if err := task.FSM.Event(resource.TaskEventDownload); err != nil { if !task.FSM.Is(resource.TaskStateRunning) {
task.Log.Warnf("task dose not need to back-to-source, because of %s", err.Error()) if err := task.FSM.Event(resource.TaskEventDownload); err != nil {
return task, false peer.Log.Errorf("task fsm event failed: %s", err.Error())
} return err
// Seed peer registers the task, then it needs to back-to-source.
host, loaded := s.resource.HostManager().Load(req.PeerHost.Id)
if loaded && host.Type != types.HostTypeNormal {
task.Log.Infof("task needs to back-to-source, because of host can be loaded and type is %d", host.Type)
return task, true
}
// FIXME Need to add the condition that the seed peer grpc client is
// available and can be triggered back-to-source.
if s.config.SeedPeer.Enable {
if task.IsSeedPeerFailed() {
task.Log.Info("task needs to back-to-source, because of seed peer is failed")
return task, true
} }
go s.triggerSeedPeerTask(ctx, task)
task.Log.Info("task dose not need to back-to-source, because of seed peer has been triggered")
return task, false
} }
// Task need to back-to-source. // If host type is not HostTypeNormal, then it needs to back-to-source.
task.Log.Info("task needs to back-to-source, because of seed peer disabled") if host.Type != types.HostTypeNormal {
return task, true peer.Log.Infof("peer back-to-source, because of host type is %d", host.Type)
peer.NeedBackToSource.Store(true)
return nil
}
// The first download is triggered according to
// the different priorities of the peer.
priority := peer.GetPriority(dynconfig)
peer.Log.Infof("peer priority is %d", priority)
switch priority {
case managerv1.Priority_LEVEL5:
if s.config.SeedPeer.Enable && !task.IsSeedPeerFailed() {
go s.triggerSeedPeerTask(ctx, task)
return nil
}
fallthrough
case managerv1.Priority_LEVEL4:
fallthrough
case managerv1.Priority_LEVEL3:
fallthrough
case managerv1.Priority_LEVEL2:
peer.Log.Infof("peer back-to-source, because of hitting priority %d", managerv1.Priority_LEVEL2)
peer.NeedBackToSource.Store(true)
return nil
case managerv1.Priority_LEVEL1:
return fmt.Errorf("priority is %d and no available peers", managerv1.Priority_LEVEL1)
case managerv1.Priority_LEVEL0:
return fmt.Errorf("priority is %d", managerv1.Priority_LEVEL0)
}
peer.Log.Infof("peer back-to-source, because of peer has invalid priority %d", priority)
peer.NeedBackToSource.Store(true)
return nil
} }
// registerHost creates a new host or reuses a previous host. // triggerSeedPeerTask starts to trigger seed peer task.
func (s *Service) registerHost(ctx context.Context, peerHost *schedulerv1.PeerHost) *resource.Host { func (s *Service) triggerSeedPeerTask(ctx context.Context, task *resource.Task) {
task.Log.Info("trigger seed peer")
peer, endOfPiece, err := s.resource.SeedPeer().TriggerTask(
trace.ContextWithSpan(context.Background(), trace.SpanFromContext(ctx)), task)
if err != nil {
task.Log.Errorf("trigger seed peer failed: %s", err.Error())
s.handleTaskFailure(ctx, task, nil, err)
return
}
// Update the task status first to help peer scheduling evaluation and scoring.
peer.Log.Info("trigger seed peer successfully")
s.handleTaskSuccess(ctx, task, endOfPiece)
s.handlePeerSuccess(ctx, peer)
}
// storeTask stores a new task or reuses a previous task.
func (s *Service) storeTask(ctx context.Context, req *schedulerv1.PeerTaskRequest, taskType commonv1.TaskType) *resource.Task {
task, loaded := s.resource.TaskManager().Load(req.TaskId)
if !loaded {
// Create a task for the first time.
task = resource.NewTask(req.TaskId, req.Url, taskType, req.UrlMeta, resource.WithBackToSourceLimit(int32(s.config.Scheduler.BackToSourceCount)))
s.resource.TaskManager().Store(task)
task.Log.Info("create new task")
return task
}
// Task is the pointer, if the task already exists, the next request will
// update the task's Url and UrlMeta in task manager.
task.URL = req.Url
task.URLMeta = req.UrlMeta
task.Log.Info("task already exists")
return task
}
// storeHost stores a new host or reuses a previous host.
func (s *Service) storeHost(ctx context.Context, peerHost *schedulerv1.PeerHost) *resource.Host {
host, loaded := s.resource.HostManager().Load(peerHost.Id) host, loaded := s.resource.HostManager().Load(peerHost.Id)
if !loaded { if !loaded {
// Get scheduler cluster client config by manager. // Get scheduler cluster client config by manager.
@ -562,8 +613,8 @@ func (s *Service) registerHost(ctx context.Context, peerHost *schedulerv1.PeerHo
return host return host
} }
// registerPeer creates a new peer or reuses a previous peer. // storePeer stores a new peer or reuses a previous peer.
func (s *Service) registerPeer(ctx context.Context, peerID string, task *resource.Task, host *resource.Host, tag, application string) *resource.Peer { func (s *Service) storePeer(ctx context.Context, peerID string, task *resource.Task, host *resource.Host, tag, application string) *resource.Peer {
var options []resource.PeerOption var options []resource.PeerOption
if tag != "" { if tag != "" {
options = append(options, resource.WithTag(tag)) options = append(options, resource.WithTag(tag))
@ -578,27 +629,10 @@ func (s *Service) registerPeer(ctx context.Context, peerID string, task *resourc
return peer return peer
} }
peer.Log.Infof("peer already exists, state %s", peer.FSM.Current()) peer.Log.Info("peer already exists")
return peer return peer
} }
// triggerSeedPeerTask starts to trigger seed peer task.
func (s *Service) triggerSeedPeerTask(ctx context.Context, task *resource.Task) {
task.Log.Infof("trigger seed peer download task and task status is %s", task.FSM.Current())
peer, endOfPiece, err := s.resource.SeedPeer().TriggerTask(
trace.ContextWithSpan(context.Background(), trace.SpanFromContext(ctx)), task)
if err != nil {
task.Log.Errorf("trigger seed peer download task failed: %s", err.Error())
s.handleTaskFail(ctx, task, nil, err)
return
}
// Update the task status first to help peer scheduling evaluation and scoring.
peer.Log.Info("trigger seed peer download task successfully")
s.handleTaskSuccess(ctx, task, endOfPiece)
s.handlePeerSuccess(ctx, peer)
}
// registerEmptyTask registers the empty task. // registerEmptyTask registers the empty task.
func (s *Service) registerEmptyTask(ctx context.Context, peer *resource.Peer) (*schedulerv1.RegisterResult, error) { func (s *Service) registerEmptyTask(ctx context.Context, peer *resource.Peer) (*schedulerv1.RegisterResult, error) {
if err := peer.FSM.Event(resource.PeerEventRegisterEmpty); err != nil { if err := peer.FSM.Event(resource.PeerEventRegisterEmpty); err != nil {
@ -635,18 +669,18 @@ func (s *Service) registerTinyTask(ctx context.Context, peer *resource.Peer) (*s
func (s *Service) registerSmallTask(ctx context.Context, peer *resource.Peer) (*schedulerv1.RegisterResult, error) { func (s *Service) registerSmallTask(ctx context.Context, peer *resource.Peer) (*schedulerv1.RegisterResult, error) {
parent, ok := s.scheduler.FindParent(ctx, peer, set.NewSafeSet[string]()) parent, ok := s.scheduler.FindParent(ctx, peer, set.NewSafeSet[string]())
if !ok { if !ok {
return nil, errors.New("can not found parent") return nil, errors.New("parent not found")
} }
// When task size scope is small, parent must be downloaded successfully // When task size scope is small, parent must be downloaded successfully
// before returning to the parent directly. // before returning to the parent directly.
if !parent.FSM.Is(resource.PeerStateSucceeded) { if !parent.FSM.Is(resource.PeerStateSucceeded) {
return nil, fmt.Errorf("parent state %s is not PeerStateSucceede", parent.FSM.Current()) return nil, fmt.Errorf("parent state is %s", parent.FSM.Current())
} }
firstPiece, loaded := peer.Task.LoadPiece(0) firstPiece, loaded := peer.Task.LoadPiece(0)
if !loaded { if !loaded {
return nil, fmt.Errorf("can not found first piece") return nil, fmt.Errorf("first piece not found")
} }
// Delete inedges of peer. // Delete inedges of peer.
@ -697,17 +731,28 @@ func (s *Service) registerNormalTask(ctx context.Context, peer *resource.Peer) (
}, nil }, nil
} }
// handleRegisterFailure handles failure of register.
func (s *Service) handleRegisterFailure(ctx context.Context, peer *resource.Peer) {
if err := peer.FSM.Event(resource.PeerEventLeave); err != nil {
peer.Log.Error(err)
}
s.resource.PeerManager().Delete(peer.ID)
return
}
// handleBeginOfPiece handles begin of piece. // handleBeginOfPiece handles begin of piece.
func (s *Service) handleBeginOfPiece(ctx context.Context, peer *resource.Peer) { func (s *Service) handleBeginOfPiece(ctx context.Context, peer *resource.Peer) {
switch peer.FSM.Current() { state := peer.FSM.Current()
peer.Log.Infof("peer state is %s", state)
switch state {
case resource.PeerStateBackToSource: case resource.PeerStateBackToSource:
// Back to the source download process, peer directly returns. // Back to the source download process, peer directly returns.
peer.Log.Info("peer downloads back-to-source when receive the begin of piece")
return return
case resource.PeerStateReceivedTiny: case resource.PeerStateReceivedTiny:
// When the task is tiny, // When the task is tiny,
// the peer has already returned to piece data when registering. // the peer has already returned to piece data when registering.
peer.Log.Info("file type is tiny, peer has already returned to piece data when registering")
if err := peer.FSM.Event(resource.PeerEventDownload); err != nil { if err := peer.FSM.Event(resource.PeerEventDownload); err != nil {
peer.Log.Errorf("peer fsm event failed: %s", err.Error()) peer.Log.Errorf("peer fsm event failed: %s", err.Error())
return return
@ -715,7 +760,6 @@ func (s *Service) handleBeginOfPiece(ctx context.Context, peer *resource.Peer) {
case resource.PeerStateReceivedSmall: case resource.PeerStateReceivedSmall:
// When the task is small, // When the task is small,
// the peer has already returned to the parent when registering. // the peer has already returned to the parent when registering.
peer.Log.Info("file type is small, peer has already returned to the parent when registering")
if err := peer.FSM.Event(resource.PeerEventDownload); err != nil { if err := peer.FSM.Event(resource.PeerEventDownload); err != nil {
peer.Log.Errorf("peer fsm event failed: %s", err.Error()) peer.Log.Errorf("peer fsm event failed: %s", err.Error())
return return
@ -726,10 +770,8 @@ func (s *Service) handleBeginOfPiece(ctx context.Context, peer *resource.Peer) {
return return
} }
peer.Log.Infof("schedule parent because of peer receive begin of piece")
s.scheduler.ScheduleParent(ctx, peer, set.NewSafeSet[string]()) s.scheduler.ScheduleParent(ctx, peer, set.NewSafeSet[string]())
default: default:
peer.Log.Warnf("peer state is %s when receive the begin of piece", peer.FSM.Current())
} }
} }
@ -765,8 +807,8 @@ func (s *Service) handlePieceSuccess(ctx context.Context, peer *resource.Peer, p
} }
} }
// handlePieceFail handles failed piece. // handlePieceFailure handles failed piece.
func (s *Service) handlePieceFail(ctx context.Context, peer *resource.Peer, piece *schedulerv1.PieceResult) { func (s *Service) handlePieceFailure(ctx context.Context, peer *resource.Peer, piece *schedulerv1.PieceResult) {
// Failed to download piece back-to-source. // Failed to download piece back-to-source.
if peer.FSM.Is(resource.PeerStateBackToSource) { if peer.FSM.Is(resource.PeerStateBackToSource) {
return return
@ -775,7 +817,7 @@ func (s *Service) handlePieceFail(ctx context.Context, peer *resource.Peer, piec
// If parent can not found, reschedule parent. // If parent can not found, reschedule parent.
parent, loaded := s.resource.PeerManager().Load(piece.DstPid) parent, loaded := s.resource.PeerManager().Load(piece.DstPid)
if !loaded { if !loaded {
peer.Log.Errorf("reschedule parent because of peer can not found parent %s", piece.DstPid) peer.Log.Errorf("parent %s not found", piece.DstPid)
peer.BlockParents.Add(piece.DstPid) peer.BlockParents.Add(piece.DstPid)
s.scheduler.ScheduleParent(ctx, peer, peer.BlockParents) s.scheduler.ScheduleParent(ctx, peer, peer.BlockParents)
return return
@ -786,7 +828,10 @@ func (s *Service) handlePieceFail(ctx context.Context, peer *resource.Peer, piec
// Its not a case of back-to-source downloading failed, // Its not a case of back-to-source downloading failed,
// to help peer to reschedule the parent node. // to help peer to reschedule the parent node.
switch piece.Code { code := piece.Code
peer.Log.Infof("piece error code is %s", code)
switch code {
case commonv1.Code_PeerTaskNotFound: case commonv1.Code_PeerTaskNotFound:
if err := parent.FSM.Event(resource.PeerEventDownloadFailed); err != nil { if err := parent.FSM.Event(resource.PeerEventDownloadFailed); err != nil {
peer.Log.Errorf("peer fsm event failed: %s", err.Error()) peer.Log.Errorf("peer fsm event failed: %s", err.Error())
@ -813,7 +858,7 @@ func (s *Service) handlePieceFail(ctx context.Context, peer *resource.Peer, piec
// Peer state is PeerStateRunning will be rescheduled. // Peer state is PeerStateRunning will be rescheduled.
if !peer.FSM.Is(resource.PeerStateRunning) { if !peer.FSM.Is(resource.PeerStateRunning) {
peer.Log.Infof("peer can not be rescheduled because peer state is %s", peer.FSM.Current()) peer.Log.Infof("peer state is %s and can not be rescheduled", peer.FSM.Current())
// Returns an scheduling error if the peer // Returns an scheduling error if the peer
// state is not PeerStateRunning. // state is not PeerStateRunning.
@ -824,14 +869,14 @@ func (s *Service) handlePieceFail(ctx context.Context, peer *resource.Peer, piec
} }
if err := stream.Send(&schedulerv1.PeerPacket{Code: commonv1.Code_SchedError}); err != nil { if err := stream.Send(&schedulerv1.PeerPacket{Code: commonv1.Code_SchedError}); err != nil {
peer.Log.Errorf("send packet failed: %s", err.Error()) peer.Log.Error(err)
return return
} }
return return
} }
peer.Log.Infof("reschedule parent because of peer receive failed piece") peer.Log.Infof("reschedule parent because of failed piece")
peer.BlockParents.Add(parent.ID) peer.BlockParents.Add(parent.ID)
s.scheduler.ScheduleParent(ctx, peer, peer.BlockParents) s.scheduler.ScheduleParent(ctx, peer, peer.BlockParents)
} }
@ -845,7 +890,7 @@ func (s *Service) handlePeerSuccess(ctx context.Context, peer *resource.Peer) {
sizeScope, err := peer.Task.SizeScope() sizeScope, err := peer.Task.SizeScope()
if err != nil { if err != nil {
peer.Log.Errorf("get task size scope failed: %s", err.Error()) peer.Log.Error(err)
return return
} }
@ -868,8 +913,8 @@ func (s *Service) handlePeerSuccess(ctx context.Context, peer *resource.Peer) {
} }
} }
// handlePeerFail handles failed peer. // handlePeerFailure handles failed peer.
func (s *Service) handlePeerFail(ctx context.Context, peer *resource.Peer) { func (s *Service) handlePeerFailure(ctx context.Context, peer *resource.Peer) {
if err := peer.FSM.Event(resource.PeerEventDownloadFailed); err != nil { if err := peer.FSM.Event(resource.PeerEventDownloadFailed); err != nil {
peer.Log.Errorf("peer fsm event failed: %s", err.Error()) peer.Log.Errorf("peer fsm event failed: %s", err.Error())
return return
@ -885,7 +930,6 @@ func (s *Service) handlePeerFail(ctx context.Context, peer *resource.Peer) {
// handleLegacySeedPeer handles seed server's task has left, // handleLegacySeedPeer handles seed server's task has left,
// but did not notify the scheduler to leave the task. // but did not notify the scheduler to leave the task.
func (s *Service) handleLegacySeedPeer(ctx context.Context, peer *resource.Peer) { func (s *Service) handleLegacySeedPeer(ctx context.Context, peer *resource.Peer) {
peer.Log.Info("peer is legacy seed peer, causing the peer to leave")
if err := peer.FSM.Event(resource.PeerEventLeave); err != nil { if err := peer.FSM.Event(resource.PeerEventLeave); err != nil {
peer.Log.Errorf("peer fsm event failed: %s", err.Error()) peer.Log.Errorf("peer fsm event failed: %s", err.Error())
return return
@ -920,7 +964,7 @@ func (s *Service) handleTaskSuccess(ctx context.Context, task *resource.Task, re
// Conditions for the task to switch to the TaskStateSucceeded are: // Conditions for the task to switch to the TaskStateSucceeded are:
// 1. Seed peer downloads the resource failed. // 1. Seed peer downloads the resource failed.
// 2. Dfdaemon back-to-source to download failed. // 2. Dfdaemon back-to-source to download failed.
func (s *Service) handleTaskFail(ctx context.Context, task *resource.Task, backToSourceErr *errordetailsv1.SourceError, seedPeerErr error) { func (s *Service) handleTaskFailure(ctx context.Context, task *resource.Task, backToSourceErr *errordetailsv1.SourceError, seedPeerErr error) {
// If peer back-to-source fails due to an unrecoverable error, // If peer back-to-source fails due to an unrecoverable error,
// notify other peers of the failure, // notify other peers of the failure,
// and return the source metadata to peer. // and return the source metadata to peer.
@ -946,18 +990,17 @@ func (s *Service) handleTaskFail(ctx context.Context, task *resource.Task, backT
if u, err := url.Parse(task.URL); err == nil { if u, err := url.Parse(task.URL); err == nil {
proto = u.Scheme proto = u.Scheme
} }
task.Log.Infof("source error: %#v", d)
// TODO currently, metrics.PeerTaskSourceErrorCounter is only updated for seed peer source error, need update for normal peer // TODO currently, metrics.PeerTaskSourceErrorCounter is only updated for seed peer source error, need update for normal peer
if d.Metadata != nil { if d.Metadata != nil {
task.Log.Infof("source error: %d/%s", d.Metadata.StatusCode, d.Metadata.Status)
metrics.PeerTaskSourceErrorCounter.WithLabelValues( metrics.PeerTaskSourceErrorCounter.WithLabelValues(
task.URLMeta.Tag, task.URLMeta.Application, proto, fmt.Sprintf("%d", d.Metadata.StatusCode)).Inc() task.URLMeta.Tag, task.URLMeta.Application, proto, fmt.Sprintf("%d", d.Metadata.StatusCode)).Inc()
} else { } else {
task.Log.Warn("source error, but no metadata found")
metrics.PeerTaskSourceErrorCounter.WithLabelValues( metrics.PeerTaskSourceErrorCounter.WithLabelValues(
task.URLMeta.Tag, task.URLMeta.Application, proto, "0").Inc() task.URLMeta.Tag, task.URLMeta.Application, proto, "0").Inc()
} }
if !d.Temporary { if !d.Temporary {
task.Log.Infof("source error is not temporary, notify other peers task aborted")
task.NotifyPeers(&schedulerv1.PeerPacket{ task.NotifyPeers(&schedulerv1.PeerPacket{
Code: commonv1.Code_BackToSourceAborted, Code: commonv1.Code_BackToSourceAborted,
Errordetails: &schedulerv1.PeerPacket_SourceError{ Errordetails: &schedulerv1.PeerPacket_SourceError{

File diff suppressed because it is too large Load Diff