486 lines
15 KiB
Go
486 lines
15 KiB
Go
/*
|
|
* Copyright 2022 The Dragonfly Authors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package peer
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"sync"
|
|
"time"
|
|
|
|
"go.opentelemetry.io/otel/trace"
|
|
"go.uber.org/atomic"
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/grpc/codes"
|
|
"google.golang.org/grpc/status"
|
|
|
|
commonv1 "d7y.io/api/pkg/apis/common/v1"
|
|
dfdaemonv1 "d7y.io/api/pkg/apis/dfdaemon/v1"
|
|
schedulerv1 "d7y.io/api/pkg/apis/scheduler/v1"
|
|
|
|
"d7y.io/dragonfly/v2/client/config"
|
|
logger "d7y.io/dragonfly/v2/internal/dflog"
|
|
"d7y.io/dragonfly/v2/pkg/dfnet"
|
|
"d7y.io/dragonfly/v2/pkg/net/ip"
|
|
dfdaemonclient "d7y.io/dragonfly/v2/pkg/rpc/dfdaemon/client"
|
|
)
|
|
|
|
type pieceTaskSyncManager struct {
|
|
sync.RWMutex
|
|
ctx context.Context
|
|
ctxCancel context.CancelFunc
|
|
peerTaskConductor *peerTaskConductor
|
|
pieceRequestQueue PieceDispatcher
|
|
workers map[string]*pieceTaskSynchronizer
|
|
watchdog *synchronizerWatchdog
|
|
}
|
|
|
|
type pieceTaskSynchronizer struct {
|
|
*logger.SugaredLoggerOnWith
|
|
ctx context.Context
|
|
ctxCancel context.CancelFunc
|
|
span trace.Span
|
|
syncPiecesStream dfdaemonv1.Daemon_SyncPieceTasksClient
|
|
grpcClient dfdaemonclient.V1
|
|
dstPeer *schedulerv1.PeerPacket_DestPeer
|
|
error atomic.Value
|
|
grpcInitialized *atomic.Bool
|
|
grpcInitError atomic.Value
|
|
peerTaskConductor *peerTaskConductor
|
|
pieceRequestQueue PieceDispatcher
|
|
}
|
|
|
|
type synchronizerWatchdog struct {
|
|
done chan struct{}
|
|
mainPeer atomic.Value // save *schedulerv1.PeerPacket_DestPeer
|
|
syncSuccess *atomic.Bool
|
|
peerTaskConductor *peerTaskConductor
|
|
}
|
|
|
|
type pieceTaskSynchronizerError struct {
|
|
err error
|
|
}
|
|
|
|
// FIXME for compatibility, sync will be called after the dfdaemonclient.GetPieceTasks deprecated and the pieceTaskPoller removed
|
|
func (s *pieceTaskSyncManager) syncPeers(destPeers []*schedulerv1.PeerPacket_DestPeer, desiredPiece int32) {
|
|
s.Lock()
|
|
defer func() {
|
|
if s.peerTaskConductor.WatchdogTimeout > 0 {
|
|
s.resetWatchdog(destPeers[0])
|
|
}
|
|
s.Unlock()
|
|
}()
|
|
|
|
peersToKeep, peersToAdd, peersToClose := s.diffPeers(destPeers)
|
|
|
|
for _, peer := range peersToAdd {
|
|
s.newPieceTaskSynchronizer(s.ctx, peer, desiredPiece)
|
|
}
|
|
|
|
for _, peer := range peersToKeep {
|
|
worker := s.workers[peer.PeerId]
|
|
// worker is working, keep it going on
|
|
if worker.error.Load() == nil {
|
|
s.peerTaskConductor.Infof("reuse working PieceTaskSynchronizer %s", peer.PeerId)
|
|
} else {
|
|
s.peerTaskConductor.Infof("close stale PieceTaskSynchronizer %s and re-initialize it", peer.PeerId)
|
|
// clean error worker
|
|
worker.close()
|
|
delete(s.workers, peer.PeerId)
|
|
// reconnect and retry
|
|
s.newPieceTaskSynchronizer(s.ctx, peer, desiredPiece)
|
|
}
|
|
}
|
|
|
|
// close stale workers
|
|
for _, p := range peersToClose {
|
|
s.workers[p].close()
|
|
delete(s.workers, p)
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func (s *pieceTaskSyncManager) diffPeers(peers []*schedulerv1.PeerPacket_DestPeer) (
|
|
peersToKeep []*schedulerv1.PeerPacket_DestPeer, peersToAdd []*schedulerv1.PeerPacket_DestPeer, peersToClose []string) {
|
|
if len(s.workers) == 0 {
|
|
return nil, peers, nil
|
|
}
|
|
|
|
cache := make(map[string]bool)
|
|
for _, p := range peers {
|
|
cache[p.PeerId] = true
|
|
if _, ok := s.workers[p.PeerId]; ok {
|
|
peersToKeep = append(peersToKeep, p)
|
|
} else {
|
|
peersToAdd = append(peersToAdd, p)
|
|
}
|
|
}
|
|
|
|
for p := range s.workers {
|
|
if !cache[p] {
|
|
peersToClose = append(peersToClose, p)
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
func (s *pieceTaskSyncManager) newPieceTaskSynchronizer(
|
|
ctx context.Context,
|
|
dstPeer *schedulerv1.PeerPacket_DestPeer,
|
|
desiredPiece int32) {
|
|
_, span := tracer.Start(s.ctx, config.SpanSyncPieceTasks)
|
|
span.SetAttributes(config.AttributeTargetPeerID.String(dstPeer.PeerId))
|
|
request := &commonv1.PieceTaskRequest{
|
|
TaskId: s.peerTaskConductor.taskID,
|
|
SrcPid: s.peerTaskConductor.peerID,
|
|
DstPid: dstPeer.PeerId,
|
|
StartNum: uint32(desiredPiece),
|
|
Limit: 16,
|
|
}
|
|
ctx, cancel := context.WithCancel(ctx)
|
|
synchronizer := &pieceTaskSynchronizer{
|
|
ctx: ctx,
|
|
ctxCancel: cancel,
|
|
span: span,
|
|
peerTaskConductor: s.peerTaskConductor,
|
|
pieceRequestQueue: s.pieceRequestQueue,
|
|
dstPeer: dstPeer,
|
|
error: atomic.Value{},
|
|
grpcInitialized: atomic.NewBool(false),
|
|
grpcInitError: atomic.Value{},
|
|
SugaredLoggerOnWith: s.peerTaskConductor.With("targetPeerID", request.DstPid),
|
|
}
|
|
s.workers[dstPeer.PeerId] = synchronizer
|
|
go synchronizer.start(request, dstPeer)
|
|
return
|
|
}
|
|
|
|
func (s *pieceTaskSyncManager) resetWatchdog(mainPeer *schedulerv1.PeerPacket_DestPeer) {
|
|
if s.watchdog != nil {
|
|
close(s.watchdog.done)
|
|
s.peerTaskConductor.Debugf("close old watchdog")
|
|
}
|
|
s.watchdog = &synchronizerWatchdog{
|
|
done: make(chan struct{}),
|
|
mainPeer: atomic.Value{},
|
|
syncSuccess: atomic.NewBool(false),
|
|
peerTaskConductor: s.peerTaskConductor,
|
|
}
|
|
s.watchdog.mainPeer.Store(mainPeer)
|
|
s.peerTaskConductor.Infof("start new watchdog")
|
|
go s.watchdog.watch(s.peerTaskConductor.WatchdogTimeout)
|
|
}
|
|
|
|
func compositePieceResult(peerTaskConductor *peerTaskConductor, destPeer *schedulerv1.PeerPacket_DestPeer, code commonv1.Code) *schedulerv1.PieceResult {
|
|
return &schedulerv1.PieceResult{
|
|
TaskId: peerTaskConductor.taskID,
|
|
SrcPid: peerTaskConductor.peerID,
|
|
DstPid: destPeer.PeerId,
|
|
PieceInfo: &commonv1.PieceInfo{},
|
|
Success: false,
|
|
Code: code,
|
|
FinishedCount: peerTaskConductor.readyPieces.Settled(),
|
|
}
|
|
}
|
|
|
|
func (s *pieceTaskSyncManager) reportInvalidPeer(destPeer *schedulerv1.PeerPacket_DestPeer, code commonv1.Code) {
|
|
sendError := s.peerTaskConductor.sendPieceResult(compositePieceResult(s.peerTaskConductor, destPeer, code))
|
|
if sendError != nil {
|
|
s.peerTaskConductor.Errorf("connect peer %s failed and send piece result with error: %s", destPeer.PeerId, sendError)
|
|
go s.peerTaskConductor.cancel(commonv1.Code_SchedError, sendError.Error())
|
|
} else {
|
|
s.peerTaskConductor.Debugf("report invalid peer %s/%d to scheduler", destPeer.PeerId, code)
|
|
}
|
|
}
|
|
|
|
// acquire send the target piece to other peers
|
|
func (s *pieceTaskSyncManager) acquire(request *commonv1.PieceTaskRequest) (attempt int, success int) {
|
|
s.RLock()
|
|
for _, p := range s.workers {
|
|
attempt++
|
|
if p.grpcInitialized.Load() && p.acquire(request) == nil {
|
|
success++
|
|
}
|
|
}
|
|
s.RUnlock()
|
|
return
|
|
}
|
|
|
|
func (s *pieceTaskSyncManager) cancel() {
|
|
s.ctxCancel()
|
|
s.pieceRequestQueue.Close()
|
|
s.Lock()
|
|
for _, p := range s.workers {
|
|
p.close()
|
|
}
|
|
s.workers = map[string]*pieceTaskSynchronizer{}
|
|
s.Unlock()
|
|
}
|
|
|
|
func (s *pieceTaskSynchronizer) start(request *commonv1.PieceTaskRequest, dstPeer *schedulerv1.PeerPacket_DestPeer) {
|
|
var startError error
|
|
defer func() {
|
|
if startError != nil {
|
|
s.grpcInitError.Store(&pieceTaskSynchronizerError{startError})
|
|
s.peerTaskConductor.Errorf("connect peer %s error: %s", dstPeer.PeerId, startError)
|
|
if errors.Is(startError, context.DeadlineExceeded) {
|
|
// connect timeout error, report to scheduler to get more available peers
|
|
s.peerTaskConductor.pieceTaskSyncManager.reportInvalidPeer(dstPeer, commonv1.Code_ClientConnectionError)
|
|
} else {
|
|
// other errors, report to scheduler to get more available peers
|
|
s.peerTaskConductor.pieceTaskSyncManager.reportInvalidPeer(dstPeer, commonv1.Code_ClientPieceRequestFail)
|
|
}
|
|
}
|
|
}()
|
|
|
|
formatIP, ok := ip.FormatIP(dstPeer.Ip)
|
|
if !ok {
|
|
startError = errors.New("format ip failed")
|
|
return
|
|
}
|
|
|
|
netAddr := &dfnet.NetAddr{
|
|
Type: dfnet.TCP,
|
|
Addr: fmt.Sprintf("%s:%d", formatIP, dstPeer.RpcPort),
|
|
}
|
|
|
|
credentialOpt := grpc.WithTransportCredentials(s.peerTaskConductor.GRPCCredentials)
|
|
|
|
dialCtx, cancel := context.WithTimeout(s.ctx, s.peerTaskConductor.GRPCDialTimeout)
|
|
grpcClient, err := dfdaemonclient.GetV1(dialCtx, netAddr.String(), credentialOpt, grpc.WithBlock())
|
|
cancel()
|
|
|
|
if err != nil {
|
|
startError = err
|
|
return
|
|
}
|
|
|
|
stream, err := grpcClient.SyncPieceTasks(s.ctx, request)
|
|
// Refer: https://github.com/grpc/grpc-go/blob/v1.44.0/stream.go#L104
|
|
// When receive io.EOF, the real error should be discovered using RecvMsg, here is client.Recv()
|
|
if err == io.EOF && stream != nil {
|
|
_, err = stream.Recv()
|
|
}
|
|
if err != nil {
|
|
// grpc client must be close, Refer: https://github.com/grpc/grpc-go/issues/5321
|
|
_ = grpcClient.Close()
|
|
if stream != nil {
|
|
_ = stream.CloseSend()
|
|
}
|
|
s.peerTaskConductor.Errorf("call SyncPieceTasks error: %s, dest peer: %s", err, dstPeer.PeerId)
|
|
startError = err
|
|
return
|
|
}
|
|
|
|
s.syncPiecesStream = stream
|
|
s.grpcClient = grpcClient
|
|
|
|
s.grpcInitialized.Store(true)
|
|
s.receive()
|
|
}
|
|
|
|
func (s *pieceTaskSynchronizer) close() {
|
|
s.ctxCancel()
|
|
if s.grpcInitialized.Load() {
|
|
s.closeGRPC()
|
|
s.Infof("pieceTaskSynchronizer grpc closed")
|
|
} else {
|
|
go s.waitAndClose()
|
|
}
|
|
}
|
|
|
|
// one of grpcInitialized and grpcInitError must be true, otherwise the pieceTaskSynchronizer is initializing, wait it
|
|
func (s *pieceTaskSynchronizer) waitAndClose() {
|
|
for {
|
|
// grpc is ready, just close
|
|
if s.grpcInitialized.Load() {
|
|
s.closeGRPC()
|
|
s.Infof("pieceTaskSynchronizer grpc closed and exit in background")
|
|
return
|
|
}
|
|
// grpc init error
|
|
if s.grpcInitError.Load() != nil {
|
|
s.Infof("pieceTaskSynchronizer grpc init error and exit in background")
|
|
return
|
|
}
|
|
s.Infof("pieceTaskSynchronizer grpc is initializing, wait it completed in background")
|
|
time.Sleep(time.Minute)
|
|
}
|
|
}
|
|
|
|
func (s *pieceTaskSynchronizer) closeGRPC() {
|
|
if err := s.syncPiecesStream.CloseSend(); err != nil {
|
|
s.error.Store(&pieceTaskSynchronizerError{err})
|
|
s.Debugf("close send error: %s, dest peer: %s", err, s.dstPeer.PeerId)
|
|
s.span.RecordError(err)
|
|
}
|
|
if err := s.grpcClient.Close(); err != nil {
|
|
s.error.Store(&pieceTaskSynchronizerError{err})
|
|
s.Debugf("close grpc client error: %s, dest peer: %s", err, s.dstPeer.PeerId)
|
|
s.span.RecordError(err)
|
|
}
|
|
s.span.End()
|
|
}
|
|
|
|
func (s *pieceTaskSynchronizer) dispatchPieceRequest(piecePacket *commonv1.PiecePacket) {
|
|
s.peerTaskConductor.updateMetadata(piecePacket)
|
|
|
|
pieceCount := len(piecePacket.PieceInfos)
|
|
s.Debugf("dispatch piece request, piece count: %d, dest peer: %s", pieceCount, s.dstPeer.PeerId)
|
|
// peers maybe send zero piece info, but with total piece count and content length
|
|
if pieceCount == 0 {
|
|
finished := s.peerTaskConductor.isCompleted()
|
|
if finished {
|
|
s.peerTaskConductor.Done()
|
|
}
|
|
return
|
|
}
|
|
for _, piece := range piecePacket.PieceInfos {
|
|
s.Infof("got piece %d from %s/%s, digest: %s, start: %d, size: %d",
|
|
piece.PieceNum, piecePacket.DstAddr, piecePacket.DstPid, piece.PieceMd5, piece.RangeStart, piece.RangeSize)
|
|
// FIXME when set total piece but no total digest, fetch again
|
|
s.peerTaskConductor.requestedPiecesLock.Lock()
|
|
if !s.peerTaskConductor.requestedPieces.IsSet(piece.PieceNum) {
|
|
s.peerTaskConductor.requestedPieces.Set(piece.PieceNum)
|
|
}
|
|
s.peerTaskConductor.requestedPiecesLock.Unlock()
|
|
req := &DownloadPieceRequest{
|
|
storage: s.peerTaskConductor.GetStorage(),
|
|
piece: piece,
|
|
log: s.peerTaskConductor.Log(),
|
|
TaskID: s.peerTaskConductor.GetTaskID(),
|
|
PeerID: s.peerTaskConductor.GetPeerID(),
|
|
DstPid: piecePacket.DstPid,
|
|
DstAddr: piecePacket.DstAddr,
|
|
}
|
|
|
|
s.pieceRequestQueue.Put(req)
|
|
s.span.AddEvent(fmt.Sprintf("send piece #%d request to piece download queue", piece.PieceNum))
|
|
|
|
select {
|
|
case <-s.peerTaskConductor.successCh:
|
|
s.Infof("peer task success, stop dispatch piece request, dest peer: %s", s.dstPeer.PeerId)
|
|
case <-s.peerTaskConductor.failCh:
|
|
s.Warnf("peer task fail, stop dispatch piece request, dest peer: %s", s.dstPeer.PeerId)
|
|
default:
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *pieceTaskSynchronizer) receive() {
|
|
var (
|
|
piecePacket *commonv1.PiecePacket
|
|
err error
|
|
)
|
|
for {
|
|
piecePacket, err = s.syncPiecesStream.Recv()
|
|
if err != nil {
|
|
break
|
|
}
|
|
s.dispatchPieceRequest(piecePacket)
|
|
}
|
|
|
|
if err == io.EOF {
|
|
s.Debugf("synchronizer receives io.EOF")
|
|
} else if s.canceled(err) {
|
|
s.Debugf("synchronizer receives canceled")
|
|
s.error.Store(&pieceTaskSynchronizerError{err})
|
|
} else {
|
|
s.Errorf("synchronizer receives with error: %s", err)
|
|
s.error.Store(&pieceTaskSynchronizerError{err})
|
|
s.reportError(err)
|
|
s.Errorf("synchronizer receives with error: %s", err)
|
|
}
|
|
}
|
|
|
|
func (s *pieceTaskSynchronizer) acquire(request *commonv1.PieceTaskRequest) error {
|
|
if s.error.Load() != nil {
|
|
err := s.error.Load().(*pieceTaskSynchronizerError).err
|
|
s.Debugf("synchronizer already error %s, skip acquire more pieces", err)
|
|
return err
|
|
}
|
|
request.DstPid = s.dstPeer.PeerId
|
|
err := s.syncPiecesStream.Send(request)
|
|
s.span.AddEvent(fmt.Sprintf("send piece #%d request", request.StartNum))
|
|
if err != nil {
|
|
// send should always ok
|
|
s.error.Store(&pieceTaskSynchronizerError{err})
|
|
s.Errorf("synchronizer sends with error: %s", err)
|
|
s.reportError(err)
|
|
}
|
|
return err
|
|
}
|
|
|
|
func (s *pieceTaskSynchronizer) reportError(err error) {
|
|
s.span.RecordError(err)
|
|
sendError := s.peerTaskConductor.sendPieceResult(compositePieceResult(s.peerTaskConductor, s.dstPeer, commonv1.Code_ClientPieceRequestFail))
|
|
if sendError != nil {
|
|
s.Errorf("sync piece info failed and send piece result with error: %s", sendError)
|
|
go s.peerTaskConductor.cancel(commonv1.Code_SchedError, sendError.Error())
|
|
} else {
|
|
s.Debugf("report sync piece error to scheduler")
|
|
}
|
|
}
|
|
|
|
func (s *pieceTaskSynchronizer) canceled(err error) bool {
|
|
if err == context.Canceled {
|
|
s.Debugf("context canceled, dst peer: %s", s.dstPeer.PeerId)
|
|
return true
|
|
}
|
|
if stat, ok := err.(interface{ GRPCStatus() *status.Status }); ok {
|
|
if stat.GRPCStatus().Code() == codes.Canceled {
|
|
s.Debugf("grpc canceled, dst peer: %s", s.dstPeer.PeerId)
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (s *synchronizerWatchdog) watch(timeout time.Duration) {
|
|
select {
|
|
case <-time.After(timeout):
|
|
if s.peerTaskConductor.readyPieces.Settled() == 0 {
|
|
s.peerTaskConductor.Warnf("watch sync pieces timeout, may be a bug, " +
|
|
"please file a issue in https://github.com/dragonflyoss/Dragonfly2/issues")
|
|
s.syncSuccess.Store(false)
|
|
s.reportWatchFailed()
|
|
} else {
|
|
s.peerTaskConductor.Infof("watch sync pieces ok")
|
|
}
|
|
case <-s.peerTaskConductor.successCh:
|
|
s.peerTaskConductor.Debugf("peer task success, watchdog exit")
|
|
case <-s.peerTaskConductor.failCh:
|
|
s.peerTaskConductor.Debugf("peer task fail, watchdog exit")
|
|
case <-s.done:
|
|
s.peerTaskConductor.Debugf("watchdog done, exit")
|
|
}
|
|
}
|
|
|
|
func (s *synchronizerWatchdog) reportWatchFailed() {
|
|
sendError := s.peerTaskConductor.sendPieceResult(compositePieceResult(
|
|
s.peerTaskConductor, s.mainPeer.Load().(*schedulerv1.PeerPacket_DestPeer), commonv1.Code_ClientPieceRequestFail))
|
|
if sendError != nil {
|
|
s.peerTaskConductor.Errorf("watchdog sync piece info failed and send piece result with error: %s", sendError)
|
|
go s.peerTaskConductor.cancel(commonv1.Code_SchedError, sendError.Error())
|
|
} else {
|
|
s.peerTaskConductor.Debugf("report watchdog sync piece error to scheduler")
|
|
}
|
|
}
|