Compare commits

..

No commits in common. "main" and "v0.2.27" have entirely different histories.

66 changed files with 1479 additions and 3091 deletions

View File

@ -1,2 +0,0 @@
[build]
rustflags = ["--cfg", "tokio_unstable"]

View File

@ -8,4 +8,4 @@ jobs:
add-assignee: add-assignee:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: kentaro-m/auto-assign-action@9f6dbe84a80c6e7639d1b9698048b201052a2a94 - uses: kentaro-m/auto-assign-action@7ae38e468e64dec0af17820972bc4915aa511ec2

View File

@ -26,8 +26,6 @@ jobs:
- name: Install Protoc - name: Install Protoc
uses: arduino/setup-protoc@v2 uses: arduino/setup-protoc@v2
with:
repo-token: ${{ secrets.GH_TOKEN }}
- name: Install Rust toolchain - name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable uses: dtolnay/rust-toolchain@stable
@ -57,8 +55,6 @@ jobs:
- name: Install Protoc - name: Install Protoc
uses: arduino/setup-protoc@v2 uses: arduino/setup-protoc@v2
with:
repo-token: ${{ secrets.GH_TOKEN }}
- name: Install Rust toolchain - name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable uses: dtolnay/rust-toolchain@stable

View File

@ -86,7 +86,7 @@ jobs:
cache-to: type=local,dest=/tmp/.buildx-cache-new cache-to: type=local,dest=/tmp/.buildx-cache-new
- name: Run Trivy vulnerability scanner in tarball mode - name: Run Trivy vulnerability scanner in tarball mode
uses: aquasecurity/trivy-action@dc5a429b52fcf669ce959baa2c2dd26090d2a6c4 uses: aquasecurity/trivy-action@6c175e9c4083a92bbca2f9724c8a5e33bc2d97a5
with: with:
image-ref: dragonflyoss/client:${{ steps.get_version.outputs.VERSION }} image-ref: dragonflyoss/client:${{ steps.get_version.outputs.VERSION }}
severity: 'CRITICAL,HIGH' severity: 'CRITICAL,HIGH'
@ -94,7 +94,7 @@ jobs:
output: 'trivy-results.sarif' output: 'trivy-results.sarif'
- name: Upload Trivy scan results to GitHub Security tab - name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@76621b61decf072c1cee8dd1ce2d2a82d33c17ed uses: github/codeql-action/upload-sarif@60168efe1c415ce0f5521ea06d5c2062adbeed1b
with: with:
sarif_file: 'trivy-results.sarif' sarif_file: 'trivy-results.sarif'
@ -181,7 +181,7 @@ jobs:
cache-to: type=local,dest=/tmp/.buildx-cache-new cache-to: type=local,dest=/tmp/.buildx-cache-new
- name: Run Trivy vulnerability scanner in tarball mode - name: Run Trivy vulnerability scanner in tarball mode
uses: aquasecurity/trivy-action@dc5a429b52fcf669ce959baa2c2dd26090d2a6c4 uses: aquasecurity/trivy-action@6c175e9c4083a92bbca2f9724c8a5e33bc2d97a5
with: with:
image-ref: dragonflyoss/client:${{ steps.get_version.outputs.VERSION }}-debug image-ref: dragonflyoss/client:${{ steps.get_version.outputs.VERSION }}-debug
severity: 'CRITICAL,HIGH' severity: 'CRITICAL,HIGH'
@ -189,7 +189,7 @@ jobs:
output: 'trivy-results.sarif' output: 'trivy-results.sarif'
- name: Upload Trivy scan results to GitHub Security tab - name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@76621b61decf072c1cee8dd1ce2d2a82d33c17ed uses: github/codeql-action/upload-sarif@60168efe1c415ce0f5521ea06d5c2062adbeed1b
with: with:
sarif_file: 'trivy-results.sarif' sarif_file: 'trivy-results.sarif'
@ -276,7 +276,7 @@ jobs:
cache-to: type=local,dest=/tmp/.buildx-cache-new cache-to: type=local,dest=/tmp/.buildx-cache-new
- name: Run Trivy vulnerability scanner in tarball mode - name: Run Trivy vulnerability scanner in tarball mode
uses: aquasecurity/trivy-action@dc5a429b52fcf669ce959baa2c2dd26090d2a6c4 uses: aquasecurity/trivy-action@6c175e9c4083a92bbca2f9724c8a5e33bc2d97a5
with: with:
image-ref: dragonflyoss/dfinit:${{ steps.get_version.outputs.VERSION }} image-ref: dragonflyoss/dfinit:${{ steps.get_version.outputs.VERSION }}
severity: 'CRITICAL,HIGH' severity: 'CRITICAL,HIGH'
@ -284,7 +284,7 @@ jobs:
output: 'trivy-results.sarif' output: 'trivy-results.sarif'
- name: Upload Trivy scan results to GitHub Security tab - name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@76621b61decf072c1cee8dd1ce2d2a82d33c17ed uses: github/codeql-action/upload-sarif@60168efe1c415ce0f5521ea06d5c2062adbeed1b
with: with:
sarif_file: 'trivy-results.sarif' sarif_file: 'trivy-results.sarif'

View File

@ -22,14 +22,11 @@ jobs:
- name: Install Protoc - name: Install Protoc
uses: arduino/setup-protoc@v2 uses: arduino/setup-protoc@v2
with:
repo-token: ${{ secrets.GH_TOKEN }}
- name: Install Rust toolchain - name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable uses: dtolnay/rust-toolchain@stable
with: with:
components: rustfmt, clippy components: rustfmt, clippy
toolchain: 1.85.0
- name: Set up Clang - name: Set up Clang
uses: egor-tensin/setup-clang@v1 uses: egor-tensin/setup-clang@v1

View File

@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: PR impact specified - name: PR impact specified
uses: mheap/github-action-required-labels@8afbe8ae6ab7647d0c9f0cfa7c2f939650d22509 # v5.5 uses: mheap/github-action-required-labels@388fd6af37b34cdfe5a23b37060e763217e58b03 # v5.5
with: with:
mode: exactly mode: exactly
count: 1 count: 1

View File

@ -52,7 +52,7 @@ jobs:
target: ${{ matrix.target }} target: ${{ matrix.target }}
- name: Install cargo-deb - name: Install cargo-deb
uses: taiki-e/cache-cargo-install-action@b33c63d3b3c85540f4eba8a4f71a5cc0ce030855 uses: taiki-e/cache-cargo-install-action@4d586f211d9b0bca9e7b59e57e2a0febf36c0929
with: with:
# Don't upgrade cargo-deb, refer to https://github.com/kornelski/cargo-deb/issues/169. # Don't upgrade cargo-deb, refer to https://github.com/kornelski/cargo-deb/issues/169.
tool: cargo-deb@2.10.0 tool: cargo-deb@2.10.0
@ -119,7 +119,7 @@ jobs:
contents: write contents: write
steps: steps:
- name: Download Release Artifacts - name: Download Release Artifacts
uses: actions/download-artifact@v5 uses: actions/download-artifact@v4
with: with:
path: releases path: releases
pattern: release-* pattern: release-*

829
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -12,7 +12,7 @@ members = [
] ]
[workspace.package] [workspace.package]
version = "1.0.10" version = "0.2.27"
authors = ["The Dragonfly Developers"] authors = ["The Dragonfly Developers"]
homepage = "https://d7y.io/" homepage = "https://d7y.io/"
repository = "https://github.com/dragonflyoss/client.git" repository = "https://github.com/dragonflyoss/client.git"
@ -22,14 +22,14 @@ readme = "README.md"
edition = "2021" edition = "2021"
[workspace.dependencies] [workspace.dependencies]
dragonfly-client = { path = "dragonfly-client", version = "1.0.10" } dragonfly-client = { path = "dragonfly-client", version = "0.2.27" }
dragonfly-client-core = { path = "dragonfly-client-core", version = "1.0.10" } dragonfly-client-core = { path = "dragonfly-client-core", version = "0.2.27" }
dragonfly-client-config = { path = "dragonfly-client-config", version = "1.0.10" } dragonfly-client-config = { path = "dragonfly-client-config", version = "0.2.27" }
dragonfly-client-storage = { path = "dragonfly-client-storage", version = "1.0.10" } dragonfly-client-storage = { path = "dragonfly-client-storage", version = "0.2.27" }
dragonfly-client-backend = { path = "dragonfly-client-backend", version = "1.0.10" } dragonfly-client-backend = { path = "dragonfly-client-backend", version = "0.2.27" }
dragonfly-client-util = { path = "dragonfly-client-util", version = "1.0.10" } dragonfly-client-util = { path = "dragonfly-client-util", version = "0.2.27" }
dragonfly-client-init = { path = "dragonfly-client-init", version = "1.0.10" } dragonfly-client-init = { path = "dragonfly-client-init", version = "0.2.27" }
dragonfly-api = "2.1.57" dragonfly-api = "=2.1.39"
thiserror = "2.0" thiserror = "2.0"
futures = "0.3.31" futures = "0.3.31"
reqwest = { version = "0.12.4", features = [ reqwest = { version = "0.12.4", features = [
@ -41,12 +41,11 @@ reqwest = { version = "0.12.4", features = [
"brotli", "brotli",
"zstd", "zstd",
"deflate", "deflate",
"blocking",
] } ] }
reqwest-middleware = "0.4" reqwest-middleware = "0.4"
rcgen = { version = "0.12.1", features = ["x509-parser"] } rcgen = { version = "0.12.1", features = ["x509-parser"] }
hyper = { version = "1.6", features = ["full"] } hyper = { version = "1.6", features = ["full"] }
hyper-util = { version = "0.1.16", features = [ hyper-util = { version = "0.1.10", features = [
"client", "client",
"client-legacy", "client-legacy",
"tokio", "tokio",
@ -59,10 +58,10 @@ http-range-header = "0.4.2"
tracing = "0.1" tracing = "0.1"
url = "2.5.4" url = "2.5.4"
rustls = { version = "0.22.4", features = ["tls12"] } rustls = { version = "0.22.4", features = ["tls12"] }
rustls-pki-types = "1.12.0" rustls-pki-types = "1.11.0"
rustls-pemfile = "2.2.0" rustls-pemfile = "2.2.0"
sha2 = "0.10" sha2 = "0.10"
crc32fast = "1.5.0" crc32fast = "1.4.2"
uuid = { version = "1.16", features = ["v4"] } uuid = { version = "1.16", features = ["v4"] }
hex = "0.4" hex = "0.4"
rocksdb = "0.22.0" rocksdb = "0.22.0"
@ -71,12 +70,12 @@ serde_yaml = "0.9"
http = "1" http = "1"
tonic = { version = "0.12.2", features = ["tls"] } tonic = { version = "0.12.2", features = ["tls"] }
tonic-reflection = "0.12.3" tonic-reflection = "0.12.3"
tokio = { version = "1.47.1", features = ["full", "tracing"] } tokio = { version = "1.44.2", features = ["full"] }
tokio-util = { version = "0.7.16", features = ["full"] } tokio-util = { version = "0.7.15", features = ["full"] }
tokio-stream = "0.1.17" tokio-stream = "0.1.17"
validator = { version = "0.16", features = ["derive"] } validator = { version = "0.16", features = ["derive"] }
warp = "0.3.5" warp = "0.3.5"
headers = "0.4.1" headers = "0.4.0"
regex = "1.11.1" regex = "1.11.1"
humantime = "2.1.0" humantime = "2.1.0"
prost-wkt-types = "0.6" prost-wkt-types = "0.6"
@ -91,22 +90,20 @@ opendal = { version = "0.48.0", features = [
"services-cos", "services-cos",
"services-webhdfs", "services-webhdfs",
] } ] }
clap = { version = "4.5.45", features = ["derive"] } clap = { version = "4.5.38", features = ["derive"] }
anyhow = "1.0.98" anyhow = "1.0.98"
toml_edit = "0.22.26" toml_edit = "0.22.26"
toml = "0.8.23" toml = "0.8.22"
bytesize = { version = "1.3.3", features = ["serde"] } bytesize = { version = "1.3.3", features = ["serde"] }
bytesize-serde = "0.2.1" bytesize-serde = "0.2.1"
percent-encoding = "2.3.1" percent-encoding = "2.3.1"
tempfile = "3.20.0" tempfile = "3.19.1"
tokio-rustls = "0.25.0-alpha.4" tokio-rustls = "0.25.0-alpha.4"
serde_json = "1.0.142" serde_json = "1.0.140"
lru = "0.12.5" lru = "0.12.5"
fs2 = "0.4.3" fs2 = "0.4.3"
lazy_static = "1.5" lazy_static = "1.5"
bytes = "1.10" bytes = "1.10"
local-ip-address = "0.6.5"
sysinfo = { version = "0.32.1", default-features = false, features = ["component", "disk", "network", "system", "user"] }
[profile.release] [profile.release]
opt-level = 3 opt-level = 3

View File

@ -20,9 +20,9 @@ You can find the full documentation on the [d7y.io](https://d7y.io).
Join the conversation and help the community. Join the conversation and help the community.
- **Slack Channel**: [#dragonfly](https://cloud-native.slack.com/messages/dragonfly/) on [CNCF Slack](https://slack.cncf.io/) - **Slack Channel**: [#dragonfly](https://cloud-native.slack.com/messages/dragonfly/) on [CNCF Slack](https://slack.cncf.io/)
- **Github Discussions**: [Dragonfly Discussion Forum](https://github.com/dragonflyoss/dragonfly/discussions) - **Discussion Group**: <dragonfly-discuss@googlegroups.com>
- **Developer Group**: <dragonfly-developers@googlegroups.com> - **Developer Group**: <dragonfly-developers@googlegroups.com>
- **Maintainer Group**: <dragonfly-maintainers@googlegroups.com> - **Github Discussions**: [Dragonfly Discussion Forum](https://github.com/dragonflyoss/dragonfly/discussions)
- **Twitter**: [@dragonfly_oss](https://twitter.com/dragonfly_oss) - **Twitter**: [@dragonfly_oss](https://twitter.com/dragonfly_oss)
- **DingTalk**: [22880028764](https://qr.dingtalk.com/action/joingroup?code=v1,k1,pkV9IbsSyDusFQdByPSK3HfCG61ZCLeb8b/lpQ3uUqI=&_dt_no_comment=1&origin=11) - **DingTalk**: [22880028764](https://qr.dingtalk.com/action/joingroup?code=v1,k1,pkV9IbsSyDusFQdByPSK3HfCG61ZCLeb8b/lpQ3uUqI=&_dt_no_comment=1&origin=11)
@ -30,3 +30,7 @@ Join the conversation and help the community.
You should check out our You should check out our
[CONTRIBUTING](./CONTRIBUTING.md) and develop the project together. [CONTRIBUTING](./CONTRIBUTING.md) and develop the project together.
## License
[![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2Fdragonflyoss%2Fclient.svg?type=large)](https://app.fossa.com/projects/git%2Bgithub.com%2Fdragonflyoss%2Fclient?ref=badge_large)

View File

@ -1,4 +1,4 @@
FROM public.ecr.aws/docker/library/rust:1.85.0 AS builder FROM public.ecr.aws/docker/library/rust:1.82.0 AS builder
WORKDIR /app/client WORKDIR /app/client
@ -7,7 +7,6 @@ RUN apt-get update && apt-get install -y \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
COPY Cargo.toml Cargo.lock ./ COPY Cargo.toml Cargo.lock ./
COPY .cargo ./cargo
COPY dragonfly-client/Cargo.toml ./dragonfly-client/Cargo.toml COPY dragonfly-client/Cargo.toml ./dragonfly-client/Cargo.toml
COPY dragonfly-client/src ./dragonfly-client/src COPY dragonfly-client/src ./dragonfly-client/src
@ -35,13 +34,7 @@ COPY dragonfly-client-util/src ./dragonfly-client-util/src
COPY dragonfly-client-init/Cargo.toml ./dragonfly-client-init/Cargo.toml COPY dragonfly-client-init/Cargo.toml ./dragonfly-client-init/Cargo.toml
COPY dragonfly-client-init/src ./dragonfly-client-init/src COPY dragonfly-client-init/src ./dragonfly-client-init/src
ARG TARGETPLATFORM RUN cargo build --release --verbose --bin dfget --bin dfdaemon --bin dfcache
RUN case "${TARGETPLATFORM}" in \
"linux/arm64") export JEMALLOC_SYS_WITH_LG_PAGE=16;; \
esac && \
cargo build --release --verbose --bin dfget --bin dfdaemon --bin dfcache
RUN cargo install tokio-console --locked --root /usr/local
FROM public.ecr.aws/docker/library/alpine:3.20 AS health FROM public.ecr.aws/docker/library/alpine:3.20 AS health
@ -59,21 +52,17 @@ RUN if [ "$(uname -m)" = "ppc64le" ]; then \
FROM public.ecr.aws/docker/library/golang:1.23.0-alpine3.20 AS pprof FROM public.ecr.aws/docker/library/golang:1.23.0-alpine3.20 AS pprof
RUN go install github.com/google/pprof@latest RUN go install github.com/google/pprof@latest
RUN go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest
FROM public.ecr.aws/debian/debian:bookworm-slim FROM public.ecr.aws/debian/debian:bookworm-slim
RUN apt-get update && apt-get install -y --no-install-recommends iperf3 fio curl \ RUN apt-get update && apt-get install -y --no-install-recommends curl bash-completion procps infiniband-diags ibverbs-utils \
iotop sysstat bash-completion procps apache2-utils ca-certificates binutils \ apache2-utils ca-certificates binutils dnsutils iputils-ping llvm dstat sysstat net-tools \
dnsutils iputils-ping llvm graphviz lsof strace dstat net-tools \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
COPY --from=builder /app/client/target/release/dfget /usr/local/bin/dfget COPY --from=builder /app/client/target/release/dfget /usr/local/bin/dfget
COPY --from=builder /app/client/target/release/dfdaemon /usr/local/bin/dfdaemon COPY --from=builder /app/client/target/release/dfdaemon /usr/local/bin/dfdaemon
COPY --from=builder /app/client/target/release/dfcache /usr/local/bin/dfcache COPY --from=builder /app/client/target/release/dfcache /usr/local/bin/dfcache
COPY --from=builder /usr/local/bin/tokio-console /usr/local/bin/
COPY --from=pprof /go/bin/pprof /bin/pprof COPY --from=pprof /go/bin/pprof /bin/pprof
COPY --from=pprof /go/bin/grpcurl /bin/grpcurl
COPY --from=health /bin/grpc_health_probe /bin/grpc_health_probe COPY --from=health /bin/grpc_health_probe /bin/grpc_health_probe
ENTRYPOINT ["/usr/local/bin/dfdaemon"] ENTRYPOINT ["/usr/local/bin/dfdaemon"]

View File

@ -1,4 +1,4 @@
FROM public.ecr.aws/docker/library/rust:1.85.0 AS builder FROM public.ecr.aws/docker/library/rust:1.82.0 AS builder
WORKDIR /app/client WORKDIR /app/client
@ -7,7 +7,6 @@ RUN apt-get update && apt-get install -y \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
COPY Cargo.toml Cargo.lock ./ COPY Cargo.toml Cargo.lock ./
COPY .cargo ./cargo
COPY dragonfly-client/Cargo.toml ./dragonfly-client/Cargo.toml COPY dragonfly-client/Cargo.toml ./dragonfly-client/Cargo.toml
COPY dragonfly-client/src ./dragonfly-client/src COPY dragonfly-client/src ./dragonfly-client/src
@ -35,15 +34,10 @@ COPY dragonfly-client-util/src ./dragonfly-client-util/src
COPY dragonfly-client-init/Cargo.toml ./dragonfly-client-init/Cargo.toml COPY dragonfly-client-init/Cargo.toml ./dragonfly-client-init/Cargo.toml
COPY dragonfly-client-init/src ./dragonfly-client-init/src COPY dragonfly-client-init/src ./dragonfly-client-init/src
ARG TARGETPLATFORM RUN cargo build --verbose --bin dfget --bin dfdaemon --bin dfcache
RUN case "${TARGETPLATFORM}" in \
"linux/arm64") export JEMALLOC_SYS_WITH_LG_PAGE=16;; \
esac && \
cargo build --verbose --bin dfget --bin dfdaemon --bin dfcache
RUN cargo install flamegraph --root /usr/local RUN cargo install flamegraph --root /usr/local
RUN cargo install bottom --locked --root /usr/local RUN cargo install bottom --locked --root /usr/local
RUN cargo install tokio-console --locked --root /usr/local
FROM public.ecr.aws/docker/library/alpine:3.20 AS health FROM public.ecr.aws/docker/library/alpine:3.20 AS health
@ -61,13 +55,12 @@ RUN if [ "$(uname -m)" = "ppc64le" ]; then \
FROM public.ecr.aws/docker/library/golang:1.23.0-alpine3.20 AS pprof FROM public.ecr.aws/docker/library/golang:1.23.0-alpine3.20 AS pprof
RUN go install github.com/google/pprof@latest RUN go install github.com/google/pprof@latest
RUN go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest
FROM public.ecr.aws/debian/debian:bookworm-slim FROM public.ecr.aws/debian/debian:bookworm-slim
RUN apt-get update && apt-get install -y --no-install-recommends iperf3 fio curl infiniband-diags ibverbs-utils \ RUN apt-get update && apt-get install -y --no-install-recommends iperf3 fio wget curl infiniband-diags ibverbs-utils \
iotop sysstat bash-completion procps apache2-utils ca-certificates binutils bpfcc-tools \ iotop sysstat bash-completion procps apache2-utils ca-certificates binutils bpfcc-tools \
dnsutils iputils-ping vim linux-perf llvm lsof socat strace dstat net-tools \ dnsutils iputils-ping vim linux-perf llvm graphviz lsof socat strace dstat net-tools \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
COPY --from=builder /app/client/target/debug/dfget /usr/local/bin/dfget COPY --from=builder /app/client/target/debug/dfget /usr/local/bin/dfget
@ -75,9 +68,7 @@ COPY --from=builder /app/client/target/debug/dfdaemon /usr/local/bin/dfdaemon
COPY --from=builder /app/client/target/debug/dfcache /usr/local/bin/dfcache COPY --from=builder /app/client/target/debug/dfcache /usr/local/bin/dfcache
COPY --from=builder /usr/local/bin/flamegraph /usr/local/bin/ COPY --from=builder /usr/local/bin/flamegraph /usr/local/bin/
COPY --from=builder /usr/local/bin/btm /usr/local/bin/ COPY --from=builder /usr/local/bin/btm /usr/local/bin/
COPY --from=builder /usr/local/bin/tokio-console /usr/local/bin/
COPY --from=pprof /go/bin/pprof /bin/pprof COPY --from=pprof /go/bin/pprof /bin/pprof
COPY --from=pprof /go/bin/grpcurl /bin/grpcurl
COPY --from=health /bin/grpc_health_probe /bin/grpc_health_probe COPY --from=health /bin/grpc_health_probe /bin/grpc_health_probe
ENTRYPOINT ["/usr/local/bin/dfdaemon"] ENTRYPOINT ["/usr/local/bin/dfdaemon"]

View File

@ -1,4 +1,4 @@
FROM public.ecr.aws/docker/library/rust:1.85.0 AS builder FROM public.ecr.aws/docker/library/rust:1.82.0 AS builder
RUN apt-get update && apt-get install -y \ RUN apt-get update && apt-get install -y \
openssl libclang-dev pkg-config protobuf-compiler \ openssl libclang-dev pkg-config protobuf-compiler \
@ -7,7 +7,6 @@ RUN apt-get update && apt-get install -y \
WORKDIR /app/client WORKDIR /app/client
COPY Cargo.toml Cargo.lock ./ COPY Cargo.toml Cargo.lock ./
COPY .cargo ./cargo
COPY dragonfly-client/Cargo.toml ./dragonfly-client/Cargo.toml COPY dragonfly-client/Cargo.toml ./dragonfly-client/Cargo.toml
COPY dragonfly-client/src ./dragonfly-client/src COPY dragonfly-client/src ./dragonfly-client/src
@ -35,11 +34,7 @@ COPY dragonfly-client-util/src ./dragonfly-client-util/src
COPY dragonfly-client-init/Cargo.toml ./dragonfly-client-init/Cargo.toml COPY dragonfly-client-init/Cargo.toml ./dragonfly-client-init/Cargo.toml
COPY dragonfly-client-init/src ./dragonfly-client-init/src COPY dragonfly-client-init/src ./dragonfly-client-init/src
ARG TARGETPLATFORM RUN cargo build --release --verbose --bin dfinit
RUN case "${TARGETPLATFORM}" in \
"linux/arm64") export JEMALLOC_SYS_WITH_LG_PAGE=16;; \
esac && \
cargo build --release --verbose --bin dfinit
FROM public.ecr.aws/debian/debian:bookworm-slim FROM public.ecr.aws/debian/debian:bookworm-slim

View File

@ -5,7 +5,7 @@ After=network-online.target
After=network.target After=network.target
[Service] [Service]
ExecStart=/usr/bin/dfdaemon --config /etc/dragonfly/dfdaemon.yaml --console ExecStart=/usr/bin/dfdaemon --config /etc/dragonfly/dfdaemon.yaml --verbose
Type=simple Type=simple
Environment=HOME=/root Environment=HOME=/root

View File

@ -69,7 +69,7 @@ cargo build --release --bin dfdaemon
```bash ```bash
# prepare client.yaml by yourself. # prepare client.yaml by yourself.
./target/release/dfdaemon --config client.yaml -l info --console ./target/release/dfdaemon --config client.yaml -l info --verbose
``` ```
## FlameGraph ## FlameGraph

View File

@ -27,11 +27,11 @@ percent-encoding.workspace = true
futures.workspace = true futures.workspace = true
reqwest-retry = "0.7" reqwest-retry = "0.7"
reqwest-tracing = "0.5" reqwest-tracing = "0.5"
libloading = "0.8.8" libloading = "0.8.7"
[dev-dependencies] [dev-dependencies]
tempfile.workspace = true tempfile.workspace = true
wiremock = "0.6.4" wiremock = "0.6.3"
rustls-pki-types.workspace = true rustls-pki-types.workspace = true
rustls-pemfile.workspace = true rustls-pemfile.workspace = true
hyper.workspace = true hyper.workspace = true

View File

@ -14,7 +14,7 @@ cargo build --all && mv target/debug/libhdfs.so {plugin_dir}/backend/libhdfs.so
## Run Client with Plugin ## Run Client with Plugin
```shell ```shell
$ cargo run --bin dfdaemon -- --config {config_dir}/config.yaml -l info --console $ cargo run --bin dfdaemon -- --config {config_dir}/config.yaml -l info --verbose
INFO load [http] builtin backend INFO load [http] builtin backend
INFO load [https] builtin backend INFO load [https] builtin backend
INFO load [hdfs] plugin backend INFO load [hdfs] plugin backend

View File

@ -31,7 +31,6 @@ pub const HDFS_SCHEME: &str = "hdfs";
const DEFAULT_NAMENODE_PORT: u16 = 9870; const DEFAULT_NAMENODE_PORT: u16 = 9870;
/// Hdfs is a struct that implements the Backend trait. /// Hdfs is a struct that implements the Backend trait.
#[derive(Default)]
pub struct Hdfs { pub struct Hdfs {
/// scheme is the scheme of the HDFS. /// scheme is the scheme of the HDFS.
scheme: String, scheme: String,
@ -40,6 +39,7 @@ pub struct Hdfs {
/// Hdfs implements the Backend trait. /// Hdfs implements the Backend trait.
impl Hdfs { impl Hdfs {
/// new returns a new HDFS backend. /// new returns a new HDFS backend.
#[instrument(skip_all)]
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
scheme: HDFS_SCHEME.to_string(), scheme: HDFS_SCHEME.to_string(),
@ -47,6 +47,7 @@ impl Hdfs {
} }
/// operator initializes the operator with the parsed URL and HDFS config. /// operator initializes the operator with the parsed URL and HDFS config.
#[instrument(skip_all)]
pub fn operator( pub fn operator(
&self, &self,
url: Url, url: Url,
@ -83,6 +84,7 @@ impl Hdfs {
#[tonic::async_trait] #[tonic::async_trait]
impl super::Backend for Hdfs { impl super::Backend for Hdfs {
/// scheme returns the scheme of the HDFS backend. /// scheme returns the scheme of the HDFS backend.
#[instrument(skip_all)]
fn scheme(&self) -> String { fn scheme(&self) -> String {
self.scheme.clone() self.scheme.clone()
} }

View File

@ -43,6 +43,7 @@ pub struct HTTP {
/// HTTP implements the http interface. /// HTTP implements the http interface.
impl HTTP { impl HTTP {
/// new returns a new HTTP. /// new returns a new HTTP.
#[instrument(skip_all)]
pub fn new(scheme: &str) -> Result<HTTP> { pub fn new(scheme: &str) -> Result<HTTP> {
// Default TLS client config with no validation. // Default TLS client config with no validation.
let client_config_builder = rustls::ClientConfig::builder() let client_config_builder = rustls::ClientConfig::builder()
@ -50,22 +51,11 @@ impl HTTP {
.with_custom_certificate_verifier(NoVerifier::new()) .with_custom_certificate_verifier(NoVerifier::new())
.with_no_client_auth(); .with_no_client_auth();
// Disable automatic compression to prevent double-decompression issues.
//
// Problem scenario:
// 1. Origin server supports gzip and returns "content-encoding: gzip" header.
// 2. Backend decompresses the response and stores uncompressed content to disk.
// 3. When user's client downloads via dfdaemon proxy, the original "content-encoding: gzip".
// header is forwarded to it.
// 4. User's client attempts to decompress the already-decompressed content, causing errors.
//
// Solution: Disable all compression formats (gzip, brotli, zstd, deflate) to ensure
// we receive and store uncompressed content, eliminating the double-decompression issue.
let client = reqwest::Client::builder() let client = reqwest::Client::builder()
.no_gzip() .gzip(true)
.no_brotli() .brotli(true)
.no_zstd() .zstd(true)
.no_deflate() .deflate(true)
.use_preconfigured_tls(client_config_builder) .use_preconfigured_tls(client_config_builder)
.pool_max_idle_per_host(super::POOL_MAX_IDLE_PER_HOST) .pool_max_idle_per_host(super::POOL_MAX_IDLE_PER_HOST)
.tcp_keepalive(super::KEEP_ALIVE_INTERVAL) .tcp_keepalive(super::KEEP_ALIVE_INTERVAL)
@ -85,6 +75,7 @@ impl HTTP {
} }
/// client returns a new reqwest client. /// client returns a new reqwest client.
#[instrument(skip_all)]
fn client( fn client(
&self, &self,
client_cert: Option<Vec<CertificateDer<'static>>>, client_cert: Option<Vec<CertificateDer<'static>>>,
@ -99,22 +90,11 @@ impl HTTP {
.with_root_certificates(root_cert_store) .with_root_certificates(root_cert_store)
.with_no_client_auth(); .with_no_client_auth();
// Disable automatic compression to prevent double-decompression issues.
//
// Problem scenario:
// 1. Origin server supports gzip and returns "content-encoding: gzip" header.
// 2. Backend decompresses the response and stores uncompressed content to disk.
// 3. When user's client downloads via dfdaemon proxy, the original "content-encoding: gzip".
// header is forwarded to it.
// 4. User's client attempts to decompress the already-decompressed content, causing errors.
//
// Solution: Disable all compression formats (gzip, brotli, zstd, deflate) to ensure
// we receive and store uncompressed content, eliminating the double-decompression issue.
let client = reqwest::Client::builder() let client = reqwest::Client::builder()
.no_gzip() .gzip(true)
.no_brotli() .brotli(true)
.no_zstd() .zstd(true)
.no_deflate() .deflate(true)
.use_preconfigured_tls(client_config_builder) .use_preconfigured_tls(client_config_builder)
.build()?; .build()?;
@ -137,6 +117,7 @@ impl HTTP {
#[tonic::async_trait] #[tonic::async_trait]
impl super::Backend for HTTP { impl super::Backend for HTTP {
/// scheme returns the scheme of the HTTP backend. /// scheme returns the scheme of the HTTP backend.
#[instrument(skip_all)]
fn scheme(&self) -> String { fn scheme(&self) -> String {
self.scheme.clone() self.scheme.clone()
} }
@ -160,13 +141,6 @@ impl super::Backend for HTTP {
.client(request.client_cert)? .client(request.client_cert)?
.get(&request.url) .get(&request.url)
.headers(header) .headers(header)
// Add Range header to ensure Content-Length is returned in response headers.
// Some servers (especially when using Transfer-Encoding: chunked,
// refer to https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Transfer-Encoding.) may not
// include Content-Length in HEAD requests. Using "bytes=0-" requests the
// entire file starting from byte 0, forcing the server to include file size
// information in the response headers.
.header(reqwest::header::RANGE, "bytes=0-")
.timeout(request.timeout) .timeout(request.timeout)
.send() .send()
.await .await

View File

@ -23,10 +23,11 @@ use libloading::Library;
use reqwest::header::HeaderMap; use reqwest::header::HeaderMap;
use rustls_pki_types::CertificateDer; use rustls_pki_types::CertificateDer;
use std::path::Path; use std::path::Path;
use std::str::FromStr;
use std::{collections::HashMap, pin::Pin, time::Duration}; use std::{collections::HashMap, pin::Pin, time::Duration};
use std::{fmt::Debug, fs}; use std::{fmt::Debug, fs};
use tokio::io::{AsyncRead, AsyncReadExt}; use tokio::io::{AsyncRead, AsyncReadExt};
use tracing::{error, info, warn}; use tracing::{error, info, instrument, warn};
use url::Url; use url::Url;
pub mod hdfs; pub mod hdfs;
@ -166,7 +167,7 @@ where
} }
/// The File Entry of a directory, including some relevant file metadata. /// The File Entry of a directory, including some relevant file metadata.
#[derive(Debug, PartialEq, Eq, Hash, Clone)] #[derive(Debug, PartialEq, Eq)]
pub struct DirEntry { pub struct DirEntry {
/// url is the url of the entry. /// url is the url of the entry.
pub url: String, pub url: String,
@ -226,6 +227,7 @@ pub struct BackendFactory {
/// https://github.com/dragonflyoss/client/tree/main/dragonfly-client-backend/examples/plugin/. /// https://github.com/dragonflyoss/client/tree/main/dragonfly-client-backend/examples/plugin/.
impl BackendFactory { impl BackendFactory {
/// new returns a new BackendFactory. /// new returns a new BackendFactory.
#[instrument(skip_all)]
pub fn new(plugin_dir: Option<&Path>) -> Result<Self> { pub fn new(plugin_dir: Option<&Path>) -> Result<Self> {
let mut backend_factory = Self::default(); let mut backend_factory = Self::default();
backend_factory.load_builtin_backends()?; backend_factory.load_builtin_backends()?;
@ -240,12 +242,14 @@ impl BackendFactory {
Ok(backend_factory) Ok(backend_factory)
} }
/// unsupported_download_directory returns whether the scheme does not support directory download. /// supported_download_directory returns whether the scheme supports directory download.
pub fn unsupported_download_directory(scheme: &str) -> bool { #[instrument(skip_all)]
scheme == http::HTTP_SCHEME || scheme == http::HTTPS_SCHEME pub fn supported_download_directory(scheme: &str) -> bool {
object_storage::Scheme::from_str(scheme).is_ok() || scheme == hdfs::HDFS_SCHEME
} }
/// build returns the backend by the scheme of the url. /// build returns the backend by the scheme of the url.
#[instrument(skip_all)]
pub fn build(&self, url: &str) -> Result<&(dyn Backend + Send + Sync)> { pub fn build(&self, url: &str) -> Result<&(dyn Backend + Send + Sync)> {
let url = Url::parse(url).or_err(ErrorType::ParseError)?; let url = Url::parse(url).or_err(ErrorType::ParseError)?;
let scheme = url.scheme(); let scheme = url.scheme();
@ -256,6 +260,7 @@ impl BackendFactory {
} }
/// load_builtin_backends loads the builtin backends. /// load_builtin_backends loads the builtin backends.
#[instrument(skip_all)]
fn load_builtin_backends(&mut self) -> Result<()> { fn load_builtin_backends(&mut self) -> Result<()> {
self.backends.insert( self.backends.insert(
"http".to_string(), "http".to_string(),
@ -325,12 +330,13 @@ impl BackendFactory {
} }
/// load_plugin_backends loads the plugin backends. /// load_plugin_backends loads the plugin backends.
#[instrument(skip_all)]
fn load_plugin_backends(&mut self, plugin_dir: &Path) -> Result<()> { fn load_plugin_backends(&mut self, plugin_dir: &Path) -> Result<()> {
let backend_plugin_dir = plugin_dir.join(NAME); let backend_plugin_dir = plugin_dir.join(NAME);
if !backend_plugin_dir.exists() { if !backend_plugin_dir.exists() {
warn!( warn!(
"skip loading plugin backends, because the plugin directory {} does not exist", "skip loading plugin backends, because the plugin directory {} does not exist",
backend_plugin_dir.display() plugin_dir.display()
); );
return Ok(()); return Ok(());
} }

View File

@ -177,6 +177,7 @@ pub struct ObjectStorage {
/// ObjectStorage implements the ObjectStorage trait. /// ObjectStorage implements the ObjectStorage trait.
impl ObjectStorage { impl ObjectStorage {
/// Returns ObjectStorage that implements the Backend trait. /// Returns ObjectStorage that implements the Backend trait.
#[instrument(skip_all)]
pub fn new(scheme: Scheme) -> ClientResult<ObjectStorage> { pub fn new(scheme: Scheme) -> ClientResult<ObjectStorage> {
// Initialize the reqwest client. // Initialize the reqwest client.
let client = reqwest::Client::builder() let client = reqwest::Client::builder()
@ -195,6 +196,7 @@ impl ObjectStorage {
} }
/// operator initializes the operator with the parsed URL and object storage. /// operator initializes the operator with the parsed URL and object storage.
#[instrument(skip_all)]
pub fn operator( pub fn operator(
&self, &self,
parsed_url: &super::object_storage::ParsedURL, parsed_url: &super::object_storage::ParsedURL,
@ -221,6 +223,7 @@ impl ObjectStorage {
} }
/// s3_operator initializes the S3 operator with the parsed URL and object storage. /// s3_operator initializes the S3 operator with the parsed URL and object storage.
#[instrument(skip_all)]
pub fn s3_operator( pub fn s3_operator(
&self, &self,
parsed_url: &super::object_storage::ParsedURL, parsed_url: &super::object_storage::ParsedURL,
@ -273,6 +276,7 @@ impl ObjectStorage {
} }
/// gcs_operator initializes the GCS operator with the parsed URL and object storage. /// gcs_operator initializes the GCS operator with the parsed URL and object storage.
#[instrument(skip_all)]
pub fn gcs_operator( pub fn gcs_operator(
&self, &self,
parsed_url: &super::object_storage::ParsedURL, parsed_url: &super::object_storage::ParsedURL,
@ -307,6 +311,7 @@ impl ObjectStorage {
} }
/// abs_operator initializes the ABS operator with the parsed URL and object storage. /// abs_operator initializes the ABS operator with the parsed URL and object storage.
#[instrument(skip_all)]
pub fn abs_operator( pub fn abs_operator(
&self, &self,
parsed_url: &super::object_storage::ParsedURL, parsed_url: &super::object_storage::ParsedURL,
@ -349,6 +354,7 @@ impl ObjectStorage {
} }
/// oss_operator initializes the OSS operator with the parsed URL and object storage. /// oss_operator initializes the OSS operator with the parsed URL and object storage.
#[instrument(skip_all)]
pub fn oss_operator( pub fn oss_operator(
&self, &self,
parsed_url: &super::object_storage::ParsedURL, parsed_url: &super::object_storage::ParsedURL,
@ -392,6 +398,7 @@ impl ObjectStorage {
} }
/// obs_operator initializes the OBS operator with the parsed URL and object storage. /// obs_operator initializes the OBS operator with the parsed URL and object storage.
#[instrument(skip_all)]
pub fn obs_operator( pub fn obs_operator(
&self, &self,
parsed_url: &super::object_storage::ParsedURL, parsed_url: &super::object_storage::ParsedURL,
@ -480,6 +487,7 @@ impl ObjectStorage {
#[tonic::async_trait] #[tonic::async_trait]
impl crate::Backend for ObjectStorage { impl crate::Backend for ObjectStorage {
/// scheme returns the scheme of the object storage. /// scheme returns the scheme of the object storage.
#[instrument(skip_all)]
fn scheme(&self) -> String { fn scheme(&self) -> String {
self.scheme.to_string() self.scheme.to_string()
} }

View File

@ -13,7 +13,6 @@ build = "build.rs"
[dependencies] [dependencies]
dragonfly-client-core.workspace = true dragonfly-client-core.workspace = true
dragonfly-client-util.workspace = true dragonfly-client-util.workspace = true
local-ip-address.workspace = true
clap.workspace = true clap.workspace = true
regex.workspace = true regex.workspace = true
serde.workspace = true serde.workspace = true
@ -29,9 +28,8 @@ bytesize-serde.workspace = true
tonic.workspace = true tonic.workspace = true
rustls-pki-types.workspace = true rustls-pki-types.workspace = true
rcgen.workspace = true rcgen.workspace = true
reqwest.workspace = true
home = "0.5.11" home = "0.5.11"
local-ip-address = "0.6.5"
hostname = "^0.4" hostname = "^0.4"
humantime-serde = "1.1.1" humantime-serde = "1.1.1"
serde_regex = "1.1.0" serde_regex = "1.1.0"
http-serde = "2.1.1"

View File

@ -146,12 +146,6 @@ fn default_download_piece_timeout() -> Duration {
Duration::from_secs(120) Duration::from_secs(120)
} }
/// default_collected_download_piece_timeout is the default timeout for collecting one piece from the parent in the stream.
#[inline]
fn default_collected_download_piece_timeout() -> Duration {
Duration::from_secs(10)
}
/// default_download_concurrent_piece_count is the default number of concurrent pieces to download. /// default_download_concurrent_piece_count is the default number of concurrent pieces to download.
#[inline] #[inline]
fn default_download_concurrent_piece_count() -> u32 { fn default_download_concurrent_piece_count() -> u32 {
@ -164,12 +158,6 @@ fn default_download_max_schedule_count() -> u32 {
5 5
} }
/// default_tracing_path is the default tracing path for dfdaemon.
#[inline]
fn default_tracing_path() -> Option<PathBuf> {
Some(PathBuf::from("/v1/traces"))
}
/// default_scheduler_announce_interval is the default interval to announce peer to the scheduler. /// default_scheduler_announce_interval is the default interval to announce peer to the scheduler.
#[inline] #[inline]
fn default_scheduler_announce_interval() -> Duration { fn default_scheduler_announce_interval() -> Duration {
@ -226,6 +214,18 @@ fn default_storage_cache_capacity() -> ByteSize {
ByteSize::mib(64) ByteSize::mib(64)
} }
/// default_seed_peer_cluster_id is the default cluster id of seed peer.
#[inline]
fn default_seed_peer_cluster_id() -> u64 {
1
}
/// default_seed_peer_keepalive_interval is the default interval to keepalive with manager.
#[inline]
fn default_seed_peer_keepalive_interval() -> Duration {
Duration::from_secs(15)
}
/// default_gc_interval is the default interval to do gc. /// default_gc_interval is the default interval to do gc.
#[inline] #[inline]
fn default_gc_interval() -> Duration { fn default_gc_interval() -> Duration {
@ -238,12 +238,6 @@ fn default_gc_policy_task_ttl() -> Duration {
Duration::from_secs(21_600) Duration::from_secs(21_600)
} }
/// default_gc_policy_dist_threshold is the default threshold of the disk usage to do gc.
#[inline]
fn default_gc_policy_dist_threshold() -> ByteSize {
ByteSize::default()
}
/// default_gc_policy_dist_high_threshold_percent is the default high threshold percent of the disk usage. /// default_gc_policy_dist_high_threshold_percent is the default high threshold percent of the disk usage.
#[inline] #[inline]
fn default_gc_policy_dist_high_threshold_percent() -> u8 { fn default_gc_policy_dist_high_threshold_percent() -> u8 {
@ -401,12 +395,6 @@ pub struct Host {
/// ip is the advertise ip of the host. /// ip is the advertise ip of the host.
pub ip: Option<IpAddr>, pub ip: Option<IpAddr>,
/// scheduler_cluster_id is the ID of the cluster to which the scheduler belongs.
/// NOTE: This field is used to identify the cluster to which the scheduler belongs.
/// If this flag is set, the idc, location, hostname and ip will be ignored when listing schedulers.
#[serde(rename = "schedulerClusterID")]
pub scheduler_cluster_id: Option<u64>,
} }
/// Host implements Default. /// Host implements Default.
@ -417,7 +405,6 @@ impl Default for Host {
location: None, location: None,
hostname: default_host_hostname(), hostname: default_host_hostname(),
ip: None, ip: None,
scheduler_cluster_id: None,
} }
} }
} }
@ -487,14 +474,6 @@ pub struct Download {
#[serde(default = "default_download_piece_timeout", with = "humantime_serde")] #[serde(default = "default_download_piece_timeout", with = "humantime_serde")]
pub piece_timeout: Duration, pub piece_timeout: Duration,
/// collected_piece_timeout is the timeout for collecting one piece from the parent in the
/// stream.
#[serde(
default = "default_collected_download_piece_timeout",
with = "humantime_serde"
)]
pub collected_piece_timeout: Duration,
/// concurrent_piece_count is the number of concurrent pieces to download. /// concurrent_piece_count is the number of concurrent pieces to download.
#[serde(default = "default_download_concurrent_piece_count")] #[serde(default = "default_download_concurrent_piece_count")]
#[validate(range(min = 1))] #[validate(range(min = 1))]
@ -509,7 +488,6 @@ impl Default for Download {
parent_selector: ParentSelector::default(), parent_selector: ParentSelector::default(),
rate_limit: default_download_rate_limit(), rate_limit: default_download_rate_limit(),
piece_timeout: default_download_piece_timeout(), piece_timeout: default_download_piece_timeout(),
collected_piece_timeout: default_collected_download_piece_timeout(),
concurrent_piece_count: default_download_concurrent_piece_count(), concurrent_piece_count: default_download_concurrent_piece_count(),
} }
} }
@ -912,6 +890,18 @@ pub struct SeedPeer {
/// kind is the type of seed peer. /// kind is the type of seed peer.
#[serde(default, rename = "type")] #[serde(default, rename = "type")]
pub kind: HostType, pub kind: HostType,
/// cluster_id is the cluster id of the seed peer cluster.
#[serde(default = "default_seed_peer_cluster_id", rename = "clusterID")]
#[validate(range(min = 1))]
pub cluster_id: u64,
/// keepalive_interval is the interval to keep alive with manager.
#[serde(
default = "default_seed_peer_keepalive_interval",
with = "humantime_serde"
)]
pub keepalive_interval: Duration,
} }
/// SeedPeer implements Default. /// SeedPeer implements Default.
@ -920,6 +910,8 @@ impl Default for SeedPeer {
SeedPeer { SeedPeer {
enable: false, enable: false,
kind: HostType::Normal, kind: HostType::Normal,
cluster_id: default_seed_peer_cluster_id(),
keepalive_interval: default_seed_peer_keepalive_interval(),
} }
} }
} }
@ -1001,32 +993,30 @@ pub struct Storage {
/// cache_capacity is the cache capacity for downloading, default is 100. /// cache_capacity is the cache capacity for downloading, default is 100.
/// ///
/// Cache storage: /// Cache storage:
/// 1. Users can preheat task by caching to memory (via CacheTask) or to disk (via Task). /// 1. Users can create preheating jobs and preheat tasks to memory and disk by setting `load_to_cache` to `true`.
/// For more details, refer to https://github.com/dragonflyoss/api/blob/main/proto/dfdaemon.proto#L174. /// For more details, refer to https://github.com/dragonflyoss/api/blob/main/proto/common.proto#L443.
/// 2. If the download hits the memory cache, it will be faster than reading from the disk, because there is no /// 2. If the download hits the memory cache, it will be faster than reading from the disk, because there is no
/// page cache for the first read. /// page cache for the first read.
/// ///
/// ```text /// ```text
/// +--------+ /// 1.Preheat
/// │ Source │
/// +--------+
/// ^ ^ Preheat
/// │ │ |
/// +-----------------+ │ │ +----------------------------+
/// │ Other Peers │ │ │ │ Peer | │
/// │ │ │ │ │ v │
/// │ +----------+ │ │ │ │ +----------+ │
/// │ │ Cache |<--|----------|<-Miss--| Cache |--Hit-->|<----Download CacheTask
/// │ +----------+ │ │ │ +----------+ │
/// │ │ │ │ │
/// │ +----------+ │ │ │ +----------+ │
/// │ │ Disk |<--|----------|<-Miss--| Disk |--Hit-->|<----Download Task
/// │ +----------+ │ │ +----------+ │
/// │ │ │ ^ │
/// │ │ │ | │
/// +-----------------+ +----------------------------+
/// | /// |
/// Preheat /// |
/// +--------------------------------------------------+
/// | | Peer |
/// | | +-----------+ |
/// | | -- Partial -->| Cache | |
/// | | | +-----------+ |
/// | v | | | |
/// | Download | Miss | |
/// | Task -->| | --- Hit ------>|<-- 2.Download
/// | | | ^ |
/// | | v | |
/// | | +-----------+ | |
/// | -- Full -->| Disk |---------- |
/// | +-----------+ |
/// | |
/// +--------------------------------------------------+
/// ``` /// ```
#[serde(with = "bytesize_serde", default = "default_storage_cache_capacity")] #[serde(with = "bytesize_serde", default = "default_storage_cache_capacity")]
pub cache_capacity: ByteSize, pub cache_capacity: ByteSize,
@ -1059,19 +1049,6 @@ pub struct Policy {
)] )]
pub task_ttl: Duration, pub task_ttl: Duration,
/// dist_threshold optionally defines a specific disk capacity to be used as the base for
/// calculating GC trigger points with `dist_high_threshold_percent` and `dist_low_threshold_percent`.
///
/// - If a value is provided (e.g., "500GB"), the percentage-based thresholds (`dist_high_threshold_percent`,
/// `dist_low_threshold_percent`) are applied relative to this specified capacity.
/// - If not provided or set to 0 (the default behavior), these percentage-based thresholds are applied
/// relative to the total actual disk space.
///
/// This allows dfdaemon to effectively manage a logical portion of the disk for its cache,
/// rather than always considering the entire disk volume.
#[serde(with = "bytesize_serde", default = "default_gc_policy_dist_threshold")]
pub dist_threshold: ByteSize,
/// dist_high_threshold_percent is the high threshold percent of the disk usage. /// dist_high_threshold_percent is the high threshold percent of the disk usage.
/// If the disk usage is greater than the threshold, dfdaemon will do gc. /// If the disk usage is greater than the threshold, dfdaemon will do gc.
#[serde(default = "default_gc_policy_dist_high_threshold_percent")] #[serde(default = "default_gc_policy_dist_high_threshold_percent")]
@ -1089,7 +1066,6 @@ pub struct Policy {
impl Default for Policy { impl Default for Policy {
fn default() -> Self { fn default() -> Self {
Policy { Policy {
dist_threshold: default_gc_policy_dist_threshold(),
task_ttl: default_gc_policy_task_ttl(), task_ttl: default_gc_policy_task_ttl(),
dist_high_threshold_percent: default_gc_policy_dist_high_threshold_percent(), dist_high_threshold_percent: default_gc_policy_dist_high_threshold_percent(),
dist_low_threshold_percent: default_gc_policy_dist_low_threshold_percent(), dist_low_threshold_percent: default_gc_policy_dist_low_threshold_percent(),
@ -1449,37 +1425,11 @@ pub struct Stats {
} }
/// Tracing is the tracing configuration for dfdaemon. /// Tracing is the tracing configuration for dfdaemon.
#[derive(Debug, Clone, Validate, Deserialize)] #[derive(Debug, Clone, Default, Validate, Deserialize)]
#[serde(default, rename_all = "camelCase")] #[serde(default, rename_all = "camelCase")]
pub struct Tracing { pub struct Tracing {
/// Protocol specifies the communication protocol for the tracing server. /// addr is the address to report tracing log.
/// Supported values: "http", "https", "grpc" (default: None). pub addr: Option<String>,
/// This determines how tracing logs are transmitted to the server.
pub protocol: Option<String>,
/// endpoint is the endpoint to report tracing log, example: "localhost:4317".
pub endpoint: Option<String>,
/// path is the path to report tracing log, example: "/v1/traces" if the protocol is "http" or
/// "https".
#[serde(default = "default_tracing_path")]
pub path: Option<PathBuf>,
/// headers is the headers to report tracing log.
#[serde(with = "http_serde::header_map")]
pub headers: reqwest::header::HeaderMap,
}
/// Tracing implements Default.
impl Default for Tracing {
fn default() -> Self {
Self {
protocol: None,
endpoint: None,
path: default_tracing_path(),
headers: reqwest::header::HeaderMap::new(),
}
}
} }
/// Config is the configuration for dfdaemon. /// Config is the configuration for dfdaemon.
@ -1989,6 +1939,11 @@ key: /etc/ssl/private/client.pem
let default_seed_peer = SeedPeer::default(); let default_seed_peer = SeedPeer::default();
assert!(!default_seed_peer.enable); assert!(!default_seed_peer.enable);
assert_eq!(default_seed_peer.kind, HostType::Normal); assert_eq!(default_seed_peer.kind, HostType::Normal);
assert_eq!(default_seed_peer.cluster_id, 1);
assert_eq!(
default_seed_peer.keepalive_interval,
default_seed_peer_keepalive_interval()
);
} }
#[test] #[test]
@ -1996,9 +1951,20 @@ key: /etc/ssl/private/client.pem
let valid_seed_peer = SeedPeer { let valid_seed_peer = SeedPeer {
enable: true, enable: true,
kind: HostType::Weak, kind: HostType::Weak,
cluster_id: 5,
keepalive_interval: Duration::from_secs(90),
}; };
assert!(valid_seed_peer.validate().is_ok()); assert!(valid_seed_peer.validate().is_ok());
let invalid_seed_peer = SeedPeer {
enable: true,
kind: HostType::Weak,
cluster_id: 0,
keepalive_interval: Duration::from_secs(90),
};
assert!(invalid_seed_peer.validate().is_err());
} }
#[test] #[test]
@ -2015,6 +1981,8 @@ key: /etc/ssl/private/client.pem
assert!(seed_peer.enable); assert!(seed_peer.enable);
assert_eq!(seed_peer.kind, HostType::Super); assert_eq!(seed_peer.kind, HostType::Super);
assert_eq!(seed_peer.cluster_id, 2);
assert_eq!(seed_peer.keepalive_interval, Duration::from_secs(60));
} }
#[test] #[test]
@ -2065,18 +2033,18 @@ key: /etc/ssl/private/client.pem
fn validate_policy() { fn validate_policy() {
let valid_policy = Policy { let valid_policy = Policy {
task_ttl: Duration::from_secs(12 * 3600), task_ttl: Duration::from_secs(12 * 3600),
dist_threshold: ByteSize::mb(100),
dist_high_threshold_percent: 90, dist_high_threshold_percent: 90,
dist_low_threshold_percent: 70, dist_low_threshold_percent: 70,
}; };
assert!(valid_policy.validate().is_ok()); assert!(valid_policy.validate().is_ok());
let invalid_policy = Policy { let invalid_policy = Policy {
task_ttl: Duration::from_secs(12 * 3600), task_ttl: Duration::from_secs(12 * 3600),
dist_threshold: ByteSize::mb(100),
dist_high_threshold_percent: 100, dist_high_threshold_percent: 100,
dist_low_threshold_percent: 70, dist_low_threshold_percent: 70,
}; };
assert!(invalid_policy.validate().is_err()); assert!(invalid_policy.validate().is_err());
} }
@ -2176,19 +2144,12 @@ key: /etc/ssl/private/client.pem
fn deserialize_tracing_correctly() { fn deserialize_tracing_correctly() {
let json_data = r#" let json_data = r#"
{ {
"protocol": "http", "addr": "http://tracing.example.com"
"endpoint": "tracing.example.com",
"path": "/v1/traces",
"headers": {
"X-Custom-Header": "value"
}
}"#; }"#;
let tracing: Tracing = serde_json::from_str(json_data).unwrap(); let tracing: Tracing = serde_json::from_str(json_data).unwrap();
assert_eq!(tracing.protocol, Some("http".to_string()));
assert_eq!(tracing.endpoint, Some("tracing.example.com".to_string())); assert_eq!(tracing.addr, Some("http://tracing.example.com".to_string()));
assert_eq!(tracing.path, Some(PathBuf::from("/v1/traces")));
assert!(tracing.headers.contains_key("X-Custom-Header"));
} }
#[test] #[test]

View File

@ -104,7 +104,7 @@ pub fn default_lock_dir() -> PathBuf {
/// default_plugin_dir is the default plugin directory for client. /// default_plugin_dir is the default plugin directory for client.
pub fn default_plugin_dir() -> PathBuf { pub fn default_plugin_dir() -> PathBuf {
#[cfg(target_os = "linux")] #[cfg(target_os = "linux")]
return PathBuf::from("/usr/local/lib/dragonfly/plugins/"); return PathBuf::from("/var/lib/dragonfly/plugins/");
#[cfg(target_os = "macos")] #[cfg(target_os = "macos")]
return home::home_dir().unwrap().join(".dragonfly").join("plugins"); return home::home_dir().unwrap().join(".dragonfly").join("plugins");

View File

@ -23,6 +23,7 @@ tokio.workspace = true
anyhow.workspace = true anyhow.workspace = true
tracing.workspace = true tracing.workspace = true
toml_edit.workspace = true toml_edit.workspace = true
toml.workspace = true
url.workspace = true url.workspace = true
tempfile.workspace = true tempfile.workspace = true
serde_json.workspace = true serde_json.workspace = true

View File

@ -64,8 +64,12 @@ struct Args {
)] )]
log_max_files: usize, log_max_files: usize,
#[arg(long, default_value_t = false, help = "Specify whether to print log")] #[arg(
console: bool, long = "verbose",
default_value_t = false,
help = "Specify whether to print log"
)]
verbose: bool,
#[arg( #[arg(
short = 'V', short = 'V',
@ -90,12 +94,7 @@ async fn main() -> Result<(), anyhow::Error> {
args.log_level, args.log_level,
args.log_max_files, args.log_max_files,
None, None,
None, args.verbose,
None,
None,
None,
false,
args.console,
); );
// Load config. // Load config.

View File

@ -22,16 +22,17 @@ tracing.workspace = true
prost-wkt-types.workspace = true prost-wkt-types.workspace = true
tokio.workspace = true tokio.workspace = true
tokio-util.workspace = true tokio-util.workspace = true
sha2.workspace = true
crc32fast.workspace = true crc32fast.workspace = true
fs2.workspace = true fs2.workspace = true
lru.workspace = true
bytes.workspace = true bytes.workspace = true
bytesize.workspace = true bytesize.workspace = true
num_cpus = "1.17" num_cpus = "1.0"
bincode = "1.3.3" bincode = "1.3.3"
walkdir = "2.5.0"
[dev-dependencies] [dev-dependencies]
tempfile.workspace = true tempdir = "0.3"
criterion = "0.5" criterion = "0.5"
[[bench]] [[bench]]

View File

@ -76,32 +76,30 @@ impl Task {
/// Cache is the cache for storing piece content by LRU algorithm. /// Cache is the cache for storing piece content by LRU algorithm.
/// ///
/// Cache storage: /// Cache storage:
/// 1. Users can preheat task by caching to memory (via CacheTask) or to disk (via Task). /// 1. Users can create preheating jobs and preheat tasks to memory and disk by setting `load_to_cache` to `true`.
/// For more details, refer to https://github.com/dragonflyoss/api/blob/main/proto/dfdaemon.proto#L174. /// For more details, refer to https://github.com/dragonflyoss/api/blob/main/proto/common.proto#L443.
/// 2. If the download hits the memory cache, it will be faster than reading from the disk, because there is no /// 2. If the download hits the memory cache, it will be faster than reading from the disk, because there is no
/// page cache for the first read. /// page cache for the first read.
/// ///
/// ```text /// ```text
/// +--------+ /// 1.Preheat
/// │ Source │
/// +--------+
/// ^ ^ Preheat
/// │ │ |
/// +-----------------+ │ │ +----------------------------+
/// │ Other Peers │ │ │ │ Peer | │
/// │ │ │ │ │ v │
/// │ +----------+ │ │ │ │ +----------+ │
/// │ │ Cache |<--|----------|<-Miss--| Cache |--Hit-->|<----Download CacheTask
/// │ +----------+ │ │ │ +----------+ │
/// │ │ │ │ │
/// │ +----------+ │ │ │ +----------+ │
/// │ │ Disk |<--|----------|<-Miss--| Disk |--Hit-->|<----Download Task
/// │ +----------+ │ │ +----------+ │
/// │ │ │ ^ │
/// │ │ │ | │
/// +-----------------+ +----------------------------+
/// | /// |
/// Preheat /// |
/// +--------------------------------------------------+
/// | | Peer |
/// | | +-----------+ |
/// | | -- Partial -->| Cache | |
/// | | | +-----------+ |
/// | v | | | |
/// | Download | Miss | |
/// | Task -->| | --- Hit ------>|<-- 2.Download
/// | | | ^ |
/// | | v | |
/// | | +-----------+ | |
/// | -- Full -->| Disk |---------- |
/// | +-----------+ |
/// | |
/// +--------------------------------------------------+
/// ``` /// ```
/// Task is the metadata of the task. /// Task is the metadata of the task.
#[derive(Clone)] #[derive(Clone)]

View File

@ -14,7 +14,6 @@
* limitations under the License. * limitations under the License.
*/ */
use bytesize::ByteSize;
use dragonfly_api::common::v2::Range; use dragonfly_api::common::v2::Range;
use dragonfly_client_config::dfdaemon::Config; use dragonfly_client_config::dfdaemon::Config;
use dragonfly_client_core::{Error, Result}; use dragonfly_client_core::{Error, Result};
@ -28,7 +27,6 @@ use tokio::io::{
}; };
use tokio_util::io::InspectReader; use tokio_util::io::InspectReader;
use tracing::{error, info, instrument, warn}; use tracing::{error, info, instrument, warn};
use walkdir::WalkDir;
/// DEFAULT_CONTENT_DIR is the default directory for store content. /// DEFAULT_CONTENT_DIR is the default directory for store content.
pub const DEFAULT_CONTENT_DIR: &str = "content"; pub const DEFAULT_CONTENT_DIR: &str = "content";
@ -69,6 +67,7 @@ pub struct WritePersistentCacheTaskResponse {
/// Content implements the content storage. /// Content implements the content storage.
impl Content { impl Content {
/// new returns a new content. /// new returns a new content.
#[instrument(skip_all)]
pub async fn new(config: Arc<Config>, dir: &Path) -> Result<Content> { pub async fn new(config: Arc<Config>, dir: &Path) -> Result<Content> {
let dir = dir.join(DEFAULT_CONTENT_DIR); let dir = dir.join(DEFAULT_CONTENT_DIR);
@ -86,45 +85,21 @@ impl Content {
} }
/// available_space returns the available space of the disk. /// available_space returns the available space of the disk.
#[instrument(skip_all)]
pub fn available_space(&self) -> Result<u64> { pub fn available_space(&self) -> Result<u64> {
let dist_threshold = self.config.gc.policy.dist_threshold;
if dist_threshold != ByteSize::default() {
let usage_space = WalkDir::new(&self.dir)
.into_iter()
.filter_map(|entry| entry.ok())
.filter_map(|entry| entry.metadata().ok())
.filter(|metadata| metadata.is_file())
.fold(0, |acc, m| acc + m.len());
if usage_space >= dist_threshold.as_u64() {
warn!(
"usage space {} is greater than dist threshold {}, no need to calculate available space",
usage_space, dist_threshold
);
return Ok(0);
}
return Ok(dist_threshold.as_u64() - usage_space);
}
let stat = fs2::statvfs(&self.dir)?; let stat = fs2::statvfs(&self.dir)?;
Ok(stat.available_space()) Ok(stat.available_space())
} }
/// total_space returns the total space of the disk. /// total_space returns the total space of the disk.
#[instrument(skip_all)]
pub fn total_space(&self) -> Result<u64> { pub fn total_space(&self) -> Result<u64> {
// If the dist_threshold is set, return it directly.
let dist_threshold = self.config.gc.policy.dist_threshold;
if dist_threshold != ByteSize::default() {
return Ok(dist_threshold.as_u64());
}
let stat = fs2::statvfs(&self.dir)?; let stat = fs2::statvfs(&self.dir)?;
Ok(stat.total_space()) Ok(stat.total_space())
} }
/// has_enough_space checks if the storage has enough space to store the content. /// has_enough_space checks if the storage has enough space to store the content.
#[instrument(skip_all)]
pub fn has_enough_space(&self, content_length: u64) -> Result<bool> { pub fn has_enough_space(&self, content_length: u64) -> Result<bool> {
let available_space = self.available_space()?; let available_space = self.available_space()?;
if available_space < content_length { if available_space < content_length {
@ -140,6 +115,7 @@ impl Content {
} }
/// is_same_dev_inode checks if the source and target are the same device and inode. /// is_same_dev_inode checks if the source and target are the same device and inode.
#[instrument(skip_all)]
async fn is_same_dev_inode<P: AsRef<Path>, Q: AsRef<Path>>( async fn is_same_dev_inode<P: AsRef<Path>, Q: AsRef<Path>>(
&self, &self,
source: P, source: P,
@ -165,6 +141,7 @@ impl Content {
} }
/// is_same_dev_inode_as_task checks if the task and target are the same device and inode. /// is_same_dev_inode_as_task checks if the task and target are the same device and inode.
#[instrument(skip_all)]
pub async fn is_same_dev_inode_as_task(&self, task_id: &str, to: &Path) -> Result<bool> { pub async fn is_same_dev_inode_as_task(&self, task_id: &str, to: &Path) -> Result<bool> {
let task_path = self.get_task_path(task_id); let task_path = self.get_task_path(task_id);
self.is_same_dev_inode(&task_path, to).await self.is_same_dev_inode(&task_path, to).await
@ -175,7 +152,6 @@ impl Content {
/// Behavior of `create_task`: /// Behavior of `create_task`:
/// 1. If the task already exists, return the task path. /// 1. If the task already exists, return the task path.
/// 2. If the task does not exist, create the task directory and file. /// 2. If the task does not exist, create the task directory and file.
#[instrument(skip_all)]
pub async fn create_task(&self, task_id: &str, length: u64) -> Result<PathBuf> { pub async fn create_task(&self, task_id: &str, length: u64) -> Result<PathBuf> {
let task_path = self.get_task_path(task_id); let task_path = self.get_task_path(task_id);
if task_path.exists() { if task_path.exists() {
@ -268,6 +244,7 @@ impl Content {
} }
/// delete_task deletes the task content. /// delete_task deletes the task content.
#[instrument(skip_all)]
pub async fn delete_task(&self, task_id: &str) -> Result<()> { pub async fn delete_task(&self, task_id: &str) -> Result<()> {
info!("delete task content: {}", task_id); info!("delete task content: {}", task_id);
let task_path = self.get_task_path(task_id); let task_path = self.get_task_path(task_id);
@ -359,7 +336,6 @@ impl Content {
&self, &self,
task_id: &str, task_id: &str,
offset: u64, offset: u64,
expected_length: u64,
reader: &mut R, reader: &mut R,
) -> Result<WritePieceResponse> { ) -> Result<WritePieceResponse> {
// Open the file and seek to the offset. // Open the file and seek to the offset.
@ -394,13 +370,6 @@ impl Content {
error!("flush {:?} failed: {}", task_path, err); error!("flush {:?} failed: {}", task_path, err);
})?; })?;
if length != expected_length {
return Err(Error::Unknown(format!(
"expected length {} but got {}",
expected_length, length
)));
}
// Calculate the hash of the piece. // Calculate the hash of the piece.
Ok(WritePieceResponse { Ok(WritePieceResponse {
length, length,
@ -409,6 +378,7 @@ impl Content {
} }
/// get_task_path returns the task path by task id. /// get_task_path returns the task path by task id.
#[instrument(skip_all)]
fn get_task_path(&self, task_id: &str) -> PathBuf { fn get_task_path(&self, task_id: &str) -> PathBuf {
// The task needs split by the first 3 characters of task id(sha256) to // The task needs split by the first 3 characters of task id(sha256) to
// avoid too many files in one directory. // avoid too many files in one directory.
@ -418,6 +388,7 @@ impl Content {
/// is_same_dev_inode_as_persistent_cache_task checks if the persistent cache task and target /// is_same_dev_inode_as_persistent_cache_task checks if the persistent cache task and target
/// are the same device and inode. /// are the same device and inode.
#[instrument(skip_all)]
pub async fn is_same_dev_inode_as_persistent_cache_task( pub async fn is_same_dev_inode_as_persistent_cache_task(
&self, &self,
task_id: &str, task_id: &str,
@ -432,7 +403,6 @@ impl Content {
/// Behavior of `create_persistent_cache_task`: /// Behavior of `create_persistent_cache_task`:
/// 1. If the persistent cache task already exists, return the persistent cache task path. /// 1. If the persistent cache task already exists, return the persistent cache task path.
/// 2. If the persistent cache task does not exist, create the persistent cache task directory and file. /// 2. If the persistent cache task does not exist, create the persistent cache task directory and file.
#[instrument(skip_all)]
pub async fn create_persistent_cache_task( pub async fn create_persistent_cache_task(
&self, &self,
task_id: &str, task_id: &str,
@ -581,7 +551,6 @@ impl Content {
&self, &self,
task_id: &str, task_id: &str,
offset: u64, offset: u64,
expected_length: u64,
reader: &mut R, reader: &mut R,
) -> Result<WritePieceResponse> { ) -> Result<WritePieceResponse> {
// Open the file and seek to the offset. // Open the file and seek to the offset.
@ -616,13 +585,6 @@ impl Content {
error!("flush {:?} failed: {}", task_path, err); error!("flush {:?} failed: {}", task_path, err);
})?; })?;
if length != expected_length {
return Err(Error::Unknown(format!(
"expected length {} but got {}",
expected_length, length
)));
}
// Calculate the hash of the piece. // Calculate the hash of the piece.
Ok(WritePieceResponse { Ok(WritePieceResponse {
length, length,
@ -631,6 +593,7 @@ impl Content {
} }
/// delete_task deletes the persistent cache task content. /// delete_task deletes the persistent cache task content.
#[instrument(skip_all)]
pub async fn delete_persistent_cache_task(&self, task_id: &str) -> Result<()> { pub async fn delete_persistent_cache_task(&self, task_id: &str) -> Result<()> {
info!("delete persistent cache task content: {}", task_id); info!("delete persistent cache task content: {}", task_id);
let persistent_cache_task_path = self.get_persistent_cache_task_path(task_id); let persistent_cache_task_path = self.get_persistent_cache_task_path(task_id);
@ -643,6 +606,7 @@ impl Content {
} }
/// get_persistent_cache_task_path returns the persistent cache task path by task id. /// get_persistent_cache_task_path returns the persistent cache task path by task id.
#[instrument(skip_all)]
fn get_persistent_cache_task_path(&self, task_id: &str) -> PathBuf { fn get_persistent_cache_task_path(&self, task_id: &str) -> PathBuf {
// The persistent cache task needs split by the first 3 characters of task id(sha256) to // The persistent cache task needs split by the first 3 characters of task id(sha256) to
// avoid too many files in one directory. // avoid too many files in one directory.
@ -670,11 +634,11 @@ pub fn calculate_piece_range(offset: u64, length: u64, range: Option<Range>) ->
mod tests { mod tests {
use super::*; use super::*;
use std::io::Cursor; use std::io::Cursor;
use tempfile::tempdir; use tempdir::TempDir;
#[tokio::test] #[tokio::test]
async fn test_create_task() { async fn test_create_task() {
let temp_dir = tempdir().unwrap(); let temp_dir = TempDir::new("content").unwrap();
let config = Arc::new(Config::default()); let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap(); let content = Content::new(config, temp_dir.path()).await.unwrap();
@ -689,7 +653,7 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn test_hard_link_task() { async fn test_hard_link_task() {
let temp_dir = tempdir().unwrap(); let temp_dir = TempDir::new("content").unwrap();
let config = Arc::new(Config::default()); let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap(); let content = Content::new(config, temp_dir.path()).await.unwrap();
@ -707,7 +671,7 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn test_copy_task() { async fn test_copy_task() {
let temp_dir = tempdir().unwrap(); let temp_dir = TempDir::new("content").unwrap();
let config = Arc::new(Config::default()); let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap(); let content = Content::new(config, temp_dir.path()).await.unwrap();
@ -723,7 +687,7 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn test_delete_task() { async fn test_delete_task() {
let temp_dir = tempdir().unwrap(); let temp_dir = TempDir::new("content").unwrap();
let config = Arc::new(Config::default()); let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap(); let content = Content::new(config, temp_dir.path()).await.unwrap();
@ -737,7 +701,7 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn test_read_piece() { async fn test_read_piece() {
let temp_dir = tempdir().unwrap(); let temp_dir = TempDir::new("content").unwrap();
let config = Arc::new(Config::default()); let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap(); let content = Content::new(config, temp_dir.path()).await.unwrap();
@ -746,10 +710,7 @@ mod tests {
let data = b"hello, world!"; let data = b"hello, world!";
let mut reader = Cursor::new(data); let mut reader = Cursor::new(data);
content content.write_piece(task_id, 0, &mut reader).await.unwrap();
.write_piece(task_id, 0, 13, &mut reader)
.await
.unwrap();
let mut reader = content.read_piece(task_id, 0, 13, None).await.unwrap(); let mut reader = content.read_piece(task_id, 0, 13, None).await.unwrap();
let mut buffer = Vec::new(); let mut buffer = Vec::new();
@ -775,7 +736,7 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn test_write_piece() { async fn test_write_piece() {
let temp_dir = tempdir().unwrap(); let temp_dir = TempDir::new("content").unwrap();
let config = Arc::new(Config::default()); let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap(); let content = Content::new(config, temp_dir.path()).await.unwrap();
@ -784,17 +745,14 @@ mod tests {
let data = b"test"; let data = b"test";
let mut reader = Cursor::new(data); let mut reader = Cursor::new(data);
let response = content let response = content.write_piece(task_id, 0, &mut reader).await.unwrap();
.write_piece(task_id, 0, 4, &mut reader)
.await
.unwrap();
assert_eq!(response.length, 4); assert_eq!(response.length, 4);
assert!(!response.hash.is_empty()); assert!(!response.hash.is_empty());
} }
#[tokio::test] #[tokio::test]
async fn test_create_persistent_task() { async fn test_create_persistent_task() {
let temp_dir = tempdir().unwrap(); let temp_dir = TempDir::new("content").unwrap();
let config = Arc::new(Config::default()); let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap(); let content = Content::new(config, temp_dir.path()).await.unwrap();
@ -815,7 +773,7 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn test_hard_link_persistent_cache_task() { async fn test_hard_link_persistent_cache_task() {
let temp_dir = tempdir().unwrap(); let temp_dir = TempDir::new("content").unwrap();
let config = Arc::new(Config::default()); let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap(); let content = Content::new(config, temp_dir.path()).await.unwrap();
@ -842,7 +800,7 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn test_copy_persistent_cache_task() { async fn test_copy_persistent_cache_task() {
let temp_dir = tempdir().unwrap(); let temp_dir = TempDir::new("content").unwrap();
let config = Arc::new(Config::default()); let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap(); let content = Content::new(config, temp_dir.path()).await.unwrap();
@ -864,7 +822,7 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn test_delete_persistent_cache_task() { async fn test_delete_persistent_cache_task() {
let temp_dir = tempdir().unwrap(); let temp_dir = TempDir::new("content").unwrap();
let config = Arc::new(Config::default()); let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap(); let content = Content::new(config, temp_dir.path()).await.unwrap();
@ -881,7 +839,7 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn test_read_persistent_cache_piece() { async fn test_read_persistent_cache_piece() {
let temp_dir = tempdir().unwrap(); let temp_dir = TempDir::new("content").unwrap();
let config = Arc::new(Config::default()); let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap(); let content = Content::new(config, temp_dir.path()).await.unwrap();
@ -894,7 +852,7 @@ mod tests {
let data = b"hello, world!"; let data = b"hello, world!";
let mut reader = Cursor::new(data); let mut reader = Cursor::new(data);
content content
.write_persistent_cache_piece(task_id, 0, 13, &mut reader) .write_persistent_cache_piece(task_id, 0, &mut reader)
.await .await
.unwrap(); .unwrap();
@ -925,7 +883,7 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn test_write_persistent_cache_piece() { async fn test_write_persistent_cache_piece() {
let temp_dir = tempdir().unwrap(); let temp_dir = TempDir::new("content").unwrap();
let config = Arc::new(Config::default()); let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap(); let content = Content::new(config, temp_dir.path()).await.unwrap();
@ -938,7 +896,7 @@ mod tests {
let data = b"test"; let data = b"test";
let mut reader = Cursor::new(data); let mut reader = Cursor::new(data);
let response = content let response = content
.write_persistent_cache_piece(task_id, 0, 4, &mut reader) .write_persistent_cache_piece(task_id, 0, &mut reader)
.await .await
.unwrap(); .unwrap();
assert_eq!(response.length, 4); assert_eq!(response.length, 4);
@ -948,36 +906,14 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn test_has_enough_space() { async fn test_has_enough_space() {
let config = Arc::new(Config::default()); let config = Arc::new(Config::default());
let temp_dir = tempdir().unwrap(); let dir = PathBuf::from("/tmp/dragonfly_test");
let content = Content::new(config, temp_dir.path()).await.unwrap(); let content = Content::new(config, &dir).await.unwrap();
let has_space = content.has_enough_space(1).unwrap(); let has_space = content.has_enough_space(1).unwrap();
assert!(has_space); assert!(has_space);
let has_space = content.has_enough_space(u64::MAX).unwrap(); let has_space = content.has_enough_space(u64::MAX).unwrap();
assert!(!has_space); assert!(!has_space);
let mut config = Config::default();
config.gc.policy.dist_threshold = ByteSize::mib(10);
let config = Arc::new(config);
let content = Content::new(config, temp_dir.path()).await.unwrap();
let file_path = Path::new(temp_dir.path())
.join(DEFAULT_CONTENT_DIR)
.join(DEFAULT_TASK_DIR)
.join("1mib");
let mut file = File::create(&file_path).await.unwrap();
let buffer = vec![0u8; ByteSize::mib(1).as_u64() as usize];
file.write_all(&buffer).await.unwrap();
file.flush().await.unwrap();
let has_space = content
.has_enough_space(ByteSize::mib(9).as_u64() + 1)
.unwrap();
assert!(!has_space);
let has_space = content.has_enough_space(ByteSize::mib(9).as_u64()).unwrap();
assert!(has_space);
} }
#[tokio::test] #[tokio::test]

View File

@ -27,6 +27,7 @@ use std::time::Duration;
use tokio::io::AsyncRead; use tokio::io::AsyncRead;
use tokio::time::sleep; use tokio::time::sleep;
use tokio_util::either::Either; use tokio_util::either::Either;
use tokio_util::io::InspectReader;
use tracing::{debug, error, info, instrument, warn}; use tracing::{debug, error, info, instrument, warn};
pub mod cache; pub mod cache;
@ -55,6 +56,7 @@ pub struct Storage {
/// Storage implements the storage. /// Storage implements the storage.
impl Storage { impl Storage {
/// new returns a new storage. /// new returns a new storage.
#[instrument(skip_all)]
pub async fn new(config: Arc<Config>, dir: &Path, log_dir: PathBuf) -> Result<Self> { pub async fn new(config: Arc<Config>, dir: &Path, log_dir: PathBuf) -> Result<Self> {
let metadata = metadata::Metadata::new(config.clone(), dir, &log_dir)?; let metadata = metadata::Metadata::new(config.clone(), dir, &log_dir)?;
let content = content::Content::new(config.clone(), dir).await?; let content = content::Content::new(config.clone(), dir).await?;
@ -69,16 +71,19 @@ impl Storage {
} }
/// total_space returns the total space of the disk. /// total_space returns the total space of the disk.
#[instrument(skip_all)]
pub fn total_space(&self) -> Result<u64> { pub fn total_space(&self) -> Result<u64> {
self.content.total_space() self.content.total_space()
} }
/// available_space returns the available space of the disk. /// available_space returns the available space of the disk.
#[instrument(skip_all)]
pub fn available_space(&self) -> Result<u64> { pub fn available_space(&self) -> Result<u64> {
self.content.available_space() self.content.available_space()
} }
/// has_enough_space checks if the storage has enough space to store the content. /// has_enough_space checks if the storage has enough space to store the content.
#[instrument(skip_all)]
pub fn has_enough_space(&self, content_length: u64) -> Result<bool> { pub fn has_enough_space(&self, content_length: u64) -> Result<bool> {
self.content.has_enough_space(content_length) self.content.has_enough_space(content_length)
} }
@ -97,12 +102,14 @@ impl Storage {
/// is_same_dev_inode_as_task checks if the task content is on the same device inode as the /// is_same_dev_inode_as_task checks if the task content is on the same device inode as the
/// destination. /// destination.
#[instrument(skip_all)]
pub async fn is_same_dev_inode_as_task(&self, id: &str, to: &Path) -> Result<bool> { pub async fn is_same_dev_inode_as_task(&self, id: &str, to: &Path) -> Result<bool> {
self.content.is_same_dev_inode_as_task(id, to).await self.content.is_same_dev_inode_as_task(id, to).await
} }
/// prepare_download_task_started prepares the metadata of the task when the task downloads /// prepare_download_task_started prepares the metadata of the task when the task downloads
/// started. /// started.
#[instrument(skip_all)]
pub async fn prepare_download_task_started(&self, id: &str) -> Result<metadata::Task> { pub async fn prepare_download_task_started(&self, id: &str) -> Result<metadata::Task> {
self.metadata.download_task_started(id, None, None, None) self.metadata.download_task_started(id, None, None, None)
} }
@ -116,15 +123,23 @@ impl Storage {
piece_length: u64, piece_length: u64,
content_length: u64, content_length: u64,
response_header: Option<HeaderMap>, response_header: Option<HeaderMap>,
load_to_cache: bool,
) -> Result<metadata::Task> { ) -> Result<metadata::Task> {
self.content.create_task(id, content_length).await?; let metadata = self.metadata.download_task_started(
self.metadata.download_task_started(
id, id,
Some(piece_length), Some(piece_length),
Some(content_length), Some(content_length),
response_header, response_header,
) )?;
self.content.create_task(id, content_length).await?;
if load_to_cache {
let mut cache = self.cache.clone();
cache.put_task(id, content_length).await;
debug!("put task to cache: {}", id);
}
Ok(metadata)
} }
/// download_task_finished updates the metadata of the task when the task downloads finished. /// download_task_finished updates the metadata of the task when the task downloads finished.
@ -212,6 +227,7 @@ impl Storage {
/// is_same_dev_inode_as_persistent_cache_task checks if the persistent cache task content is on the same device inode as the /// is_same_dev_inode_as_persistent_cache_task checks if the persistent cache task content is on the same device inode as the
/// destination. /// destination.
#[instrument(skip_all)]
pub async fn is_same_dev_inode_as_persistent_cache_task( pub async fn is_same_dev_inode_as_persistent_cache_task(
&self, &self,
id: &str, id: &str,
@ -376,7 +392,7 @@ impl Storage {
) -> Result<metadata::Piece> { ) -> Result<metadata::Piece> {
let response = self let response = self
.content .content
.write_persistent_cache_piece(task_id, offset, length, reader) .write_persistent_cache_piece(task_id, offset, reader)
.await?; .await?;
let digest = Digest::new(Algorithm::Crc32, response.hash); let digest = Digest::new(Algorithm::Crc32, response.hash);
@ -415,10 +431,11 @@ impl Storage {
offset: u64, offset: u64,
length: u64, length: u64,
reader: &mut R, reader: &mut R,
load_to_cache: bool,
timeout: Duration, timeout: Duration,
) -> Result<metadata::Piece> { ) -> Result<metadata::Piece> {
tokio::select! { tokio::select! {
piece = self.handle_downloaded_from_source_finished(piece_id, task_id, offset, length, reader) => { piece = self.handle_downloaded_from_source_finished(piece_id, task_id, offset, length, reader, load_to_cache) => {
piece piece
} }
_ = sleep(timeout) => { _ = sleep(timeout) => {
@ -436,11 +453,25 @@ impl Storage {
offset: u64, offset: u64,
length: u64, length: u64,
reader: &mut R, reader: &mut R,
load_to_cache: bool,
) -> Result<metadata::Piece> { ) -> Result<metadata::Piece> {
let response = self let response = if load_to_cache {
.content let mut buffer = Vec::with_capacity(length as usize);
.write_piece(task_id, offset, length, reader) let mut tee = InspectReader::new(reader, |bytes| {
buffer.extend_from_slice(bytes);
});
let response = self.content.write_piece(task_id, offset, &mut tee).await?;
self.cache
.write_piece(task_id, piece_id, bytes::Bytes::from(buffer))
.await?; .await?;
debug!("put piece to cache: {}", piece_id);
response
} else {
self.content.write_piece(task_id, offset, reader).await?
};
let digest = Digest::new(Algorithm::Crc32, response.hash); let digest = Digest::new(Algorithm::Crc32, response.hash);
self.metadata.download_piece_finished( self.metadata.download_piece_finished(
@ -464,10 +495,11 @@ impl Storage {
expected_digest: &str, expected_digest: &str,
parent_id: &str, parent_id: &str,
reader: &mut R, reader: &mut R,
load_to_cache: bool,
timeout: Duration, timeout: Duration,
) -> Result<metadata::Piece> { ) -> Result<metadata::Piece> {
tokio::select! { tokio::select! {
piece = self.handle_downloaded_piece_from_parent_finished(piece_id, task_id, offset, length, expected_digest, parent_id, reader) => { piece = self.handle_downloaded_piece_from_parent_finished(piece_id, task_id, offset, length, expected_digest, parent_id, reader, load_to_cache) => {
piece piece
} }
_ = sleep(timeout) => { _ = sleep(timeout) => {
@ -488,11 +520,25 @@ impl Storage {
expected_digest: &str, expected_digest: &str,
parent_id: &str, parent_id: &str,
reader: &mut R, reader: &mut R,
load_to_cache: bool,
) -> Result<metadata::Piece> { ) -> Result<metadata::Piece> {
let response = self let response = if load_to_cache {
.content let mut buffer = Vec::with_capacity(length as usize);
.write_piece(task_id, offset, length, reader) let mut tee = InspectReader::new(reader, |bytes| {
buffer.extend_from_slice(bytes);
});
let response = self.content.write_piece(task_id, offset, &mut tee).await?;
self.cache
.write_piece(task_id, piece_id, bytes::Bytes::from(buffer))
.await?; .await?;
debug!("put piece to cache: {}", piece_id);
response
} else {
self.content.write_piece(task_id, offset, reader).await?
};
let length = response.length; let length = response.length;
let digest = Digest::new(Algorithm::Crc32, response.hash); let digest = Digest::new(Algorithm::Crc32, response.hash);
@ -587,6 +633,7 @@ impl Storage {
} }
/// get_piece returns the piece metadata. /// get_piece returns the piece metadata.
#[instrument(skip_all)]
pub fn get_piece(&self, piece_id: &str) -> Result<Option<metadata::Piece>> { pub fn get_piece(&self, piece_id: &str) -> Result<Option<metadata::Piece>> {
self.metadata.get_piece(piece_id) self.metadata.get_piece(piece_id)
} }
@ -598,13 +645,13 @@ impl Storage {
} }
/// get_pieces returns the piece metadatas. /// get_pieces returns the piece metadatas.
#[instrument(skip_all)]
pub fn get_pieces(&self, task_id: &str) -> Result<Vec<metadata::Piece>> { pub fn get_pieces(&self, task_id: &str) -> Result<Vec<metadata::Piece>> {
self.metadata.get_pieces(task_id) self.metadata.get_pieces(task_id)
} }
/// piece_id returns the piece id. /// piece_id returns the piece id.
#[inline] #[inline]
#[instrument(skip_all)]
pub fn piece_id(&self, task_id: &str, number: u32) -> String { pub fn piece_id(&self, task_id: &str, number: u32) -> String {
self.metadata.piece_id(task_id, number) self.metadata.piece_id(task_id, number)
} }
@ -629,7 +676,6 @@ impl Storage {
} }
/// download_persistent_cache_piece_from_parent_finished is used for downloading persistent cache piece from parent. /// download_persistent_cache_piece_from_parent_finished is used for downloading persistent cache piece from parent.
#[allow(clippy::too_many_arguments)]
#[instrument(skip_all)] #[instrument(skip_all)]
pub async fn download_persistent_cache_piece_from_parent_finished< pub async fn download_persistent_cache_piece_from_parent_finished<
R: AsyncRead + Unpin + ?Sized, R: AsyncRead + Unpin + ?Sized,
@ -638,14 +684,13 @@ impl Storage {
piece_id: &str, piece_id: &str,
task_id: &str, task_id: &str,
offset: u64, offset: u64,
length: u64,
expected_digest: &str, expected_digest: &str,
parent_id: &str, parent_id: &str,
reader: &mut R, reader: &mut R,
) -> Result<metadata::Piece> { ) -> Result<metadata::Piece> {
let response = self let response = self
.content .content
.write_persistent_cache_piece(task_id, offset, length, reader) .write_persistent_cache_piece(task_id, offset, reader)
.await?; .await?;
let length = response.length; let length = response.length;
@ -744,6 +789,7 @@ impl Storage {
/// persistent_cache_piece_id returns the persistent cache piece id. /// persistent_cache_piece_id returns the persistent cache piece id.
#[inline] #[inline]
#[instrument(skip_all)]
pub fn persistent_cache_piece_id(&self, task_id: &str, number: u32) -> String { pub fn persistent_cache_piece_id(&self, task_id: &str, number: u32) -> String {
self.metadata.piece_id(task_id, number) self.metadata.piece_id(task_id, number)
} }

View File

@ -840,6 +840,7 @@ impl<E: StorageEngineOwned> Metadata<E> {
} }
/// get_piece gets the piece metadata. /// get_piece gets the piece metadata.
#[instrument(skip_all)]
pub fn get_piece(&self, piece_id: &str) -> Result<Option<Piece>> { pub fn get_piece(&self, piece_id: &str) -> Result<Option<Piece>> {
self.db.get(piece_id.as_bytes()) self.db.get(piece_id.as_bytes())
} }
@ -851,7 +852,6 @@ impl<E: StorageEngineOwned> Metadata<E> {
} }
/// get_pieces gets the piece metadatas. /// get_pieces gets the piece metadatas.
#[instrument(skip_all)]
pub fn get_pieces(&self, task_id: &str) -> Result<Vec<Piece>> { pub fn get_pieces(&self, task_id: &str) -> Result<Vec<Piece>> {
let pieces = self let pieces = self
.db .db
@ -906,6 +906,7 @@ impl<E: StorageEngineOwned> Metadata<E> {
/// piece_id returns the piece id. /// piece_id returns the piece id.
#[inline] #[inline]
#[instrument(skip_all)]
pub fn piece_id(&self, task_id: &str, number: u32) -> String { pub fn piece_id(&self, task_id: &str, number: u32) -> String {
format!("{}-{}", task_id, number) format!("{}-{}", task_id, number)
} }
@ -938,7 +939,7 @@ impl Metadata<RocksdbStorageEngine> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use tempfile::tempdir; use tempdir::TempDir;
#[test] #[test]
fn test_calculate_digest() { fn test_calculate_digest() {
@ -956,7 +957,7 @@ mod tests {
#[test] #[test]
fn should_create_metadata() { fn should_create_metadata() {
let dir = tempdir().unwrap(); let dir = TempDir::new("metadata").unwrap();
let log_dir = dir.path().join("log"); let log_dir = dir.path().join("log");
let metadata = Metadata::new(Arc::new(Config::default()), dir.path(), &log_dir).unwrap(); let metadata = Metadata::new(Arc::new(Config::default()), dir.path(), &log_dir).unwrap();
assert!(metadata.get_tasks().unwrap().is_empty()); assert!(metadata.get_tasks().unwrap().is_empty());
@ -968,7 +969,7 @@ mod tests {
#[test] #[test]
fn test_task_lifecycle() { fn test_task_lifecycle() {
let dir = tempdir().unwrap(); let dir = TempDir::new("metadata").unwrap();
let log_dir = dir.path().join("log"); let log_dir = dir.path().join("log");
let metadata = Metadata::new(Arc::new(Config::default()), dir.path(), &log_dir).unwrap(); let metadata = Metadata::new(Arc::new(Config::default()), dir.path(), &log_dir).unwrap();
let task_id = "d3c4e940ad06c47fc36ac67801e6f8e36cb400e2391708620bc7e865b102062c"; let task_id = "d3c4e940ad06c47fc36ac67801e6f8e36cb400e2391708620bc7e865b102062c";
@ -1028,7 +1029,7 @@ mod tests {
#[test] #[test]
fn test_piece_lifecycle() { fn test_piece_lifecycle() {
let dir = tempdir().unwrap(); let dir = TempDir::new("metadata").unwrap();
let log_dir = dir.path().join("log"); let log_dir = dir.path().join("log");
let metadata = Metadata::new(Arc::new(Config::default()), dir.path(), &log_dir).unwrap(); let metadata = Metadata::new(Arc::new(Config::default()), dir.path(), &log_dir).unwrap();
let task_id = "d3c4e940ad06c47fc36ac67801e6f8e36cb400e2391708620bc7e865b102062c"; let task_id = "d3c4e940ad06c47fc36ac67801e6f8e36cb400e2391708620bc7e865b102062c";

View File

@ -24,7 +24,7 @@ use std::{
ops::Deref, ops::Deref,
path::{Path, PathBuf}, path::{Path, PathBuf},
}; };
use tracing::{info, warn}; use tracing::{info, instrument, warn};
/// RocksdbStorageEngine is a storage engine based on rocksdb. /// RocksdbStorageEngine is a storage engine based on rocksdb.
pub struct RocksdbStorageEngine { pub struct RocksdbStorageEngine {
@ -67,6 +67,7 @@ impl RocksdbStorageEngine {
const DEFAULT_LOG_MAX_FILES: usize = 10; const DEFAULT_LOG_MAX_FILES: usize = 10;
/// open opens a rocksdb storage engine with the given directory and column families. /// open opens a rocksdb storage engine with the given directory and column families.
#[instrument(skip_all)]
pub fn open(dir: &Path, log_dir: &PathBuf, cf_names: &[&str], keep: bool) -> Result<Self> { pub fn open(dir: &Path, log_dir: &PathBuf, cf_names: &[&str], keep: bool) -> Result<Self> {
info!("initializing metadata directory: {:?} {:?}", dir, cf_names); info!("initializing metadata directory: {:?} {:?}", dir, cf_names);
// Initialize rocksdb options. // Initialize rocksdb options.
@ -134,6 +135,7 @@ impl RocksdbStorageEngine {
/// RocksdbStorageEngine implements the storage engine operations. /// RocksdbStorageEngine implements the storage engine operations.
impl Operations for RocksdbStorageEngine { impl Operations for RocksdbStorageEngine {
/// get gets the object by key. /// get gets the object by key.
#[instrument(skip_all)]
fn get<O: DatabaseObject>(&self, key: &[u8]) -> Result<Option<O>> { fn get<O: DatabaseObject>(&self, key: &[u8]) -> Result<Option<O>> {
let cf = cf_handle::<O>(self)?; let cf = cf_handle::<O>(self)?;
let value = self.get_cf(cf, key).or_err(ErrorType::StorageError)?; let value = self.get_cf(cf, key).or_err(ErrorType::StorageError)?;
@ -144,6 +146,7 @@ impl Operations for RocksdbStorageEngine {
} }
/// is_exist checks if the object exists by key. /// is_exist checks if the object exists by key.
#[instrument(skip_all)]
fn is_exist<O: DatabaseObject>(&self, key: &[u8]) -> Result<bool> { fn is_exist<O: DatabaseObject>(&self, key: &[u8]) -> Result<bool> {
let cf = cf_handle::<O>(self)?; let cf = cf_handle::<O>(self)?;
Ok(self Ok(self
@ -153,6 +156,7 @@ impl Operations for RocksdbStorageEngine {
} }
/// put puts the object by key. /// put puts the object by key.
#[instrument(skip_all)]
fn put<O: DatabaseObject>(&self, key: &[u8], value: &O) -> Result<()> { fn put<O: DatabaseObject>(&self, key: &[u8], value: &O) -> Result<()> {
let cf = cf_handle::<O>(self)?; let cf = cf_handle::<O>(self)?;
self.put_cf(cf, key, value.serialized()?) self.put_cf(cf, key, value.serialized()?)
@ -161,6 +165,7 @@ impl Operations for RocksdbStorageEngine {
} }
/// delete deletes the object by key. /// delete deletes the object by key.
#[instrument(skip_all)]
fn delete<O: DatabaseObject>(&self, key: &[u8]) -> Result<()> { fn delete<O: DatabaseObject>(&self, key: &[u8]) -> Result<()> {
let cf = cf_handle::<O>(self)?; let cf = cf_handle::<O>(self)?;
let mut options = WriteOptions::default(); let mut options = WriteOptions::default();
@ -172,6 +177,7 @@ impl Operations for RocksdbStorageEngine {
} }
/// iter iterates all objects. /// iter iterates all objects.
#[instrument(skip_all)]
fn iter<O: DatabaseObject>(&self) -> Result<impl Iterator<Item = Result<(Box<[u8]>, O)>>> { fn iter<O: DatabaseObject>(&self) -> Result<impl Iterator<Item = Result<(Box<[u8]>, O)>>> {
let cf = cf_handle::<O>(self)?; let cf = cf_handle::<O>(self)?;
let iter = self.iterator_cf(cf, rocksdb::IteratorMode::Start); let iter = self.iterator_cf(cf, rocksdb::IteratorMode::Start);
@ -182,6 +188,7 @@ impl Operations for RocksdbStorageEngine {
} }
/// iter_raw iterates all objects without serialization. /// iter_raw iterates all objects without serialization.
#[instrument(skip_all)]
fn iter_raw<O: DatabaseObject>( fn iter_raw<O: DatabaseObject>(
&self, &self,
) -> Result<impl Iterator<Item = Result<(Box<[u8]>, Box<[u8]>)>>> { ) -> Result<impl Iterator<Item = Result<(Box<[u8]>, Box<[u8]>)>>> {
@ -195,6 +202,7 @@ impl Operations for RocksdbStorageEngine {
} }
/// prefix_iter iterates all objects with prefix. /// prefix_iter iterates all objects with prefix.
#[instrument(skip_all)]
fn prefix_iter<O: DatabaseObject>( fn prefix_iter<O: DatabaseObject>(
&self, &self,
prefix: &[u8], prefix: &[u8],
@ -208,6 +216,7 @@ impl Operations for RocksdbStorageEngine {
} }
/// prefix_iter_raw iterates all objects with prefix without serialization. /// prefix_iter_raw iterates all objects with prefix without serialization.
#[instrument(skip_all)]
fn prefix_iter_raw<O: DatabaseObject>( fn prefix_iter_raw<O: DatabaseObject>(
&self, &self,
prefix: &[u8], prefix: &[u8],
@ -220,6 +229,7 @@ impl Operations for RocksdbStorageEngine {
} }
/// batch_delete deletes objects by keys. /// batch_delete deletes objects by keys.
#[instrument(skip_all)]
fn batch_delete<O: DatabaseObject>(&self, keys: Vec<&[u8]>) -> Result<()> { fn batch_delete<O: DatabaseObject>(&self, keys: Vec<&[u8]>) -> Result<()> {
let cf = cf_handle::<O>(self)?; let cf = cf_handle::<O>(self)?;
let mut batch = rocksdb::WriteBatch::default(); let mut batch = rocksdb::WriteBatch::default();
@ -252,7 +262,7 @@ where
mod tests { mod tests {
use super::*; use super::*;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use tempfile::tempdir; use tempdir::TempDir;
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)] #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
struct Object { struct Object {
@ -265,7 +275,7 @@ mod tests {
} }
fn create_test_engine() -> RocksdbStorageEngine { fn create_test_engine() -> RocksdbStorageEngine {
let temp_dir = tempdir().unwrap(); let temp_dir = TempDir::new("rocksdb_test").unwrap();
let log_dir = temp_dir.path().to_path_buf(); let log_dir = temp_dir.path().to_path_buf();
RocksdbStorageEngine::open(temp_dir.path(), &log_dir, &[Object::NAMESPACE], false).unwrap() RocksdbStorageEngine::open(temp_dir.path(), &log_dir, &[Object::NAMESPACE], false).unwrap()
} }

View File

@ -13,6 +13,7 @@ edition.workspace = true
dragonfly-client-core.workspace = true dragonfly-client-core.workspace = true
dragonfly-api.workspace = true dragonfly-api.workspace = true
reqwest.workspace = true reqwest.workspace = true
hyper.workspace = true
http-range-header.workspace = true http-range-header.workspace = true
http.workspace = true http.workspace = true
tracing.workspace = true tracing.workspace = true
@ -23,15 +24,14 @@ rustls-pki-types.workspace = true
rustls-pemfile.workspace = true rustls-pemfile.workspace = true
sha2.workspace = true sha2.workspace = true
uuid.workspace = true uuid.workspace = true
sysinfo.workspace = true
hex.workspace = true hex.workspace = true
crc32fast.workspace = true
openssl.workspace = true openssl.workspace = true
crc32fast.workspace = true
lazy_static.workspace = true lazy_static.workspace = true
bytesize.workspace = true bytesize.workspace = true
lru.workspace = true lru.workspace = true
tokio.workspace = true tokio.workspace = true
rustix = { version = "1.0.8", features = ["fs"] } rustix = { version = "1.0.5", features = ["fs"] }
base64 = "0.22.1" base64 = "0.22.1"
pnet = "0.35.0" pnet = "0.35.0"

View File

@ -14,7 +14,7 @@
* limitations under the License. * limitations under the License.
*/ */
use dragonfly_client_core::{Error as ClientError, Result as ClientResult}; use dragonfly_client_core::Result as ClientResult;
use sha2::Digest as Sha2Digest; use sha2::Digest as Sha2Digest;
use std::fmt; use std::fmt;
use std::io::{self, Read}; use std::io::{self, Read};
@ -112,36 +112,9 @@ impl FromStr for Digest {
} }
let algorithm = match parts[0] { let algorithm = match parts[0] {
"crc32" => { "crc32" => Algorithm::Crc32,
if parts[1].len() != 10 { "sha256" => Algorithm::Sha256,
return Err(format!( "sha512" => Algorithm::Sha512,
"invalid crc32 digest length: {}, expected 10",
parts[1].len()
));
}
Algorithm::Crc32
}
"sha256" => {
if parts[1].len() != 64 {
return Err(format!(
"invalid sha256 digest length: {}, expected 64",
parts[1].len()
));
}
Algorithm::Sha256
}
"sha512" => {
if parts[1].len() != 128 {
return Err(format!(
"invalid sha512 digest length: {}, expected 128",
parts[1].len()
));
}
Algorithm::Sha512
}
_ => return Err(format!("invalid digest algorithm: {}", parts[0])), _ => return Err(format!("invalid digest algorithm: {}", parts[0])),
}; };
@ -182,25 +155,6 @@ pub fn calculate_file_digest(algorithm: Algorithm, path: &Path) -> ClientResult<
} }
} }
/// verify_file_digest verifies the digest of a file against an expected digest.
pub fn verify_file_digest(expected_digest: Digest, file_path: &Path) -> ClientResult<()> {
let digest = match calculate_file_digest(expected_digest.algorithm(), file_path) {
Ok(digest) => digest,
Err(err) => {
return Err(err);
}
};
if digest.to_string() != expected_digest.to_string() {
return Err(ClientError::DigestMismatch(
expected_digest.to_string(),
digest.to_string(),
));
}
Ok(())
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@ -251,28 +205,4 @@ mod tests {
calculate_file_digest(Algorithm::Crc32, path).expect("failed to calculate Crc32 hash"); calculate_file_digest(Algorithm::Crc32, path).expect("failed to calculate Crc32 hash");
assert_eq!(digest.encoded(), expected_crc32); assert_eq!(digest.encoded(), expected_crc32);
} }
#[test]
fn test_verify_file_digest() {
let content = b"test content";
let temp_file = tempfile::NamedTempFile::new().expect("failed to create temp file");
let path = temp_file.path();
let mut file = File::create(path).expect("failed to create file");
file.write_all(content).expect("failed to write to file");
let expected_sha256_digest = Digest::new(
Algorithm::Sha256,
"6ae8a75555209fd6c44157c0aed8016e763ff435a19cf186f76863140143ff72".to_string(),
);
assert!(verify_file_digest(expected_sha256_digest, path).is_ok());
let expected_sha512_digest = Digest::new(
Algorithm::Sha512,
"0cbf4caef38047bba9a24e621a961484e5d2a92176a859e7eb27df343dd34eb98d538a6c5f4da1ce302ec250b821cc001e46cc97a704988297185a4df7e99602".to_string(),
);
assert!(verify_file_digest(expected_sha512_digest, path).is_ok());
let expected_crc32_digest = Digest::new(Algorithm::Crc32, "1475635037".to_string());
assert!(verify_file_digest(expected_crc32_digest, path).is_ok());
}
} }

View File

@ -20,6 +20,7 @@ use dragonfly_client_core::{
Error, Result, Error, Result,
}; };
use http::header::{self, HeaderMap}; use http::header::{self, HeaderMap};
use tracing::instrument;
/// Credentials is the credentials for the basic auth. /// Credentials is the credentials for the basic auth.
pub struct Credentials { pub struct Credentials {
@ -33,6 +34,7 @@ pub struct Credentials {
/// Credentials is the basic auth. /// Credentials is the basic auth.
impl Credentials { impl Credentials {
/// new returns a new Credentials. /// new returns a new Credentials.
#[instrument(skip_all)]
pub fn new(username: &str, password: &str) -> Credentials { pub fn new(username: &str, password: &str) -> Credentials {
Self { Self {
username: username.to_string(), username: username.to_string(),

View File

@ -21,10 +21,12 @@ use dragonfly_client_core::{
}; };
use reqwest::header::{HeaderMap, HeaderName, HeaderValue}; use reqwest::header::{HeaderMap, HeaderName, HeaderValue};
use std::collections::HashMap; use std::collections::HashMap;
use tracing::instrument;
pub mod basic_auth; pub mod basic_auth;
/// headermap_to_hashmap converts a headermap to a hashmap. /// headermap_to_hashmap converts a headermap to a hashmap.
#[instrument(skip_all)]
pub fn headermap_to_hashmap(header: &HeaderMap<HeaderValue>) -> HashMap<String, String> { pub fn headermap_to_hashmap(header: &HeaderMap<HeaderValue>) -> HashMap<String, String> {
let mut hashmap: HashMap<String, String> = HashMap::with_capacity(header.len()); let mut hashmap: HashMap<String, String> = HashMap::with_capacity(header.len());
for (k, v) in header { for (k, v) in header {
@ -37,6 +39,7 @@ pub fn headermap_to_hashmap(header: &HeaderMap<HeaderValue>) -> HashMap<String,
} }
/// hashmap_to_headermap converts a hashmap to a headermap. /// hashmap_to_headermap converts a hashmap to a headermap.
#[instrument(skip_all)]
pub fn hashmap_to_headermap(header: &HashMap<String, String>) -> Result<HeaderMap<HeaderValue>> { pub fn hashmap_to_headermap(header: &HashMap<String, String>) -> Result<HeaderMap<HeaderValue>> {
let mut headermap = HeaderMap::with_capacity(header.len()); let mut headermap = HeaderMap::with_capacity(header.len());
for (k, v) in header { for (k, v) in header {
@ -49,6 +52,7 @@ pub fn hashmap_to_headermap(header: &HashMap<String, String>) -> Result<HeaderMa
} }
/// header_vec_to_hashmap converts a vector of header string to a hashmap. /// header_vec_to_hashmap converts a vector of header string to a hashmap.
#[instrument(skip_all)]
pub fn header_vec_to_hashmap(raw_header: Vec<String>) -> Result<HashMap<String, String>> { pub fn header_vec_to_hashmap(raw_header: Vec<String>) -> Result<HashMap<String, String>> {
let mut header = HashMap::with_capacity(raw_header.len()); let mut header = HashMap::with_capacity(raw_header.len());
for h in raw_header { for h in raw_header {
@ -61,11 +65,13 @@ pub fn header_vec_to_hashmap(raw_header: Vec<String>) -> Result<HashMap<String,
} }
/// header_vec_to_headermap converts a vector of header string to a reqwest headermap. /// header_vec_to_headermap converts a vector of header string to a reqwest headermap.
#[instrument(skip_all)]
pub fn header_vec_to_headermap(raw_header: Vec<String>) -> Result<HeaderMap> { pub fn header_vec_to_headermap(raw_header: Vec<String>) -> Result<HeaderMap> {
hashmap_to_headermap(&header_vec_to_hashmap(raw_header)?) hashmap_to_headermap(&header_vec_to_hashmap(raw_header)?)
} }
/// get_range gets the range from http header. /// get_range gets the range from http header.
#[instrument(skip_all)]
pub fn get_range(header: &HeaderMap, content_length: u64) -> Result<Option<Range>> { pub fn get_range(header: &HeaderMap, content_length: u64) -> Result<Option<Range>> {
match header.get(reqwest::header::RANGE) { match header.get(reqwest::header::RANGE) {
Some(range) => { Some(range) => {
@ -79,6 +85,7 @@ pub fn get_range(header: &HeaderMap, content_length: u64) -> Result<Option<Range
/// parse_range_header parses a Range header string as per RFC 7233, /// parse_range_header parses a Range header string as per RFC 7233,
/// supported Range Header: "Range": "bytes=100-200", "Range": "bytes=-50", /// supported Range Header: "Range": "bytes=100-200", "Range": "bytes=-50",
/// "Range": "bytes=150-", "Range": "bytes=0-0,-1". /// "Range": "bytes=150-", "Range": "bytes=0-0,-1".
#[instrument(skip_all)]
pub fn parse_range_header(range_header_value: &str, content_length: u64) -> Result<Range> { pub fn parse_range_header(range_header_value: &str, content_length: u64) -> Result<Range> {
let parsed_ranges = let parsed_ranges =
http_range_header::parse_range_header(range_header_value).or_err(ErrorType::ParseError)?; http_range_header::parse_range_header(range_header_value).or_err(ErrorType::ParseError)?;

View File

@ -22,6 +22,7 @@ use dragonfly_client_core::{
use sha2::{Digest, Sha256}; use sha2::{Digest, Sha256};
use std::io::{self, Read}; use std::io::{self, Read};
use std::path::PathBuf; use std::path::PathBuf;
use tracing::instrument;
use url::Url; use url::Url;
use uuid::Uuid; use uuid::Uuid;
@ -75,6 +76,7 @@ pub struct IDGenerator {
/// IDGenerator implements the IDGenerator. /// IDGenerator implements the IDGenerator.
impl IDGenerator { impl IDGenerator {
/// new creates a new IDGenerator. /// new creates a new IDGenerator.
#[instrument(skip_all)]
pub fn new(ip: String, hostname: String, is_seed_peer: bool) -> Self { pub fn new(ip: String, hostname: String, is_seed_peer: bool) -> Self {
IDGenerator { IDGenerator {
ip, ip,
@ -85,6 +87,7 @@ impl IDGenerator {
/// host_id generates the host id. /// host_id generates the host id.
#[inline] #[inline]
#[instrument(skip_all)]
pub fn host_id(&self) -> String { pub fn host_id(&self) -> String {
if self.is_seed_peer { if self.is_seed_peer {
return format!("{}-{}-{}", self.ip, self.hostname, "seed"); return format!("{}-{}-{}", self.ip, self.hostname, "seed");
@ -95,6 +98,7 @@ impl IDGenerator {
/// task_id generates the task id. /// task_id generates the task id.
#[inline] #[inline]
#[instrument(skip_all)]
pub fn task_id(&self, parameter: TaskIDParameter) -> Result<String> { pub fn task_id(&self, parameter: TaskIDParameter) -> Result<String> {
match parameter { match parameter {
TaskIDParameter::Content(content) => { TaskIDParameter::Content(content) => {
@ -148,8 +152,6 @@ impl IDGenerator {
hasher.update(piece_length.to_string()); hasher.update(piece_length.to_string());
} }
hasher.update(TaskType::Standard.as_str_name().as_bytes());
// Generate the task id. // Generate the task id.
Ok(hex::encode(hasher.finalize())) Ok(hex::encode(hasher.finalize()))
} }
@ -158,6 +160,7 @@ impl IDGenerator {
/// persistent_cache_task_id generates the persistent cache task id. /// persistent_cache_task_id generates the persistent cache task id.
#[inline] #[inline]
#[instrument(skip_all)]
pub fn persistent_cache_task_id( pub fn persistent_cache_task_id(
&self, &self,
parameter: PersistentCacheTaskIDParameter, parameter: PersistentCacheTaskIDParameter,
@ -203,8 +206,6 @@ impl IDGenerator {
hasher.update(piece_length.to_string().as_bytes()); hasher.update(piece_length.to_string().as_bytes());
} }
hasher.update(TaskType::PersistentCache.as_str_name().as_bytes());
// Generate the task id by crc32. // Generate the task id by crc32.
Ok(hasher.finalize().to_string()) Ok(hasher.finalize().to_string())
} }
@ -213,6 +214,7 @@ impl IDGenerator {
/// peer_id generates the peer id. /// peer_id generates the peer id.
#[inline] #[inline]
#[instrument(skip_all)]
pub fn peer_id(&self) -> String { pub fn peer_id(&self) -> String {
if self.is_seed_peer { if self.is_seed_peer {
return format!( return format!(
@ -228,6 +230,7 @@ impl IDGenerator {
} }
/// task_type generates the task type by the task id. /// task_type generates the task type by the task id.
#[instrument(skip_all)]
pub fn task_type(&self, id: &str) -> TaskType { pub fn task_type(&self, id: &str) -> TaskType {
if id.ends_with(PERSISTENT_CACHE_TASK_SUFFIX) { if id.ends_with(PERSISTENT_CACHE_TASK_SUFFIX) {
return TaskType::PersistentCache; return TaskType::PersistentCache;
@ -274,7 +277,7 @@ mod tests {
application: Some("bar".to_string()), application: Some("bar".to_string()),
filtered_query_params: vec![], filtered_query_params: vec![],
}, },
"27554d06dfc788c2c2c60e01960152ffbd4b145fc103fcb80b432b4dc238a6fe", "99a47b38e9d3321aebebd715bea0483c1400cef2f767f84d97458f9dcedff221",
), ),
( (
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false), IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
@ -285,7 +288,7 @@ mod tests {
application: Some("bar".to_string()), application: Some("bar".to_string()),
filtered_query_params: vec![], filtered_query_params: vec![],
}, },
"06408fbf247ddaca478f8cb9565fe5591c28efd0994b8fea80a6a87d3203c5ca", "160fa7f001d9d2e893130894fbb60a5fb006e1d61bff82955f2946582bc9de1d",
), ),
( (
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false), IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
@ -296,7 +299,7 @@ mod tests {
application: None, application: None,
filtered_query_params: vec![], filtered_query_params: vec![],
}, },
"3c3f230ef9f191dd2821510346a7bc138e4894bee9aee184ba250a3040701d2a", "2773851c628744fb7933003195db436ce397c1722920696c4274ff804d86920b",
), ),
( (
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false), IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
@ -307,7 +310,7 @@ mod tests {
application: Some("bar".to_string()), application: Some("bar".to_string()),
filtered_query_params: vec![], filtered_query_params: vec![],
}, },
"c9f9261b7305c24371244f9f149f5d4589ed601348fdf22d7f6f4b10658fdba2", "63dee2822037636b0109876b58e95692233840753a882afa69b9b5ee82a6c57d",
), ),
( (
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false), IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
@ -318,7 +321,7 @@ mod tests {
application: None, application: None,
filtered_query_params: vec![], filtered_query_params: vec![],
}, },
"9f7c9aafbc6f30f8f41a96ca77eeae80c5b60964b3034b0ee43ccf7b2f9e52b8", "40c21de3ad2f1470ca1a19a2ad2577803a1829851f6cf862ffa2d4577ae51d38",
), ),
( (
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false), IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
@ -329,7 +332,7 @@ mod tests {
application: None, application: None,
filtered_query_params: vec!["foo".to_string(), "bar".to_string()], filtered_query_params: vec!["foo".to_string(), "bar".to_string()],
}, },
"457b4328cde278e422c9e243f7bfd1e97f511fec43a80f535cf6b0ef6b086776", "100680ad546ce6a577f42f52df33b4cfdca756859e664b8d7de329b150d09ce9",
), ),
( (
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false), IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
@ -360,7 +363,7 @@ mod tests {
tag: Some("tag1".to_string()), tag: Some("tag1".to_string()),
application: Some("app1".to_string()), application: Some("app1".to_string()),
}, },
"3490958009", "223755482",
), ),
( (
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false), IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
@ -370,7 +373,7 @@ mod tests {
tag: None, tag: None,
application: Some("app1".to_string()), application: Some("app1".to_string()),
}, },
"735741469", "1152081721",
), ),
( (
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false), IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
@ -380,7 +383,7 @@ mod tests {
tag: Some("tag1".to_string()), tag: Some("tag1".to_string()),
application: None, application: None,
}, },
"3954905097", "990623045",
), ),
( (
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false), IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
@ -390,7 +393,7 @@ mod tests {
tag: None, tag: None,
application: None, application: None,
}, },
"4162557545", "1293485139",
), ),
( (
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false), IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),

View File

@ -14,140 +14,34 @@
* limitations under the License. * limitations under the License.
*/ */
use bytesize::ByteSize; use bytesize::{ByteSize, MB};
use pnet::datalink::{self, NetworkInterface}; use pnet::datalink::{self, NetworkInterface};
use std::cmp::min; use std::cmp::min;
use std::net::IpAddr; use std::net::IpAddr;
use std::sync::Arc;
use std::time::Duration;
use sysinfo::Networks;
use tokio::sync::Mutex;
use tracing::{info, warn};
/// Interface represents a network interface with its information. #[cfg(not(target_os = "linux"))]
#[derive(Debug, Clone, Default)] use tracing::warn;
pub struct Interface {
/// name is the name of the network interface.
pub name: String,
/// bandwidth is the bandwidth of the network interface in bps. /// get_interface_by_ip returns the name of the network interface that has the specified IP
pub bandwidth: u64, /// address.
pub fn get_interface_by_ip(ip: IpAddr) -> Option<NetworkInterface> {
// network_data_mutex is a mutex to protect access to network data. for interface in datalink::interfaces() {
network_data_mutex: Arc<Mutex<()>>, for ip_network in interface.ips.iter() {
} if ip_network.ip() == ip {
return Some(interface);
/// NetworkData represents the network data for a specific interface,
#[derive(Debug, Clone, Default)]
pub struct NetworkData {
/// max_rx_bandwidth is the maximum receive bandwidth of the interface in bps.
pub max_rx_bandwidth: u64,
/// rx_bandwidth is the current receive bandwidth of the interface in bps.
pub rx_bandwidth: Option<u64>,
/// max_tx_bandwidth is the maximum transmit bandwidth of the interface in bps.
pub max_tx_bandwidth: u64,
/// tx_bandwidth is the current transmit bandwidth of the interface in bps.
pub tx_bandwidth: Option<u64>,
}
/// Interface methods provide functionality to get network interface information.
impl Interface {
/// DEFAULT_NETWORKS_REFRESH_INTERVAL is the default interval for refreshing network data.
const DEFAULT_NETWORKS_REFRESH_INTERVAL: Duration = Duration::from_secs(2);
/// new creates a new Interface instance based on the provided IP address and rate limit.
pub fn new(ip: IpAddr, rate_limit: ByteSize) -> Interface {
let rate_limit = Self::byte_size_to_bits(rate_limit); // convert to bps
let Some(interface) = Self::get_network_interface_by_ip(ip) else {
warn!(
"can not find interface for IP address {}, network interface unknown with bandwidth {} bps",
ip, rate_limit
);
return Interface {
name: "unknown".to_string(),
bandwidth: rate_limit,
network_data_mutex: Arc::new(Mutex::new(())),
};
};
match Self::get_speed(&interface.name) {
Some(speed) => {
let bandwidth = min(Self::megabits_to_bits(speed), rate_limit);
info!(
"network interface {} with bandwidth {} bps",
interface.name, bandwidth
);
Interface {
name: interface.name,
bandwidth,
network_data_mutex: Arc::new(Mutex::new(())),
}
}
None => {
warn!(
"can not get speed, network interface {} with bandwidth {} bps",
interface.name, rate_limit
);
Interface {
name: interface.name,
bandwidth: rate_limit,
network_data_mutex: Arc::new(Mutex::new(())),
} }
} }
} }
None
} }
/// get_network_data retrieves the network data for the interface. /// get_interface_speed_by_ip returns the speed of the network interface that has the specified IP
pub async fn get_network_data(&self) -> NetworkData { /// address in Mbps.
// Lock the mutex to ensure exclusive access to network data. pub fn get_interface_speed(interface_name: &str) -> Option<u64> {
let _guard = self.network_data_mutex.lock().await;
// Initialize sysinfo network.
let mut networks = Networks::new_with_refreshed_list();
// Sleep to calculate the network traffic difference over
// the DEFAULT_NETWORKS_REFRESH_INTERVAL.
tokio::time::sleep(Self::DEFAULT_NETWORKS_REFRESH_INTERVAL).await;
// Refresh network information.
networks.refresh();
let Some(network_data) = networks.get(self.name.as_str()) else {
warn!("can not find network data for interface {}", self.name);
return NetworkData {
max_rx_bandwidth: self.bandwidth,
max_tx_bandwidth: self.bandwidth,
..Default::default()
};
};
// Calculate the receive and transmit bandwidth in bits per second.
let rx_bandwidth = (Self::bytes_to_bits(network_data.received()) as f64
/ Self::DEFAULT_NETWORKS_REFRESH_INTERVAL.as_secs_f64())
.round() as u64;
// Calculate the transmit bandwidth in bits per second.
let tx_bandwidth = (Self::bytes_to_bits(network_data.transmitted()) as f64
/ Self::DEFAULT_NETWORKS_REFRESH_INTERVAL.as_secs_f64())
.round() as u64;
NetworkData {
max_rx_bandwidth: self.bandwidth,
rx_bandwidth: Some(rx_bandwidth),
max_tx_bandwidth: self.bandwidth,
tx_bandwidth: Some(tx_bandwidth),
}
}
/// get_speed returns the speed of the network interface in Mbps.
pub fn get_speed(name: &str) -> Option<u64> {
#[cfg(target_os = "linux")] #[cfg(target_os = "linux")]
{ {
let speed_path = format!("/sys/class/net/{}/speed", name); let speed_path = format!("/sys/class/net/{}/speed", interface_name);
std::fs::read_to_string(&speed_path) std::fs::read_to_string(&speed_path)
.ok() .ok()
.and_then(|speed_str| speed_str.trim().parse::<u64>().ok()) .and_then(|speed_str| speed_str.trim().parse::<u64>().ok())
@ -155,76 +49,37 @@ impl Interface {
#[cfg(not(target_os = "linux"))] #[cfg(not(target_os = "linux"))]
{ {
warn!("can not get interface {} speed on non-linux platform", name); warn!(
"can not get interface {} speed on non-linux platform",
interface_name
);
None None
} }
} }
/// get_network_interface_by_ip returns the network interface that has the specified /// Interface represents a network interface with its information.
/// IP address. #[derive(Debug, Clone, Default)]
pub fn get_network_interface_by_ip(ip: IpAddr) -> Option<NetworkInterface> { pub struct Interface {
datalink::interfaces() /// name is the name of the network interface.
.into_iter() pub name: String,
.find(|interface| interface.ips.iter().any(|ip_net| ip_net.ip() == ip))
// bandwidth is the bandwidth of the network interface in Mbps.
pub bandwidth: u64,
} }
/// byte_size_to_bits converts a ByteSize to bits. /// get_interface_info returns the network interface information for the specified IP address.
pub fn byte_size_to_bits(size: ByteSize) -> u64 { pub fn get_interface_info(ip: IpAddr, rate_limit: ByteSize) -> Option<Interface> {
size.as_u64() * 8 let rate_limit = rate_limit.as_u64() / MB * 8; // convert to Mbps
}
/// megabits_to_bit converts megabits to bits. let interface = get_interface_by_ip(ip)?;
pub fn megabits_to_bits(size: u64) -> u64 { match get_interface_speed(&interface.name) {
size * 1_000_000 // 1 Mbit = 1,000,000 bits Some(speed) => Some(Interface {
} name: interface.name,
bandwidth: min(speed, rate_limit),
/// bytes_to_bits converts bytes to bits. }),
pub fn bytes_to_bits(size: u64) -> u64 { None => Some(Interface {
size * 8 // 1 byte = 8 bits name: interface.name,
} bandwidth: rate_limit,
} }),
#[cfg(test)]
mod tests {
use super::*;
use bytesize::ByteSize;
#[test]
fn test_byte_size_to_bits() {
let test_cases = vec![
(ByteSize::kb(1), 8_000u64),
(ByteSize::mb(1), 8_000_000u64),
(ByteSize::gb(1), 8_000_000_000u64),
(ByteSize::b(0), 0u64),
];
for (input, expected) in test_cases {
let result = Interface::byte_size_to_bits(input);
assert_eq!(result, expected);
}
}
#[test]
fn test_megabits_to_bits() {
let test_cases = vec![
(1u64, 1_000_000u64),
(1000u64, 1_000_000_000u64),
(0u64, 0u64),
];
for (input, expected) in test_cases {
let result = Interface::megabits_to_bits(input);
assert_eq!(result, expected);
}
}
#[test]
fn test_bytes_to_bits() {
let test_cases = vec![(1u64, 8u64), (1000u64, 8_000u64), (0u64, 0u64)];
for (input, expected) in test_cases {
let result = Interface::bytes_to_bits(input);
assert_eq!(result, expected);
}
} }
} }

View File

@ -34,6 +34,8 @@ hyper.workspace = true
hyper-util.workspace = true hyper-util.workspace = true
hyper-rustls.workspace = true hyper-rustls.workspace = true
tracing.workspace = true tracing.workspace = true
validator.workspace = true
humantime.workspace = true
serde.workspace = true serde.workspace = true
chrono.workspace = true chrono.workspace = true
prost-wkt-types.workspace = true prost-wkt-types.workspace = true
@ -53,40 +55,34 @@ clap.workspace = true
anyhow.workspace = true anyhow.workspace = true
bytes.workspace = true bytes.workspace = true
bytesize.workspace = true bytesize.workspace = true
humantime.workspace = true
uuid.workspace = true uuid.workspace = true
percent-encoding.workspace = true percent-encoding.workspace = true
tokio-rustls.workspace = true tokio-rustls.workspace = true
serde_json.workspace = true serde_json.workspace = true
lru.workspace = true
fs2.workspace = true fs2.workspace = true
lazy_static.workspace = true lazy_static.workspace = true
futures.workspace = true futures.workspace = true
local-ip-address.workspace = true tracing-log = "0.2"
sysinfo.workspace = true
tracing-appender = "0.2.3"
tracing-subscriber = { version = "0.3", features = ["env-filter", "time", "chrono"] } tracing-subscriber = { version = "0.3", features = ["env-filter", "time", "chrono"] }
tracing-panic = "0.1.2" tracing-appender = "0.2.3"
tracing-opentelemetry = "0.30.0"
opentelemetry = { version = "0.29.1", default-features = false, features = ["trace"] }
opentelemetry-otlp = { version = "0.29.0", default-features = false, features = ["trace", "grpc-tonic", "http-proto", "reqwest-blocking-client"] }
opentelemetry_sdk = { version = "0.29.0", default-features = false, features = ["trace", "rt-tokio"] }
opentelemetry-semantic-conventions = { version = "0.30.0", features = ["semconv_experimental"] }
rolling-file = "0.2.0" rolling-file = "0.2.0"
pprof = { version = "0.15", features = ["flamegraph", "protobuf-codec"] } tracing-opentelemetry = "0.18.0"
opentelemetry = { version = "0.18.0", default-features = false, features = ["trace", "rt-tokio"] }
opentelemetry-jaeger = { version = "0.17.0", features = ["rt-tokio"] }
pprof = { version = "0.14", features = ["flamegraph", "protobuf-codec"] }
prometheus = { version = "0.13", features = ["process"] } prometheus = { version = "0.13", features = ["process"] }
tonic-health = "0.12.3" tonic-health = "0.12.3"
sysinfo = { version = "0.32.1", default-features = false, features = ["component", "disk", "network", "system", "user"] }
tower = { version = "0.4.13", features = ["limit", "load-shed", "buffer"] } tower = { version = "0.4.13", features = ["limit", "load-shed", "buffer"] }
indicatif = "0.18.0" indicatif = "0.17.11"
hashring = "0.3.6" hashring = "0.3.6"
fslock = "0.2.1"
leaky-bucket = "1.1.2" leaky-bucket = "1.1.2"
http-body-util = "0.1.3" http-body-util = "0.1.3"
termion = "4.0.5" termion = "4.0.5"
tabled = "0.20.0" tabled = "0.19.0"
path-absolutize = "3.1.1" path-absolutize = "3.1.1"
dashmap = "6.1.0"
fastrand = "2.3.0"
glob = "0.3.3"
console-subscriber = "0.4.1"
[dev-dependencies] [dev-dependencies]
tempfile.workspace = true tempfile.workspace = true

View File

@ -14,9 +14,10 @@
* limitations under the License. * limitations under the License.
*/ */
use crate::grpc::scheduler::SchedulerClient; use crate::grpc::{manager::ManagerClient, scheduler::SchedulerClient};
use crate::shutdown; use crate::shutdown;
use dragonfly_api::common::v2::{Build, Cpu, Disk, Host, Memory, Network}; use dragonfly_api::common::v2::{Build, Cpu, Disk, Host, Memory, Network};
use dragonfly_api::manager::v2::{DeleteSeedPeerRequest, SourceType, UpdateSeedPeerRequest};
use dragonfly_api::scheduler::v2::{AnnounceHostRequest, DeleteHostRequest}; use dragonfly_api::scheduler::v2::{AnnounceHostRequest, DeleteHostRequest};
use dragonfly_client_config::{ use dragonfly_client_config::{
dfdaemon::{Config, HostType}, dfdaemon::{Config, HostType},
@ -24,13 +25,91 @@ use dragonfly_client_config::{
}; };
use dragonfly_client_core::error::{ErrorType, OrErr}; use dragonfly_client_core::error::{ErrorType, OrErr};
use dragonfly_client_core::Result; use dragonfly_client_core::Result;
use dragonfly_client_util::net::Interface;
use std::env; use std::env;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use sysinfo::System; use sysinfo::System;
use tokio::sync::mpsc; use tokio::sync::mpsc;
use tracing::{debug, error, info, instrument}; use tracing::{error, info, instrument};
/// ManagerAnnouncer is used to announce the dfdaemon information to the manager.
pub struct ManagerAnnouncer {
/// config is the configuration of the dfdaemon.
config: Arc<Config>,
/// manager_client is the grpc client of the manager.
manager_client: Arc<ManagerClient>,
/// shutdown is used to shutdown the announcer.
shutdown: shutdown::Shutdown,
/// _shutdown_complete is used to notify the announcer is shutdown.
_shutdown_complete: mpsc::UnboundedSender<()>,
}
/// ManagerAnnouncer implements the manager announcer of the dfdaemon.
impl ManagerAnnouncer {
/// new creates a new manager announcer.
#[instrument(skip_all)]
pub fn new(
config: Arc<Config>,
manager_client: Arc<ManagerClient>,
shutdown: shutdown::Shutdown,
shutdown_complete_tx: mpsc::UnboundedSender<()>,
) -> Self {
Self {
config,
manager_client,
shutdown,
_shutdown_complete: shutdown_complete_tx,
}
}
/// run announces the dfdaemon information to the manager.
#[instrument(skip_all)]
pub async fn run(&self) -> Result<()> {
// Clone the shutdown channel.
let mut shutdown = self.shutdown.clone();
// If the seed peer is enabled, we should announce the seed peer to the manager.
if self.config.seed_peer.enable {
// Register the seed peer to the manager.
self.manager_client
.update_seed_peer(UpdateSeedPeerRequest {
source_type: SourceType::SeedPeerSource.into(),
hostname: self.config.host.hostname.clone(),
r#type: self.config.seed_peer.kind.to_string(),
idc: self.config.host.idc.clone(),
location: self.config.host.location.clone(),
ip: self.config.host.ip.unwrap().to_string(),
port: self.config.upload.server.port as i32,
download_port: self.config.upload.server.port as i32,
seed_peer_cluster_id: self.config.seed_peer.cluster_id,
})
.await?;
// Announce to scheduler shutting down with signals.
shutdown.recv().await;
// Delete the seed peer from the manager.
self.manager_client
.delete_seed_peer(DeleteSeedPeerRequest {
source_type: SourceType::SeedPeerSource.into(),
hostname: self.config.host.hostname.clone(),
ip: self.config.host.ip.unwrap().to_string(),
seed_peer_cluster_id: self.config.seed_peer.cluster_id,
})
.await?;
info!("announce to manager shutting down");
} else {
shutdown.recv().await;
info!("announce to manager shutting down");
}
Ok(())
}
}
/// Announcer is used to announce the dfdaemon information to the manager and scheduler. /// Announcer is used to announce the dfdaemon information to the manager and scheduler.
pub struct SchedulerAnnouncer { pub struct SchedulerAnnouncer {
@ -43,9 +122,6 @@ pub struct SchedulerAnnouncer {
/// scheduler_client is the grpc client of the scheduler. /// scheduler_client is the grpc client of the scheduler.
scheduler_client: Arc<SchedulerClient>, scheduler_client: Arc<SchedulerClient>,
/// interface is the network interface.
interface: Arc<Interface>,
/// shutdown is used to shutdown the announcer. /// shutdown is used to shutdown the announcer.
shutdown: shutdown::Shutdown, shutdown: shutdown::Shutdown,
@ -56,11 +132,11 @@ pub struct SchedulerAnnouncer {
/// SchedulerAnnouncer implements the scheduler announcer of the dfdaemon. /// SchedulerAnnouncer implements the scheduler announcer of the dfdaemon.
impl SchedulerAnnouncer { impl SchedulerAnnouncer {
/// new creates a new scheduler announcer. /// new creates a new scheduler announcer.
#[instrument(skip_all)]
pub async fn new( pub async fn new(
config: Arc<Config>, config: Arc<Config>,
host_id: String, host_id: String,
scheduler_client: Arc<SchedulerClient>, scheduler_client: Arc<SchedulerClient>,
interface: Arc<Interface>,
shutdown: shutdown::Shutdown, shutdown: shutdown::Shutdown,
shutdown_complete_tx: mpsc::UnboundedSender<()>, shutdown_complete_tx: mpsc::UnboundedSender<()>,
) -> Result<Self> { ) -> Result<Self> {
@ -68,7 +144,6 @@ impl SchedulerAnnouncer {
config, config,
host_id, host_id,
scheduler_client, scheduler_client,
interface,
shutdown, shutdown,
_shutdown_complete: shutdown_complete_tx, _shutdown_complete: shutdown_complete_tx,
}; };
@ -76,12 +151,13 @@ impl SchedulerAnnouncer {
// Initialize the scheduler announcer. // Initialize the scheduler announcer.
announcer announcer
.scheduler_client .scheduler_client
.init_announce_host(announcer.make_announce_host_request(Duration::ZERO).await?) .init_announce_host(announcer.make_announce_host_request(Duration::ZERO)?)
.await?; .await?;
Ok(announcer) Ok(announcer)
} }
/// run announces the dfdaemon information to the scheduler. /// run announces the dfdaemon information to the scheduler.
#[instrument(skip_all)]
pub async fn run(&self) { pub async fn run(&self) {
// Clone the shutdown channel. // Clone the shutdown channel.
let mut shutdown = self.shutdown.clone(); let mut shutdown = self.shutdown.clone();
@ -91,7 +167,7 @@ impl SchedulerAnnouncer {
loop { loop {
tokio::select! { tokio::select! {
_ = interval.tick() => { _ = interval.tick() => {
let request = match self.make_announce_host_request(interval.period()).await { let request = match self.make_announce_host_request(interval.period()) {
Ok(request) => request, Ok(request) => request,
Err(err) => { Err(err) => {
error!("make announce host request failed: {}", err); error!("make announce host request failed: {}", err);
@ -120,7 +196,7 @@ impl SchedulerAnnouncer {
/// make_announce_host_request makes the announce host request. /// make_announce_host_request makes the announce host request.
#[instrument(skip_all)] #[instrument(skip_all)]
async fn make_announce_host_request(&self, interval: Duration) -> Result<AnnounceHostRequest> { fn make_announce_host_request(&self, interval: Duration) -> Result<AnnounceHostRequest> {
// If the seed peer is enabled, we should announce the seed peer to the scheduler. // If the seed peer is enabled, we should announce the seed peer to the scheduler.
let host_type = if self.config.seed_peer.enable { let host_type = if self.config.seed_peer.enable {
self.config.seed_peer.kind self.config.seed_peer.kind
@ -156,25 +232,25 @@ impl SchedulerAnnouncer {
free: sys.free_memory(), free: sys.free_memory(),
}; };
// Wait for getting the network data.
let network_data = self.interface.get_network_data().await;
debug!(
"network data: rx bandwidth {}/{} bps, tx bandwidth {}/{} bps",
network_data.rx_bandwidth.unwrap_or(0),
network_data.max_rx_bandwidth,
network_data.tx_bandwidth.unwrap_or(0),
network_data.max_tx_bandwidth
);
// Get the network information. // Get the network information.
let network = Network { let network = Network {
// TODO: Get the count of the tcp connection.
tcp_connection_count: 0,
// TODO: Get the count of the upload tcp connection.
upload_tcp_connection_count: 0,
idc: self.config.host.idc.clone(), idc: self.config.host.idc.clone(),
location: self.config.host.location.clone(), location: self.config.host.location.clone(),
max_rx_bandwidth: network_data.max_rx_bandwidth,
rx_bandwidth: network_data.rx_bandwidth, // TODO: Get the network download rate, refer to
max_tx_bandwidth: network_data.max_tx_bandwidth, // https://docs.rs/sysinfo/latest/sysinfo/struct.NetworkData.html#method.received.
tx_bandwidth: network_data.tx_bandwidth, download_rate: 0,
..Default::default() download_rate_limit: self.config.download.rate_limit.as_u64(),
// TODO: Get the network download rate, refer to
// https://docs.rs/sysinfo/latest/sysinfo/struct.NetworkData.html#method.transmitted
upload_rate: 0,
upload_rate_limit: self.config.upload.rate_limit.as_u64(),
}; };
// Get the disk information. // Get the disk information.

View File

@ -25,7 +25,6 @@ use dragonfly_client_core::{
}; };
use dragonfly_client_util::fs::fallocate; use dragonfly_client_util::fs::fallocate;
use indicatif::{ProgressBar, ProgressState, ProgressStyle}; use indicatif::{ProgressBar, ProgressState, ProgressStyle};
use local_ip_address::local_ip;
use path_absolutize::*; use path_absolutize::*;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::time::Duration; use std::time::Duration;
@ -86,13 +85,6 @@ pub struct ExportCommand {
)] )]
timeout: Duration, timeout: Duration,
#[arg(
long = "digest",
required = false,
help = "Verify the integrity of the downloaded file using the specified digest, support sha256, sha512, crc32. If the digest is not specified, the downloaded file will not be verified. Format: <algorithm>:<digest>, e.g. sha256:1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef, crc32:12345678"
)]
digest: Option<String>,
#[arg( #[arg(
short = 'e', short = 'e',
long = "endpoint", long = "endpoint",
@ -123,19 +115,17 @@ pub struct ExportCommand {
)] )]
log_max_files: usize, log_max_files: usize,
#[arg(long, default_value_t = false, help = "Specify whether to print log")] #[arg(
console: bool, long = "verbose",
default_value_t = false,
help = "Specify whether to print log"
)]
verbose: bool,
} }
/// Implement the execute for ExportCommand. /// Implement the execute for ExportCommand.
impl ExportCommand { impl ExportCommand {
/// Executes the export command with comprehensive validation and advanced error handling. /// execute executes the export command.
///
/// This function serves as the main entry point for the dfcache export command execution.
/// It handles the complete workflow including argument parsing, validation, logging setup,
/// dfdaemon client connection, and export operation execution. The function provides
/// sophisticated error reporting with colored terminal output, including specialized
/// handling for backend errors with HTTP status codes and headers.
pub async fn execute(&self) -> Result<()> { pub async fn execute(&self) -> Result<()> {
// Parse command line arguments. // Parse command line arguments.
Args::parse(); Args::parse();
@ -147,12 +137,7 @@ impl ExportCommand {
self.log_level, self.log_level,
self.log_max_files, self.log_max_files,
None, None,
None, self.verbose,
None,
None,
None,
false,
self.console,
); );
// Validate the command line arguments. // Validate the command line arguments.
@ -442,13 +427,7 @@ impl ExportCommand {
Ok(()) Ok(())
} }
/// Executes the export operation to retrieve cached files from the persistent cache system. /// run runs the export command.
///
/// This function handles the core export functionality by downloading a cached file from the
/// dfdaemon persistent cache system. It supports two transfer modes: direct file transfer
/// by dfdaemon (hardlink/copy) or streaming piece content through the client for manual
/// file assembly. The operation provides real-time progress feedback and handles file
/// creation, directory setup, and efficient piece-by-piece writing with sparse file allocation.
async fn run(&self, dfdaemon_download_client: DfdaemonDownloadClient) -> Result<()> { async fn run(&self, dfdaemon_download_client: DfdaemonDownloadClient) -> Result<()> {
// Dfcache needs to notify dfdaemon to transfer the piece content of downloading file via unix domain socket // Dfcache needs to notify dfdaemon to transfer the piece content of downloading file via unix domain socket
// when the `transfer_from_dfdaemon` is true. Otherwise, dfdaemon will download the file and hardlink or // when the `transfer_from_dfdaemon` is true. Otherwise, dfdaemon will download the file and hardlink or
@ -477,8 +456,6 @@ impl ExportCommand {
), ),
need_piece_content, need_piece_content,
force_hard_link: self.force_hard_link, force_hard_link: self.force_hard_link,
digest: self.digest.clone(),
remote_ip: Some(local_ip().unwrap().to_string()),
}) })
.await .await
.inspect_err(|err| { .inspect_err(|err| {
@ -577,12 +554,7 @@ impl ExportCommand {
Ok(()) Ok(())
} }
/// Validates command line arguments for the export operation to ensure safe file output. /// validate_args validates the command line arguments.
///
/// This function performs essential validation of the output path to prevent file conflicts
/// and ensure the target location is suitable for export operations. It checks parent
/// directory existence, prevents accidental file overwrites, and validates path accessibility
/// before allowing the export operation to proceed.
fn validate_args(&self) -> Result<()> { fn validate_args(&self) -> Result<()> {
let absolute_path = Path::new(&self.output).absolutize()?; let absolute_path = Path::new(&self.output).absolutize()?;
match absolute_path.parent() { match absolute_path.parent() {

View File

@ -24,7 +24,6 @@ use dragonfly_client_core::{
Error, Result, Error, Result,
}; };
use indicatif::{ProgressBar, ProgressStyle}; use indicatif::{ProgressBar, ProgressStyle};
use local_ip_address::local_ip;
use path_absolutize::*; use path_absolutize::*;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::time::Duration; use std::time::Duration;
@ -122,19 +121,17 @@ pub struct ImportCommand {
)] )]
log_max_files: usize, log_max_files: usize,
#[arg(long, default_value_t = false, help = "Specify whether to print log")] #[arg(
console: bool, long = "verbose",
default_value_t = false,
help = "Specify whether to print log"
)]
verbose: bool,
} }
/// Implement the execute for ImportCommand. /// Implement the execute for ImportCommand.
impl ImportCommand { impl ImportCommand {
/// Executes the import sub command with comprehensive validation and error handling. /// execute executes the import sub command.
///
/// This function serves as the main entry point for the dfcache import command execution.
/// It handles the complete workflow including argument parsing, validation, logging setup,
/// dfdaemon client connection, and import operation execution. The function provides
/// detailed error reporting with colored terminal output and follows a fail-fast approach
/// with immediate process termination on any critical failures.
pub async fn execute(&self) -> Result<()> { pub async fn execute(&self) -> Result<()> {
// Parse command line arguments. // Parse command line arguments.
Args::parse(); Args::parse();
@ -146,12 +143,7 @@ impl ImportCommand {
self.log_level, self.log_level,
self.log_max_files, self.log_max_files,
None, None,
None, self.verbose,
None,
None,
None,
false,
self.console,
); );
// Validate the command line arguments. // Validate the command line arguments.
@ -332,13 +324,7 @@ impl ImportCommand {
Ok(()) Ok(())
} }
/// Executes the cache import operation by uploading a file to the persistent cache system. /// run runs the import sub command.
///
/// This function handles the core import functionality by uploading a local file to the
/// dfdaemon persistent cache system. It provides visual feedback through a progress spinner,
/// converts the file path to absolute format, and configures the cache task with specified
/// parameters including TTL, replica count, and piece length. The operation is asynchronous
/// and provides completion feedback with the generated task ID.
async fn run(&self, dfdaemon_download_client: DfdaemonDownloadClient) -> Result<()> { async fn run(&self, dfdaemon_download_client: DfdaemonDownloadClient) -> Result<()> {
let absolute_path = Path::new(&self.path).absolutize()?; let absolute_path = Path::new(&self.path).absolutize()?;
info!("import file: {}", absolute_path.to_string_lossy()); info!("import file: {}", absolute_path.to_string_lossy());
@ -367,7 +353,6 @@ impl ImportCommand {
prost_wkt_types::Duration::try_from(self.timeout) prost_wkt_types::Duration::try_from(self.timeout)
.or_err(ErrorType::ParseError)?, .or_err(ErrorType::ParseError)?,
), ),
remote_ip: Some(local_ip().unwrap().to_string()),
}) })
.await?; .await?;
@ -375,12 +360,7 @@ impl ImportCommand {
Ok(()) Ok(())
} }
/// Validates command line arguments for the import operation to ensure safe and correct execution. /// validate_args validates the command line arguments.
///
/// This function performs comprehensive validation of import-specific parameters to prevent
/// invalid operations and ensure the import request meets all system requirements. It validates
/// TTL boundaries, file existence and type, and piece length constraints before allowing the
/// import operation to proceed.
fn validate_args(&self) -> Result<()> { fn validate_args(&self) -> Result<()> {
if self.ttl < Duration::from_secs(5 * 60) if self.ttl < Duration::from_secs(5 * 60)
|| self.ttl > Duration::from_secs(7 * 24 * 60 * 60) || self.ttl > Duration::from_secs(7 * 24 * 60 * 60)

View File

@ -106,12 +106,7 @@ async fn main() -> anyhow::Result<()> {
Ok(()) Ok(())
} }
/// Creates and validates a dfdaemon download client with health checking. /// get_and_check_dfdaemon_download_client gets a dfdaemon download client and checks its health.
///
/// This function establishes a connection to the dfdaemon service via Unix domain socket
/// and performs a health check to ensure the service is running and ready to handle
/// download requests. Only after successful health verification does it return the
/// download client for actual use.
pub async fn get_dfdaemon_download_client(endpoint: PathBuf) -> Result<DfdaemonDownloadClient> { pub async fn get_dfdaemon_download_client(endpoint: PathBuf) -> Result<DfdaemonDownloadClient> {
// Check dfdaemon's health. // Check dfdaemon's health.
let health_client = HealthClient::new_unix(endpoint.clone()).await?; let health_client = HealthClient::new_unix(endpoint.clone()).await?;

View File

@ -22,7 +22,6 @@ use dragonfly_client_core::{
Error, Result, Error, Result,
}; };
use humantime::format_duration; use humantime::format_duration;
use local_ip_address::local_ip;
use std::time::Duration; use std::time::Duration;
use tabled::{ use tabled::{
settings::{object::Rows, Alignment, Modify, Style}, settings::{object::Rows, Alignment, Modify, Style},
@ -68,19 +67,17 @@ pub struct StatCommand {
)] )]
log_max_files: usize, log_max_files: usize,
#[arg(long, default_value_t = false, help = "Specify whether to print log")] #[arg(
console: bool, long = "verbose",
default_value_t = false,
help = "Specify whether to print log"
)]
verbose: bool,
} }
/// Implement the execute for StatCommand. /// Implement the execute for StatCommand.
impl StatCommand { impl StatCommand {
/// Executes the stat command with comprehensive error handling and user feedback. /// execute executes the stat command.
///
/// This function serves as the main entry point for the dfcache stat command execution.
/// It handles the complete lifecycle including argument parsing, logging initialization,
/// dfdaemon client setup, and command execution with detailed error reporting. The
/// function provides colored terminal output for better user experience and exits
/// with appropriate status codes on failure.
pub async fn execute(&self) -> Result<()> { pub async fn execute(&self) -> Result<()> {
// Parse command line arguments. // Parse command line arguments.
Args::parse(); Args::parse();
@ -92,12 +89,7 @@ impl StatCommand {
self.log_level, self.log_level,
self.log_max_files, self.log_max_files,
None, None,
None, self.verbose,
None,
None,
None,
false,
self.console,
); );
// Get dfdaemon download client. // Get dfdaemon download client.
@ -240,17 +232,11 @@ impl StatCommand {
Ok(()) Ok(())
} }
/// Executes the stat command to retrieve and display persistent cache task information. /// run runs the stat command.
///
/// This function queries the dfdaemon service for detailed information about a specific
/// persistent cache task and presents it in a formatted table for user consumption.
/// It handles data conversion from raw protocol buffer values to human-readable formats
/// including byte sizes, durations, and timestamps with proper timezone conversion.
async fn run(&self, dfdaemon_download_client: DfdaemonDownloadClient) -> Result<()> { async fn run(&self, dfdaemon_download_client: DfdaemonDownloadClient) -> Result<()> {
let task = dfdaemon_download_client let task = dfdaemon_download_client
.stat_persistent_cache_task(StatPersistentCacheTaskRequest { .stat_persistent_cache_task(StatPersistentCacheTaskRequest {
task_id: self.id.clone(), task_id: self.id.clone(),
remote_ip: Some(local_ip().unwrap().to_string()),
}) })
.await?; .await?;

View File

@ -15,7 +15,7 @@
*/ */
use clap::Parser; use clap::Parser;
use dragonfly_client::announcer::SchedulerAnnouncer; use dragonfly_client::announcer::{ManagerAnnouncer, SchedulerAnnouncer};
use dragonfly_client::dynconfig::Dynconfig; use dragonfly_client::dynconfig::Dynconfig;
use dragonfly_client::gc::GC; use dragonfly_client::gc::GC;
use dragonfly_client::grpc::{ use dragonfly_client::grpc::{
@ -30,9 +30,10 @@ use dragonfly_client::shutdown;
use dragonfly_client::stats::Stats; use dragonfly_client::stats::Stats;
use dragonfly_client::tracing::init_tracing; use dragonfly_client::tracing::init_tracing;
use dragonfly_client_backend::BackendFactory; use dragonfly_client_backend::BackendFactory;
use dragonfly_client_config::{dfdaemon, VersionValueParser}; use dragonfly_client_config::dfdaemon;
use dragonfly_client_config::VersionValueParser;
use dragonfly_client_storage::Storage; use dragonfly_client_storage::Storage;
use dragonfly_client_util::{id_generator::IDGenerator, net::Interface}; use dragonfly_client_util::id_generator::IDGenerator;
use std::net::SocketAddr; use std::net::SocketAddr;
use std::path::PathBuf; use std::path::PathBuf;
use std::sync::Arc; use std::sync::Arc;
@ -91,8 +92,12 @@ struct Args {
)] )]
log_max_files: usize, log_max_files: usize,
#[arg(long, default_value_t = true, help = "Specify whether to print log")] #[arg(
console: bool, long = "verbose",
default_value_t = true,
help = "Specify whether to print log"
)]
verbose: bool,
#[arg( #[arg(
short = 'V', short = 'V',
@ -145,13 +150,8 @@ async fn main() -> Result<(), anyhow::Error> {
args.log_dir.clone(), args.log_dir.clone(),
args.log_level, args.log_level,
args.log_max_files, args.log_max_files,
config.tracing.protocol.clone(), config.tracing.addr.to_owned(),
config.tracing.endpoint.clone(), args.verbose,
config.tracing.path.clone(),
Some(config.tracing.headers.clone()),
Some(config.host.clone()),
config.seed_peer.enable,
args.console,
); );
// Initialize storage. // Initialize storage.
@ -229,9 +229,6 @@ async fn main() -> Result<(), anyhow::Error> {
)?; )?;
let persistent_cache_task = Arc::new(persistent_cache_task); let persistent_cache_task = Arc::new(persistent_cache_task);
let interface = Interface::new(config.host.ip.unwrap(), config.upload.rate_limit);
let interface = Arc::new(interface);
// Initialize health server. // Initialize health server.
let health = Health::new( let health = Health::new(
SocketAddr::new(config.health.server.ip.unwrap(), config.health.server.port), SocketAddr::new(config.health.server.ip.unwrap(), config.health.server.port),
@ -261,12 +258,19 @@ async fn main() -> Result<(), anyhow::Error> {
shutdown_complete_tx.clone(), shutdown_complete_tx.clone(),
); );
// Initialize manager announcer.
let manager_announcer = ManagerAnnouncer::new(
config.clone(),
manager_client.clone(),
shutdown.clone(),
shutdown_complete_tx.clone(),
);
// Initialize scheduler announcer. // Initialize scheduler announcer.
let scheduler_announcer = SchedulerAnnouncer::new( let scheduler_announcer = SchedulerAnnouncer::new(
config.clone(), config.clone(),
id_generator.host_id(), id_generator.host_id(),
scheduler_client.clone(), scheduler_client.clone(),
interface.clone(),
shutdown.clone(), shutdown.clone(),
shutdown_complete_tx.clone(), shutdown_complete_tx.clone(),
) )
@ -281,7 +285,6 @@ async fn main() -> Result<(), anyhow::Error> {
SocketAddr::new(config.upload.server.ip.unwrap(), config.upload.server.port), SocketAddr::new(config.upload.server.ip.unwrap(), config.upload.server.port),
task.clone(), task.clone(),
persistent_cache_task.clone(), persistent_cache_task.clone(),
interface.clone(),
shutdown.clone(), shutdown.clone(),
shutdown_complete_tx.clone(), shutdown_complete_tx.clone(),
); );
@ -330,6 +333,10 @@ async fn main() -> Result<(), anyhow::Error> {
info!("stats server exited"); info!("stats server exited");
}, },
_ = tokio::spawn(async move { manager_announcer.run().await.unwrap_or_else(|err| error!("announcer manager failed: {}", err))} ) => {
info!("announcer manager exited");
},
_ = tokio::spawn(async move { scheduler_announcer.run().await }) => { _ = tokio::spawn(async move { scheduler_announcer.run().await }) => {
info!("announcer scheduler exited"); info!("announcer scheduler exited");
}, },

View File

@ -17,30 +17,32 @@
use bytesize::ByteSize; use bytesize::ByteSize;
use clap::Parser; use clap::Parser;
use dragonfly_api::common::v2::{Download, Hdfs, ObjectStorage, TaskType}; use dragonfly_api::common::v2::{Download, Hdfs, ObjectStorage, TaskType};
use dragonfly_api::dfdaemon::v2::{ use dragonfly_api::dfdaemon::v2::{download_task_response, DownloadTaskRequest};
download_task_response, DownloadTaskRequest, ListTaskEntriesRequest,
};
use dragonfly_api::errordetails::v2::Backend; use dragonfly_api::errordetails::v2::Backend;
use dragonfly_client::grpc::dfdaemon_download::DfdaemonDownloadClient; use dragonfly_client::grpc::dfdaemon_download::DfdaemonDownloadClient;
use dragonfly_client::grpc::health::HealthClient; use dragonfly_client::grpc::health::HealthClient;
use dragonfly_client::metrics::{
collect_backend_request_failure_metrics, collect_backend_request_finished_metrics,
collect_backend_request_started_metrics,
};
use dragonfly_client::resource::piece::MIN_PIECE_LENGTH; use dragonfly_client::resource::piece::MIN_PIECE_LENGTH;
use dragonfly_client::tracing::init_tracing; use dragonfly_client::tracing::init_tracing;
use dragonfly_client_backend::{hdfs, object_storage, BackendFactory, DirEntry}; use dragonfly_client_backend::{hdfs, object_storage, BackendFactory, DirEntry, HeadRequest};
use dragonfly_client_config::VersionValueParser; use dragonfly_client_config::VersionValueParser;
use dragonfly_client_config::{self, dfdaemon, dfget}; use dragonfly_client_config::{self, dfdaemon, dfget};
use dragonfly_client_core::error::{ErrorType, OrErr}; use dragonfly_client_core::error::{BackendError, ErrorType, OrErr};
use dragonfly_client_core::{Error, Result}; use dragonfly_client_core::{Error, Result};
use dragonfly_client_util::{fs::fallocate, http::header_vec_to_hashmap}; use dragonfly_client_util::{
use glob::Pattern; fs::fallocate,
http::{header_vec_to_hashmap, header_vec_to_headermap},
};
use indicatif::{MultiProgress, ProgressBar, ProgressState, ProgressStyle}; use indicatif::{MultiProgress, ProgressBar, ProgressState, ProgressStyle};
use local_ip_address::local_ip;
use path_absolutize::*; use path_absolutize::*;
use percent_encoding::percent_decode_str; use percent_encoding::percent_decode_str;
use std::collections::{HashMap, HashSet}; use std::path::{Path, PathBuf};
use std::path::{Component, Path, PathBuf};
use std::str::FromStr; use std::str::FromStr;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::{Duration, Instant};
use std::{cmp::min, fmt::Write}; use std::{cmp::min, fmt::Write};
use termion::{color, style}; use termion::{color, style};
use tokio::fs::{self, OpenOptions}; use tokio::fs::{self, OpenOptions};
@ -111,7 +113,7 @@ struct Args {
#[arg( #[arg(
long = "content-for-calculating-task-id", long = "content-for-calculating-task-id",
help = "Specify the content used to calculate the task ID. If it is set, use its value to calculate the task ID, Otherwise, calculate the task ID based on URL, piece-length, tag, application, and filtered-query-params." help = "Specify the content used to calculate the task ID. If it is set, use its value to calculate the task ID, Otherwise, calculate the task ID based on url, piece-length, tag, application, and filtered-query-params."
)] )]
content_for_calculating_task_id: Option<String>, content_for_calculating_task_id: Option<String>,
@ -139,11 +141,12 @@ struct Args {
timeout: Duration, timeout: Duration,
#[arg( #[arg(
short = 'd',
long = "digest", long = "digest",
required = false, default_value = "",
help = "Verify the integrity of the downloaded file using the specified digest, support sha256, sha512, crc32. If the digest is not specified, the downloaded file will not be verified. Format: <algorithm>:<digest>. Examples: sha256:1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef, crc32:12345678" help = "Verify the integrity of the downloaded file using the specified digest, e.g. md5:86d3f3a95c324c9479bd8986968f4327"
)] )]
digest: Option<String>, digest: String,
#[arg( #[arg(
short = 'p', short = 'p',
@ -163,14 +166,14 @@ struct Args {
#[arg( #[arg(
long = "application", long = "application",
default_value = "", default_value = "",
help = "Different applications for the same URL will be divided into different tasks" help = "Different applications for the same url will be divided into different tasks"
)] )]
application: String, application: String,
#[arg( #[arg(
long = "tag", long = "tag",
default_value = "", default_value = "",
help = "Different tags for the same URL will be divided into different tasks" help = "Different tags for the same url will be divided into different tasks"
)] )]
tag: String, tag: String,
@ -178,24 +181,17 @@ struct Args {
short = 'H', short = 'H',
long = "header", long = "header",
required = false, required = false,
help = "Specify the header for downloading file. Examples: --header='Content-Type: application/json' --header='Accept: application/json'" help = "Specify the header for downloading file, e.g. --header='Content-Type: application/json' --header='Accept: application/json'"
)] )]
header: Option<Vec<String>>, header: Option<Vec<String>>,
#[arg( #[arg(
long = "filtered-query-param", long = "filtered-query-param",
required = false, required = false,
help = "Filter the query parameters of the downloaded URL. If the download URL is the same, it will be scheduled as the same task. Examples: --filtered-query-param='signature' --filtered-query-param='timeout'" help = "Filter the query parameters of the downloaded URL. If the download URL is the same, it will be scheduled as the same task, e.g. --filtered-query-param='signature' --filtered-query-param='timeout'"
)] )]
filtered_query_params: Option<Vec<String>>, filtered_query_params: Option<Vec<String>>,
#[arg(
long = "include-files",
required = false,
help = "Filter files to download in a directory using glob patterns relative to the root URL's path. Examples: --include-files='*.txt' --include-files='subdir/file.txt'"
)]
include_files: Option<Vec<String>>,
#[arg( #[arg(
long = "disable-back-to-source", long = "disable-back-to-source",
default_value_t = false, default_value_t = false,
@ -282,8 +278,12 @@ struct Args {
)] )]
log_max_files: usize, log_max_files: usize,
#[arg(long, default_value_t = false, help = "Specify whether to print log")] #[arg(
console: bool, long = "verbose",
default_value_t = false,
help = "Specify whether to print log"
)]
verbose: bool,
#[arg( #[arg(
short = 'V', short = 'V',
@ -308,12 +308,7 @@ async fn main() -> anyhow::Result<()> {
args.log_level, args.log_level,
args.log_max_files, args.log_max_files,
None, None,
None, args.verbose,
None,
None,
None,
false,
args.console,
); );
// Validate command line arguments. // Validate command line arguments.
@ -603,12 +598,7 @@ async fn main() -> anyhow::Result<()> {
Ok(()) Ok(())
} }
/// Runs the dfget command to download files or directories from a given URL. /// run runs the dfget command.
///
/// This function serves as the main entry point for the dfget download operation.
/// It handles both single file downloads and directory downloads based on the URL format.
/// The function performs path normalization, validates the URL scheme's capabilities,
/// and delegates to the appropriate download handler.
async fn run(mut args: Args, dfdaemon_download_client: DfdaemonDownloadClient) -> Result<()> { async fn run(mut args: Args, dfdaemon_download_client: DfdaemonDownloadClient) -> Result<()> {
// Get the absolute path of the output file. // Get the absolute path of the output file.
args.output = Path::new(&args.output).absolutize()?.into(); args.output = Path::new(&args.output).absolutize()?.into();
@ -618,7 +608,7 @@ async fn run(mut args: Args, dfdaemon_download_client: DfdaemonDownloadClient) -
// then download all files in the directory. Otherwise, download the single file. // then download all files in the directory. Otherwise, download the single file.
let scheme = args.url.scheme(); let scheme = args.url.scheme();
if args.url.path().ends_with('/') { if args.url.path().ends_with('/') {
if BackendFactory::unsupported_download_directory(scheme) { if !BackendFactory::supported_download_directory(scheme) {
return Err(Error::Unsupported(format!("{} download directory", scheme))); return Err(Error::Unsupported(format!("{} download directory", scheme)));
}; };
@ -628,13 +618,7 @@ async fn run(mut args: Args, dfdaemon_download_client: DfdaemonDownloadClient) -
download(args, ProgressBar::new(0), dfdaemon_download_client).await download(args, ProgressBar::new(0), dfdaemon_download_client).await
} }
/// Downloads all files in a directory from various storage backends (object storage, HDFS, etc.). /// download_dir downloads all files in the directory.
///
/// This function handles directory-based downloads by recursively fetching all entries
/// in the specified directory. It supports filtering files based on include patterns,
/// enforces download limits, and performs concurrent downloads with configurable
/// concurrency control. The function creates the necessary directory structure
/// locally and downloads files while preserving the remote directory hierarchy.
async fn download_dir(args: Args, download_client: DfdaemonDownloadClient) -> Result<()> { async fn download_dir(args: Args, download_client: DfdaemonDownloadClient) -> Result<()> {
// Initialize the object storage config and the hdfs config. // Initialize the object storage config and the hdfs config.
let object_storage = Some(ObjectStorage { let object_storage = Some(ObjectStorage {
@ -651,17 +635,12 @@ async fn download_dir(args: Args, download_client: DfdaemonDownloadClient) -> Re
delegation_token: args.hdfs_delegation_token.clone(), delegation_token: args.hdfs_delegation_token.clone(),
}); });
// Get all entries in the directory. // Get all entries in the directory. If the directory is empty, then return directly.
let mut entries = get_entries(&args, object_storage, hdfs, download_client.clone()).await?; let entries = get_entries(args.clone(), object_storage, hdfs).await?;
if let Some(ref include_files) = args.include_files {
entries = filter_entries(&args.url, entries, include_files)?;
}
// If the entries is empty, then return directly.
if entries.is_empty() { if entries.is_empty() {
warn!("no entries found in directory {}", args.url); warn!("directory {} is empty", args.url);
return Ok(()); return Ok(());
} };
// If the actual file count is greater than the max_files, then reject the downloading. // If the actual file count is greater than the max_files, then reject the downloading.
let count = entries.iter().filter(|entry| !entry.is_dir).count(); let count = entries.iter().filter(|entry| !entry.is_dir).count();
@ -732,13 +711,7 @@ async fn download_dir(args: Args, download_client: DfdaemonDownloadClient) -> Re
Ok(()) Ok(())
} }
/// Downloads a single file from various storage backends using the dfdaemon service. /// download downloads the single file.
///
/// This function handles single file downloads by communicating with a dfdaemon client.
/// It supports multiple storage protocols (object storage, HDFS, HTTP/HTTPS) and provides
/// two transfer modes: direct download by dfdaemon or streaming piece content through
/// the client. The function includes progress tracking, file creation, and proper error
/// handling throughout the download process.
async fn download( async fn download(
args: Args, args: Args,
progress_bar: ProgressBar, progress_bar: ProgressBar,
@ -785,7 +758,7 @@ async fn download(
.download_task(DownloadTaskRequest { .download_task(DownloadTaskRequest {
download: Some(Download { download: Some(Download {
url: args.url.to_string(), url: args.url.to_string(),
digest: args.digest, digest: Some(args.digest),
// NOTE: Dfget does not support range download. // NOTE: Dfget does not support range download.
range: None, range: None,
r#type: TaskType::Standard as i32, r#type: TaskType::Standard as i32,
@ -808,9 +781,9 @@ async fn download(
need_piece_content, need_piece_content,
object_storage, object_storage,
hdfs, hdfs,
load_to_cache: false,
force_hard_link: args.force_hard_link, force_hard_link: args.force_hard_link,
content_for_calculating_task_id: args.content_for_calculating_task_id, content_for_calculating_task_id: args.content_for_calculating_task_id,
remote_ip: Some(local_ip().unwrap().to_string()),
}), }),
}) })
.await .await
@ -910,116 +883,69 @@ async fn download(
Ok(()) Ok(())
} }
/// Retrieves all directory entries from a remote storage location. /// get_entries gets all entries in the directory.
///
/// This function communicates with the dfdaemon service to list all entries
/// (files and subdirectories) in the specified directory URL. It supports
/// various storage backends including object storage and HDFS by passing
/// the appropriate credentials and configuration. The function converts
/// the gRPC response into a local `DirEntry` format for further processing.
async fn get_entries( async fn get_entries(
args: &Args, args: Args,
object_storage: Option<ObjectStorage>, object_storage: Option<ObjectStorage>,
hdfs: Option<Hdfs>, hdfs: Option<Hdfs>,
download_client: DfdaemonDownloadClient,
) -> Result<Vec<DirEntry>> { ) -> Result<Vec<DirEntry>> {
info!("list task entries: {:?}", args.url); // Initialize backend factory and build backend.
// List task entries. let backend_factory = BackendFactory::new(None)?;
let response = download_client let backend = backend_factory.build(args.url.as_str())?;
.list_task_entries(ListTaskEntriesRequest {
// Collect backend request started metrics.
collect_backend_request_started_metrics(backend.scheme().as_str(), http::Method::HEAD.as_str());
// Record the start time.
let start_time = Instant::now();
let response = backend
.head(HeadRequest {
// NOTE: Mock a task id for head request.
task_id: Uuid::new_v4().to_string(), task_id: Uuid::new_v4().to_string(),
url: args.url.to_string(), url: args.url.to_string(),
request_header: header_vec_to_hashmap(args.header.clone().unwrap_or_default())?, http_header: Some(header_vec_to_headermap(
timeout: None, args.header.clone().unwrap_or_default(),
certificate_chain: Vec::new(), )?),
timeout: args.timeout,
client_cert: None,
object_storage, object_storage,
hdfs, hdfs,
remote_ip: Some(local_ip().unwrap().to_string()),
}) })
.await .await
.inspect_err(|err| { .inspect_err(|_err| {
error!("list task entries failed: {}", err); // Collect backend request failure metrics.
})?; collect_backend_request_failure_metrics(
backend.scheme().as_str(),
Ok(response http::Method::HEAD.as_str(),
.entries
.into_iter()
.map(|entry| DirEntry {
url: entry.url,
content_length: entry.content_length as usize,
is_dir: entry.is_dir,
})
.collect())
}
/// Filters directory entries based on include patterns and validates their URLs.
///
/// This function takes a collection of directory entries and filters them based on
/// glob patterns specified in `include_files`. It performs URL validation to ensure
/// all entries have valid URLs and that their paths fall within the scope of the
/// root URL. When an entry matches a pattern, both the entry and its parent
/// directory (if it exists) are included in the result.
fn filter_entries(
url: &Url,
entries: Vec<DirEntry>,
include_files: &[String],
) -> Result<Vec<DirEntry>> {
let patterns: Vec<Pattern> = include_files
.iter()
.filter_map(|include_file| Pattern::new(include_file).ok())
.collect();
// Build a HashMap of DirEntry objects keyed by relative paths for filtering and
// validates URLs and ensures paths are within the root URL's scope.
let mut entries_by_relative_path = HashMap::with_capacity(entries.len());
for entry in entries {
let entry_url = Url::parse(&entry.url).map_err(|err| {
error!("failed to parse entry URL '{}': {}", entry.url, err);
Error::ValidationError(format!("invalid URL: {}", entry.url))
})?;
let entry_path = entry_url.path();
match entry_path.strip_prefix(url.path()) {
Some(relative_path) => entries_by_relative_path
.insert(relative_path.trim_start_matches('/').to_string(), entry),
None => {
error!(
"entry path '{}' does not belong to the root path",
entry_path
); );
return Err(Error::ValidationError(format!( })?;
"path '{}' is outside the expected scope",
entry_path // Return error when response is failed.
))); if !response.success {
} // Collect backend request failure metrics.
}; collect_backend_request_failure_metrics(
backend.scheme().as_str(),
http::Method::HEAD.as_str(),
);
return Err(Error::BackendError(Box::new(BackendError {
message: response.error_message.unwrap_or_default(),
status_code: Some(response.http_status_code.unwrap_or_default()),
header: Some(response.http_header.unwrap_or_default()),
})));
} }
// Filter entries by matching relative paths against patterns, including // Collect backend request finished metrics.
// parent directories for matches. collect_backend_request_finished_metrics(
let mut filtered_entries = HashSet::new(); backend.scheme().as_str(),
for (relative_path, entry) in &entries_by_relative_path { http::Method::HEAD.as_str(),
if patterns.iter().any(|pat| pat.matches(relative_path)) { start_time.elapsed(),
filtered_entries.insert(entry.clone()); );
if let Some(parent) = std::path::Path::new(relative_path).parent() {
if let Some(parent_entry) = Ok(response.entries)
entries_by_relative_path.get(&parent.join("").to_string_lossy().to_string())
{
filtered_entries.insert(parent_entry.clone());
}
}
}
} }
Ok(filtered_entries.into_iter().collect()) /// make_output_by_entry makes the output path by the entry information.
}
/// Constructs the local output path for a directory entry based on its remote URL.
///
/// This function maps a remote directory entry to its corresponding local file system
/// path by replacing the remote root directory with the local output directory.
/// It handles URL percent-decoding to ensure proper path construction and maintains
/// the relative directory structure from the remote source.
fn make_output_by_entry(url: Url, output: &Path, entry: DirEntry) -> Result<PathBuf> { fn make_output_by_entry(url: Url, output: &Path, entry: DirEntry) -> Result<PathBuf> {
// Get the root directory of the download directory and the output root directory. // Get the root directory of the download directory and the output root directory.
let root_dir = url.path().to_string(); let root_dir = url.path().to_string();
@ -1037,12 +963,7 @@ fn make_output_by_entry(url: Url, output: &Path, entry: DirEntry) -> Result<Path
.into()) .into())
} }
/// Creates and validates a dfdaemon download client with health checking. /// get_and_check_dfdaemon_download_client gets a dfdaemon download client and checks its health.
///
/// This function establishes a connection to the dfdaemon service via Unix domain socket
/// and performs a health check to ensure the service is running and ready to handle
/// download requests. Only after successful health verification does it return the
/// download client for actual use.
async fn get_dfdaemon_download_client(endpoint: PathBuf) -> Result<DfdaemonDownloadClient> { async fn get_dfdaemon_download_client(endpoint: PathBuf) -> Result<DfdaemonDownloadClient> {
// Check dfdaemon's health. // Check dfdaemon's health.
let health_client = HealthClient::new_unix(endpoint.clone()).await?; let health_client = HealthClient::new_unix(endpoint.clone()).await?;
@ -1053,13 +974,7 @@ async fn get_dfdaemon_download_client(endpoint: PathBuf) -> Result<DfdaemonDownl
Ok(dfdaemon_download_client) Ok(dfdaemon_download_client)
} }
/// Validates command line arguments for consistency and safety requirements. /// validate_args validates the command line arguments.
///
/// This function performs comprehensive validation of the download arguments to ensure
/// they are logically consistent and safe to execute. It checks URL-output path matching,
/// directory existence, file conflicts, piece length constraints, and glob pattern validity.
/// The validation prevents common user errors and potential security issues before
/// starting the download process.
fn validate_args(args: &Args) -> Result<()> { fn validate_args(args: &Args) -> Result<()> {
// If the URL is a directory, the output path should be a directory. // If the URL is a directory, the output path should be a directory.
if args.url.path().ends_with('/') && !args.output.is_dir() { if args.url.path().ends_with('/') && !args.output.is_dir() {
@ -1108,42 +1023,9 @@ fn validate_args(args: &Args) -> Result<()> {
} }
} }
if let Some(ref include_files) = args.include_files {
for include_file in include_files {
if Pattern::new(include_file).is_err() {
return Err(Error::ValidationError(format!(
"invalid glob pattern in include_files: '{}'",
include_file
)));
}
if !is_normal_relative_path(include_file) {
return Err(Error::ValidationError(format!(
"path is not a normal relative path in include_files: '{}'. It must not contain '..', '.', or start with '/'.",
include_file
)));
}
}
}
Ok(()) Ok(())
} }
/// Validates that a path string is a normal relative path without unsafe components.
///
/// This function ensures that a given path is both relative (doesn't start with '/')
/// and contains only normal path components. It rejects paths with parent directory
/// references ('..'), current directory references ('.'), or any other special
/// path components that could be used for directory traversal attacks or unexpected
/// file system navigation.
fn is_normal_relative_path(path: &str) -> bool {
let path = Path::new(path);
path.is_relative()
&& path
.components()
.all(|comp| matches!(comp, Component::Normal(_)))
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@ -1307,346 +1189,4 @@ mod tests {
let result = make_output_by_entry(url, output, entry); let result = make_output_by_entry(url, output, entry);
assert!(result.is_err()); assert!(result.is_err());
} }
#[test]
fn should_filter_entries() {
let test_cases = vec![
(
Url::parse("http://example.com/root/").unwrap(),
vec![
DirEntry {
url: "http://example.com/root/dir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/file.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/file2.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file3.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file4.png".to_string(),
content_length: 100,
is_dir: false,
},
],
vec!["dir/file.txt".to_string()],
vec![
DirEntry {
url: "http://example.com/root/dir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/file.txt".to_string(),
content_length: 100,
is_dir: false,
},
],
),
(
Url::parse("http://example.com/root/").unwrap(),
vec![
DirEntry {
url: "http://example.com/root/dir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/file.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/file2.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file3.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file4.png".to_string(),
content_length: 100,
is_dir: false,
},
],
vec![
"dir/file.txt".to_string(),
"dir/subdir/file4.png".to_string(),
],
vec![
DirEntry {
url: "http://example.com/root/dir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/file.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file4.png".to_string(),
content_length: 100,
is_dir: false,
},
],
),
(
Url::parse("http://example.com/root/").unwrap(),
vec![
DirEntry {
url: "http://example.com/root/dir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/file.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/file2.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file3.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file4.png".to_string(),
content_length: 100,
is_dir: false,
},
],
vec!["dir/subdir/*.png".to_string()],
vec![
DirEntry {
url: "http://example.com/root/dir/subdir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file4.png".to_string(),
content_length: 100,
is_dir: false,
},
],
),
(
Url::parse("http://example.com/root/").unwrap(),
vec![
DirEntry {
url: "http://example.com/root/dir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/file.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/file2.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file3.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file4.png".to_string(),
content_length: 100,
is_dir: false,
},
],
vec!["dir/*".to_string()],
vec![
DirEntry {
url: "http://example.com/root/dir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/file.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/file2.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file3.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file4.png".to_string(),
content_length: 100,
is_dir: false,
},
],
),
(
Url::parse("http://example.com/root/").unwrap(),
vec![
DirEntry {
url: "http://example.com/root/dir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/file.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/file2.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file3.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file4.png".to_string(),
content_length: 100,
is_dir: false,
},
],
vec!["dir/".to_string()],
vec![DirEntry {
url: "http://example.com/root/dir/".to_string(),
content_length: 10,
is_dir: true,
}],
),
(
Url::parse("http://example.com/root/").unwrap(),
vec![
DirEntry {
url: "http://example.com/root/dir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/file.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/file2.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file3.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file4.png".to_string(),
content_length: 100,
is_dir: false,
},
],
vec!["test".to_string()],
vec![],
),
(
Url::parse("http://example.com/root/").unwrap(),
vec![
DirEntry {
url: "http://example.com/root/dir/file.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: " ".to_string(),
content_length: 100,
is_dir: false,
},
],
vec!["dir/file.txt".to_string()],
vec![],
),
];
for (url, entries, include_files, expected_entries) in test_cases {
let result = filter_entries(&url, entries, &include_files);
if result.is_err() {
assert!(matches!(result, Err(Error::ValidationError(_))));
} else {
let filtered_entries = result.unwrap();
assert_eq!(filtered_entries.len(), expected_entries.len());
for filtered_entry in &filtered_entries {
assert!(expected_entries
.iter()
.any(|expected_entry| { expected_entry.url == filtered_entry.url }));
}
}
}
}
} }

View File

@ -65,6 +65,7 @@ pub struct Dynconfig {
/// Dynconfig is the implementation of Dynconfig. /// Dynconfig is the implementation of Dynconfig.
impl Dynconfig { impl Dynconfig {
/// new creates a new Dynconfig. /// new creates a new Dynconfig.
#[instrument(skip_all)]
pub async fn new( pub async fn new(
config: Arc<Config>, config: Arc<Config>,
manager_client: Arc<ManagerClient>, manager_client: Arc<ManagerClient>,
@ -87,6 +88,7 @@ impl Dynconfig {
} }
/// run starts the dynconfig server. /// run starts the dynconfig server.
#[instrument(skip_all)]
pub async fn run(&self) { pub async fn run(&self) {
// Clone the shutdown channel. // Clone the shutdown channel.
let mut shutdown = self.shutdown.clone(); let mut shutdown = self.shutdown.clone();
@ -162,7 +164,6 @@ impl Dynconfig {
location: self.config.host.location.clone(), location: self.config.host.location.clone(),
version: CARGO_PKG_VERSION.to_string(), version: CARGO_PKG_VERSION.to_string(),
commit: GIT_COMMIT_SHORT_HASH.to_string(), commit: GIT_COMMIT_SHORT_HASH.to_string(),
scheduler_cluster_id: self.config.host.scheduler_cluster_id.unwrap_or(0),
}) })
.await .await
} }

View File

@ -53,6 +53,7 @@ pub struct GC {
impl GC { impl GC {
/// new creates a new GC. /// new creates a new GC.
#[instrument(skip_all)]
pub fn new( pub fn new(
config: Arc<Config>, config: Arc<Config>,
host_id: String, host_id: String,
@ -72,6 +73,7 @@ impl GC {
} }
/// run runs the garbage collector. /// run runs the garbage collector.
#[instrument(skip_all)]
pub async fn run(&self) { pub async fn run(&self) {
// Clone the shutdown channel. // Clone the shutdown channel.
let mut shutdown = self.shutdown.clone(); let mut shutdown = self.shutdown.clone();

View File

@ -18,40 +18,34 @@ use crate::metrics::{
collect_delete_host_failure_metrics, collect_delete_host_started_metrics, collect_delete_host_failure_metrics, collect_delete_host_started_metrics,
collect_delete_task_failure_metrics, collect_delete_task_started_metrics, collect_delete_task_failure_metrics, collect_delete_task_started_metrics,
collect_download_task_failure_metrics, collect_download_task_finished_metrics, collect_download_task_failure_metrics, collect_download_task_finished_metrics,
collect_download_task_started_metrics, collect_list_task_entries_failure_metrics, collect_download_task_started_metrics, collect_stat_task_failure_metrics,
collect_list_task_entries_started_metrics, collect_stat_task_failure_metrics,
collect_stat_task_started_metrics, collect_upload_task_failure_metrics, collect_stat_task_started_metrics, collect_upload_task_failure_metrics,
collect_upload_task_finished_metrics, collect_upload_task_started_metrics, collect_upload_task_finished_metrics, collect_upload_task_started_metrics,
}; };
use crate::resource::{persistent_cache_task, task}; use crate::resource::{persistent_cache_task, task};
use crate::shutdown; use crate::shutdown;
use dragonfly_api::common::v2::{CacheTask, PersistentCacheTask, Priority, Task, TaskType}; use dragonfly_api::common::v2::{PersistentCacheTask, Priority, Task, TaskType};
use dragonfly_api::dfdaemon::v2::{ use dragonfly_api::dfdaemon::v2::{
dfdaemon_download_client::DfdaemonDownloadClient as DfdaemonDownloadGRPCClient, dfdaemon_download_client::DfdaemonDownloadClient as DfdaemonDownloadGRPCClient,
dfdaemon_download_server::{ dfdaemon_download_server::{
DfdaemonDownload, DfdaemonDownloadServer as DfdaemonDownloadGRPCServer, DfdaemonDownload, DfdaemonDownloadServer as DfdaemonDownloadGRPCServer,
}, },
DeleteCacheTaskRequest, DeleteTaskRequest, DownloadCacheTaskRequest, DownloadCacheTaskResponse, DeleteTaskRequest, DownloadPersistentCacheTaskRequest, DownloadPersistentCacheTaskResponse,
DownloadPersistentCacheTaskRequest, DownloadPersistentCacheTaskResponse, DownloadTaskRequest, DownloadTaskRequest, DownloadTaskResponse, StatPersistentCacheTaskRequest,
DownloadTaskResponse, Entry, ListTaskEntriesRequest, ListTaskEntriesResponse,
StatCacheTaskRequest as DfdaemonStatCacheTaskRequest, StatPersistentCacheTaskRequest,
StatTaskRequest as DfdaemonStatTaskRequest, UploadPersistentCacheTaskRequest, StatTaskRequest as DfdaemonStatTaskRequest, UploadPersistentCacheTaskRequest,
}; };
use dragonfly_api::errordetails::v2::Backend; use dragonfly_api::errordetails::v2::Backend;
use dragonfly_api::scheduler::v2::DeleteHostRequest as SchedulerDeleteHostRequest; use dragonfly_api::scheduler::v2::DeleteHostRequest as SchedulerDeleteHostRequest;
use dragonfly_client_backend::HeadRequest;
use dragonfly_client_config::dfdaemon::Config; use dragonfly_client_config::dfdaemon::Config;
use dragonfly_client_core::{ use dragonfly_client_core::{
error::{ErrorType, OrErr}, error::{ErrorType, OrErr},
Error as ClientError, Result as ClientResult, Error as ClientError, Result as ClientResult,
}; };
use dragonfly_client_util::{ use dragonfly_client_util::{
digest::{verify_file_digest, Digest},
http::{get_range, hashmap_to_headermap, headermap_to_hashmap}, http::{get_range, hashmap_to_headermap, headermap_to_hashmap},
id_generator::{PersistentCacheTaskIDParameter, TaskIDParameter}, id_generator::{PersistentCacheTaskIDParameter, TaskIDParameter},
}; };
use hyper_util::rt::TokioIo; use hyper_util::rt::TokioIo;
use opentelemetry::Context;
use std::os::unix::fs::PermissionsExt; use std::os::unix::fs::PermissionsExt;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::Arc; use std::sync::Arc;
@ -69,9 +63,8 @@ use tonic::{
}; };
use tower::{service_fn, ServiceBuilder}; use tower::{service_fn, ServiceBuilder};
use tracing::{error, info, instrument, Instrument, Span}; use tracing::{error, info, instrument, Instrument, Span};
use tracing_opentelemetry::OpenTelemetrySpanExt;
use super::interceptor::{ExtractTracingInterceptor, InjectTracingInterceptor}; use super::interceptor::TracingInterceptor;
/// DfdaemonDownloadServer is the grpc unix server of the download. /// DfdaemonDownloadServer is the grpc unix server of the download.
pub struct DfdaemonDownloadServer { pub struct DfdaemonDownloadServer {
@ -81,11 +74,8 @@ pub struct DfdaemonDownloadServer {
/// socket_path is the path of the unix domain socket. /// socket_path is the path of the unix domain socket.
socket_path: PathBuf, socket_path: PathBuf,
/// task is the task manager. /// service is the grpc service of the dfdaemon.
task: Arc<task::Task>, service: DfdaemonDownloadGRPCServer<DfdaemonDownloadServerHandler>,
/// persistent_cache_task is the persistent cache task manager.
persistent_cache_task: Arc<persistent_cache_task::PersistentCacheTask>,
/// shutdown is used to shutdown the grpc server. /// shutdown is used to shutdown the grpc server.
shutdown: shutdown::Shutdown, shutdown: shutdown::Shutdown,
@ -97,6 +87,7 @@ pub struct DfdaemonDownloadServer {
/// DfdaemonDownloadServer implements the grpc server of the download. /// DfdaemonDownloadServer implements the grpc server of the download.
impl DfdaemonDownloadServer { impl DfdaemonDownloadServer {
/// new creates a new DfdaemonServer. /// new creates a new DfdaemonServer.
#[instrument(skip_all)]
pub fn new( pub fn new(
config: Arc<Config>, config: Arc<Config>,
socket_path: PathBuf, socket_path: PathBuf,
@ -105,29 +96,27 @@ impl DfdaemonDownloadServer {
shutdown: shutdown::Shutdown, shutdown: shutdown::Shutdown,
shutdown_complete_tx: mpsc::UnboundedSender<()>, shutdown_complete_tx: mpsc::UnboundedSender<()>,
) -> Self { ) -> Self {
// Initialize the grpc service.
let service = DfdaemonDownloadGRPCServer::new(DfdaemonDownloadServerHandler {
socket_path: socket_path.clone(),
task,
persistent_cache_task,
})
.max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX);
Self { Self {
config, config,
socket_path, socket_path,
task, service,
persistent_cache_task,
shutdown, shutdown,
_shutdown_complete: shutdown_complete_tx, _shutdown_complete: shutdown_complete_tx,
} }
} }
/// run starts the download server with unix domain socket. /// run starts the download server with unix domain socket.
#[instrument(skip_all)]
pub async fn run(&mut self, grpc_server_started_barrier: Arc<Barrier>) -> ClientResult<()> { pub async fn run(&mut self, grpc_server_started_barrier: Arc<Barrier>) -> ClientResult<()> {
// Initialize the grpc service.
let service = DfdaemonDownloadGRPCServer::with_interceptor(
DfdaemonDownloadServerHandler {
config: self.config.clone(),
socket_path: self.socket_path.clone(),
task: self.task.clone(),
persistent_cache_task: self.persistent_cache_task.clone(),
},
ExtractTracingInterceptor,
);
// Register the reflection service. // Register the reflection service.
let reflection = tonic_reflection::server::Builder::configure() let reflection = tonic_reflection::server::Builder::configure()
.register_encoded_file_descriptor_set(dragonfly_api::FILE_DESCRIPTOR_SET) .register_encoded_file_descriptor_set(dragonfly_api::FILE_DESCRIPTOR_SET)
@ -139,6 +128,11 @@ impl DfdaemonDownloadServer {
// Initialize health reporter. // Initialize health reporter.
let (mut health_reporter, health_service) = tonic_health::server::health_reporter(); let (mut health_reporter, health_service) = tonic_health::server::health_reporter();
// Set the serving status of the download grpc server.
health_reporter
.set_serving::<DfdaemonDownloadGRPCServer<DfdaemonDownloadServerHandler>>()
.await;
// Start download grpc server with unix domain socket. // Start download grpc server with unix domain socket.
fs::create_dir_all(self.socket_path.parent().unwrap()).await?; fs::create_dir_all(self.socket_path.parent().unwrap()).await?;
fs::remove_file(self.socket_path.clone()) fs::remove_file(self.socket_path.clone())
@ -149,12 +143,12 @@ impl DfdaemonDownloadServer {
// Bind the unix domain socket and set the permissions for the socket. // Bind the unix domain socket and set the permissions for the socket.
let uds = UnixListener::bind(&self.socket_path)?; let uds = UnixListener::bind(&self.socket_path)?;
let perms = std::fs::Permissions::from_mode(0o777); let perms = std::fs::Permissions::from_mode(0o660);
fs::set_permissions(&self.socket_path, perms).await?; fs::set_permissions(&self.socket_path, perms).await?;
// TODO(Gaius): RateLimitLayer is not implemented Clone, so we can't use it here. // TODO(Gaius): RateLimitLayer is not implemented Clone, so we can't use it here.
// Only use the LoadShed layer and the ConcurrencyLimit layer. // Only use the LoadShed layer and the ConcurrencyLimit layer.
let rate_limit_layer = ServiceBuilder::new() let layer = ServiceBuilder::new()
.concurrency_limit(self.config.download.server.request_rate_limit as usize) .concurrency_limit(self.config.download.server.request_rate_limit as usize)
.load_shed() .load_shed()
.into_inner(); .into_inner();
@ -165,10 +159,10 @@ impl DfdaemonDownloadServer {
.tcp_keepalive(Some(super::TCP_KEEPALIVE)) .tcp_keepalive(Some(super::TCP_KEEPALIVE))
.http2_keepalive_interval(Some(super::HTTP2_KEEP_ALIVE_INTERVAL)) .http2_keepalive_interval(Some(super::HTTP2_KEEP_ALIVE_INTERVAL))
.http2_keepalive_timeout(Some(super::HTTP2_KEEP_ALIVE_TIMEOUT)) .http2_keepalive_timeout(Some(super::HTTP2_KEEP_ALIVE_TIMEOUT))
.layer(rate_limit_layer) .layer(layer)
.add_service(reflection) .add_service(reflection.clone())
.add_service(health_service) .add_service(health_service)
.add_service(service) .add_service(self.service.clone())
.serve_with_incoming_shutdown(uds_stream, async move { .serve_with_incoming_shutdown(uds_stream, async move {
// When the grpc server is started, notify the barrier. If the shutdown signal is received // When the grpc server is started, notify the barrier. If the shutdown signal is received
// before barrier is waited successfully, the server will shutdown immediately. // before barrier is waited successfully, the server will shutdown immediately.
@ -176,12 +170,6 @@ impl DfdaemonDownloadServer {
// Notify the download grpc server is started. // Notify the download grpc server is started.
_ = grpc_server_started_barrier.wait() => { _ = grpc_server_started_barrier.wait() => {
info!("download server is ready to start"); info!("download server is ready to start");
health_reporter
.set_serving::<DfdaemonDownloadGRPCServer<DfdaemonDownloadServerHandler>>()
.await;
info!("download server's health status set to serving");
} }
// Wait for shutdown signal. // Wait for shutdown signal.
_ = shutdown.recv() => { _ = shutdown.recv() => {
@ -211,9 +199,6 @@ impl DfdaemonDownloadServer {
/// DfdaemonDownloadServerHandler is the handler of the dfdaemon download grpc service. /// DfdaemonDownloadServerHandler is the handler of the dfdaemon download grpc service.
pub struct DfdaemonDownloadServerHandler { pub struct DfdaemonDownloadServerHandler {
/// config is the configuration of the dfdaemon.
config: Arc<Config>,
/// socket_path is the path of the unix domain socket. /// socket_path is the path of the unix domain socket.
socket_path: PathBuf, socket_path: PathBuf,
@ -231,19 +216,11 @@ impl DfdaemonDownload for DfdaemonDownloadServerHandler {
type DownloadTaskStream = ReceiverStream<Result<DownloadTaskResponse, Status>>; type DownloadTaskStream = ReceiverStream<Result<DownloadTaskResponse, Status>>;
/// download_task tells the dfdaemon to download the task. /// download_task tells the dfdaemon to download the task.
#[instrument( #[instrument(skip_all, fields(host_id, task_id, peer_id))]
skip_all,
fields(host_id, task_id, peer_id, url, remote_ip, content_length)
)]
async fn download_task( async fn download_task(
&self, &self,
request: Request<DownloadTaskRequest>, request: Request<DownloadTaskRequest>,
) -> Result<Response<Self::DownloadTaskStream>, Status> { ) -> Result<Response<Self::DownloadTaskStream>, Status> {
// If the parent context is set, use it as the parent context for the span.
if let Some(parent_ctx) = request.extensions().get::<Context>() {
Span::current().set_parent(parent_ctx.clone());
};
// Record the start time. // Record the start time.
let start_time = Instant::now(); let start_time = Instant::now();
@ -285,11 +262,6 @@ impl DfdaemonDownload for DfdaemonDownloadServerHandler {
Span::current().record("host_id", host_id.as_str()); Span::current().record("host_id", host_id.as_str());
Span::current().record("task_id", task_id.as_str()); Span::current().record("task_id", task_id.as_str());
Span::current().record("peer_id", peer_id.as_str()); Span::current().record("peer_id", peer_id.as_str());
Span::current().record("url", download.url.clone());
Span::current().record(
"remote_ip",
download.remote_ip.clone().unwrap_or_default().as_str(),
);
info!("download task in download server"); info!("download task in download server");
// Download task started. // Download task started.
@ -363,15 +335,12 @@ impl DfdaemonDownload for DfdaemonDownloadServerHandler {
error!("missing content length in the response"); error!("missing content length in the response");
return Err(Status::internal("missing content length in the response")); return Err(Status::internal("missing content length in the response"));
}; };
info!( info!(
"content length {}, piece length {}", "content length {}, piece length {}",
content_length, content_length,
task.piece_length().unwrap_or_default() task.piece_length().unwrap_or_default()
); );
Span::current().record("content_length", content_length);
// Download's range priority is higher than the request header's range. // Download's range priority is higher than the request header's range.
// If download protocol is http, use the range of the request header. // If download protocol is http, use the range of the request header.
// If download protocol is not http, use the range of the download. // If download protocol is not http, use the range of the download.
@ -504,48 +473,22 @@ impl DfdaemonDownload for DfdaemonDownloadServerHandler {
)), )),
) )
.await; .await;
return;
} }
Err(err) => { Err(err) => {
error!("check output path: {}", err); error!("check output path: {}", err);
handle_error(&out_stream_tx, err).await; handle_error(&out_stream_tx, err).await;
}
}
return; return;
} }
}
} else if let Err(err) = task_manager_clone if let Err(err) = task_manager_clone
.copy_task(task_clone.id.as_str(), output_path) .copy_task(task_clone.id.as_str(), output_path)
.await .await
{ {
error!("copy task: {}", err); error!("copy task: {}", err);
handle_error(&out_stream_tx, err).await; handle_error(&out_stream_tx, err).await;
return;
}
}
// Verify the file digest if it is provided.
if let Some(raw_digest) = &download_clone.digest {
let digest = match raw_digest.parse::<Digest>() {
Ok(digest) => digest,
Err(err) => {
error!("parse digest: {}", err);
handle_error(
&out_stream_tx,
Status::invalid_argument(format!(
"invalid digest({}): {}",
raw_digest, err
)),
)
.await;
return;
}
};
if let Err(err) =
verify_file_digest(digest, Path::new(output_path.as_str()))
{
error!("verify file digest: {}", err);
handle_error(&out_stream_tx, err).await;
return;
} }
} }
} }
@ -670,16 +613,11 @@ impl DfdaemonDownload for DfdaemonDownloadServerHandler {
} }
/// stat_task gets the status of the task. /// stat_task gets the status of the task.
#[instrument(skip_all, fields(host_id, task_id, remote_ip, local_only))] #[instrument(skip_all, fields(host_id, task_id))]
async fn stat_task( async fn stat_task(
&self, &self,
request: Request<DfdaemonStatTaskRequest>, request: Request<DfdaemonStatTaskRequest>,
) -> Result<Response<Task>, Status> { ) -> Result<Response<Task>, Status> {
// If the parent context is set, use it as the parent context for the span.
if let Some(parent_ctx) = request.extensions().get::<Context>() {
Span::current().set_parent(parent_ctx.clone());
};
// Clone the request. // Clone the request.
let request = request.into_inner(); let request = request.into_inner();
@ -689,137 +627,36 @@ impl DfdaemonDownload for DfdaemonDownloadServerHandler {
// Get the task id from the request. // Get the task id from the request.
let task_id = request.task_id; let task_id = request.task_id;
// Get the local_only flag from the request, default to false.
let local_only = request.local_only;
// Span record the host id and task id. // Span record the host id and task id.
Span::current().record("host_id", host_id.as_str()); Span::current().record("host_id", host_id.as_str());
Span::current().record("task_id", task_id.as_str()); Span::current().record("task_id", task_id.as_str());
Span::current().record(
"remote_ip",
request.remote_ip.clone().unwrap_or_default().as_str(),
);
Span::current().record("local_only", local_only.to_string().as_str());
info!("stat task in download server"); info!("stat task in download server");
// Collect the stat task metrics. // Collect the stat task metrics.
collect_stat_task_started_metrics(TaskType::Standard as i32); collect_stat_task_started_metrics(TaskType::Standard as i32);
match self // Get the task from the scheduler.
let task = self
.task .task
.stat(task_id.as_str(), host_id.as_str(), local_only) .stat(task_id.as_str(), host_id.as_str())
.await .await
{ .map_err(|err| {
Ok(task) => Ok(Response::new(task)),
Err(err) => {
// Collect the stat task failure metrics. // Collect the stat task failure metrics.
collect_stat_task_failure_metrics(TaskType::Standard as i32); collect_stat_task_failure_metrics(TaskType::Standard as i32);
// Log the error with detailed context. error!("stat task: {}", err);
error!("stat task failed: {}", err);
// Map the error to an appropriate gRPC status.
Err(match err {
ClientError::TaskNotFound(id) => {
Status::not_found(format!("task not found: {}", id))
}
_ => Status::internal(err.to_string()),
})
}
}
}
/// list_tasks lists the tasks.
#[instrument(skip_all, fields(task_id, url, remote_ip))]
async fn list_task_entries(
&self,
request: Request<ListTaskEntriesRequest>,
) -> Result<Response<ListTaskEntriesResponse>, Status> {
// If the parent context is set, use it as the parent context for the span.
if let Some(parent_ctx) = request.extensions().get::<Context>() {
Span::current().set_parent(parent_ctx.clone());
};
// Clone the request.
let request = request.into_inner();
// Span record the task id and url.
Span::current().record("task_id", request.task_id.as_str());
Span::current().record("url", request.url.as_str());
Span::current().record(
"remote_ip",
request.remote_ip.clone().unwrap_or_default().as_str(),
);
info!("list tasks in download server");
// Collect the list tasks started metrics.
collect_list_task_entries_started_metrics(TaskType::Standard as i32);
// Build the backend.
let backend = self
.task
.backend_factory
.build(request.url.as_str())
.map_err(|err| {
// Collect the list tasks failure metrics.
collect_list_task_entries_failure_metrics(TaskType::Standard as i32);
error!("build backend: {}", err);
Status::internal(err.to_string()) Status::internal(err.to_string())
})?; })?;
// Head the task entries. Ok(Response::new(task))
let response = backend
.head(HeadRequest {
task_id: request.task_id.clone(),
url: request.url.clone(),
http_header: Some(hashmap_to_headermap(&request.request_header).map_err(
|err| {
error!("parse request header: {}", err);
Status::internal(err.to_string())
},
)?),
timeout: self.config.download.piece_timeout,
client_cert: None,
object_storage: request.object_storage.clone(),
hdfs: request.hdfs.clone(),
})
.await
.map_err(|err| {
// Collect the list tasks failure metrics.
collect_list_task_entries_failure_metrics(TaskType::Standard as i32);
error!("list task entries: {}", err);
Status::internal(err.to_string())
})?;
Ok(Response::new(ListTaskEntriesResponse {
content_length: response.content_length.unwrap_or_default(),
response_header: headermap_to_hashmap(&response.http_header.unwrap_or_default()),
status_code: response.http_status_code.map(|code| code.as_u16().into()),
entries: response
.entries
.into_iter()
.map(|dir_entry| Entry {
url: dir_entry.url,
content_length: dir_entry.content_length as u64,
is_dir: dir_entry.is_dir,
})
.collect(),
}))
} }
/// delete_task calls the dfdaemon to delete the task. /// delete_task calls the dfdaemon to delete the task.
#[instrument(skip_all, fields(host_id, task_id, remote_ip))] #[instrument(skip_all, fields(host_id, task_id))]
async fn delete_task( async fn delete_task(
&self, &self,
request: Request<DeleteTaskRequest>, request: Request<DeleteTaskRequest>,
) -> Result<Response<()>, Status> { ) -> Result<Response<()>, Status> {
// If the parent context is set, use it as the parent context for the span.
if let Some(parent_ctx) = request.extensions().get::<Context>() {
Span::current().set_parent(parent_ctx.clone());
};
// Clone the request. // Clone the request.
let request = request.into_inner(); let request = request.into_inner();
@ -832,10 +669,6 @@ impl DfdaemonDownload for DfdaemonDownloadServerHandler {
// Span record the host id and task id. // Span record the host id and task id.
Span::current().record("host_id", host_id.as_str()); Span::current().record("host_id", host_id.as_str());
Span::current().record("task_id", task_id.as_str()); Span::current().record("task_id", task_id.as_str());
Span::current().record(
"remote_ip",
request.remote_ip.clone().unwrap_or_default().as_str(),
);
info!("delete task in download server"); info!("delete task in download server");
// Collect the delete task started metrics. // Collect the delete task started metrics.
@ -858,12 +691,7 @@ impl DfdaemonDownload for DfdaemonDownloadServerHandler {
/// delete_host calls the scheduler to delete the host. /// delete_host calls the scheduler to delete the host.
#[instrument(skip_all, fields(host_id))] #[instrument(skip_all, fields(host_id))]
async fn delete_host(&self, request: Request<()>) -> Result<Response<()>, Status> { async fn delete_host(&self, _: Request<()>) -> Result<Response<()>, Status> {
// If the parent context is set, use it as the parent context for the span.
if let Some(parent_ctx) = request.extensions().get::<Context>() {
Span::current().set_parent(parent_ctx.clone());
};
// Generate the host id. // Generate the host id.
let host_id = self.task.id_generator.host_id(); let host_id = self.task.id_generator.host_id();
@ -894,16 +722,11 @@ impl DfdaemonDownload for DfdaemonDownloadServerHandler {
ReceiverStream<Result<DownloadPersistentCacheTaskResponse, Status>>; ReceiverStream<Result<DownloadPersistentCacheTaskResponse, Status>>;
/// download_persistent_cache_task downloads the persistent cache task. /// download_persistent_cache_task downloads the persistent cache task.
#[instrument(skip_all, fields(host_id, task_id, peer_id, remote_ip, content_length))] #[instrument(skip_all, fields(host_id, task_id, peer_id))]
async fn download_persistent_cache_task( async fn download_persistent_cache_task(
&self, &self,
request: Request<DownloadPersistentCacheTaskRequest>, request: Request<DownloadPersistentCacheTaskRequest>,
) -> Result<Response<Self::DownloadPersistentCacheTaskStream>, Status> { ) -> Result<Response<Self::DownloadPersistentCacheTaskStream>, Status> {
// If the parent context is set, use it as the parent context for the span.
if let Some(parent_ctx) = request.extensions().get::<Context>() {
Span::current().set_parent(parent_ctx.clone());
};
// Record the start time. // Record the start time.
let start_time = Instant::now(); let start_time = Instant::now();
@ -927,10 +750,6 @@ impl DfdaemonDownload for DfdaemonDownloadServerHandler {
Span::current().record("host_id", host_id.as_str()); Span::current().record("host_id", host_id.as_str());
Span::current().record("task_id", task_id.as_str()); Span::current().record("task_id", task_id.as_str());
Span::current().record("peer_id", peer_id.as_str()); Span::current().record("peer_id", peer_id.as_str());
Span::current().record(
"remote_ip",
request.remote_ip.clone().unwrap_or_default().as_str(),
);
info!("download persistent cache task in download server"); info!("download persistent cache task in download server");
// Download task started. // Download task started.
@ -984,15 +803,12 @@ impl DfdaemonDownload for DfdaemonDownloadServerHandler {
task task
} }
}; };
info!( info!(
"content length {}, piece length {}", "content length {}, piece length {}",
task.content_length(), task.content_length(),
task.piece_length() task.piece_length()
); );
Span::current().record("content_length", task.content_length());
// Initialize stream channel. // Initialize stream channel.
let request_clone = request.clone(); let request_clone = request.clone();
let task_manager_clone = self.persistent_cache_task.clone(); let task_manager_clone = self.persistent_cache_task.clone();
@ -1074,48 +890,22 @@ impl DfdaemonDownload for DfdaemonDownloadServerHandler {
)), )),
) )
.await; .await;
return;
} }
Err(err) => { Err(err) => {
error!("check output path: {}", err); error!("check output path: {}", err);
handle_error(&out_stream_tx, err).await; handle_error(&out_stream_tx, err).await;
}
}
return; return;
} }
}
} else if let Err(err) = task_manager_clone if let Err(err) = task_manager_clone
.copy_task(task_clone.id.as_str(), output_path) .copy_task(task_clone.id.as_str(), output_path)
.await .await
{ {
error!("copy task: {}", err); error!("copy task: {}", err);
handle_error(&out_stream_tx, err).await; handle_error(&out_stream_tx, err).await;
return;
}
}
// Verify the file digest if it is provided.
if let Some(raw_digest) = &request_clone.digest {
let digest = match raw_digest.parse::<Digest>() {
Ok(digest) => digest,
Err(err) => {
error!("parse digest: {}", err);
handle_error(
&out_stream_tx,
Status::invalid_argument(format!(
"invalid digest({}): {}",
raw_digest, err
)),
)
.await;
return;
}
};
if let Err(err) =
verify_file_digest(digest, Path::new(output_path.as_str()))
{
error!("verify file digest: {}", err);
handle_error(&out_stream_tx, err).await;
return;
} }
} }
} }
@ -1152,16 +942,11 @@ impl DfdaemonDownload for DfdaemonDownloadServerHandler {
} }
/// upload_persistent_cache_task uploads the persistent cache task. /// upload_persistent_cache_task uploads the persistent cache task.
#[instrument(skip_all, fields(host_id, task_id, peer_id, remote_ip))] #[instrument(skip_all, fields(host_id, task_id, peer_id))]
async fn upload_persistent_cache_task( async fn upload_persistent_cache_task(
&self, &self,
request: Request<UploadPersistentCacheTaskRequest>, request: Request<UploadPersistentCacheTaskRequest>,
) -> Result<Response<PersistentCacheTask>, Status> { ) -> Result<Response<PersistentCacheTask>, Status> {
// If the parent context is set, use it as the parent context for the span.
if let Some(parent_ctx) = request.extensions().get::<Context>() {
Span::current().set_parent(parent_ctx.clone());
};
// Record the start time. // Record the start time.
let start_time = Instant::now(); let start_time = Instant::now();
@ -1199,10 +984,6 @@ impl DfdaemonDownload for DfdaemonDownloadServerHandler {
Span::current().record("host_id", host_id.as_str()); Span::current().record("host_id", host_id.as_str());
Span::current().record("task_id", task_id.as_str()); Span::current().record("task_id", task_id.as_str());
Span::current().record("peer_id", peer_id.as_str()); Span::current().record("peer_id", peer_id.as_str());
Span::current().record(
"remote_ip",
request.remote_ip.clone().unwrap_or_default().as_str(),
);
info!("upload persistent cache task in download server"); info!("upload persistent cache task in download server");
// Collect upload task started metrics. // Collect upload task started metrics.
@ -1253,16 +1034,11 @@ impl DfdaemonDownload for DfdaemonDownloadServerHandler {
} }
/// stat_persistent_cache_task stats the persistent cache task. /// stat_persistent_cache_task stats the persistent cache task.
#[instrument(skip_all, fields(host_id, task_id, remote_ip))] #[instrument(skip_all, fields(host_id, task_id))]
async fn stat_persistent_cache_task( async fn stat_persistent_cache_task(
&self, &self,
request: Request<StatPersistentCacheTaskRequest>, request: Request<StatPersistentCacheTaskRequest>,
) -> Result<Response<PersistentCacheTask>, Status> { ) -> Result<Response<PersistentCacheTask>, Status> {
// If the parent context is set, use it as the parent context for the span.
if let Some(parent_ctx) = request.extensions().get::<Context>() {
Span::current().set_parent(parent_ctx.clone());
};
// Clone the request. // Clone the request.
let request = request.into_inner(); let request = request.into_inner();
@ -1275,10 +1051,6 @@ impl DfdaemonDownload for DfdaemonDownloadServerHandler {
// Span record the host id and task id. // Span record the host id and task id.
Span::current().record("host_id", host_id.as_str()); Span::current().record("host_id", host_id.as_str());
Span::current().record("task_id", task_id.as_str()); Span::current().record("task_id", task_id.as_str());
Span::current().record(
"remote_ip",
request.remote_ip.clone().unwrap_or_default().as_str(),
);
info!("stat persistent cache task in download server"); info!("stat persistent cache task in download server");
// Collect the stat persistent cache task started metrics. // Collect the stat persistent cache task started metrics.
@ -1298,51 +1070,19 @@ impl DfdaemonDownload for DfdaemonDownloadServerHandler {
Ok(Response::new(task)) Ok(Response::new(task))
} }
/// DownloadCacheTaskStream is the stream of the download cache task response.
type DownloadCacheTaskStream = ReceiverStream<Result<DownloadCacheTaskResponse, Status>>;
/// download_cache_task tells the dfdaemon to download the cache task.
#[instrument(
skip_all,
fields(host_id, task_id, peer_id, url, remote_ip, content_length)
)]
async fn download_cache_task(
&self,
_request: Request<DownloadCacheTaskRequest>,
) -> Result<Response<Self::DownloadCacheTaskStream>, Status> {
todo!();
}
/// stat_cache_task gets the status of the cache task.
#[instrument(skip_all, fields(host_id, task_id, remote_pi, local_only))]
async fn stat_cache_task(
&self,
_request: Request<DfdaemonStatCacheTaskRequest>,
) -> Result<Response<CacheTask>, Status> {
todo!();
}
/// delete_cache_task calls the dfdaemon to delete the cache task.
#[instrument(skip_all, fields(host_id, task_id, remote_ip))]
async fn delete_cache_task(
&self,
_request: Request<DeleteCacheTaskRequest>,
) -> Result<Response<()>, Status> {
todo!();
}
} }
/// DfdaemonDownloadClient is a wrapper of DfdaemonDownloadGRPCClient. /// DfdaemonDownloadClient is a wrapper of DfdaemonDownloadGRPCClient.
#[derive(Clone)] #[derive(Clone)]
pub struct DfdaemonDownloadClient { pub struct DfdaemonDownloadClient {
/// client is the grpc client of the dfdaemon. /// client is the grpc client of the dfdaemon.
pub client: DfdaemonDownloadGRPCClient<InterceptedService<Channel, InjectTracingInterceptor>>, pub client: DfdaemonDownloadGRPCClient<InterceptedService<Channel, TracingInterceptor>>,
} }
/// DfdaemonDownloadClient implements the grpc client of the dfdaemon download. /// DfdaemonDownloadClient implements the grpc client of the dfdaemon download.
impl DfdaemonDownloadClient { impl DfdaemonDownloadClient {
/// new_unix creates a new DfdaemonDownloadClient with unix domain socket. /// new_unix creates a new DfdaemonDownloadClient with unix domain socket.
#[instrument(skip_all)]
pub async fn new_unix(socket_path: PathBuf) -> ClientResult<Self> { pub async fn new_unix(socket_path: PathBuf) -> ClientResult<Self> {
// Ignore the uri because it is not used. // Ignore the uri because it is not used.
let channel = Endpoint::try_from("http://[::]:50051") let channel = Endpoint::try_from("http://[::]:50051")
@ -1367,8 +1107,7 @@ impl DfdaemonDownloadClient {
}) })
.or_err(ErrorType::ConnectError)?; .or_err(ErrorType::ConnectError)?;
let client = let client = DfdaemonDownloadGRPCClient::with_interceptor(channel, TracingInterceptor)
DfdaemonDownloadGRPCClient::with_interceptor(channel, InjectTracingInterceptor)
.max_decoding_message_size(usize::MAX) .max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX); .max_encoding_message_size(usize::MAX);
Ok(Self { client }) Ok(Self { client })
@ -1408,18 +1147,6 @@ impl DfdaemonDownloadClient {
Ok(response.into_inner()) Ok(response.into_inner())
} }
/// list_task_entries lists the task entries.
#[instrument(skip_all)]
pub async fn list_task_entries(
&self,
request: ListTaskEntriesRequest,
) -> ClientResult<ListTaskEntriesResponse> {
let request = Self::make_request(request);
info!("list task entries request: {:?}", request);
let response = self.client.clone().list_task_entries(request).await?;
Ok(response.into_inner())
}
/// delete_task tells the dfdaemon to delete the task. /// delete_task tells the dfdaemon to delete the task.
#[instrument(skip_all)] #[instrument(skip_all)]
pub async fn delete_task(&self, request: DeleteTaskRequest) -> ClientResult<()> { pub async fn delete_task(&self, request: DeleteTaskRequest) -> ClientResult<()> {
@ -1501,6 +1228,7 @@ impl DfdaemonDownloadClient {
} }
/// make_request creates a new request with timeout. /// make_request creates a new request with timeout.
#[instrument(skip_all)]
fn make_request<T>(request: T) -> tonic::Request<T> { fn make_request<T>(request: T) -> tonic::Request<T> {
let mut request = tonic::Request::new(request); let mut request = tonic::Request::new(request);
request.set_timeout(super::REQUEST_TIMEOUT); request.set_timeout(super::REQUEST_TIMEOUT);

View File

@ -14,6 +14,7 @@
* limitations under the License. * limitations under the License.
*/ */
use super::interceptor::TracingInterceptor;
use crate::metrics::{ use crate::metrics::{
collect_delete_task_failure_metrics, collect_delete_task_started_metrics, collect_delete_task_failure_metrics, collect_delete_task_started_metrics,
collect_download_task_failure_metrics, collect_download_task_finished_metrics, collect_download_task_failure_metrics, collect_download_task_finished_metrics,
@ -24,22 +25,20 @@ use crate::metrics::{
}; };
use crate::resource::{persistent_cache_task, task}; use crate::resource::{persistent_cache_task, task};
use crate::shutdown; use crate::shutdown;
use bytesize::MB;
use dragonfly_api::common::v2::{ use dragonfly_api::common::v2::{
CacheTask, Host, Network, PersistentCacheTask, Piece, Priority, Task, TaskType, Host, Network, PersistentCacheTask, Piece, Priority, Task, TaskType,
}; };
use dragonfly_api::dfdaemon::v2::{ use dragonfly_api::dfdaemon::v2::{
dfdaemon_upload_client::DfdaemonUploadClient as DfdaemonUploadGRPCClient, dfdaemon_upload_client::DfdaemonUploadClient as DfdaemonUploadGRPCClient,
dfdaemon_upload_server::{DfdaemonUpload, DfdaemonUploadServer as DfdaemonUploadGRPCServer}, dfdaemon_upload_server::{DfdaemonUpload, DfdaemonUploadServer as DfdaemonUploadGRPCServer},
DeleteCacheTaskRequest, DeletePersistentCacheTaskRequest, DeleteTaskRequest, DeletePersistentCacheTaskRequest, DeleteTaskRequest, DownloadPersistentCachePieceRequest,
DownloadCachePieceRequest, DownloadCachePieceResponse, DownloadCacheTaskRequest,
DownloadCacheTaskResponse, DownloadPersistentCachePieceRequest,
DownloadPersistentCachePieceResponse, DownloadPersistentCacheTaskRequest, DownloadPersistentCachePieceResponse, DownloadPersistentCacheTaskRequest,
DownloadPersistentCacheTaskResponse, DownloadPieceRequest, DownloadPieceResponse, DownloadPersistentCacheTaskResponse, DownloadPieceRequest, DownloadPieceResponse,
DownloadTaskRequest, DownloadTaskResponse, ExchangeIbVerbsQueuePairEndpointRequest, DownloadTaskRequest, DownloadTaskResponse, ExchangeIbVerbsQueuePairEndpointRequest,
ExchangeIbVerbsQueuePairEndpointResponse, StatCacheTaskRequest, StatPersistentCacheTaskRequest, ExchangeIbVerbsQueuePairEndpointResponse, StatPersistentCacheTaskRequest, StatTaskRequest,
StatTaskRequest, SyncCachePiecesRequest, SyncCachePiecesResponse, SyncHostRequest, SyncHostRequest, SyncPersistentCachePiecesRequest, SyncPersistentCachePiecesResponse,
SyncPersistentCachePiecesRequest, SyncPersistentCachePiecesResponse, SyncPiecesRequest, SyncPiecesRequest, SyncPiecesResponse, UpdatePersistentCacheTaskRequest,
SyncPiecesResponse, UpdatePersistentCacheTaskRequest,
}; };
use dragonfly_api::errordetails::v2::Backend; use dragonfly_api::errordetails::v2::Backend;
use dragonfly_client_config::dfdaemon::Config; use dragonfly_client_config::dfdaemon::Config;
@ -50,13 +49,13 @@ use dragonfly_client_core::{
use dragonfly_client_util::{ use dragonfly_client_util::{
http::{get_range, hashmap_to_headermap, headermap_to_hashmap}, http::{get_range, hashmap_to_headermap, headermap_to_hashmap},
id_generator::TaskIDParameter, id_generator::TaskIDParameter,
net::Interface, net::{get_interface_info, Interface},
}; };
use opentelemetry::Context;
use std::net::SocketAddr; use std::net::SocketAddr;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::Arc; use std::sync::Arc;
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use sysinfo::Networks;
use tokio::io::AsyncReadExt; use tokio::io::AsyncReadExt;
use tokio::sync::mpsc; use tokio::sync::mpsc;
use tokio::sync::mpsc::Sender; use tokio::sync::mpsc::Sender;
@ -69,11 +68,8 @@ use tonic::{
}; };
use tower::ServiceBuilder; use tower::ServiceBuilder;
use tracing::{debug, error, info, instrument, Instrument, Span}; use tracing::{debug, error, info, instrument, Instrument, Span};
use tracing_opentelemetry::OpenTelemetrySpanExt;
use url::Url; use url::Url;
use super::interceptor::{ExtractTracingInterceptor, InjectTracingInterceptor};
/// DfdaemonUploadServer is the grpc server of the upload. /// DfdaemonUploadServer is the grpc server of the upload.
pub struct DfdaemonUploadServer { pub struct DfdaemonUploadServer {
/// config is the configuration of the dfdaemon. /// config is the configuration of the dfdaemon.
@ -82,14 +78,8 @@ pub struct DfdaemonUploadServer {
/// addr is the address of the grpc server. /// addr is the address of the grpc server.
addr: SocketAddr, addr: SocketAddr,
/// task is the task manager. /// service is the grpc service of the dfdaemon upload.
task: Arc<task::Task>, service: DfdaemonUploadGRPCServer<DfdaemonUploadServerHandler>,
/// persistent_cache_task is the persistent cache task manager.
persistent_cache_task: Arc<persistent_cache_task::PersistentCacheTask>,
/// interface is the network interface.
interface: Arc<Interface>,
/// shutdown is used to shutdown the grpc server. /// shutdown is used to shutdown the grpc server.
shutdown: shutdown::Shutdown, shutdown: shutdown::Shutdown,
@ -101,38 +91,40 @@ pub struct DfdaemonUploadServer {
/// DfdaemonUploadServer implements the grpc server of the upload. /// DfdaemonUploadServer implements the grpc server of the upload.
impl DfdaemonUploadServer { impl DfdaemonUploadServer {
/// new creates a new DfdaemonUploadServer. /// new creates a new DfdaemonUploadServer.
#[instrument(skip_all)]
pub fn new( pub fn new(
config: Arc<Config>, config: Arc<Config>,
addr: SocketAddr, addr: SocketAddr,
task: Arc<task::Task>, task: Arc<task::Task>,
persistent_cache_task: Arc<persistent_cache_task::PersistentCacheTask>, persistent_cache_task: Arc<persistent_cache_task::PersistentCacheTask>,
interface: Arc<Interface>,
shutdown: shutdown::Shutdown, shutdown: shutdown::Shutdown,
shutdown_complete_tx: mpsc::UnboundedSender<()>, shutdown_complete_tx: mpsc::UnboundedSender<()>,
) -> Self { ) -> Self {
// Initialize the grpc service.
let interface =
get_interface_info(config.host.ip.unwrap(), config.upload.rate_limit).unwrap();
let service = DfdaemonUploadGRPCServer::new(DfdaemonUploadServerHandler {
interface,
socket_path: config.download.server.socket_path.clone(),
task,
persistent_cache_task,
})
.max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX);
Self { Self {
config, config,
addr, addr,
task, service,
interface,
persistent_cache_task,
shutdown, shutdown,
_shutdown_complete: shutdown_complete_tx, _shutdown_complete: shutdown_complete_tx,
} }
} }
/// run starts the upload server. /// run starts the upload server.
#[instrument(skip_all)]
pub async fn run(&mut self, grpc_server_started_barrier: Arc<Barrier>) -> ClientResult<()> { pub async fn run(&mut self, grpc_server_started_barrier: Arc<Barrier>) -> ClientResult<()> {
let service = DfdaemonUploadGRPCServer::with_interceptor(
DfdaemonUploadServerHandler {
socket_path: self.config.download.server.socket_path.clone(),
task: self.task.clone(),
persistent_cache_task: self.persistent_cache_task.clone(),
interface: self.interface.clone(),
},
ExtractTracingInterceptor,
);
// Register the reflection service. // Register the reflection service.
let reflection = tonic_reflection::server::Builder::configure() let reflection = tonic_reflection::server::Builder::configure()
.register_encoded_file_descriptor_set(dragonfly_api::FILE_DESCRIPTOR_SET) .register_encoded_file_descriptor_set(dragonfly_api::FILE_DESCRIPTOR_SET)
@ -144,9 +136,14 @@ impl DfdaemonUploadServer {
// Initialize health reporter. // Initialize health reporter.
let (mut health_reporter, health_service) = tonic_health::server::health_reporter(); let (mut health_reporter, health_service) = tonic_health::server::health_reporter();
// Set the serving status of the upload grpc server.
health_reporter
.set_serving::<DfdaemonUploadGRPCServer<DfdaemonUploadServerHandler>>()
.await;
// TODO(Gaius): RateLimitLayer is not implemented Clone, so we can't use it here. // TODO(Gaius): RateLimitLayer is not implemented Clone, so we can't use it here.
// Only use the LoadShed layer and the ConcurrencyLimit layer. // Only use the LoadShed layer and the ConcurrencyLimit layer.
let rate_limit_layer = ServiceBuilder::new() let layer = ServiceBuilder::new()
.concurrency_limit(self.config.upload.server.request_rate_limit as usize) .concurrency_limit(self.config.upload.server.request_rate_limit as usize)
.load_shed() .load_shed()
.into_inner(); .into_inner();
@ -166,23 +163,17 @@ impl DfdaemonUploadServer {
.tcp_keepalive(Some(super::TCP_KEEPALIVE)) .tcp_keepalive(Some(super::TCP_KEEPALIVE))
.http2_keepalive_interval(Some(super::HTTP2_KEEP_ALIVE_INTERVAL)) .http2_keepalive_interval(Some(super::HTTP2_KEEP_ALIVE_INTERVAL))
.http2_keepalive_timeout(Some(super::HTTP2_KEEP_ALIVE_TIMEOUT)) .http2_keepalive_timeout(Some(super::HTTP2_KEEP_ALIVE_TIMEOUT))
.layer(rate_limit_layer) .layer(layer)
.add_service(reflection) .add_service(reflection.clone())
.add_service(health_service) .add_service(health_service)
.add_service(service) .add_service(self.service.clone())
.serve_with_shutdown(self.addr, async move { .serve_with_shutdown(self.addr, async move {
// When the grpc server is started, notify the barrier. If the shutdown signal is received // When the grpc server is started, notify the barrier. If the shutdown signal is received
// before barrier is waited successfully, the server will shutdown immediately. // before barrier is waited successfully, the server will shutdown immediately.
tokio::select! { tokio::select! {
// Notify the upload grpc server is started. // Notify the upload grpc server is started.
_ = grpc_server_started_barrier.wait() => { _ = grpc_server_started_barrier.wait() => {
info!("upload server is ready to start"); info!("upload server is ready");
health_reporter
.set_serving::<DfdaemonUploadGRPCServer<DfdaemonUploadServerHandler>>()
.await;
info!("upload server's health status set to serving");
} }
// Wait for shutdown signal. // Wait for shutdown signal.
_ = shutdown.recv() => { _ = shutdown.recv() => {
@ -204,6 +195,9 @@ impl DfdaemonUploadServer {
/// DfdaemonUploadServerHandler is the handler of the dfdaemon upload grpc service. /// DfdaemonUploadServerHandler is the handler of the dfdaemon upload grpc service.
pub struct DfdaemonUploadServerHandler { pub struct DfdaemonUploadServerHandler {
/// interface is the network interface.
interface: Interface,
/// socket_path is the path of the unix domain socket. /// socket_path is the path of the unix domain socket.
socket_path: PathBuf, socket_path: PathBuf,
@ -212,9 +206,6 @@ pub struct DfdaemonUploadServerHandler {
/// persistent_cache_task is the persistent cache task manager. /// persistent_cache_task is the persistent cache task manager.
persistent_cache_task: Arc<persistent_cache_task::PersistentCacheTask>, persistent_cache_task: Arc<persistent_cache_task::PersistentCacheTask>,
/// interface is the network interface.
interface: Arc<Interface>,
} }
/// DfdaemonUploadServerHandler implements the dfdaemon upload grpc service. /// DfdaemonUploadServerHandler implements the dfdaemon upload grpc service.
@ -224,19 +215,11 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
type DownloadTaskStream = ReceiverStream<Result<DownloadTaskResponse, Status>>; type DownloadTaskStream = ReceiverStream<Result<DownloadTaskResponse, Status>>;
/// download_task downloads the task. /// download_task downloads the task.
#[instrument( #[instrument(skip_all, fields(host_id, task_id, peer_id))]
skip_all,
fields(host_id, task_id, peer_id, url, remote_ip, content_length)
)]
async fn download_task( async fn download_task(
&self, &self,
request: Request<DownloadTaskRequest>, request: Request<DownloadTaskRequest>,
) -> Result<Response<Self::DownloadTaskStream>, Status> { ) -> Result<Response<Self::DownloadTaskStream>, Status> {
// If the parent context is set, use it as the parent context for the span.
if let Some(parent_ctx) = request.extensions().get::<Context>() {
Span::current().set_parent(parent_ctx.clone());
};
// Record the start time. // Record the start time.
let start_time = Instant::now(); let start_time = Instant::now();
@ -278,11 +261,6 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
Span::current().record("host_id", host_id.as_str()); Span::current().record("host_id", host_id.as_str());
Span::current().record("task_id", task_id.as_str()); Span::current().record("task_id", task_id.as_str());
Span::current().record("peer_id", peer_id.as_str()); Span::current().record("peer_id", peer_id.as_str());
Span::current().record("url", download.url.clone());
Span::current().record(
"remote_ip",
download.remote_ip.clone().unwrap_or_default().as_str(),
);
info!("download task in upload server"); info!("download task in upload server");
// Download task started. // Download task started.
@ -363,8 +341,6 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
task.piece_length().unwrap_or_default() task.piece_length().unwrap_or_default()
); );
Span::current().record("content_length", content_length);
// Download's range priority is higher than the request header's range. // Download's range priority is higher than the request header's range.
// If download protocol is http, use the range of the request header. // If download protocol is http, use the range of the request header.
// If download protocol is not http, use the range of the download. // If download protocol is not http, use the range of the download.
@ -639,13 +615,8 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
} }
/// stat_task stats the task. /// stat_task stats the task.
#[instrument(skip_all, fields(host_id, task_id, remote_ip, local_only))] #[instrument(skip_all, fields(host_id, task_id))]
async fn stat_task(&self, request: Request<StatTaskRequest>) -> Result<Response<Task>, Status> { async fn stat_task(&self, request: Request<StatTaskRequest>) -> Result<Response<Task>, Status> {
// If the parent context is set, use it as the parent context for the span.
if let Some(parent_ctx) = request.extensions().get::<Context>() {
Span::current().set_parent(parent_ctx.clone());
};
// Clone the request. // Clone the request.
let request = request.into_inner(); let request = request.into_inner();
@ -655,57 +626,36 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
// Get the task id from the request. // Get the task id from the request.
let task_id = request.task_id; let task_id = request.task_id;
// Get the local_only flag from the request, default to false.
let local_only = request.local_only;
// Span record the host id and task id. // Span record the host id and task id.
Span::current().record("host_id", host_id.as_str()); Span::current().record("host_id", host_id.as_str());
Span::current().record("task_id", task_id.as_str()); Span::current().record("task_id", task_id.as_str());
Span::current().record(
"remote_ip",
request.remote_ip.clone().unwrap_or_default().as_str(),
);
Span::current().record("local_only", local_only.to_string().as_str());
info!("stat task in upload server"); info!("stat task in upload server");
// Collect the stat task metrics. // Collect the stat task metrics.
collect_stat_task_started_metrics(TaskType::Standard as i32); collect_stat_task_started_metrics(TaskType::Standard as i32);
match self // Get the task from the scheduler.
let task = self
.task .task
.stat(task_id.as_str(), host_id.as_str(), local_only) .stat(task_id.as_str(), host_id.as_str())
.await .await
{ .map_err(|err| {
Ok(task) => Ok(Response::new(task)),
Err(err) => {
// Collect the stat task failure metrics. // Collect the stat task failure metrics.
collect_stat_task_failure_metrics(TaskType::Standard as i32); collect_stat_task_failure_metrics(TaskType::Standard as i32);
// Log the error with detailed context. error!("stat task: {}", err);
error!("stat task failed: {}", err); Status::internal(err.to_string())
})?;
// Map the error to an appropriate gRPC status. Ok(Response::new(task))
Err(match err {
ClientError::TaskNotFound(id) => {
Status::not_found(format!("task not found: {}", id))
}
_ => Status::internal(err.to_string()),
})
}
}
} }
/// delete_task deletes the task. /// delete_task deletes the task.
#[instrument(skip_all, fields(host_id, task_id, remote_ip))] #[instrument(skip_all, fields(host_id, task_id))]
async fn delete_task( async fn delete_task(
&self, &self,
request: Request<DeleteTaskRequest>, request: Request<DeleteTaskRequest>,
) -> Result<Response<()>, Status> { ) -> Result<Response<()>, Status> {
// If the parent context is set, use it as the parent context for the span.
if let Some(parent_ctx) = request.extensions().get::<Context>() {
Span::current().set_parent(parent_ctx.clone());
};
// Clone the request. // Clone the request.
let request = request.into_inner(); let request = request.into_inner();
@ -718,10 +668,6 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
// Span record the host id and task id. // Span record the host id and task id.
Span::current().record("host_id", host_id.as_str()); Span::current().record("host_id", host_id.as_str());
Span::current().record("task_id", task_id.as_str()); Span::current().record("task_id", task_id.as_str());
Span::current().record(
"remote_ip",
request.remote_ip.clone().unwrap_or_default().as_str(),
);
info!("delete task in upload server"); info!("delete task in upload server");
// Collect the delete task started metrics. // Collect the delete task started metrics.
@ -745,18 +691,12 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
/// SyncPiecesStream is the stream of the sync pieces response. /// SyncPiecesStream is the stream of the sync pieces response.
type SyncPiecesStream = ReceiverStream<Result<SyncPiecesResponse, Status>>; type SyncPiecesStream = ReceiverStream<Result<SyncPiecesResponse, Status>>;
/// sync_pieces provides the piece metadata for parent. If the per-piece collection timeout is exceeded, /// sync_pieces provides the piece metadata for parent.
/// the stream will be closed.
#[instrument(skip_all, fields(host_id, remote_host_id, task_id))] #[instrument(skip_all, fields(host_id, remote_host_id, task_id))]
async fn sync_pieces( async fn sync_pieces(
&self, &self,
request: Request<SyncPiecesRequest>, request: Request<SyncPiecesRequest>,
) -> Result<Response<Self::SyncPiecesStream>, Status> { ) -> Result<Response<Self::SyncPiecesStream>, Status> {
// If the parent context is set, use it as the parent context for the span.
if let Some(parent_ctx) = request.extensions().get::<Context>() {
Span::current().set_parent(parent_ctx.clone());
};
// Clone the request. // Clone the request.
let request = request.into_inner(); let request = request.into_inner();
@ -786,6 +726,7 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
tokio::spawn( tokio::spawn(
async move { async move {
loop { loop {
let mut has_started_piece = false;
let mut finished_piece_numbers = Vec::new(); let mut finished_piece_numbers = Vec::new();
for interested_piece_number in interested_piece_numbers.iter() { for interested_piece_number in interested_piece_numbers.iter() {
let piece = match task_manager.piece.get( let piece = match task_manager.piece.get(
@ -850,6 +791,11 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
finished_piece_numbers.push(piece.number); finished_piece_numbers.push(piece.number);
continue; continue;
} }
// Check whether the piece is started.
if piece.is_started() {
has_started_piece = true;
}
} }
// Remove the finished piece numbers from the interested piece numbers. // Remove the finished piece numbers from the interested piece numbers.
@ -863,6 +809,13 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
return; return;
} }
// If there is no started piece, return.
if !has_started_piece {
info!("there is no started piece");
drop(out_stream_tx);
return;
}
// Wait for the piece to be finished. // Wait for the piece to be finished.
tokio::time::sleep( tokio::time::sleep(
dragonfly_client_storage::DEFAULT_WAIT_FOR_PIECE_FINISHED_INTERVAL, dragonfly_client_storage::DEFAULT_WAIT_FOR_PIECE_FINISHED_INTERVAL,
@ -877,19 +830,11 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
} }
/// download_piece provides the piece content for parent. /// download_piece provides the piece content for parent.
#[instrument( #[instrument(skip_all, fields(host_id, remote_host_id, task_id, piece_id))]
skip_all,
fields(host_id, remote_host_id, task_id, piece_id, piece_length)
)]
async fn download_piece( async fn download_piece(
&self, &self,
request: Request<DownloadPieceRequest>, request: Request<DownloadPieceRequest>,
) -> Result<Response<DownloadPieceResponse>, Status> { ) -> Result<Response<DownloadPieceResponse>, Status> {
// If the parent context is set, use it as the parent context for the span.
if let Some(parent_ctx) = request.extensions().get::<Context>() {
Span::current().set_parent(parent_ctx.clone());
};
// Clone the request. // Clone the request.
let request = request.into_inner(); let request = request.into_inner();
@ -908,6 +853,7 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
// Generate the piece id. // Generate the piece id.
let piece_id = self.task.piece.id(task_id.as_str(), piece_number); let piece_id = self.task.piece.id(task_id.as_str(), piece_number);
// Span record the host id, task id and piece number.
Span::current().record("host_id", host_id.as_str()); Span::current().record("host_id", host_id.as_str());
Span::current().record("remote_host_id", remote_host_id.as_str()); Span::current().record("remote_host_id", remote_host_id.as_str());
Span::current().record("task_id", task_id.as_str()); Span::current().record("task_id", task_id.as_str());
@ -928,8 +874,6 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
Status::not_found("piece metadata not found") Status::not_found("piece metadata not found")
})?; })?;
Span::current().record("piece_length", piece.length);
// Collect upload piece started metrics. // Collect upload piece started metrics.
collect_upload_piece_started_metrics(); collect_upload_piece_started_metrics();
info!("start upload piece content"); info!("start upload piece content");
@ -963,7 +907,6 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
error!("upload piece content failed: {}", err); error!("upload piece content failed: {}", err);
Status::internal(err.to_string()) Status::internal(err.to_string())
})?; })?;
drop(reader);
// Collect upload piece finished metrics. // Collect upload piece finished metrics.
collect_upload_piece_finished_metrics(); collect_upload_piece_finished_metrics();
@ -997,10 +940,8 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
&self, &self,
request: Request<SyncHostRequest>, request: Request<SyncHostRequest>,
) -> Result<Response<Self::SyncHostStream>, Status> { ) -> Result<Response<Self::SyncHostStream>, Status> {
// If the parent context is set, use it as the parent context for the span. // DEFAULT_HOST_INFO_REFRESH_INTERVAL is the default interval for refreshing the host info.
if let Some(parent_ctx) = request.extensions().get::<Context>() { const DEFAULT_HOST_INFO_REFRESH_INTERVAL: Duration = Duration::from_millis(500);
Span::current().set_parent(parent_ctx.clone());
};
// Clone the request. // Clone the request.
let request = request.into_inner(); let request = request.into_inner();
@ -1023,42 +964,43 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
// Get local interface. // Get local interface.
let interface = self.interface.clone(); let interface = self.interface.clone();
// DEFAULT_HOST_INFO_REFRESH_INTERVAL is the default interval for refreshing the host info.
const DEFAULT_HOST_INFO_REFRESH_INTERVAL: Duration = Duration::from_millis(500);
// Initialize stream channel. // Initialize stream channel.
let (out_stream_tx, out_stream_rx) = mpsc::channel(10 * 1024); let (out_stream_tx, out_stream_rx) = mpsc::channel(10 * 1024);
tokio::spawn( tokio::spawn(
async move { async move {
// Initialize sysinfo network.
let mut networks = Networks::new_with_refreshed_list();
// Start the host info update loop. // Start the host info update loop.
loop { loop {
// Wait for the host info refresh interval. // Sleep to calculate the network traffic difference over
// the DEFAULT_HOST_INFO_REFRESH_INTERVAL.
tokio::time::sleep(DEFAULT_HOST_INFO_REFRESH_INTERVAL).await; tokio::time::sleep(DEFAULT_HOST_INFO_REFRESH_INTERVAL).await;
// Wait for getting the network data. // Refresh network information.
let network_data = interface.get_network_data().await; networks.refresh();
debug!(
"network data: rx bandwidth {}/{} bps, tx bandwidth {}/{} bps", // Init response.
network_data.rx_bandwidth.unwrap_or(0), let mut host = Host::default();
network_data.max_rx_bandwidth, if let Some(network_data) = networks.get(&interface.name) {
network_data.tx_bandwidth.unwrap_or(0), let network = Network {
network_data.max_tx_bandwidth download_rate: network_data.received()
); / DEFAULT_HOST_INFO_REFRESH_INTERVAL.as_secs(),
// Convert bandwidth to bytes per second.
download_rate_limit: interface.bandwidth / 8 * MB,
upload_rate: network_data.transmitted()
/ DEFAULT_HOST_INFO_REFRESH_INTERVAL.as_secs(),
// Convert bandwidth to bytes per second.
upload_rate_limit: interface.bandwidth / 8 * MB,
..Default::default()
};
host.network = Some(network.clone());
debug!("interface: {}, network: {:?}", interface.name, network);
};
// Send host info. // Send host info.
match out_stream_tx match out_stream_tx.send(Ok(host.clone())).await {
.send(Ok(Host {
network: Some(Network {
max_rx_bandwidth: network_data.max_rx_bandwidth,
rx_bandwidth: network_data.rx_bandwidth,
max_tx_bandwidth: network_data.max_tx_bandwidth,
tx_bandwidth: network_data.tx_bandwidth,
..Default::default()
}),
..Default::default()
}))
.await
{
Ok(_) => {} Ok(_) => {}
Err(err) => { Err(err) => {
error!( error!(
@ -1066,7 +1008,7 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
remote_host_id, err remote_host_id, err
); );
return; break;
} }
}; };
} }
@ -1082,16 +1024,11 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
ReceiverStream<Result<DownloadPersistentCacheTaskResponse, Status>>; ReceiverStream<Result<DownloadPersistentCacheTaskResponse, Status>>;
/// download_persistent_cache_task downloads the persistent cache task. /// download_persistent_cache_task downloads the persistent cache task.
#[instrument(skip_all, fields(host_id, task_id, peer_id, remote_ip, content_length))] #[instrument(skip_all, fields(host_id, task_id, peer_id))]
async fn download_persistent_cache_task( async fn download_persistent_cache_task(
&self, &self,
request: Request<DownloadPersistentCacheTaskRequest>, request: Request<DownloadPersistentCacheTaskRequest>,
) -> Result<Response<Self::DownloadPersistentCacheTaskStream>, Status> { ) -> Result<Response<Self::DownloadPersistentCacheTaskStream>, Status> {
// If the parent context is set, use it as the parent context for the span.
if let Some(parent_ctx) = request.extensions().get::<Context>() {
Span::current().set_parent(parent_ctx.clone());
};
// Record the start time. // Record the start time.
let start_time = Instant::now(); let start_time = Instant::now();
@ -1115,10 +1052,6 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
Span::current().record("host_id", host_id.as_str()); Span::current().record("host_id", host_id.as_str());
Span::current().record("task_id", task_id.as_str()); Span::current().record("task_id", task_id.as_str());
Span::current().record("peer_id", peer_id.as_str()); Span::current().record("peer_id", peer_id.as_str());
Span::current().record(
"remote_ip",
request.remote_ip.clone().unwrap_or_default().as_str(),
);
info!("download persistent cache task in download server"); info!("download persistent cache task in download server");
// Download task started. // Download task started.
@ -1172,15 +1105,12 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
task task
} }
}; };
info!( info!(
"content length {}, piece length {}", "content length {}, piece length {}",
task.content_length(), task.content_length(),
task.piece_length() task.piece_length()
); );
Span::current().record("content_length", task.content_length());
// Initialize stream channel. // Initialize stream channel.
let request_clone = request.clone(); let request_clone = request.clone();
let task_manager_clone = self.persistent_cache_task.clone(); let task_manager_clone = self.persistent_cache_task.clone();
@ -1314,16 +1244,11 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
} }
/// update_persistent_cache_task update metadata of the persistent cache task. /// update_persistent_cache_task update metadata of the persistent cache task.
#[instrument(skip_all, fields(host_id, task_id, remote_ip))] #[instrument(skip_all, fields(host_id, task_id))]
async fn update_persistent_cache_task( async fn update_persistent_cache_task(
&self, &self,
request: Request<UpdatePersistentCacheTaskRequest>, request: Request<UpdatePersistentCacheTaskRequest>,
) -> Result<Response<()>, Status> { ) -> Result<Response<()>, Status> {
// If the parent context is set, use it as the parent context for the span.
if let Some(parent_ctx) = request.extensions().get::<Context>() {
Span::current().set_parent(parent_ctx.clone());
};
// Clone the request. // Clone the request.
let request = request.into_inner(); let request = request.into_inner();
@ -1336,10 +1261,6 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
// Span record the host id and task id. // Span record the host id and task id.
Span::current().record("host_id", host_id.as_str()); Span::current().record("host_id", host_id.as_str());
Span::current().record("task_id", task_id.as_str()); Span::current().record("task_id", task_id.as_str());
Span::current().record(
"remote_ip",
request.remote_ip.clone().unwrap_or_default().as_str(),
);
info!("update persistent cache task in upload server"); info!("update persistent cache task in upload server");
// Collect the update task started metrics. // Collect the update task started metrics.
@ -1361,16 +1282,11 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
} }
/// stat_persistent_cache_task stats the persistent cache task. /// stat_persistent_cache_task stats the persistent cache task.
#[instrument(skip_all, fields(host_id, task_id, remote_ip))] #[instrument(skip_all, fields(host_id, task_id))]
async fn stat_persistent_cache_task( async fn stat_persistent_cache_task(
&self, &self,
request: Request<StatPersistentCacheTaskRequest>, request: Request<StatPersistentCacheTaskRequest>,
) -> Result<Response<PersistentCacheTask>, Status> { ) -> Result<Response<PersistentCacheTask>, Status> {
// If the parent context is set, use it as the parent context for the span.
if let Some(parent_ctx) = request.extensions().get::<Context>() {
Span::current().set_parent(parent_ctx.clone());
};
// Clone the request. // Clone the request.
let request = request.into_inner(); let request = request.into_inner();
@ -1383,10 +1299,6 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
// Span record the host id and task id. // Span record the host id and task id.
Span::current().record("host_id", host_id.as_str()); Span::current().record("host_id", host_id.as_str());
Span::current().record("task_id", task_id.as_str()); Span::current().record("task_id", task_id.as_str());
Span::current().record(
"remote_ip",
request.remote_ip.clone().unwrap_or_default().as_str(),
);
info!("stat persistent cache task in upload server"); info!("stat persistent cache task in upload server");
// Collect the stat task started metrics. // Collect the stat task started metrics.
@ -1408,16 +1320,11 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
} }
/// delete_persistent_cache_task deletes the persistent cache task. /// delete_persistent_cache_task deletes the persistent cache task.
#[instrument(skip_all, fields(host_id, task_id, remote_ip))] #[instrument(skip_all, fields(host_id, task_id))]
async fn delete_persistent_cache_task( async fn delete_persistent_cache_task(
&self, &self,
request: Request<DeletePersistentCacheTaskRequest>, request: Request<DeletePersistentCacheTaskRequest>,
) -> Result<Response<()>, Status> { ) -> Result<Response<()>, Status> {
// If the parent context is set, use it as the parent context for the span.
if let Some(parent_ctx) = request.extensions().get::<Context>() {
Span::current().set_parent(parent_ctx.clone());
};
// Clone the request. // Clone the request.
let request = request.into_inner(); let request = request.into_inner();
@ -1430,10 +1337,6 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
// Span record the host id and task id. // Span record the host id and task id.
Span::current().record("host_id", host_id.as_str()); Span::current().record("host_id", host_id.as_str());
Span::current().record("task_id", task_id.as_str()); Span::current().record("task_id", task_id.as_str());
Span::current().record(
"remote_ip",
request.remote_ip.clone().unwrap_or_default().as_str(),
);
info!("delete persistent cache task in upload server"); info!("delete persistent cache task in upload server");
// Collect the delete task started metrics. // Collect the delete task started metrics.
@ -1452,11 +1355,6 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
&self, &self,
request: Request<SyncPersistentCachePiecesRequest>, request: Request<SyncPersistentCachePiecesRequest>,
) -> Result<Response<Self::SyncPersistentCachePiecesStream>, Status> { ) -> Result<Response<Self::SyncPersistentCachePiecesStream>, Status> {
// If the parent context is set, use it as the parent context for the span.
if let Some(parent_ctx) = request.extensions().get::<Context>() {
Span::current().set_parent(parent_ctx.clone());
};
// Clone the request. // Clone the request.
let request = request.into_inner(); let request = request.into_inner();
@ -1486,6 +1384,7 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
tokio::spawn( tokio::spawn(
async move { async move {
loop { loop {
let mut has_started_piece = false;
let mut finished_piece_numbers = Vec::new(); let mut finished_piece_numbers = Vec::new();
for interested_piece_number in interested_piece_numbers.iter() { for interested_piece_number in interested_piece_numbers.iter() {
let piece = match task_manager.piece.get( let piece = match task_manager.piece.get(
@ -1544,6 +1443,11 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
finished_piece_numbers.push(piece.number); finished_piece_numbers.push(piece.number);
continue; continue;
} }
// Check whether the piece is started.
if piece.is_started() {
has_started_piece = true;
}
} }
// Remove the finished piece numbers from the interested piece numbers. // Remove the finished piece numbers from the interested piece numbers.
@ -1557,6 +1461,13 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
return; return;
} }
// If there is no started piece, return.
if !has_started_piece {
info!("there is no started persistent cache piece");
drop(out_stream_tx);
return;
}
// Wait for the piece to be finished. // Wait for the piece to be finished.
tokio::time::sleep( tokio::time::sleep(
dragonfly_client_storage::DEFAULT_WAIT_FOR_PIECE_FINISHED_INTERVAL, dragonfly_client_storage::DEFAULT_WAIT_FOR_PIECE_FINISHED_INTERVAL,
@ -1576,11 +1487,6 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
&self, &self,
request: Request<DownloadPersistentCachePieceRequest>, request: Request<DownloadPersistentCachePieceRequest>,
) -> Result<Response<DownloadPersistentCachePieceResponse>, Status> { ) -> Result<Response<DownloadPersistentCachePieceResponse>, Status> {
// If the parent context is set, use it as the parent context for the span.
if let Some(parent_ctx) = request.extensions().get::<Context>() {
Span::current().set_parent(parent_ctx.clone());
};
// Clone the request. // Clone the request.
let request = request.into_inner(); let request = request.into_inner();
@ -1658,7 +1564,6 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
error!("upload persistent cache piece content failed: {}", err); error!("upload persistent cache piece content failed: {}", err);
Status::internal(err.to_string()) Status::internal(err.to_string())
})?; })?;
drop(reader);
// Collect upload piece finished metrics. // Collect upload piece finished metrics.
collect_upload_piece_finished_metrics(); collect_upload_piece_finished_metrics();
@ -1691,75 +1596,19 @@ impl DfdaemonUpload for DfdaemonUploadServerHandler {
) -> Result<Response<ExchangeIbVerbsQueuePairEndpointResponse>, Status> { ) -> Result<Response<ExchangeIbVerbsQueuePairEndpointResponse>, Status> {
unimplemented!() unimplemented!()
} }
/// DownloadCacheTaskStream is the stream of the download cache task response.
type DownloadCacheTaskStream = ReceiverStream<Result<DownloadCacheTaskResponse, Status>>;
/// download_cache_task downloads the cache task.
#[instrument(
skip_all,
fields(host_id, task_id, peer_id, url, remote_ip, content_length)
)]
async fn download_cache_task(
&self,
_request: Request<DownloadCacheTaskRequest>,
) -> Result<Response<Self::DownloadCacheTaskStream>, Status> {
todo!();
}
/// stat_cache_task stats the cache task.
#[instrument(skip_all, fields(host_id, task_id, remote_ip, local_only))]
async fn stat_cache_task(
&self,
_request: Request<StatCacheTaskRequest>,
) -> Result<Response<CacheTask>, Status> {
todo!();
}
/// delete_cache_task deletes the cache task.
#[instrument(skip_all, fields(host_id, task_id, remote_ip))]
async fn delete_cache_task(
&self,
_request: Request<DeleteCacheTaskRequest>,
) -> Result<Response<()>, Status> {
todo!();
}
/// SyncCachePiecesStream is the stream of the sync cache pieces response.
type SyncCachePiecesStream = ReceiverStream<Result<SyncCachePiecesResponse, Status>>;
/// sync_cache_pieces provides the cache piece metadata for parent.
#[instrument(skip_all, fields(host_id, remote_host_id, task_id))]
async fn sync_cache_pieces(
&self,
_request: Request<SyncCachePiecesRequest>,
) -> Result<Response<Self::SyncCachePiecesStream>, Status> {
todo!();
}
/// download_cache_piece provides the cache piece content for parent.
#[instrument(
skip_all,
fields(host_id, remote_host_id, task_id, piece_id, piece_length)
)]
async fn download_cache_piece(
&self,
_request: Request<DownloadCachePieceRequest>,
) -> Result<Response<DownloadCachePieceResponse>, Status> {
todo!();
}
} }
/// DfdaemonUploadClient is a wrapper of DfdaemonUploadGRPCClient. /// DfdaemonUploadClient is a wrapper of DfdaemonUploadGRPCClient.
#[derive(Clone)] #[derive(Clone)]
pub struct DfdaemonUploadClient { pub struct DfdaemonUploadClient {
/// client is the grpc client of the dfdaemon upload. /// client is the grpc client of the dfdaemon upload.
pub client: DfdaemonUploadGRPCClient<InterceptedService<Channel, InjectTracingInterceptor>>, pub client: DfdaemonUploadGRPCClient<InterceptedService<Channel, TracingInterceptor>>,
} }
/// DfdaemonUploadClient implements the dfdaemon upload grpc client. /// DfdaemonUploadClient implements the dfdaemon upload grpc client.
impl DfdaemonUploadClient { impl DfdaemonUploadClient {
/// new creates a new DfdaemonUploadClient. /// new creates a new DfdaemonUploadClient.
#[instrument(skip_all)]
pub async fn new( pub async fn new(
config: Arc<Config>, config: Arc<Config>,
addr: String, addr: String,
@ -1818,7 +1667,7 @@ impl DfdaemonUploadClient {
.or_err(ErrorType::ConnectError)?, .or_err(ErrorType::ConnectError)?,
}; };
let client = DfdaemonUploadGRPCClient::with_interceptor(channel, InjectTracingInterceptor) let client = DfdaemonUploadGRPCClient::with_interceptor(channel, TracingInterceptor)
.max_decoding_message_size(usize::MAX) .max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX); .max_encoding_message_size(usize::MAX);
Ok(Self { client }) Ok(Self { client })
@ -1946,7 +1795,6 @@ impl DfdaemonUploadClient {
} }
/// sync_persistent_cache_pieces provides the persistent cache piece metadata for parent. /// sync_persistent_cache_pieces provides the persistent cache piece metadata for parent.
/// If the per-piece collection timeout is exceeded, the stream will be closed.
#[instrument(skip_all)] #[instrument(skip_all)]
pub async fn sync_persistent_cache_pieces( pub async fn sync_persistent_cache_pieces(
&self, &self,
@ -1996,6 +1844,7 @@ impl DfdaemonUploadClient {
} }
/// make_request creates a new request with timeout. /// make_request creates a new request with timeout.
#[instrument(skip_all)]
fn make_request<T>(request: T) -> tonic::Request<T> { fn make_request<T>(request: T) -> tonic::Request<T> {
let mut request = tonic::Request::new(request); let mut request = tonic::Request::new(request);
request.set_timeout(super::REQUEST_TIMEOUT); request.set_timeout(super::REQUEST_TIMEOUT);

View File

@ -21,27 +21,27 @@ use dragonfly_client_core::{
use hyper_util::rt::TokioIo; use hyper_util::rt::TokioIo;
use std::path::PathBuf; use std::path::PathBuf;
use tokio::net::UnixStream; use tokio::net::UnixStream;
use tonic::service::interceptor::InterceptedService;
use tonic::transport::ClientTlsConfig;
use tonic::transport::{Channel, Endpoint, Uri}; use tonic::transport::{Channel, Endpoint, Uri};
use tonic::{service::interceptor::InterceptedService, transport::ClientTlsConfig};
use tonic_health::pb::{ use tonic_health::pb::{
health_client::HealthClient as HealthGRPCClient, HealthCheckRequest, HealthCheckResponse, health_client::HealthClient as HealthGRPCClient, HealthCheckRequest, HealthCheckResponse,
}; };
use tower::service_fn; use tower::service_fn;
use tracing::{error, instrument}; use tracing::{error, instrument};
use super::interceptor::InjectTracingInterceptor; use super::interceptor::TracingInterceptor;
/// HealthClient is a wrapper of HealthGRPCClient. /// HealthClient is a wrapper of HealthGRPCClient.
#[derive(Clone)] #[derive(Clone)]
pub struct HealthClient { pub struct HealthClient {
/// client is the grpc client of the certificate. /// client is the grpc client of the certificate.
client: HealthGRPCClient<InterceptedService<Channel, InjectTracingInterceptor>>, client: HealthGRPCClient<InterceptedService<Channel, TracingInterceptor>>,
} }
/// HealthClient implements the grpc client of the health. /// HealthClient implements the grpc client of the health.
impl HealthClient { impl HealthClient {
/// new creates a new HealthClient. /// new creates a new HealthClient.
#[instrument(skip_all)]
pub async fn new(addr: &str, client_tls_config: Option<ClientTlsConfig>) -> Result<Self> { pub async fn new(addr: &str, client_tls_config: Option<ClientTlsConfig>) -> Result<Self> {
let channel = match client_tls_config { let channel = match client_tls_config {
Some(client_tls_config) => Channel::from_shared(addr.to_string()) Some(client_tls_config) => Channel::from_shared(addr.to_string())
@ -73,13 +73,14 @@ impl HealthClient {
.or_err(ErrorType::ConnectError)?, .or_err(ErrorType::ConnectError)?,
}; };
let client = HealthGRPCClient::with_interceptor(channel, InjectTracingInterceptor) let client = HealthGRPCClient::with_interceptor(channel, TracingInterceptor)
.max_decoding_message_size(usize::MAX) .max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX); .max_encoding_message_size(usize::MAX);
Ok(Self { client }) Ok(Self { client })
} }
/// new_unix creates a new HealthClient with unix domain socket. /// new_unix creates a new HealthClient with unix domain socket.
#[instrument(skip_all)]
pub async fn new_unix(socket_path: PathBuf) -> Result<Self> { pub async fn new_unix(socket_path: PathBuf) -> Result<Self> {
// Ignore the uri because it is not used. // Ignore the uri because it is not used.
let channel = Endpoint::try_from("http://[::]:50051") let channel = Endpoint::try_from("http://[::]:50051")
@ -97,8 +98,7 @@ impl HealthClient {
error!("connect failed: {}", err); error!("connect failed: {}", err);
}) })
.or_err(ErrorType::ConnectError)?; .or_err(ErrorType::ConnectError)?;
let client = HealthGRPCClient::with_interceptor(channel, TracingInterceptor)
let client = HealthGRPCClient::with_interceptor(channel, InjectTracingInterceptor)
.max_decoding_message_size(usize::MAX) .max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX); .max_encoding_message_size(usize::MAX);
Ok(Self { client }) Ok(Self { client })
@ -137,6 +137,7 @@ impl HealthClient {
} }
/// make_request creates a new request with timeout. /// make_request creates a new request with timeout.
#[instrument(skip_all)]
fn make_request<T>(request: T) -> tonic::Request<T> { fn make_request<T>(request: T) -> tonic::Request<T> {
let mut request = tonic::Request::new(request); let mut request = tonic::Request::new(request);
request.set_timeout(super::REQUEST_TIMEOUT); request.set_timeout(super::REQUEST_TIMEOUT);

View File

@ -17,28 +17,9 @@
use tonic::{metadata, service::Interceptor, Request, Status}; use tonic::{metadata, service::Interceptor, Request, Status};
use tracing_opentelemetry::OpenTelemetrySpanExt; use tracing_opentelemetry::OpenTelemetrySpanExt;
/// MetadataMap is a tracing meda data map container for span context. /// MetadataMap is a tracing meda data map container.
struct MetadataMap<'a>(&'a mut metadata::MetadataMap); struct MetadataMap<'a>(&'a mut metadata::MetadataMap);
/// MetadataMap implements the otel tracing Extractor.
impl opentelemetry::propagation::Extractor for MetadataMap<'_> {
/// Get a value for a key from the `MetadataMap`. If the value can't be converted to &str, returns None
fn get(&self, key: &str) -> Option<&str> {
self.0.get(key).and_then(|metadata| metadata.to_str().ok())
}
/// Collect all the keys from the `MetadataMap`.
fn keys(&self) -> Vec<&str> {
self.0
.keys()
.map(|key| match key {
tonic::metadata::KeyRef::Ascii(v) => v.as_str(),
tonic::metadata::KeyRef::Binary(v) => v.as_str(),
})
.collect::<Vec<_>>()
}
}
/// MetadataMap implements the otel tracing Injector. /// MetadataMap implements the otel tracing Injector.
impl opentelemetry::propagation::Injector for MetadataMap<'_> { impl opentelemetry::propagation::Injector for MetadataMap<'_> {
/// set a key-value pair to the injector. /// set a key-value pair to the injector.
@ -51,12 +32,12 @@ impl opentelemetry::propagation::Injector for MetadataMap<'_> {
} }
} }
/// InjectTracingInterceptor is a auto-inject tracing gRPC interceptor. /// TracingInterceptor is a auto-inject tracing gRPC interceptor.
#[derive(Clone)] #[derive(Clone)]
pub struct InjectTracingInterceptor; pub struct TracingInterceptor;
/// InjectTracingInterceptor implements the tonic Interceptor interface. /// TracingInterceptor implements the tonic Interceptor interface.
impl Interceptor for InjectTracingInterceptor { impl Interceptor for TracingInterceptor {
/// call and inject tracing context into lgobal propagator. /// call and inject tracing context into lgobal propagator.
fn call(&mut self, mut request: Request<()>) -> std::result::Result<Request<()>, Status> { fn call(&mut self, mut request: Request<()>) -> std::result::Result<Request<()>, Status> {
let context = tracing::Span::current().context(); let context = tracing::Span::current().context();
@ -67,20 +48,3 @@ impl Interceptor for InjectTracingInterceptor {
Ok(request) Ok(request)
} }
} }
/// ExtractTracingInterceptor is a auto-extract tracing gRPC interceptor.
#[derive(Clone)]
pub struct ExtractTracingInterceptor;
/// ExtractTracingInterceptor implements the tonic Interceptor interface.
impl Interceptor for ExtractTracingInterceptor {
/// call and inject tracing context into lgobal propagator.
fn call(&mut self, mut request: Request<()>) -> std::result::Result<Request<()>, Status> {
let parent_cx = opentelemetry::global::get_text_map_propagator(|prop| {
prop.extract(&MetadataMap(request.metadata_mut()))
});
request.extensions_mut().insert(parent_cx);
Ok(request)
}
}

View File

@ -27,21 +27,22 @@ use dragonfly_client_core::{
use std::sync::Arc; use std::sync::Arc;
use tonic::{service::interceptor::InterceptedService, transport::Channel}; use tonic::{service::interceptor::InterceptedService, transport::Channel};
use tonic_health::pb::health_check_response::ServingStatus; use tonic_health::pb::health_check_response::ServingStatus;
use tracing::{error, instrument}; use tracing::{error, instrument, warn};
use url::Url; use url::Url;
use super::interceptor::InjectTracingInterceptor; use super::interceptor::TracingInterceptor;
/// ManagerClient is a wrapper of ManagerGRPCClient. /// ManagerClient is a wrapper of ManagerGRPCClient.
#[derive(Clone)] #[derive(Clone)]
pub struct ManagerClient { pub struct ManagerClient {
/// client is the grpc client of the manager. /// client is the grpc client of the manager.
pub client: ManagerGRPCClient<InterceptedService<Channel, InjectTracingInterceptor>>, pub client: ManagerGRPCClient<InterceptedService<Channel, TracingInterceptor>>,
} }
/// ManagerClient implements the grpc client of the manager. /// ManagerClient implements the grpc client of the manager.
impl ManagerClient { impl ManagerClient {
/// new creates a new ManagerClient. /// new creates a new ManagerClient.
#[instrument(skip_all)]
pub async fn new(config: Arc<Config>, addr: String) -> Result<Self> { pub async fn new(config: Arc<Config>, addr: String) -> Result<Self> {
let domain_name = Url::parse(addr.as_str())? let domain_name = Url::parse(addr.as_str())?
.host_str() .host_str()
@ -98,7 +99,7 @@ impl ManagerClient {
.or_err(ErrorType::ConnectError)?, .or_err(ErrorType::ConnectError)?,
}; };
let client = ManagerGRPCClient::with_interceptor(channel, InjectTracingInterceptor) let client = ManagerGRPCClient::with_interceptor(channel, TracingInterceptor)
.max_decoding_message_size(usize::MAX) .max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX); .max_encoding_message_size(usize::MAX);
Ok(Self { client }) Ok(Self { client })
@ -132,6 +133,7 @@ impl ManagerClient {
} }
/// make_request creates a new request with timeout. /// make_request creates a new request with timeout.
#[instrument(skip_all)]
fn make_request<T>(request: T) -> tonic::Request<T> { fn make_request<T>(request: T) -> tonic::Request<T> {
let mut request = tonic::Request::new(request); let mut request = tonic::Request::new(request);
request.set_timeout(super::REQUEST_TIMEOUT); request.set_timeout(super::REQUEST_TIMEOUT);

View File

@ -34,12 +34,8 @@ pub mod scheduler;
/// CONNECT_TIMEOUT is the timeout for GRPC connection. /// CONNECT_TIMEOUT is the timeout for GRPC connection.
pub const CONNECT_TIMEOUT: Duration = Duration::from_secs(2); pub const CONNECT_TIMEOUT: Duration = Duration::from_secs(2);
/// REQUEST_TIMEOUT is the timeout for GRPC requests, default is 10 second. /// REQUEST_TIMEOUT is the timeout for GRPC requests.
/// Note: This timeout is used for the whole request, including wait for scheduler pub const REQUEST_TIMEOUT: Duration = Duration::from_secs(5);
/// scheduling, refer to https://d7y.io/docs/next/reference/configuration/scheduler/.
/// Scheduler'configure `scheduler.retryInterval`, `scheduler.retryBackToSourceLimit` and `scheduler.retryLimit`
/// is used for the scheduler to schedule the task.
pub const REQUEST_TIMEOUT: Duration = Duration::from_secs(15);
/// TCP_KEEPALIVE is the keepalive duration for TCP connection. /// TCP_KEEPALIVE is the keepalive duration for TCP connection.
pub const TCP_KEEPALIVE: Duration = Duration::from_secs(3600); pub const TCP_KEEPALIVE: Duration = Duration::from_secs(3600);
@ -50,11 +46,11 @@ pub const HTTP2_KEEP_ALIVE_INTERVAL: Duration = Duration::from_secs(300);
/// HTTP2_KEEP_ALIVE_TIMEOUT is the timeout for HTTP2 keep alive. /// HTTP2_KEEP_ALIVE_TIMEOUT is the timeout for HTTP2 keep alive.
pub const HTTP2_KEEP_ALIVE_TIMEOUT: Duration = Duration::from_secs(20); pub const HTTP2_KEEP_ALIVE_TIMEOUT: Duration = Duration::from_secs(20);
/// MAX_FRAME_SIZE is the max frame size for GRPC, default is 4MB. /// MAX_FRAME_SIZE is the max frame size for GRPC, default is 12MB.
pub const MAX_FRAME_SIZE: u32 = 4 * 1024 * 1024; pub const MAX_FRAME_SIZE: u32 = 12 * 1024 * 1024;
/// INITIAL_WINDOW_SIZE is the initial window size for GRPC, default is 512KB. /// INITIAL_WINDOW_SIZE is the initial window size for GRPC, default is 12MB.
pub const INITIAL_WINDOW_SIZE: u32 = 512 * 1024; pub const INITIAL_WINDOW_SIZE: u32 = 12 * 1024 * 1024;
/// BUFFER_SIZE is the buffer size for GRPC, default is 64KB. /// BUFFER_SIZE is the buffer size for GRPC, default is 64KB.
pub const BUFFER_SIZE: usize = 64 * 1024; pub const BUFFER_SIZE: usize = 64 * 1024;

View File

@ -40,7 +40,7 @@ use tonic::transport::Channel;
use tracing::{error, info, instrument, Instrument}; use tracing::{error, info, instrument, Instrument};
use url::Url; use url::Url;
use super::interceptor::InjectTracingInterceptor; use super::interceptor::TracingInterceptor;
/// VNode is the virtual node of the hashring. /// VNode is the virtual node of the hashring.
#[derive(Debug, Copy, Clone, Hash, PartialEq)] #[derive(Debug, Copy, Clone, Hash, PartialEq)]
@ -79,6 +79,7 @@ pub struct SchedulerClient {
/// SchedulerClient implements the grpc client of the scheduler. /// SchedulerClient implements the grpc client of the scheduler.
impl SchedulerClient { impl SchedulerClient {
/// new creates a new SchedulerClient. /// new creates a new SchedulerClient.
#[instrument(skip_all)]
pub async fn new(config: Arc<Config>, dynconfig: Arc<Dynconfig>) -> Result<Self> { pub async fn new(config: Arc<Config>, dynconfig: Arc<Dynconfig>) -> Result<Self> {
let client = Self { let client = Self {
config, config,
@ -191,8 +192,7 @@ impl SchedulerClient {
}) })
.or_err(ErrorType::ConnectError)?; .or_err(ErrorType::ConnectError)?;
let mut client = let mut client = SchedulerGRPCClient::with_interceptor(channel, TracingInterceptor)
SchedulerGRPCClient::with_interceptor(channel, InjectTracingInterceptor)
.max_decoding_message_size(usize::MAX) .max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX); .max_encoding_message_size(usize::MAX);
client.announce_host(request).await?; client.announce_host(request).await?;
@ -245,8 +245,7 @@ impl SchedulerClient {
}) })
.or_err(ErrorType::ConnectError)?; .or_err(ErrorType::ConnectError)?;
let mut client = let mut client = SchedulerGRPCClient::with_interceptor(channel, TracingInterceptor)
SchedulerGRPCClient::with_interceptor(channel, InjectTracingInterceptor)
.max_decoding_message_size(usize::MAX) .max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX); .max_encoding_message_size(usize::MAX);
client.announce_host(request).await?; client.announce_host(request).await?;
@ -304,8 +303,7 @@ impl SchedulerClient {
}) })
.or_err(ErrorType::ConnectError)?; .or_err(ErrorType::ConnectError)?;
let mut client = let mut client = SchedulerGRPCClient::with_interceptor(channel, TracingInterceptor)
SchedulerGRPCClient::with_interceptor(channel, InjectTracingInterceptor)
.max_decoding_message_size(usize::MAX) .max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX); .max_encoding_message_size(usize::MAX);
client.delete_host(request).await?; client.delete_host(request).await?;
@ -459,7 +457,7 @@ impl SchedulerClient {
&self, &self,
task_id: &str, task_id: &str,
peer_id: Option<&str>, peer_id: Option<&str>,
) -> Result<SchedulerGRPCClient<InterceptedService<Channel, InjectTracingInterceptor>>> { ) -> Result<SchedulerGRPCClient<InterceptedService<Channel, TracingInterceptor>>> {
// Update scheduler addresses of the client. // Update scheduler addresses of the client.
self.update_available_scheduler_addrs().await?; self.update_available_scheduler_addrs().await?;
@ -518,7 +516,7 @@ impl SchedulerClient {
}; };
Ok( Ok(
SchedulerGRPCClient::with_interceptor(channel, InjectTracingInterceptor) SchedulerGRPCClient::with_interceptor(channel, TracingInterceptor)
.max_decoding_message_size(usize::MAX) .max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX), .max_encoding_message_size(usize::MAX),
) )
@ -621,6 +619,7 @@ impl SchedulerClient {
} }
/// make_request creates a new request with timeout. /// make_request creates a new request with timeout.
#[instrument(skip_all)]
fn make_request<T>(request: T) -> tonic::Request<T> { fn make_request<T>(request: T) -> tonic::Request<T> {
let mut request = tonic::Request::new(request); let mut request = tonic::Request::new(request);
request.set_timeout(super::REQUEST_TIMEOUT); request.set_timeout(super::REQUEST_TIMEOUT);

View File

@ -36,6 +36,7 @@ pub struct Health {
/// Health implements the health server. /// Health implements the health server.
impl Health { impl Health {
/// new creates a new Health. /// new creates a new Health.
#[instrument(skip_all)]
pub fn new( pub fn new(
addr: SocketAddr, addr: SocketAddr,
shutdown: shutdown::Shutdown, shutdown: shutdown::Shutdown,
@ -49,6 +50,7 @@ impl Health {
} }
/// run starts the health server. /// run starts the health server.
#[instrument(skip_all)]
pub async fn run(&self) { pub async fn run(&self) {
// Clone the shutdown channel. // Clone the shutdown channel.
let mut shutdown = self.shutdown.clone(); let mut shutdown = self.shutdown.clone();
@ -69,6 +71,7 @@ impl Health {
_ = shutdown.recv() => { _ = shutdown.recv() => {
// Health server shutting down with signals. // Health server shutting down with signals.
info!("health server shutting down"); info!("health server shutting down");
return
} }
} }
} }

View File

@ -26,8 +26,9 @@ use prometheus::{
}; };
use std::net::SocketAddr; use std::net::SocketAddr;
use std::path::Path; use std::path::Path;
use std::sync::Arc; use std::sync::{Arc, Mutex};
use std::time::Duration; use std::time::Duration;
use sysinfo::{ProcessRefreshKind, ProcessesToUpdate, RefreshKind, System, UpdateKind};
use tokio::sync::mpsc; use tokio::sync::mpsc;
use tracing::{error, info, instrument, warn}; use tracing::{error, info, instrument, warn};
use warp::{Filter, Rejection, Reply}; use warp::{Filter, Rejection, Reply};
@ -212,20 +213,6 @@ lazy_static! {
&["type"] &["type"]
).expect("metric can be created"); ).expect("metric can be created");
/// LIST_TASK_ENTRIES_COUNT is used to count the number of list task entries.
pub static ref LIST_TASK_ENTRIES_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("list_task_entries_total", "Counter of the number of the list task entries.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["type"]
).expect("metric can be created");
/// LIST_TASK_ENTRIES_FAILURE_COUNT is used to count the failed number of list task entries.
pub static ref LIST_TASK_ENTRIES_FAILURE_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("list_task_entries_failure_total", "Counter of the number of failed of the list task entries.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["type"]
).expect("metric can be created");
/// DELETE_TASK_COUNT is used to count the number of delete tasks. /// DELETE_TASK_COUNT is used to count the number of delete tasks.
pub static ref DELETE_TASK_COUNT: IntCounterVec = pub static ref DELETE_TASK_COUNT: IntCounterVec =
IntCounterVec::new( IntCounterVec::new(
@ -267,9 +254,24 @@ lazy_static! {
Opts::new("disk_usage_space_total", "Gauge of the disk usage space in bytes").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME), Opts::new("disk_usage_space_total", "Gauge of the disk usage space in bytes").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&[] &[]
).expect("metric can be created"); ).expect("metric can be created");
/// DISK_WRITTEN_BYTES is used to count of the disk written bytes.
pub static ref DISK_WRITTEN_BYTES: IntGaugeVec =
IntGaugeVec::new(
Opts::new("disk_written_bytes", "Gauge of the disk written bytes.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&[]
).expect("metric can be created");
/// DISK_READ_BYTES is used to count of the disk read bytes.
pub static ref DISK_READ_BYTES: IntGaugeVec =
IntGaugeVec::new(
Opts::new("disk_read_bytes", "Gauge of the disk read bytes.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&[]
).expect("metric can be created");
} }
/// register_custom_metrics registers all custom metrics. /// register_custom_metrics registers all custom metrics.
#[instrument(skip_all)]
fn register_custom_metrics() { fn register_custom_metrics() {
REGISTRY REGISTRY
.register(Box::new(VERSION_GAUGE.clone())) .register(Box::new(VERSION_GAUGE.clone()))
@ -351,14 +353,6 @@ fn register_custom_metrics() {
.register(Box::new(STAT_TASK_FAILURE_COUNT.clone())) .register(Box::new(STAT_TASK_FAILURE_COUNT.clone()))
.expect("metric can be registered"); .expect("metric can be registered");
REGISTRY
.register(Box::new(LIST_TASK_ENTRIES_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(LIST_TASK_ENTRIES_FAILURE_COUNT.clone()))
.expect("metric can be registered");
REGISTRY REGISTRY
.register(Box::new(DELETE_TASK_COUNT.clone())) .register(Box::new(DELETE_TASK_COUNT.clone()))
.expect("metric can be registered"); .expect("metric can be registered");
@ -382,9 +376,18 @@ fn register_custom_metrics() {
REGISTRY REGISTRY
.register(Box::new(DISK_USAGE_SPACE.clone())) .register(Box::new(DISK_USAGE_SPACE.clone()))
.expect("metric can be registered"); .expect("metric can be registered");
REGISTRY
.register(Box::new(DISK_WRITTEN_BYTES.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(DISK_READ_BYTES.clone()))
.expect("metric can be registered");
} }
/// reset_custom_metrics resets all custom metrics. /// reset_custom_metrics resets all custom metrics.
#[instrument(skip_all)]
fn reset_custom_metrics() { fn reset_custom_metrics() {
VERSION_GAUGE.reset(); VERSION_GAUGE.reset();
DOWNLOAD_TASK_COUNT.reset(); DOWNLOAD_TASK_COUNT.reset();
@ -406,14 +409,14 @@ fn reset_custom_metrics() {
UPDATE_TASK_FAILURE_COUNT.reset(); UPDATE_TASK_FAILURE_COUNT.reset();
STAT_TASK_COUNT.reset(); STAT_TASK_COUNT.reset();
STAT_TASK_FAILURE_COUNT.reset(); STAT_TASK_FAILURE_COUNT.reset();
LIST_TASK_ENTRIES_COUNT.reset();
LIST_TASK_ENTRIES_FAILURE_COUNT.reset();
DELETE_TASK_COUNT.reset(); DELETE_TASK_COUNT.reset();
DELETE_TASK_FAILURE_COUNT.reset(); DELETE_TASK_FAILURE_COUNT.reset();
DELETE_HOST_COUNT.reset(); DELETE_HOST_COUNT.reset();
DELETE_HOST_FAILURE_COUNT.reset(); DELETE_HOST_FAILURE_COUNT.reset();
DISK_SPACE.reset(); DISK_SPACE.reset();
DISK_USAGE_SPACE.reset(); DISK_USAGE_SPACE.reset();
DISK_WRITTEN_BYTES.reset();
DISK_READ_BYTES.reset();
} }
/// TaskSize represents the size of the task. /// TaskSize represents the size of the task.
@ -772,20 +775,6 @@ pub fn collect_stat_task_failure_metrics(typ: i32) {
.inc(); .inc();
} }
/// collect_list_task_entries_started_metrics collects the list task entries started metrics.
pub fn collect_list_task_entries_started_metrics(typ: i32) {
LIST_TASK_ENTRIES_COUNT
.with_label_values(&[typ.to_string().as_str()])
.inc();
}
/// collect_list_task_entries_failure_metrics collects the list task entries failure metrics.
pub fn collect_list_task_entries_failure_metrics(typ: i32) {
LIST_TASK_ENTRIES_FAILURE_COUNT
.with_label_values(&[typ.to_string().as_str()])
.inc();
}
/// collect_delete_task_started_metrics collects the delete task started metrics. /// collect_delete_task_started_metrics collects the delete task started metrics.
pub fn collect_delete_task_started_metrics(typ: i32) { pub fn collect_delete_task_started_metrics(typ: i32) {
DELETE_TASK_COUNT DELETE_TASK_COUNT
@ -811,7 +800,7 @@ pub fn collect_delete_host_failure_metrics() {
} }
/// collect_disk_metrics collects the disk metrics. /// collect_disk_metrics collects the disk metrics.
pub fn collect_disk_metrics(path: &Path) { pub fn collect_disk_metrics(path: &Path, system: &Arc<Mutex<System>>) {
// Collect disk space metrics. // Collect disk space metrics.
let stats = match fs2::statvfs(path) { let stats = match fs2::statvfs(path) {
Ok(stats) => stats, Ok(stats) => stats,
@ -828,6 +817,24 @@ pub fn collect_disk_metrics(path: &Path) {
DISK_USAGE_SPACE DISK_USAGE_SPACE
.with_label_values(&[]) .with_label_values(&[])
.set(usage_space as i64); .set(usage_space as i64);
// Collect disk bandwidth metrics.
let mut sys = system.lock().unwrap();
sys.refresh_processes_specifics(
ProcessesToUpdate::All,
true,
ProcessRefreshKind::new()
.with_disk_usage()
.with_exe(UpdateKind::Always),
);
let process = sys.process(sysinfo::get_current_pid().unwrap()).unwrap();
DISK_WRITTEN_BYTES
.with_label_values(&[])
.set(process.disk_usage().written_bytes as i64);
DISK_READ_BYTES
.with_label_values(&[])
.set(process.disk_usage().read_bytes as i64);
} }
/// Metrics is the metrics server. /// Metrics is the metrics server.
@ -836,6 +843,9 @@ pub struct Metrics {
/// config is the configuration of the dfdaemon. /// config is the configuration of the dfdaemon.
config: Arc<Config>, config: Arc<Config>,
// system is the system information, only used for collecting disk metrics.
system: Arc<Mutex<System>>,
/// shutdown is used to shutdown the metrics server. /// shutdown is used to shutdown the metrics server.
shutdown: shutdown::Shutdown, shutdown: shutdown::Shutdown,
@ -846,6 +856,7 @@ pub struct Metrics {
/// Metrics implements the metrics server. /// Metrics implements the metrics server.
impl Metrics { impl Metrics {
/// new creates a new Metrics. /// new creates a new Metrics.
#[instrument(skip_all)]
pub fn new( pub fn new(
config: Arc<Config>, config: Arc<Config>,
shutdown: shutdown::Shutdown, shutdown: shutdown::Shutdown,
@ -853,12 +864,20 @@ impl Metrics {
) -> Self { ) -> Self {
Self { Self {
config, config,
system: Arc::new(Mutex::new(System::new_with_specifics(
RefreshKind::new().with_processes(
ProcessRefreshKind::new()
.with_disk_usage()
.with_exe(UpdateKind::Always),
),
))),
shutdown, shutdown,
_shutdown_complete: shutdown_complete_tx, _shutdown_complete: shutdown_complete_tx,
} }
} }
/// run starts the metrics server. /// run starts the metrics server.
#[instrument(skip_all)]
pub async fn run(&self) { pub async fn run(&self) {
// Clone the shutdown channel. // Clone the shutdown channel.
let mut shutdown = self.shutdown.clone(); let mut shutdown = self.shutdown.clone();
@ -879,6 +898,7 @@ impl Metrics {
// Clone the config. // Clone the config.
let config = self.config.clone(); let config = self.config.clone();
let system = self.system.clone();
// Create the metrics server address. // Create the metrics server address.
let addr = SocketAddr::new( let addr = SocketAddr::new(
@ -890,7 +910,7 @@ impl Metrics {
let get_metrics_route = warp::path!("metrics") let get_metrics_route = warp::path!("metrics")
.and(warp::get()) .and(warp::get())
.and(warp::path::end()) .and(warp::path::end())
.and_then(move || Self::get_metrics_handler(config.clone())); .and_then(move || Self::get_metrics_handler(config.clone(), system.clone()));
// Delete the metrics route. // Delete the metrics route.
let delete_metrics_route = warp::path!("metrics") let delete_metrics_route = warp::path!("metrics")
@ -909,15 +929,19 @@ impl Metrics {
_ = shutdown.recv() => { _ = shutdown.recv() => {
// Metrics server shutting down with signals. // Metrics server shutting down with signals.
info!("metrics server shutting down"); info!("metrics server shutting down");
return
} }
} }
} }
/// get_metrics_handler handles the metrics request of getting. /// get_metrics_handler handles the metrics request of getting.
#[instrument(skip_all)] #[instrument(skip_all)]
async fn get_metrics_handler(config: Arc<Config>) -> Result<impl Reply, Rejection> { async fn get_metrics_handler(
config: Arc<Config>,
system: Arc<Mutex<System>>,
) -> Result<impl Reply, Rejection> {
// Collect the disk space metrics. // Collect the disk space metrics.
collect_disk_metrics(config.storage.dir.as_path()); collect_disk_metrics(config.storage.dir.as_path(), &system);
// Encode custom metrics. // Encode custom metrics.
let encoder = TextEncoder::new(); let encoder = TextEncoder::new();

View File

@ -17,7 +17,7 @@
use bytesize::ByteSize; use bytesize::ByteSize;
use dragonfly_api::common::v2::Priority; use dragonfly_api::common::v2::Priority;
use reqwest::header::HeaderMap; use reqwest::header::HeaderMap;
use tracing::error; use tracing::{error, instrument};
/// DRAGONFLY_TAG_HEADER is the header key of tag in http request. /// DRAGONFLY_TAG_HEADER is the header key of tag in http request.
pub const DRAGONFLY_TAG_HEADER: &str = "X-Dragonfly-Tag"; pub const DRAGONFLY_TAG_HEADER: &str = "X-Dragonfly-Tag";
@ -66,29 +66,21 @@ pub const DRAGONFLY_OUTPUT_PATH_HEADER: &str = "X-Dragonfly-Output-Path";
/// For more details refer to https://github.com/dragonflyoss/design/blob/main/systems-analysis/file-download-workflow-with-hard-link/README.md. /// For more details refer to https://github.com/dragonflyoss/design/blob/main/systems-analysis/file-download-workflow-with-hard-link/README.md.
pub const DRAGONFLY_FORCE_HARD_LINK_HEADER: &str = "X-Dragonfly-Force-Hard-Link"; pub const DRAGONFLY_FORCE_HARD_LINK_HEADER: &str = "X-Dragonfly-Force-Hard-Link";
/// DRAGONFLY_PIECE_LENGTH_HEADER is the header key of piece length in http request. /// DRAGONFLY_PIECE_LENGTH is the header key of piece length in http request.
/// If the value is set, the piece length will be used to download the file. /// If the value is set, the piece length will be used to download the file.
/// Different piece length will generate different task id. The value needs to /// Different piece length will generate different task id. The value needs to
/// be set with human readable format and needs to be greater than or equal /// be set with human readable format and needs to be greater than or equal
/// to 4mib, for example: 4mib, 1gib /// to 4mib, for example: 4mib, 1gib
pub const DRAGONFLY_PIECE_LENGTH_HEADER: &str = "X-Dragonfly-Piece-Length"; pub const DRAGONFLY_PIECE_LENGTH: &str = "X-Dragonfly-Piece-Length";
/// DRAGONFLY_CONTENT_FOR_CALCULATING_TASK_ID_HEADER is the header key of content for calculating task id. /// DRAGONFLY_CONTENT_FOR_CALCULATING_TASK_ID is the header key of content for calculating task id.
/// If DRAGONFLY_CONTENT_FOR_CALCULATING_TASK_ID_HEADER is set, use its value to calculate the task ID. /// If DRAGONFLY_CONTENT_FOR_CALCULATING_TASK_ID is set, use its value to calculate the task ID.
/// Otherwise, calculate the task ID based on `url`, `piece_length`, `tag`, `application`, and `filtered_query_params`. /// Otherwise, calculate the task ID based on `url`, `piece_length`, `tag`, `application`, and `filtered_query_params`.
pub const DRAGONFLY_CONTENT_FOR_CALCULATING_TASK_ID_HEADER: &str = pub const DRAGONFLY_CONTENT_FOR_CALCULATING_TASK_ID: &str =
"X-Dragonfly-Content-For-Calculating-Task-ID"; "X-Dragonfly-Content-For-Calculating-Task-ID";
/// DRAGONFLY_TASK_DOWNLOAD_FINISHED_HEADER is the response header key to indicate whether the task download finished.
/// When the task download is finished, the response will include this header with the value `"true"`,
/// indicating that the download hit the local cache.
pub const DRAGONFLY_TASK_DOWNLOAD_FINISHED_HEADER: &str = "X-Dragonfly-Task-Download-Finished";
/// DRAGONFLY_TASK_ID_HEADER is the response header key of task id. Client will calculate the task ID
/// based on `url`, `piece_length`, `tag`, `application`, and `filtered_query_params`.
pub const DRAGONFLY_TASK_ID_HEADER: &str = "X-Dragonfly-Task-ID";
/// get_tag gets the tag from http header. /// get_tag gets the tag from http header.
#[instrument(skip_all)]
pub fn get_tag(header: &HeaderMap) -> Option<String> { pub fn get_tag(header: &HeaderMap) -> Option<String> {
header header
.get(DRAGONFLY_TAG_HEADER) .get(DRAGONFLY_TAG_HEADER)
@ -97,6 +89,7 @@ pub fn get_tag(header: &HeaderMap) -> Option<String> {
} }
/// get_application gets the application from http header. /// get_application gets the application from http header.
#[instrument(skip_all)]
pub fn get_application(header: &HeaderMap) -> Option<String> { pub fn get_application(header: &HeaderMap) -> Option<String> {
header header
.get(DRAGONFLY_APPLICATION_HEADER) .get(DRAGONFLY_APPLICATION_HEADER)
@ -105,6 +98,7 @@ pub fn get_application(header: &HeaderMap) -> Option<String> {
} }
/// get_priority gets the priority from http header. /// get_priority gets the priority from http header.
#[instrument(skip_all)]
pub fn get_priority(header: &HeaderMap) -> i32 { pub fn get_priority(header: &HeaderMap) -> i32 {
let default_priority = Priority::Level6 as i32; let default_priority = Priority::Level6 as i32;
match header.get(DRAGONFLY_PRIORITY_HEADER) { match header.get(DRAGONFLY_PRIORITY_HEADER) {
@ -126,6 +120,7 @@ pub fn get_priority(header: &HeaderMap) -> i32 {
} }
/// get_registry gets the custom address of container registry from http header. /// get_registry gets the custom address of container registry from http header.
#[instrument(skip_all)]
pub fn get_registry(header: &HeaderMap) -> Option<String> { pub fn get_registry(header: &HeaderMap) -> Option<String> {
header header
.get(DRAGONFLY_REGISTRY_HEADER) .get(DRAGONFLY_REGISTRY_HEADER)
@ -134,6 +129,7 @@ pub fn get_registry(header: &HeaderMap) -> Option<String> {
} }
/// get_filters gets the filters from http header. /// get_filters gets the filters from http header.
#[instrument(skip_all)]
pub fn get_filtered_query_params( pub fn get_filtered_query_params(
header: &HeaderMap, header: &HeaderMap,
default_filtered_query_params: Vec<String>, default_filtered_query_params: Vec<String>,
@ -151,6 +147,7 @@ pub fn get_filtered_query_params(
} }
/// get_use_p2p gets the use p2p from http header. /// get_use_p2p gets the use p2p from http header.
#[instrument(skip_all)]
pub fn get_use_p2p(header: &HeaderMap) -> bool { pub fn get_use_p2p(header: &HeaderMap) -> bool {
match header.get(DRAGONFLY_USE_P2P_HEADER) { match header.get(DRAGONFLY_USE_P2P_HEADER) {
Some(value) => match value.to_str() { Some(value) => match value.to_str() {
@ -165,6 +162,7 @@ pub fn get_use_p2p(header: &HeaderMap) -> bool {
} }
/// get_prefetch gets the prefetch from http header. /// get_prefetch gets the prefetch from http header.
#[instrument(skip_all)]
pub fn get_prefetch(header: &HeaderMap) -> Option<bool> { pub fn get_prefetch(header: &HeaderMap) -> Option<bool> {
match header.get(DRAGONFLY_PREFETCH_HEADER) { match header.get(DRAGONFLY_PREFETCH_HEADER) {
Some(value) => match value.to_str() { Some(value) => match value.to_str() {
@ -187,6 +185,7 @@ pub fn get_output_path(header: &HeaderMap) -> Option<String> {
} }
/// get_force_hard_link gets the force hard link from http header. /// get_force_hard_link gets the force hard link from http header.
#[instrument(skip_all)]
pub fn get_force_hard_link(header: &HeaderMap) -> bool { pub fn get_force_hard_link(header: &HeaderMap) -> bool {
match header.get(DRAGONFLY_FORCE_HARD_LINK_HEADER) { match header.get(DRAGONFLY_FORCE_HARD_LINK_HEADER) {
Some(value) => match value.to_str() { Some(value) => match value.to_str() {
@ -202,7 +201,7 @@ pub fn get_force_hard_link(header: &HeaderMap) -> bool {
/// get_piece_length gets the piece length from http header. /// get_piece_length gets the piece length from http header.
pub fn get_piece_length(header: &HeaderMap) -> Option<ByteSize> { pub fn get_piece_length(header: &HeaderMap) -> Option<ByteSize> {
match header.get(DRAGONFLY_PIECE_LENGTH_HEADER) { match header.get(DRAGONFLY_PIECE_LENGTH) {
Some(piece_length) => match piece_length.to_str() { Some(piece_length) => match piece_length.to_str() {
Ok(piece_length) => match piece_length.parse::<ByteSize>() { Ok(piece_length) => match piece_length.parse::<ByteSize>() {
Ok(piece_length) => Some(piece_length), Ok(piece_length) => Some(piece_length),
@ -223,7 +222,7 @@ pub fn get_piece_length(header: &HeaderMap) -> Option<ByteSize> {
/// get_content_for_calculating_task_id gets the content for calculating task id from http header. /// get_content_for_calculating_task_id gets the content for calculating task id from http header.
pub fn get_content_for_calculating_task_id(header: &HeaderMap) -> Option<String> { pub fn get_content_for_calculating_task_id(header: &HeaderMap) -> Option<String> {
header header
.get(DRAGONFLY_CONTENT_FOR_CALCULATING_TASK_ID_HEADER) .get(DRAGONFLY_CONTENT_FOR_CALCULATING_TASK_ID)
.and_then(|content| content.to_str().ok()) .and_then(|content| content.to_str().ok())
.map(|content| content.to_string()) .map(|content| content.to_string())
} }
@ -368,22 +367,16 @@ mod tests {
#[test] #[test]
fn test_get_piece_length() { fn test_get_piece_length() {
let mut headers = HeaderMap::new(); let mut headers = HeaderMap::new();
headers.insert( headers.insert(DRAGONFLY_PIECE_LENGTH, HeaderValue::from_static("4mib"));
DRAGONFLY_PIECE_LENGTH_HEADER,
HeaderValue::from_static("4mib"),
);
assert_eq!(get_piece_length(&headers), Some(ByteSize::mib(4))); assert_eq!(get_piece_length(&headers), Some(ByteSize::mib(4)));
let empty_headers = HeaderMap::new(); let empty_headers = HeaderMap::new();
assert_eq!(get_piece_length(&empty_headers), None); assert_eq!(get_piece_length(&empty_headers), None);
headers.insert( headers.insert(DRAGONFLY_PIECE_LENGTH, HeaderValue::from_static("invalid"));
DRAGONFLY_PIECE_LENGTH_HEADER,
HeaderValue::from_static("invalid"),
);
assert_eq!(get_piece_length(&headers), None); assert_eq!(get_piece_length(&headers), None);
headers.insert(DRAGONFLY_PIECE_LENGTH_HEADER, HeaderValue::from_static("0")); headers.insert(DRAGONFLY_PIECE_LENGTH, HeaderValue::from_static("0"));
assert_eq!(get_piece_length(&headers), Some(ByteSize::b(0))); assert_eq!(get_piece_length(&headers), Some(ByteSize::b(0)));
} }
@ -391,7 +384,7 @@ mod tests {
fn test_get_content_for_calculating_task_id() { fn test_get_content_for_calculating_task_id() {
let mut headers = HeaderMap::new(); let mut headers = HeaderMap::new();
headers.insert( headers.insert(
DRAGONFLY_CONTENT_FOR_CALCULATING_TASK_ID_HEADER, DRAGONFLY_CONTENT_FOR_CALCULATING_TASK_ID,
HeaderValue::from_static("test-content"), HeaderValue::from_static("test-content"),
); );
assert_eq!( assert_eq!(

View File

@ -99,6 +99,7 @@ pub struct Proxy {
/// Proxy implements the proxy server. /// Proxy implements the proxy server.
impl Proxy { impl Proxy {
/// new creates a new Proxy. /// new creates a new Proxy.
#[instrument(skip_all)]
pub fn new( pub fn new(
config: Arc<Config>, config: Arc<Config>,
task: Arc<Task>, task: Arc<Task>,
@ -143,6 +144,7 @@ impl Proxy {
} }
/// run starts the proxy server. /// run starts the proxy server.
#[instrument(skip_all)]
pub async fn run(&self, grpc_server_started_barrier: Arc<Barrier>) -> ClientResult<()> { pub async fn run(&self, grpc_server_started_barrier: Arc<Barrier>) -> ClientResult<()> {
let mut shutdown = self.shutdown.clone(); let mut shutdown = self.shutdown.clone();
let read_buffer_size = self.config.proxy.read_buffer_size; let read_buffer_size = self.config.proxy.read_buffer_size;
@ -208,7 +210,7 @@ impl Proxy {
service_fn(move |request|{ service_fn(move |request|{
let context = context.clone(); let context = context.clone();
async move { async move {
handler(context.config, context.task, request, context.dfdaemon_download_client, context.registry_cert, context.server_ca_cert, remote_address.ip()).await handler(context.config, context.task, request, context.dfdaemon_download_client, context.registry_cert, context.server_ca_cert).await
} }
} ), } ),
) )
@ -231,7 +233,7 @@ impl Proxy {
} }
/// handler handles the request from the client. /// handler handles the request from the client.
#[instrument(skip_all, fields(url, method, remote_ip))] #[instrument(skip_all, fields(uri, method))]
pub async fn handler( pub async fn handler(
config: Arc<Config>, config: Arc<Config>,
task: Arc<Task>, task: Arc<Task>,
@ -239,13 +241,7 @@ pub async fn handler(
dfdaemon_download_client: DfdaemonDownloadClient, dfdaemon_download_client: DfdaemonDownloadClient,
registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>, registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>,
server_ca_cert: Arc<Option<Certificate>>, server_ca_cert: Arc<Option<Certificate>>,
remote_ip: std::net::IpAddr,
) -> ClientResult<Response> { ) -> ClientResult<Response> {
// Span record the url and method.
Span::current().record("url", request.uri().to_string().as_str());
Span::current().record("method", request.method().as_str());
Span::current().record("remote_ip", remote_ip.to_string().as_str());
// Record the proxy request started metrics. The metrics will be recorded // Record the proxy request started metrics. The metrics will be recorded
// when the request is kept alive. // when the request is kept alive.
collect_proxy_request_started_metrics(); collect_proxy_request_started_metrics();
@ -258,7 +254,6 @@ pub async fn handler(
config, config,
task, task,
request, request,
remote_ip,
dfdaemon_download_client, dfdaemon_download_client,
registry_cert, registry_cert,
server_ca_cert, server_ca_cert,
@ -270,20 +265,22 @@ pub async fn handler(
config, config,
task, task,
request, request,
remote_ip,
dfdaemon_download_client, dfdaemon_download_client,
registry_cert, registry_cert,
) )
.await; .await;
} }
// Span record the uri and method.
Span::current().record("uri", request.uri().to_string().as_str());
Span::current().record("method", request.method().as_str());
// Handle CONNECT request. // Handle CONNECT request.
if Method::CONNECT == request.method() { if Method::CONNECT == request.method() {
return https_handler( return https_handler(
config, config,
task, task,
request, request,
remote_ip,
dfdaemon_download_client, dfdaemon_download_client,
registry_cert, registry_cert,
server_ca_cert, server_ca_cert,
@ -295,7 +292,6 @@ pub async fn handler(
config, config,
task, task,
request, request,
remote_ip,
dfdaemon_download_client, dfdaemon_download_client,
registry_cert, registry_cert,
) )
@ -308,7 +304,6 @@ pub async fn registry_mirror_http_handler(
config: Arc<Config>, config: Arc<Config>,
task: Arc<Task>, task: Arc<Task>,
request: Request<hyper::body::Incoming>, request: Request<hyper::body::Incoming>,
remote_ip: std::net::IpAddr,
dfdaemon_download_client: DfdaemonDownloadClient, dfdaemon_download_client: DfdaemonDownloadClient,
registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>, registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>,
) -> ClientResult<Response> { ) -> ClientResult<Response> {
@ -317,7 +312,6 @@ pub async fn registry_mirror_http_handler(
config, config,
task, task,
request, request,
remote_ip,
dfdaemon_download_client, dfdaemon_download_client,
registry_cert, registry_cert,
) )
@ -330,7 +324,6 @@ pub async fn registry_mirror_https_handler(
config: Arc<Config>, config: Arc<Config>,
task: Arc<Task>, task: Arc<Task>,
request: Request<hyper::body::Incoming>, request: Request<hyper::body::Incoming>,
remote_ip: std::net::IpAddr,
dfdaemon_download_client: DfdaemonDownloadClient, dfdaemon_download_client: DfdaemonDownloadClient,
registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>, registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>,
server_ca_cert: Arc<Option<Certificate>>, server_ca_cert: Arc<Option<Certificate>>,
@ -340,7 +333,6 @@ pub async fn registry_mirror_https_handler(
config, config,
task, task,
request, request,
remote_ip,
dfdaemon_download_client, dfdaemon_download_client,
registry_cert, registry_cert,
server_ca_cert, server_ca_cert,
@ -354,7 +346,6 @@ pub async fn http_handler(
config: Arc<Config>, config: Arc<Config>,
task: Arc<Task>, task: Arc<Task>,
request: Request<hyper::body::Incoming>, request: Request<hyper::body::Incoming>,
remote_ip: std::net::IpAddr,
dfdaemon_download_client: DfdaemonDownloadClient, dfdaemon_download_client: DfdaemonDownloadClient,
registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>, registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>,
) -> ClientResult<Response> { ) -> ClientResult<Response> {
@ -386,15 +377,7 @@ pub async fn http_handler(
request.method(), request.method(),
request_uri request_uri
); );
return proxy_via_dfdaemon( return proxy_via_dfdaemon(config, task, &rule, request, dfdaemon_download_client).await;
config,
task,
&rule,
request,
remote_ip,
dfdaemon_download_client,
)
.await;
} }
// If the request header contains the X-Dragonfly-Use-P2P header, proxy the request via the // If the request header contains the X-Dragonfly-Use-P2P header, proxy the request via the
@ -410,7 +393,6 @@ pub async fn http_handler(
task, task,
&Rule::default(), &Rule::default(),
request, request,
remote_ip,
dfdaemon_download_client, dfdaemon_download_client,
) )
.await; .await;
@ -439,7 +421,6 @@ pub async fn https_handler(
config: Arc<Config>, config: Arc<Config>,
task: Arc<Task>, task: Arc<Task>,
request: Request<hyper::body::Incoming>, request: Request<hyper::body::Incoming>,
remote_ip: std::net::IpAddr,
dfdaemon_download_client: DfdaemonDownloadClient, dfdaemon_download_client: DfdaemonDownloadClient,
registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>, registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>,
server_ca_cert: Arc<Option<Certificate>>, server_ca_cert: Arc<Option<Certificate>>,
@ -459,7 +440,6 @@ pub async fn https_handler(
upgraded, upgraded,
host, host,
port, port,
remote_ip,
dfdaemon_download_client, dfdaemon_download_client,
registry_cert, registry_cert,
server_ca_cert, server_ca_cert,
@ -490,7 +470,6 @@ async fn upgraded_tunnel(
upgraded: Upgraded, upgraded: Upgraded,
host: String, host: String,
port: u16, port: u16,
remote_ip: std::net::IpAddr,
dfdaemon_download_client: DfdaemonDownloadClient, dfdaemon_download_client: DfdaemonDownloadClient,
registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>, registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>,
server_ca_cert: Arc<Option<Certificate>>, server_ca_cert: Arc<Option<Certificate>>,
@ -539,7 +518,6 @@ async fn upgraded_tunnel(
host.clone(), host.clone(),
port, port,
request, request,
remote_ip,
dfdaemon_download_client.clone(), dfdaemon_download_client.clone(),
registry_cert.clone(), registry_cert.clone(),
) )
@ -555,20 +533,18 @@ async fn upgraded_tunnel(
} }
/// upgraded_handler handles the upgraded https request from the client. /// upgraded_handler handles the upgraded https request from the client.
#[allow(clippy::too_many_arguments)] #[instrument(skip_all, fields(uri, method))]
#[instrument(skip_all, fields(url, method))]
pub async fn upgraded_handler( pub async fn upgraded_handler(
config: Arc<Config>, config: Arc<Config>,
task: Arc<Task>, task: Arc<Task>,
host: String, host: String,
port: u16, port: u16,
mut request: Request<hyper::body::Incoming>, mut request: Request<hyper::body::Incoming>,
remote_ip: std::net::IpAddr,
dfdaemon_download_client: DfdaemonDownloadClient, dfdaemon_download_client: DfdaemonDownloadClient,
registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>, registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>,
) -> ClientResult<Response> { ) -> ClientResult<Response> {
// Span record the url and method. // Span record the uri and method.
Span::current().record("url", request.uri().to_string().as_str()); Span::current().record("uri", request.uri().to_string().as_str());
Span::current().record("method", request.method().as_str()); Span::current().record("method", request.method().as_str());
// Authenticate the request with the basic auth. // Authenticate the request with the basic auth.
@ -613,15 +589,7 @@ pub async fn upgraded_handler(
request.method(), request.method(),
request_uri request_uri
); );
return proxy_via_dfdaemon( return proxy_via_dfdaemon(config, task, &rule, request, dfdaemon_download_client).await;
config,
task,
&rule,
request,
remote_ip,
dfdaemon_download_client,
)
.await;
} }
// If the request header contains the X-Dragonfly-Use-P2P header, proxy the request via the // If the request header contains the X-Dragonfly-Use-P2P header, proxy the request via the
@ -637,7 +605,6 @@ pub async fn upgraded_handler(
task, task,
&Rule::default(), &Rule::default(),
request, request,
remote_ip,
dfdaemon_download_client, dfdaemon_download_client,
) )
.await; .await;
@ -667,15 +634,13 @@ async fn proxy_via_dfdaemon(
task: Arc<Task>, task: Arc<Task>,
rule: &Rule, rule: &Rule,
request: Request<hyper::body::Incoming>, request: Request<hyper::body::Incoming>,
remote_ip: std::net::IpAddr,
dfdaemon_download_client: DfdaemonDownloadClient, dfdaemon_download_client: DfdaemonDownloadClient,
) -> ClientResult<Response> { ) -> ClientResult<Response> {
// Collect the metrics for the proxy request via dfdaemon. // Collect the metrics for the proxy request via dfdaemon.
collect_proxy_request_via_dfdaemon_metrics(); collect_proxy_request_via_dfdaemon_metrics();
// Make the download task request. // Make the download task request.
let download_task_request = let download_task_request = match make_download_task_request(config.clone(), rule, request) {
match make_download_task_request(config.clone(), rule, request, remote_ip) {
Ok(download_task_request) => download_task_request, Ok(download_task_request) => download_task_request,
Err(err) => { Err(err) => {
error!("make download task request failed: {}", err); error!("make download task request failed: {}", err);
@ -768,10 +733,7 @@ async fn proxy_via_dfdaemon(
// Construct the response. // Construct the response.
let mut response = Response::new(boxed_body); let mut response = Response::new(boxed_body);
*response.headers_mut() = make_response_headers( *response.headers_mut() = make_response_headers(download_task_started_response.clone())?;
message.task_id.as_str(),
download_task_started_response.clone(),
)?;
*response.status_mut() = http::StatusCode::OK; *response.status_mut() = http::StatusCode::OK;
// Return the response if the client return the first piece. // Return the response if the client return the first piece.
@ -1019,6 +981,7 @@ async fn proxy_via_https(
} }
/// make_registry_mirror_request makes a registry mirror request by the request. /// make_registry_mirror_request makes a registry mirror request by the request.
#[instrument(skip_all)]
fn make_registry_mirror_request( fn make_registry_mirror_request(
config: Arc<Config>, config: Arc<Config>,
mut request: Request<hyper::body::Incoming>, mut request: Request<hyper::body::Incoming>,
@ -1052,11 +1015,11 @@ fn make_registry_mirror_request(
} }
/// make_download_task_request makes a download task request by the request. /// make_download_task_request makes a download task request by the request.
#[instrument(skip_all)]
fn make_download_task_request( fn make_download_task_request(
config: Arc<Config>, config: Arc<Config>,
rule: &Rule, rule: &Rule,
request: Request<hyper::body::Incoming>, request: Request<hyper::body::Incoming>,
remote_ip: std::net::IpAddr,
) -> ClientResult<DownloadTaskRequest> { ) -> ClientResult<DownloadTaskRequest> {
// Convert the Reqwest header to the Hyper header. // Convert the Reqwest header to the Hyper header.
let mut header = request.headers().clone(); let mut header = request.headers().clone();
@ -1102,15 +1065,16 @@ fn make_download_task_request(
hdfs: None, hdfs: None,
is_prefetch: false, is_prefetch: false,
need_piece_content: false, need_piece_content: false,
load_to_cache: false,
force_hard_link: header::get_force_hard_link(&header), force_hard_link: header::get_force_hard_link(&header),
content_for_calculating_task_id: header::get_content_for_calculating_task_id(&header), content_for_calculating_task_id: header::get_content_for_calculating_task_id(&header),
remote_ip: Some(remote_ip.to_string()),
}), }),
}) })
} }
/// need_prefetch returns whether the prefetch is needed by the configuration and the request /// need_prefetch returns whether the prefetch is needed by the configuration and the request
/// header. /// header.
#[instrument(skip_all)]
fn need_prefetch(config: Arc<Config>, header: &http::HeaderMap) -> bool { fn need_prefetch(config: Arc<Config>, header: &http::HeaderMap) -> bool {
// If the header not contains the range header, the request does not need prefetch. // If the header not contains the range header, the request does not need prefetch.
if !header.contains_key(reqwest::header::RANGE) { if !header.contains_key(reqwest::header::RANGE) {
@ -1124,10 +1088,11 @@ fn need_prefetch(config: Arc<Config>, header: &http::HeaderMap) -> bool {
} }
// Return the prefetch value from the configuration. // Return the prefetch value from the configuration.
config.proxy.prefetch return config.proxy.prefetch;
} }
/// make_download_url makes a download url by the given uri. /// make_download_url makes a download url by the given uri.
#[instrument(skip_all)]
fn make_download_url( fn make_download_url(
uri: &hyper::Uri, uri: &hyper::Uri,
use_tls: bool, use_tls: bool,
@ -1152,8 +1117,8 @@ fn make_download_url(
} }
/// make_response_headers makes the response headers. /// make_response_headers makes the response headers.
#[instrument(skip_all)]
fn make_response_headers( fn make_response_headers(
task_id: &str,
mut download_task_started_response: DownloadTaskStartedResponse, mut download_task_started_response: DownloadTaskStartedResponse,
) -> ClientResult<hyper::header::HeaderMap> { ) -> ClientResult<hyper::header::HeaderMap> {
// Insert the content range header to the response header. // Insert the content range header to the response header.
@ -1174,28 +1139,18 @@ fn make_response_headers(
); );
}; };
if download_task_started_response.is_finished {
download_task_started_response.response_header.insert(
header::DRAGONFLY_TASK_DOWNLOAD_FINISHED_HEADER.to_string(),
"true".to_string(),
);
}
download_task_started_response.response_header.insert(
header::DRAGONFLY_TASK_ID_HEADER.to_string(),
task_id.to_string(),
);
hashmap_to_headermap(&download_task_started_response.response_header) hashmap_to_headermap(&download_task_started_response.response_header)
} }
/// find_matching_rule returns whether the dfdaemon should be used to download the task. /// find_matching_rule returns whether the dfdaemon should be used to download the task.
/// If the dfdaemon should be used, return the matched rule. /// If the dfdaemon should be used, return the matched rule.
#[instrument(skip_all)]
fn find_matching_rule(rules: Option<&[Rule]>, url: &str) -> Option<Rule> { fn find_matching_rule(rules: Option<&[Rule]>, url: &str) -> Option<Rule> {
rules?.iter().find(|rule| rule.regex.is_match(url)).cloned() rules?.iter().find(|rule| rule.regex.is_match(url)).cloned()
} }
/// make_error_response makes an error response with the given status and message. /// make_error_response makes an error response with the given status and message.
#[instrument(skip_all)]
fn make_error_response(status: http::StatusCode, header: Option<http::HeaderMap>) -> Response { fn make_error_response(status: http::StatusCode, header: Option<http::HeaderMap>) -> Response {
let mut response = Response::new(empty()); let mut response = Response::new(empty());
*response.status_mut() = status; *response.status_mut() = status;
@ -1209,6 +1164,7 @@ fn make_error_response(status: http::StatusCode, header: Option<http::HeaderMap>
} }
/// empty returns an empty body. /// empty returns an empty body.
#[instrument(skip_all)]
fn empty() -> BoxBody<Bytes, ClientError> { fn empty() -> BoxBody<Bytes, ClientError> {
Empty::<Bytes>::new() Empty::<Bytes>::new()
.map_err(|never| match never {}) .map_err(|never| match never {})

View File

@ -84,6 +84,7 @@ pub struct PersistentCacheTask {
/// PersistentCacheTask is the implementation of PersistentCacheTask. /// PersistentCacheTask is the implementation of PersistentCacheTask.
impl PersistentCacheTask { impl PersistentCacheTask {
/// new creates a new PersistentCacheTask. /// new creates a new PersistentCacheTask.
#[instrument(skip_all)]
pub fn new( pub fn new(
config: Arc<Config>, config: Arc<Config>,
id_generator: Arc<IDGenerator>, id_generator: Arc<IDGenerator>,
@ -502,6 +503,7 @@ impl PersistentCacheTask {
} }
/// is_same_dev_inode checks if the persistent cache task is on the same device inode as the given path. /// is_same_dev_inode checks if the persistent cache task is on the same device inode as the given path.
#[instrument(skip_all)]
pub async fn is_same_dev_inode(&self, id: &str, to: &Path) -> ClientResult<bool> { pub async fn is_same_dev_inode(&self, id: &str, to: &Path) -> ClientResult<bool> {
self.storage self.storage
.is_same_dev_inode_as_persistent_cache_task(id, to) .is_same_dev_inode_as_persistent_cache_task(id, to)
@ -1143,13 +1145,13 @@ impl PersistentCacheTask {
REQUEST_TIMEOUT, REQUEST_TIMEOUT,
) )
.await .await
.unwrap_or_else(|err| { .inspect_err(|err| {
error!( error!(
"send DownloadPieceFinishedRequest for piece {} failed: {:?}", "send DownloadPieceFinishedRequest for piece {} failed: {:?}",
piece_id, err piece_id, err
); );
interrupt.store(true, Ordering::SeqCst); interrupt.store(true, Ordering::SeqCst);
}); })?;
// Send the download progress. // Send the download progress.
download_progress_tx download_progress_tx
@ -1169,13 +1171,13 @@ impl PersistentCacheTask {
REQUEST_TIMEOUT, REQUEST_TIMEOUT,
) )
.await .await
.unwrap_or_else(|err| { .inspect_err(|err| {
error!( error!(
"send DownloadPieceFinishedResponse for piece {} failed: {:?}", "send DownloadPieceFinishedResponse for piece {} failed: {:?}",
piece_id, err piece_id, err
); );
interrupt.store(true, Ordering::SeqCst); interrupt.store(true, Ordering::SeqCst);
}); })?;
info!( info!(
"finished persistent cache piece {} from parent {:?}", "finished persistent cache piece {} from parent {:?}",
@ -1370,12 +1372,12 @@ impl PersistentCacheTask {
REQUEST_TIMEOUT, REQUEST_TIMEOUT,
) )
.await .await
.unwrap_or_else(|err| { .inspect_err(|err| {
error!( error!(
"send DownloadPieceFinishedResponse for piece {} failed: {:?}", "send DownloadPieceFinishedResponse for piece {} failed: {:?}",
piece_id, err piece_id, err
); );
}); })?;
// Store the finished piece. // Store the finished piece.
finished_pieces.push(interested_piece.clone()); finished_pieces.push(interested_piece.clone());

View File

@ -87,6 +87,7 @@ pub struct Piece {
/// Piece implements the piece manager. /// Piece implements the piece manager.
impl Piece { impl Piece {
/// new returns a new Piece. /// new returns a new Piece.
#[instrument(skip_all)]
pub fn new( pub fn new(
config: Arc<Config>, config: Arc<Config>,
id_generator: Arc<IDGenerator>, id_generator: Arc<IDGenerator>,
@ -135,20 +136,17 @@ impl Piece {
/// id generates a new piece id. /// id generates a new piece id.
#[inline] #[inline]
#[instrument(skip_all)]
pub fn id(&self, task_id: &str, number: u32) -> String { pub fn id(&self, task_id: &str, number: u32) -> String {
self.storage.piece_id(task_id, number) self.storage.piece_id(task_id, number)
} }
/// get gets a piece from the local storage. /// get gets a piece from the local storage.
#[instrument(skip_all)]
pub fn get(&self, piece_id: &str) -> Result<Option<metadata::Piece>> { pub fn get(&self, piece_id: &str) -> Result<Option<metadata::Piece>> {
self.storage.get_piece(piece_id) self.storage.get_piece(piece_id)
} }
/// get_all gets all pieces of a task from the local storage.
pub fn get_all(&self, task_id: &str) -> Result<Vec<metadata::Piece>> {
self.storage.get_pieces(task_id)
}
/// calculate_interested calculates the interested pieces by content_length and range. /// calculate_interested calculates the interested pieces by content_length and range.
pub fn calculate_interested( pub fn calculate_interested(
&self, &self,
@ -340,7 +338,6 @@ impl Piece {
) -> Result<impl AsyncRead> { ) -> Result<impl AsyncRead> {
// Span record the piece_id. // Span record the piece_id.
Span::current().record("piece_id", piece_id); Span::current().record("piece_id", piece_id);
Span::current().record("piece_length", length);
// Acquire the upload rate limiter. // Acquire the upload rate limiter.
if !disable_rate_limit { if !disable_rate_limit {
@ -372,7 +369,6 @@ impl Piece {
) -> Result<impl AsyncRead> { ) -> Result<impl AsyncRead> {
// Span record the piece_id. // Span record the piece_id.
Span::current().record("piece_id", piece_id); Span::current().record("piece_id", piece_id);
Span::current().record("piece_length", length);
// Acquire the download rate limiter. // Acquire the download rate limiter.
if !disable_rate_limit { if !disable_rate_limit {
@ -412,10 +408,10 @@ impl Piece {
length: u64, length: u64,
parent: piece_collector::CollectedParent, parent: piece_collector::CollectedParent,
is_prefetch: bool, is_prefetch: bool,
load_to_cache: bool,
) -> Result<metadata::Piece> { ) -> Result<metadata::Piece> {
// Span record the piece_id. // Span record the piece_id.
Span::current().record("piece_id", piece_id); Span::current().record("piece_id", piece_id);
Span::current().record("piece_length", length);
// Record the start of downloading piece. // Record the start of downloading piece.
let piece = self let piece = self
@ -476,6 +472,7 @@ impl Piece {
digest.as_str(), digest.as_str(),
parent.id.as_str(), parent.id.as_str(),
&mut reader, &mut reader,
load_to_cache,
self.config.storage.write_piece_timeout, self.config.storage.write_piece_timeout,
) )
.await .await
@ -513,12 +510,12 @@ impl Piece {
length: u64, length: u64,
request_header: HeaderMap, request_header: HeaderMap,
is_prefetch: bool, is_prefetch: bool,
load_to_cache: bool,
object_storage: Option<ObjectStorage>, object_storage: Option<ObjectStorage>,
hdfs: Option<Hdfs>, hdfs: Option<Hdfs>,
) -> Result<metadata::Piece> { ) -> Result<metadata::Piece> {
// Span record the piece_id. // Span record the piece_id.
Span::current().record("piece_id", piece_id); Span::current().record("piece_id", piece_id);
Span::current().record("piece_length", length);
// Record the start of downloading piece. // Record the start of downloading piece.
let piece = self let piece = self
@ -638,6 +635,7 @@ impl Piece {
offset, offset,
length, length,
&mut response.reader, &mut response.reader,
load_to_cache,
self.config.storage.write_piece_timeout, self.config.storage.write_piece_timeout,
) )
.await .await
@ -664,6 +662,7 @@ impl Piece {
/// persistent_cache_id generates a new persistent cache piece id. /// persistent_cache_id generates a new persistent cache piece id.
#[inline] #[inline]
#[instrument(skip_all)]
pub fn persistent_cache_id(&self, task_id: &str, number: u32) -> String { pub fn persistent_cache_id(&self, task_id: &str, number: u32) -> String {
self.storage.persistent_cache_piece_id(task_id, number) self.storage.persistent_cache_piece_id(task_id, number)
} }
@ -701,7 +700,6 @@ impl Piece {
) -> Result<impl AsyncRead> { ) -> Result<impl AsyncRead> {
// Span record the piece_id. // Span record the piece_id.
Span::current().record("piece_id", piece_id); Span::current().record("piece_id", piece_id);
Span::current().record("piece_length", length);
// Acquire the upload rate limiter. // Acquire the upload rate limiter.
self.upload_rate_limiter.acquire(length as usize).await; self.upload_rate_limiter.acquire(length as usize).await;
@ -731,7 +729,6 @@ impl Piece {
) -> Result<impl AsyncRead> { ) -> Result<impl AsyncRead> {
// Span record the piece_id. // Span record the piece_id.
Span::current().record("piece_id", piece_id); Span::current().record("piece_id", piece_id);
Span::current().record("piece_length", length);
// Acquire the download rate limiter. // Acquire the download rate limiter.
if !disable_rate_limit { if !disable_rate_limit {
@ -776,7 +773,6 @@ impl Piece {
) -> Result<metadata::Piece> { ) -> Result<metadata::Piece> {
// Span record the piece_id. // Span record the piece_id.
Span::current().record("piece_id", piece_id); Span::current().record("piece_id", piece_id);
Span::current().record("piece_length", length);
if is_prefetch { if is_prefetch {
// Acquire the prefetch rate limiter. // Acquire the prefetch rate limiter.
@ -841,7 +837,6 @@ impl Piece {
piece_id, piece_id,
task_id, task_id,
offset, offset,
length,
digest.as_str(), digest.as_str(),
parent.id.as_str(), parent.id.as_str(),
&mut reader, &mut reader,

View File

@ -15,21 +15,20 @@
*/ */
use crate::grpc::dfdaemon_upload::DfdaemonUploadClient; use crate::grpc::dfdaemon_upload::DfdaemonUploadClient;
use dashmap::DashMap;
use dragonfly_api::common::v2::Host; use dragonfly_api::common::v2::Host;
use dragonfly_api::dfdaemon::v2::{SyncPersistentCachePiecesRequest, SyncPiecesRequest}; use dragonfly_api::dfdaemon::v2::{SyncPersistentCachePiecesRequest, SyncPiecesRequest};
use dragonfly_client_config::dfdaemon::Config; use dragonfly_client_config::dfdaemon::Config;
use dragonfly_client_core::{Error, Result}; use dragonfly_client_core::{Error, Result};
use dragonfly_client_storage::metadata; use dragonfly_client_storage::metadata;
use std::collections::HashMap;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use tokio::sync::mpsc::{self, Receiver, Sender}; use tokio::sync::mpsc::{self, Receiver, Sender};
use tokio::sync::Mutex;
use tokio::task::JoinSet; use tokio::task::JoinSet;
use tokio_stream::StreamExt; use tokio_stream::StreamExt;
use tracing::{error, info, instrument, Instrument}; use tracing::{error, info, instrument, Instrument};
const DEFAULT_WAIT_FOR_PIECE_FROM_DIFFERENT_PARENTS: Duration = Duration::from_millis(5);
/// CollectedParent is the parent peer collected from the parent. /// CollectedParent is the parent peer collected from the parent.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct CollectedParent { pub struct CollectedParent {
@ -69,13 +68,14 @@ pub struct PieceCollector {
/// interested_pieces is the pieces interested by the collector. /// interested_pieces is the pieces interested by the collector.
interested_pieces: Vec<metadata::Piece>, interested_pieces: Vec<metadata::Piece>,
/// collected_pieces is a map to store the collected pieces from different parents. /// collected_pieces is the pieces collected from peers.
collected_pieces: Arc<DashMap<u32, Vec<CollectedParent>>>, collected_pieces: Arc<Mutex<HashMap<u32, String>>>,
} }
/// PieceCollector is used to collect pieces from peers. /// PieceCollector is used to collect pieces from peers.
impl PieceCollector { impl PieceCollector {
/// new creates a new PieceCollector. /// new creates a new PieceCollector.
#[instrument(skip_all)]
pub async fn new( pub async fn new(
config: Arc<Config>, config: Arc<Config>,
host_id: &str, host_id: &str,
@ -83,10 +83,14 @@ impl PieceCollector {
interested_pieces: Vec<metadata::Piece>, interested_pieces: Vec<metadata::Piece>,
parents: Vec<CollectedParent>, parents: Vec<CollectedParent>,
) -> Self { ) -> Self {
let collected_pieces = Arc::new(DashMap::with_capacity(interested_pieces.len())); let collected_pieces =
Arc::new(Mutex::new(HashMap::with_capacity(interested_pieces.len())));
let mut collected_pieces_guard = collected_pieces.lock().await;
for interested_piece in &interested_pieces { for interested_piece in &interested_pieces {
collected_pieces.insert(interested_piece.number, Vec::new()); collected_pieces_guard.insert(interested_piece.number, String::new());
} }
drop(collected_pieces_guard);
Self { Self {
config, config,
@ -107,7 +111,7 @@ impl PieceCollector {
let parents = self.parents.clone(); let parents = self.parents.clone();
let interested_pieces = self.interested_pieces.clone(); let interested_pieces = self.interested_pieces.clone();
let collected_pieces = self.collected_pieces.clone(); let collected_pieces = self.collected_pieces.clone();
let collected_piece_timeout = self.config.download.collected_piece_timeout; let collected_piece_timeout = self.config.download.piece_timeout;
let (collected_piece_tx, collected_piece_rx) = mpsc::channel(128 * 1024); let (collected_piece_tx, collected_piece_rx) = mpsc::channel(128 * 1024);
tokio::spawn( tokio::spawn(
async move { async move {
@ -132,25 +136,7 @@ impl PieceCollector {
collected_piece_rx collected_piece_rx
} }
/// collect_from_parents collects pieces from multiple parents with load balancing strategy. /// collect_from_parents collects pieces from parents.
///
/// The collection process works in two phases:
/// 1. **Synchronization Phase**: Waits for a configured duration (DEFAULT_WAIT_FOR_PIECE_FROM_DIFFERENT_PARENTS)
/// to collect the same piece information from different parents. This allows the collector
/// to gather multiple sources for each piece.
///
/// 2. **Selection Phase**: After the wait period, randomly selects one parent from the available
/// candidates for each piece and forwards it to the piece downloader.
///
/// **Load Balancing Strategy**:
/// The random parent selection is designed to distribute download load across multiple parents
/// during concurrent piece downloads. This approach ensures:
/// - Optimal utilization of bandwidth from multiple parent nodes
/// - Prevention of overwhelming any single parent with too many requests
/// - Better overall download performance through parallel connections
///
/// This strategy is particularly effective when downloading multiple pieces simultaneously,
/// as it naturally spreads the workload across the available parent pool.
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
#[instrument(skip_all)] #[instrument(skip_all)]
async fn collect_from_parents( async fn collect_from_parents(
@ -159,7 +145,7 @@ impl PieceCollector {
task_id: &str, task_id: &str,
parents: Vec<CollectedParent>, parents: Vec<CollectedParent>,
interested_pieces: Vec<metadata::Piece>, interested_pieces: Vec<metadata::Piece>,
collected_pieces: Arc<DashMap<u32, Vec<CollectedParent>>>, collected_pieces: Arc<Mutex<HashMap<u32, String>>>,
collected_piece_tx: Sender<CollectedPiece>, collected_piece_tx: Sender<CollectedPiece>,
collected_piece_timeout: Duration, collected_piece_timeout: Duration,
) -> Result<()> { ) -> Result<()> {
@ -173,7 +159,7 @@ impl PieceCollector {
task_id: String, task_id: String,
parent: CollectedParent, parent: CollectedParent,
interested_pieces: Vec<metadata::Piece>, interested_pieces: Vec<metadata::Piece>,
collected_pieces: Arc<DashMap<u32, Vec<CollectedParent>>>, collected_pieces: Arc<Mutex<HashMap<u32, String>>>,
collected_piece_tx: Sender<CollectedPiece>, collected_piece_tx: Sender<CollectedPiece>,
collected_piece_timeout: Duration, collected_piece_timeout: Duration,
) -> Result<CollectedParent> { ) -> Result<CollectedParent> {
@ -221,33 +207,18 @@ impl PieceCollector {
error!("sync pieces from parent {} failed: {}", parent.id, err); error!("sync pieces from parent {} failed: {}", parent.id, err);
})? { })? {
let message = message?; let message = message?;
if let Some(mut parents) = collected_pieces.get_mut(&message.number) {
parents.push(parent.clone()); // Remove the piece from collected_pieces, avoid to collect the same piece from
} else { // different parents.
{
let mut collected_pieces_guard = collected_pieces.lock().await;
if collected_pieces_guard.remove(&message.number).is_none() {
continue; continue;
} }
// Wait for collecting the piece from different parents when the first
// piece is collected.
tokio::time::sleep(DEFAULT_WAIT_FOR_PIECE_FROM_DIFFERENT_PARENTS).await;
let parents = match collected_pieces.remove(&message.number) {
Some((_, parents)) => parents,
None => continue,
};
let parent = match parents.get(fastrand::usize(..parents.len())) {
Some(parent) => parent,
None => {
error!(
"collected_pieces does not contain parent for piece {}",
message.number
);
continue;
} }
};
info!( info!(
"picked up piece {}-{} metadata from parent {}", "received piece {}-{} metadata from parent {}",
task_id, message.number, parent.id task_id, message.number, parent.id
); );
@ -288,7 +259,11 @@ impl PieceCollector {
info!("peer {} sync pieces finished", peer.id); info!("peer {} sync pieces finished", peer.id);
// If all pieces are collected, abort all tasks. // If all pieces are collected, abort all tasks.
if collected_pieces.is_empty() { let collected_pieces_guard = collected_pieces.lock().await;
let is_empty = collected_pieces_guard.is_empty();
drop(collected_pieces_guard);
if is_empty {
info!("all pieces are collected, abort all tasks"); info!("all pieces are collected, abort all tasks");
join_set.abort_all(); join_set.abort_all();
} }
@ -323,13 +298,14 @@ pub struct PersistentCachePieceCollector {
/// interested_pieces is the pieces interested by the collector. /// interested_pieces is the pieces interested by the collector.
interested_pieces: Vec<metadata::Piece>, interested_pieces: Vec<metadata::Piece>,
/// collected_pieces is a map to store the collected pieces from different parents. /// collected_pieces is the pieces collected from peers.
collected_pieces: Arc<DashMap<u32, Vec<CollectedParent>>>, collected_pieces: Arc<Mutex<HashMap<u32, String>>>,
} }
/// PersistentCachePieceCollector is used to collect persistent cache pieces from peers. /// PersistentCachePieceCollector is used to collect persistent cache pieces from peers.
impl PersistentCachePieceCollector { impl PersistentCachePieceCollector {
/// new creates a new PieceCollector. /// new creates a new PieceCollector.
#[instrument(skip_all)]
pub async fn new( pub async fn new(
config: Arc<Config>, config: Arc<Config>,
host_id: &str, host_id: &str,
@ -337,10 +313,14 @@ impl PersistentCachePieceCollector {
interested_pieces: Vec<metadata::Piece>, interested_pieces: Vec<metadata::Piece>,
parents: Vec<CollectedParent>, parents: Vec<CollectedParent>,
) -> Self { ) -> Self {
let collected_pieces = Arc::new(DashMap::with_capacity(interested_pieces.len())); let collected_pieces =
Arc::new(Mutex::new(HashMap::with_capacity(interested_pieces.len())));
let mut collected_pieces_guard = collected_pieces.lock().await;
for interested_piece in &interested_pieces { for interested_piece in &interested_pieces {
collected_pieces.insert(interested_piece.number, Vec::new()); collected_pieces_guard.insert(interested_piece.number, String::new());
} }
drop(collected_pieces_guard);
Self { Self {
config, config,
@ -386,25 +366,7 @@ impl PersistentCachePieceCollector {
collected_piece_rx collected_piece_rx
} }
/// collect_from_parents collects pieces from multiple parents with load balancing strategy. /// collect_from_parents collects pieces from parents.
///
/// The collection process works in two phases:
/// 1. **Synchronization Phase**: Waits for a configured duration (DEFAULT_WAIT_FOR_PIECE_FROM_DIFFERENT_PARENTS)
/// to collect the same piece information from different parents. This allows the collector
/// to gather multiple sources for each piece.
///
/// 2. **Selection Phase**: After the wait period, randomly selects one parent from the available
/// candidates for each piece and forwards it to the piece downloader.
///
/// **Load Balancing Strategy**:
/// The random parent selection is designed to distribute download load across multiple parents
/// during concurrent piece downloads. This approach ensures:
/// - Optimal utilization of bandwidth from multiple parent nodes
/// - Prevention of overwhelming any single parent with too many requests
/// - Better overall download performance through parallel connections
///
/// This strategy is particularly effective when downloading multiple pieces simultaneously,
/// as it naturally spreads the workload across the available parent pool.
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
#[instrument(skip_all)] #[instrument(skip_all)]
async fn collect_from_parents( async fn collect_from_parents(
@ -413,7 +375,7 @@ impl PersistentCachePieceCollector {
task_id: &str, task_id: &str,
parents: Vec<CollectedParent>, parents: Vec<CollectedParent>,
interested_pieces: Vec<metadata::Piece>, interested_pieces: Vec<metadata::Piece>,
collected_pieces: Arc<DashMap<u32, Vec<CollectedParent>>>, collected_pieces: Arc<Mutex<HashMap<u32, String>>>,
collected_piece_tx: Sender<CollectedPiece>, collected_piece_tx: Sender<CollectedPiece>,
collected_piece_timeout: Duration, collected_piece_timeout: Duration,
) -> Result<()> { ) -> Result<()> {
@ -427,7 +389,7 @@ impl PersistentCachePieceCollector {
task_id: String, task_id: String,
parent: CollectedParent, parent: CollectedParent,
interested_pieces: Vec<metadata::Piece>, interested_pieces: Vec<metadata::Piece>,
collected_pieces: Arc<DashMap<u32, Vec<CollectedParent>>>, collected_pieces: Arc<Mutex<HashMap<u32, String>>>,
collected_piece_tx: Sender<CollectedPiece>, collected_piece_tx: Sender<CollectedPiece>,
collected_piece_timeout: Duration, collected_piece_timeout: Duration,
) -> Result<CollectedParent> { ) -> Result<CollectedParent> {
@ -481,33 +443,18 @@ impl PersistentCachePieceCollector {
); );
})? { })? {
let message = message?; let message = message?;
if let Some(mut parents) = collected_pieces.get_mut(&message.number) {
parents.push(parent.clone()); // Remove the piece from collected_pieces, avoid to collect the same piece from
} else { // different parents.
{
let mut collected_pieces_guard = collected_pieces.lock().await;
if collected_pieces_guard.remove(&message.number).is_none() {
continue; continue;
} }
// Wait for collecting the piece from different parents when the first
// piece is collected.
tokio::time::sleep(DEFAULT_WAIT_FOR_PIECE_FROM_DIFFERENT_PARENTS).await;
let parents = match collected_pieces.remove(&message.number) {
Some((_, parents)) => parents,
None => continue,
};
let parent = match parents.get(fastrand::usize(..parents.len())) {
Some(parent) => parent,
None => {
error!(
"collected_pieces does not contain parent for piece {}",
message.number
);
continue;
} }
};
info!( info!(
"picked up piece {}-{} metadata from parent {}", "received persistent cache piece {}-{} metadata from parent {}",
task_id, message.number, parent.id task_id, message.number, parent.id
); );
@ -548,7 +495,11 @@ impl PersistentCachePieceCollector {
info!("peer {} sync persistent cache pieces finished", peer.id); info!("peer {} sync persistent cache pieces finished", peer.id);
// If all pieces are collected, abort all tasks. // If all pieces are collected, abort all tasks.
if collected_pieces.is_empty() { let collected_pieces_guard = collected_pieces.lock().await;
let is_empty = collected_pieces_guard.is_empty();
drop(collected_pieces_guard);
if is_empty {
info!("all persistent cache pieces are collected, abort all tasks"); info!("all persistent cache pieces are collected, abort all tasks");
join_set.abort_all(); join_set.abort_all();
} }

View File

@ -66,6 +66,7 @@ pub struct DownloaderFactory {
/// DownloadFactory implements the DownloadFactory trait. /// DownloadFactory implements the DownloadFactory trait.
impl DownloaderFactory { impl DownloaderFactory {
/// new returns a new DownloadFactory. /// new returns a new DownloadFactory.
#[instrument(skip_all)]
pub fn new(protocol: &str, config: Arc<Config>) -> Result<Self> { pub fn new(protocol: &str, config: Arc<Config>) -> Result<Self> {
let downloader = match protocol { let downloader = match protocol {
"grpc" => Arc::new(GRPCDownloader::new( "grpc" => Arc::new(GRPCDownloader::new(
@ -83,6 +84,7 @@ impl DownloaderFactory {
} }
/// build returns the downloader. /// build returns the downloader.
#[instrument(skip_all)]
pub fn build(&self) -> Arc<dyn Downloader> { pub fn build(&self) -> Arc<dyn Downloader> {
self.downloader.clone() self.downloader.clone()
} }
@ -149,6 +151,7 @@ pub struct GRPCDownloader {
/// GRPCDownloader implements the downloader with the gRPC protocol. /// GRPCDownloader implements the downloader with the gRPC protocol.
impl GRPCDownloader { impl GRPCDownloader {
/// new returns a new GRPCDownloader. /// new returns a new GRPCDownloader.
#[instrument(skip_all)]
pub fn new(config: Arc<Config>, capacity: usize, idle_timeout: Duration) -> Self { pub fn new(config: Arc<Config>, capacity: usize, idle_timeout: Duration) -> Self {
Self { Self {
config, config,

View File

@ -20,8 +20,7 @@ use crate::metrics::{
collect_backend_request_started_metrics, collect_backend_request_started_metrics,
}; };
use dragonfly_api::common::v2::{ use dragonfly_api::common::v2::{
Download, Hdfs, ObjectStorage, Peer, Piece, SizeScope, Task as CommonTask, TaskType, Download, Hdfs, ObjectStorage, Peer, Piece, Task as CommonTask, TrafficType,
TrafficType,
}; };
use dragonfly_api::dfdaemon::{ use dragonfly_api::dfdaemon::{
self, self,
@ -49,7 +48,6 @@ use dragonfly_client_util::{
id_generator::IDGenerator, id_generator::IDGenerator,
}; };
use reqwest::header::HeaderMap; use reqwest::header::HeaderMap;
use std::collections::HashMap;
use std::path::Path; use std::path::Path;
use std::sync::{ use std::sync::{
atomic::{AtomicBool, Ordering}, atomic::{AtomicBool, Ordering},
@ -92,6 +90,7 @@ pub struct Task {
/// Task implements the task manager. /// Task implements the task manager.
impl Task { impl Task {
/// new returns a new Task. /// new returns a new Task.
#[instrument(skip_all)]
pub fn new( pub fn new(
config: Arc<Config>, config: Arc<Config>,
id_generator: Arc<IDGenerator>, id_generator: Arc<IDGenerator>,
@ -118,7 +117,6 @@ impl Task {
} }
/// get gets the metadata of the task. /// get gets the metadata of the task.
#[instrument(skip_all)]
pub fn get(&self, id: &str) -> ClientResult<Option<metadata::Task>> { pub fn get(&self, id: &str) -> ClientResult<Option<metadata::Task>> {
self.storage.get_task(id) self.storage.get_task(id)
} }
@ -132,7 +130,6 @@ impl Task {
) -> ClientResult<metadata::Task> { ) -> ClientResult<metadata::Task> {
let task = self.storage.prepare_download_task_started(id).await?; let task = self.storage.prepare_download_task_started(id).await?;
if task.content_length.is_some() && task.piece_length.is_some() {
// Attempt to create a hard link from the task file to the output path. // Attempt to create a hard link from the task file to the output path.
// //
// Behavior based on force_hard_link setting: // Behavior based on force_hard_link setting:
@ -154,6 +151,7 @@ impl Task {
} }
} }
if task.content_length.is_some() && task.piece_length.is_some() {
return Ok(task); return Ok(task);
} }
@ -241,38 +239,20 @@ impl Task {
// store the task. // store the task.
if !task.is_finished() && !self.storage.has_enough_space(content_length)? { if !task.is_finished() && !self.storage.has_enough_space(content_length)? {
return Err(Error::NoSpace(format!( return Err(Error::NoSpace(format!(
"not enough space to store the task: content_length={}", "not enough space to store the persistent cache task: content_length={}",
content_length content_length
))); )));
} }
let task = self self.storage
.storage .download_task_started(
.download_task_started(id, piece_length, content_length, response.http_header) id,
.await; piece_length,
content_length,
// Attempt to create a hard link from the task file to the output path. response.http_header,
// request.load_to_cache,
// Behavior based on force_hard_link setting: )
// 1. force_hard_link is true:
// - Success: Continue processing
// - Failure: Return error immediately
// 2. force_hard_link is false:
// - Success: Continue processing
// - Failure: Fall back to copying the file instead
if let Some(output_path) = &request.output_path {
if let Err(err) = self
.storage
.hard_link_task(id, Path::new(output_path.as_str()))
.await .await
{
if request.force_hard_link {
return Err(err);
}
}
}
task
} }
/// download_finished updates the metadata of the task when the task downloads finished. /// download_finished updates the metadata of the task when the task downloads finished.
@ -300,6 +280,7 @@ impl Task {
} }
/// is_same_dev_inode checks if the task is on the same device inode as the given path. /// is_same_dev_inode checks if the task is on the same device inode as the given path.
#[instrument(skip_all)]
pub async fn is_same_dev_inode(&self, id: &str, to: &Path) -> ClientResult<bool> { pub async fn is_same_dev_inode(&self, id: &str, to: &Path) -> ClientResult<bool> {
self.storage.is_same_dev_inode_as_task(id, to).await self.storage.is_same_dev_inode_as_task(id, to).await
} }
@ -388,7 +369,6 @@ impl Task {
range: request.range, range: request.range,
response_header: task.response_header.clone(), response_header: task.response_header.clone(),
pieces, pieces,
is_finished: task.is_finished(),
}, },
), ),
), ),
@ -734,6 +714,7 @@ impl Task {
remaining_interested_pieces.clone(), remaining_interested_pieces.clone(),
request.is_prefetch, request.is_prefetch,
request.need_piece_content, request.need_piece_content,
request.load_to_cache,
download_progress_tx.clone(), download_progress_tx.clone(),
in_stream_tx.clone(), in_stream_tx.clone(),
) )
@ -977,6 +958,7 @@ impl Task {
interested_pieces: Vec<metadata::Piece>, interested_pieces: Vec<metadata::Piece>,
is_prefetch: bool, is_prefetch: bool,
need_piece_content: bool, need_piece_content: bool,
load_to_cache: bool,
download_progress_tx: Sender<Result<DownloadTaskResponse, Status>>, download_progress_tx: Sender<Result<DownloadTaskResponse, Status>>,
in_stream_tx: Sender<AnnouncePeerRequest>, in_stream_tx: Sender<AnnouncePeerRequest>,
) -> ClientResult<Vec<metadata::Piece>> { ) -> ClientResult<Vec<metadata::Piece>> {
@ -1037,6 +1019,7 @@ impl Task {
finished_pieces: Arc<Mutex<Vec<metadata::Piece>>>, finished_pieces: Arc<Mutex<Vec<metadata::Piece>>>,
is_prefetch: bool, is_prefetch: bool,
need_piece_content: bool, need_piece_content: bool,
load_to_cache: bool,
) -> ClientResult<metadata::Piece> { ) -> ClientResult<metadata::Piece> {
// Limit the concurrent piece count. // Limit the concurrent piece count.
let _permit = semaphore.acquire().await.unwrap(); let _permit = semaphore.acquire().await.unwrap();
@ -1057,6 +1040,7 @@ impl Task {
length, length,
parent.clone(), parent.clone(),
is_prefetch, is_prefetch,
load_to_cache,
) )
.await .await
.map_err(|err| { .map_err(|err| {
@ -1129,13 +1113,13 @@ impl Task {
REQUEST_TIMEOUT, REQUEST_TIMEOUT,
) )
.await .await
.unwrap_or_else(|err| { .inspect_err(|err| {
error!( error!(
"send DownloadPieceFinishedRequest for piece {} failed: {:?}", "send DownloadPieceFinishedRequest for piece {} failed: {:?}",
piece_id, err piece_id, err
); );
interrupt.store(true, Ordering::SeqCst); interrupt.store(true, Ordering::SeqCst);
}); })?;
// Send the download progress. // Send the download progress.
download_progress_tx download_progress_tx
@ -1155,13 +1139,13 @@ impl Task {
REQUEST_TIMEOUT, REQUEST_TIMEOUT,
) )
.await .await
.unwrap_or_else(|err| { .inspect_err(|err| {
error!( error!(
"send DownloadPieceFinishedResponse for piece {} failed: {:?}", "send DownloadPieceFinishedResponse for piece {} failed: {:?}",
piece_id, err piece_id, err
); );
interrupt.store(true, Ordering::SeqCst); interrupt.store(true, Ordering::SeqCst);
}); })?;
info!( info!(
"finished piece {} from parent {:?}", "finished piece {} from parent {:?}",
@ -1190,6 +1174,7 @@ impl Task {
finished_pieces.clone(), finished_pieces.clone(),
is_prefetch, is_prefetch,
need_piece_content, need_piece_content,
load_to_cache,
) )
.in_current_span(), .in_current_span(),
); );
@ -1303,6 +1288,7 @@ impl Task {
request_header: HeaderMap, request_header: HeaderMap,
is_prefetch: bool, is_prefetch: bool,
need_piece_content: bool, need_piece_content: bool,
load_to_cache: bool,
piece_manager: Arc<piece::Piece>, piece_manager: Arc<piece::Piece>,
semaphore: Arc<Semaphore>, semaphore: Arc<Semaphore>,
download_progress_tx: Sender<Result<DownloadTaskResponse, Status>>, download_progress_tx: Sender<Result<DownloadTaskResponse, Status>>,
@ -1326,6 +1312,7 @@ impl Task {
length, length,
request_header, request_header,
is_prefetch, is_prefetch,
load_to_cache,
object_storage, object_storage,
hdfs, hdfs,
) )
@ -1385,9 +1372,9 @@ impl Task {
}, },
REQUEST_TIMEOUT, REQUEST_TIMEOUT,
) )
.await.unwrap_or_else(|err| { .await.inspect_err(|err| {
error!("send DownloadPieceBackToSourceFinishedRequest for piece {} failed: {:?}", piece_id, err); error!("send DownloadPieceBackToSourceFinishedRequest for piece {} failed: {:?}", piece_id, err);
}); })?;
// Send the download progress. // Send the download progress.
download_progress_tx download_progress_tx
@ -1407,12 +1394,12 @@ impl Task {
REQUEST_TIMEOUT, REQUEST_TIMEOUT,
) )
.await .await
.unwrap_or_else(|err| { .inspect_err(|err| {
error!( error!(
"send DownloadPieceFinishedResponse for piece {} failed: {:?}", "send DownloadPieceFinishedResponse for piece {} failed: {:?}",
piece_id, err piece_id, err
); );
}); })?;
info!("finished piece {} from source", piece_id); info!("finished piece {} from source", piece_id);
Ok(metadata) Ok(metadata)
@ -1430,6 +1417,7 @@ impl Task {
request_header.clone(), request_header.clone(),
request.is_prefetch, request.is_prefetch,
request.need_piece_content, request.need_piece_content,
request.load_to_cache,
self.piece.clone(), self.piece.clone(),
semaphore.clone(), semaphore.clone(),
download_progress_tx.clone(), download_progress_tx.clone(),
@ -1577,11 +1565,6 @@ impl Task {
} }
}; };
if !piece.is_finished() {
debug!("piece {} is not finished, skip it", piece_id);
continue;
}
// Fake the download from the local. // Fake the download from the local.
self.piece.download_from_local(task_id, piece.length); self.piece.download_from_local(task_id, piece.length);
info!("finished piece {} from local", piece_id,); info!("finished piece {} from local", piece_id,);
@ -1642,12 +1625,12 @@ impl Task {
REQUEST_TIMEOUT, REQUEST_TIMEOUT,
) )
.await .await
.unwrap_or_else(|err| { .inspect_err(|err| {
error!( error!(
"send DownloadPieceFinishedResponse for piece {} failed: {:?}", "send DownloadPieceFinishedResponse for piece {} failed: {:?}",
piece_id, err piece_id, err
); );
}); })?;
// Store the finished piece. // Store the finished piece.
finished_pieces.push(interested_piece.clone()); finished_pieces.push(interested_piece.clone());
@ -1696,6 +1679,7 @@ impl Task {
length: u64, length: u64,
request_header: HeaderMap, request_header: HeaderMap,
is_prefetch: bool, is_prefetch: bool,
load_to_cache: bool,
piece_manager: Arc<piece::Piece>, piece_manager: Arc<piece::Piece>,
semaphore: Arc<Semaphore>, semaphore: Arc<Semaphore>,
download_progress_tx: Sender<Result<DownloadTaskResponse, Status>>, download_progress_tx: Sender<Result<DownloadTaskResponse, Status>>,
@ -1718,6 +1702,7 @@ impl Task {
length, length,
request_header, request_header,
is_prefetch, is_prefetch,
load_to_cache,
object_storage, object_storage,
hdfs, hdfs,
) )
@ -1754,12 +1739,12 @@ impl Task {
REQUEST_TIMEOUT, REQUEST_TIMEOUT,
) )
.await .await
.unwrap_or_else(|err| { .inspect_err(|err| {
error!( error!(
"send DownloadPieceFinishedResponse for piece {} failed: {:?}", "send DownloadPieceFinishedResponse for piece {} failed: {:?}",
piece_id, err piece_id, err
); );
}); })?;
info!("finished piece {} from source", piece_id); info!("finished piece {} from source", piece_id);
Ok(metadata) Ok(metadata)
@ -1776,6 +1761,7 @@ impl Task {
interested_piece.length, interested_piece.length,
request_header.clone(), request_header.clone(),
request.is_prefetch, request.is_prefetch,
request.load_to_cache,
self.piece.clone(), self.piece.clone(),
semaphore.clone(), semaphore.clone(),
download_progress_tx.clone(), download_progress_tx.clone(),
@ -1821,74 +1807,7 @@ impl Task {
/// stat_task returns the task metadata. /// stat_task returns the task metadata.
#[instrument(skip_all)] #[instrument(skip_all)]
pub async fn stat( pub async fn stat(&self, task_id: &str, host_id: &str) -> ClientResult<CommonTask> {
&self,
task_id: &str,
host_id: &str,
local_only: bool,
) -> ClientResult<CommonTask> {
if local_only {
let Some(task_metadata) = self.storage.get_task(task_id).inspect_err(|err| {
error!("get task {} from local storage error: {:?}", task_id, err);
})?
else {
return Err(Error::TaskNotFound(task_id.to_owned()));
};
let piece_metadatas = self.piece.get_all(task_id).inspect_err(|err| {
error!(
"get pieces for task {} from local storage error: {:?}",
task_id, err
);
})?;
let pieces = piece_metadatas
.into_iter()
.filter(|piece| piece.is_finished())
.map(|piece| {
// The traffic_type indicates whether the first download was from the source or hit the remote peer cache.
// If the parent_id exists, the piece was downloaded from a remote peer. Otherwise, it was
// downloaded from the source.
let traffic_type = match piece.parent_id {
None => TrafficType::BackToSource,
Some(_) => TrafficType::RemotePeer,
};
Piece {
number: piece.number,
parent_id: piece.parent_id.clone(),
offset: piece.offset,
length: piece.length,
digest: piece.digest.clone(),
content: None,
traffic_type: Some(traffic_type as i32),
cost: piece.prost_cost(),
created_at: Some(prost_wkt_types::Timestamp::from(piece.created_at)),
}
})
.collect::<Vec<Piece>>();
return Ok(CommonTask {
id: task_metadata.id,
r#type: TaskType::Standard as i32,
url: String::new(),
digest: None,
tag: None,
application: None,
filtered_query_params: Vec::new(),
request_header: HashMap::new(),
content_length: task_metadata.content_length.unwrap_or(0),
piece_count: pieces.len() as u32,
size_scope: SizeScope::Normal as i32,
pieces,
state: String::new(),
peer_count: 0,
has_available_peer: false,
created_at: Some(prost_wkt_types::Timestamp::from(task_metadata.created_at)),
updated_at: Some(prost_wkt_types::Timestamp::from(task_metadata.updated_at)),
});
}
let task = self let task = self
.scheduler_client .scheduler_client
.stat_task(StatTaskRequest { .stat_task(StatTaskRequest {
@ -1969,7 +1888,7 @@ mod tests {
// Create a task and save it to storage. // Create a task and save it to storage.
let task_id = "test-task-id"; let task_id = "test-task-id";
storage storage
.download_task_started(task_id, 1024, 4096, None) .download_task_started(task_id, 1024, 4096, None, false)
.await .await
.unwrap(); .unwrap();

View File

@ -67,6 +67,7 @@ pub struct Stats {
/// Stats implements the stats server. /// Stats implements the stats server.
impl Stats { impl Stats {
/// new creates a new Stats. /// new creates a new Stats.
#[instrument(skip_all)]
pub fn new( pub fn new(
addr: SocketAddr, addr: SocketAddr,
shutdown: shutdown::Shutdown, shutdown: shutdown::Shutdown,
@ -80,6 +81,7 @@ impl Stats {
} }
/// run starts the stats server. /// run starts the stats server.
#[instrument(skip_all)]
pub async fn run(&self) { pub async fn run(&self) {
// Clone the shutdown channel. // Clone the shutdown channel.
let mut shutdown = self.shutdown.clone(); let mut shutdown = self.shutdown.clone();
@ -108,6 +110,7 @@ impl Stats {
_ = shutdown.recv() => { _ = shutdown.recv() => {
// Stats server shutting down with signals. // Stats server shutting down with signals.
info!("stats server shutting down"); info!("stats server shutting down");
return
} }
} }
} }

View File

@ -14,19 +14,13 @@
* limitations under the License. * limitations under the License.
*/ */
use dragonfly_client_config::dfdaemon::Host; use opentelemetry::sdk::propagation::TraceContextPropagator;
use opentelemetry::{global, trace::TracerProvider, KeyValue};
use opentelemetry_otlp::{WithExportConfig, WithTonicConfig};
use opentelemetry_sdk::{propagation::TraceContextPropagator, Resource};
use rolling_file::*; use rolling_file::*;
use std::fs; use std::fs;
use std::path::PathBuf; use std::path::PathBuf;
use std::str::FromStr;
use std::time::Duration;
use tonic::metadata::{MetadataKey, MetadataMap, MetadataValue};
use tracing::{info, Level}; use tracing::{info, Level};
use tracing_appender::non_blocking::WorkerGuard; use tracing_appender::non_blocking::WorkerGuard;
use tracing_opentelemetry::OpenTelemetryLayer; use tracing_log::LogTracer;
use tracing_subscriber::{ use tracing_subscriber::{
filter::LevelFilter, filter::LevelFilter,
fmt::{time::ChronoLocal, Layer}, fmt::{time::ChronoLocal, Layer},
@ -34,9 +28,6 @@ use tracing_subscriber::{
EnvFilter, Registry, EnvFilter, Registry,
}; };
/// SPAN_EXPORTER_TIMEOUT is the timeout for the span exporter.
const SPAN_EXPORTER_TIMEOUT: Duration = Duration::from_secs(10);
/// init_tracing initializes the tracing system. /// init_tracing initializes the tracing system.
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
pub fn init_tracing( pub fn init_tracing(
@ -44,13 +35,8 @@ pub fn init_tracing(
log_dir: PathBuf, log_dir: PathBuf,
log_level: Level, log_level: Level,
log_max_files: usize, log_max_files: usize,
otel_protocol: Option<String>, jaeger_addr: Option<String>,
otel_endpoint: Option<String>, verbose: bool,
otel_path: Option<PathBuf>,
otel_headers: Option<reqwest::header::HeaderMap>,
host: Option<Host>,
is_seed_peer: bool,
console: bool,
) -> Vec<WorkerGuard> { ) -> Vec<WorkerGuard> {
let mut guards = vec![]; let mut guards = vec![];
@ -59,7 +45,7 @@ pub fn init_tracing(
guards.push(stdout_guard); guards.push(stdout_guard);
// Initialize stdout layer. // Initialize stdout layer.
let stdout_filter = if console { let stdout_filter = if verbose {
LevelFilter::DEBUG LevelFilter::DEBUG
} else { } else {
LevelFilter::OFF LevelFilter::OFF
@ -102,116 +88,31 @@ pub fn init_tracing(
let env_filter = EnvFilter::try_from_default_env() let env_filter = EnvFilter::try_from_default_env()
.unwrap_or_else(|_| EnvFilter::default().add_directive(log_level.into())); .unwrap_or_else(|_| EnvFilter::default().add_directive(log_level.into()));
// Enable console subscriber layer for tracing spawn tasks on `127.0.0.1:6669` when log level is TRACE.
let console_subscriber_layer = if log_level == Level::TRACE {
Some(console_subscriber::spawn())
} else {
None
};
let subscriber = Registry::default() let subscriber = Registry::default()
.with(env_filter) .with(env_filter)
.with(console_subscriber_layer)
.with(file_logging_layer) .with(file_logging_layer)
.with(stdout_logging_layer); .with(stdout_logging_layer);
// If OTLP protocol and endpoint are provided, set up OpenTelemetry tracing. // Setup jaeger layer.
if let (Some(protocol), Some(endpoint)) = (otel_protocol, otel_endpoint) { if let Some(jaeger_addr) = jaeger_addr {
let otlp_exporter = match protocol.as_str() { opentelemetry::global::set_text_map_propagator(TraceContextPropagator::new());
"grpc" => { let tracer = opentelemetry_jaeger::new_agent_pipeline()
let mut metadata = MetadataMap::new(); .with_service_name(name)
if let Some(headers) = otel_headers { .with_endpoint(jaeger_addr)
for (key, value) in headers.iter() { .install_batch(opentelemetry::runtime::Tokio)
metadata.insert( .expect("install");
MetadataKey::from_str(key.as_str()) let jaeger_layer = tracing_opentelemetry::layer().with_tracer(tracer);
.expect("failed to create metadata key"), let subscriber = subscriber.with(jaeger_layer);
MetadataValue::from_str(value.to_str().unwrap())
.expect("failed to create metadata value"),
);
}
}
let endpoint_url = url::Url::parse(&format!("http://{}", endpoint)) tracing::subscriber::set_global_default(subscriber)
.expect("failed to parse OTLP endpoint URL"); .expect("failed to set global subscriber");
opentelemetry_otlp::SpanExporter::builder()
.with_tonic()
.with_endpoint(endpoint_url)
.with_timeout(SPAN_EXPORTER_TIMEOUT)
.with_metadata(metadata)
.build()
.expect("failed to create OTLP exporter")
}
"http" | "https" => {
let mut endpoint_url = url::Url::parse(&format!("{}://{}", protocol, endpoint))
.expect("failed to parse OTLP endpoint URL");
if let Some(path) = otel_path {
endpoint_url = endpoint_url
.join(path.to_str().unwrap())
.expect("failed to join OTLP endpoint path");
}
opentelemetry_otlp::SpanExporter::builder()
.with_http()
.with_endpoint(endpoint_url.as_str())
.with_protocol(opentelemetry_otlp::Protocol::HttpJson)
.with_timeout(SPAN_EXPORTER_TIMEOUT)
.build()
.expect("failed to create OTLP exporter")
}
_ => {
panic!("unsupported OTLP protocol: {}", protocol);
}
};
let host = host.unwrap();
let provider = opentelemetry_sdk::trace::SdkTracerProvider::builder()
.with_batch_exporter(otlp_exporter)
.with_resource(
Resource::builder()
.with_service_name(format!("{}-{}", name, host.ip.unwrap()))
.with_schema_url(
[
KeyValue::new(
opentelemetry_semantic_conventions::attribute::SERVICE_NAMESPACE,
"dragonfly",
),
KeyValue::new(
opentelemetry_semantic_conventions::attribute::HOST_NAME,
host.hostname,
),
KeyValue::new(
opentelemetry_semantic_conventions::attribute::HOST_IP,
host.ip.unwrap().to_string(),
),
],
opentelemetry_semantic_conventions::SCHEMA_URL,
)
.with_attribute(opentelemetry::KeyValue::new(
"host.idc",
host.idc.unwrap_or_default(),
))
.with_attribute(opentelemetry::KeyValue::new(
"host.location",
host.location.unwrap_or_default(),
))
.with_attribute(opentelemetry::KeyValue::new("host.seed_peer", is_seed_peer))
.build(),
)
.build();
let tracer = provider.tracer(name.to_string());
global::set_tracer_provider(provider.clone());
global::set_text_map_propagator(TraceContextPropagator::new());
let jaeger_layer = OpenTelemetryLayer::new(tracer);
subscriber.with(jaeger_layer).init();
} else { } else {
subscriber.init(); tracing::subscriber::set_global_default(subscriber)
.expect("failed to set global subscriber");
} }
std::panic::set_hook(Box::new(tracing_panic::panic_hook)); LogTracer::init().expect("failed to init LogTracer");
info!( info!(
"tracing initialized directory: {}, level: {}", "tracing initialized directory: {}, level: {}",
log_dir.as_path().display(), log_dir.as_path().display(),