Compare commits

..

No commits in common. "main" and "v0.2.10" have entirely different histories.

84 changed files with 2909 additions and 9954 deletions

View File

@ -1,2 +0,0 @@
[build]
rustflags = ["--cfg", "tokio_unstable"]

View File

@ -1,16 +0,0 @@
# Set to true to add reviewers to pull requests
addReviewers: true
# Set to true to add assignees to pull requests
addAssignees: author
# A list of reviewers to be added to pull requests (GitHub user name)
reviewers:
- gaius-qi
- yxxhero
- chlins
- CormickKneey
- xujihui1985
# A number of reviewers added to the pull request
numberOfReviewers: 3

View File

@ -1,11 +0,0 @@
name: "Auto Assign"
on:
pull_request_target:
types: [opened, reopened, ready_for_review]
jobs:
add-assignee:
runs-on: ubuntu-latest
steps:
- uses: kentaro-m/auto-assign-action@9f6dbe84a80c6e7639d1b9698048b201052a2a94

View File

@ -26,8 +26,6 @@ jobs:
- name: Install Protoc
uses: arduino/setup-protoc@v2
with:
repo-token: ${{ secrets.GH_TOKEN }}
- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable
@ -57,8 +55,6 @@ jobs:
- name: Install Protoc
uses: arduino/setup-protoc@v2
with:
repo-token: ${{ secrets.GH_TOKEN }}
- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable

View File

@ -85,114 +85,6 @@ jobs:
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache-new
- name: Run Trivy vulnerability scanner in tarball mode
uses: aquasecurity/trivy-action@dc5a429b52fcf669ce959baa2c2dd26090d2a6c4
with:
image-ref: dragonflyoss/client:${{ steps.get_version.outputs.VERSION }}
severity: 'CRITICAL,HIGH'
format: 'sarif'
output: 'trivy-results.sarif'
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@76621b61decf072c1cee8dd1ce2d2a82d33c17ed
with:
sarif_file: 'trivy-results.sarif'
- name: Move cache
run: |
rm -rf /tmp/.buildx-cache
mv /tmp/.buildx-cache-new /tmp/.buildx-cache
push_client_debug_image_to_registry:
name: Push Client Debug Image
runs-on: [self-hosted, Linux, X64]
timeout-minutes: 600
steps:
- name: Check out code
uses: actions/checkout@v4
with:
submodules: recursive
- name: Get Version
id: get_version
run: |
VERSION=${GITHUB_REF#refs/tags/}
if [[ ${GITHUB_REF} == "refs/heads/main" || ${GITHUB_REF} =~ refs/pull/([0-9]+)/merge ]]; then
VERSION=latest
fi
echo "VERSION=${VERSION}" >> $GITHUB_OUTPUT
- name: Get Git Revision
id: vars
shell: bash
run: |
echo "git_revision=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
- name: PrepareReg Names
run: |
echo IMAGE_REPOSITORY=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
- name: Setup QEMU
uses: docker/setup-qemu-action@v3
- name: Setup Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Cache Docker layers
uses: actions/cache@v4
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-debug-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-debug-
- name: Login Docker Hub
uses: docker/login-action@v3
with:
registry: docker.io
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Push to Registry
uses: docker/build-push-action@v6
with:
context: .
file: ci/Dockerfile.debug
platforms: linux/amd64,linux/arm64
labels: |-
org.opencontainers.image.source=https://github.com/${{ github.repository }}
org.opencontainers.image.revision=${{ github.sha }}
build-args: |
GITVERSION=git-${{ steps.vars.outputs.git_revision }}
VERSION=${{ steps.get_version.outputs.VERSION }}-debug
tags: |
dragonflyoss/client:${{ steps.get_version.outputs.VERSION }}-debug
ghcr.io/${{ env.IMAGE_REPOSITORY }}:${{ steps.get_version.outputs.VERSION }}-debug
push: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }}
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache-new
- name: Run Trivy vulnerability scanner in tarball mode
uses: aquasecurity/trivy-action@dc5a429b52fcf669ce959baa2c2dd26090d2a6c4
with:
image-ref: dragonflyoss/client:${{ steps.get_version.outputs.VERSION }}-debug
severity: 'CRITICAL,HIGH'
format: 'sarif'
output: 'trivy-results.sarif'
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@76621b61decf072c1cee8dd1ce2d2a82d33c17ed
with:
sarif_file: 'trivy-results.sarif'
- name: Move cache
run: |
rm -rf /tmp/.buildx-cache
@ -275,19 +167,6 @@ jobs:
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache-new
- name: Run Trivy vulnerability scanner in tarball mode
uses: aquasecurity/trivy-action@dc5a429b52fcf669ce959baa2c2dd26090d2a6c4
with:
image-ref: dragonflyoss/dfinit:${{ steps.get_version.outputs.VERSION }}
severity: 'CRITICAL,HIGH'
format: 'sarif'
output: 'trivy-results.sarif'
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@76621b61decf072c1cee8dd1ce2d2a82d33c17ed
with:
sarif_file: 'trivy-results.sarif'
- name: Move cache
run: |
rm -rf /tmp/.buildx-cache

View File

@ -15,21 +15,18 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4
- name: Rust cache
- name: Rust cache
uses: Swatinem/rust-cache@v2
with:
cache-on-failure: true
- name: Install Protoc
uses: arduino/setup-protoc@v2
with:
repo-token: ${{ secrets.GH_TOKEN }}
- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable
with:
components: rustfmt, clippy
toolchain: 1.85.0
- name: Set up Clang
uses: egor-tensin/setup-clang@v1

View File

@ -1,20 +0,0 @@
name: PR Label
on:
pull_request:
types: [opened, labeled, unlabeled, synchronize]
permissions:
contents: read
jobs:
classify:
name: Classify PR
runs-on: ubuntu-latest
steps:
- name: PR impact specified
uses: mheap/github-action-required-labels@8afbe8ae6ab7647d0c9f0cfa7c2f939650d22509 # v5.5
with:
mode: exactly
count: 1
labels: 'bug, enhancement, documentation, dependencies'

View File

@ -52,13 +52,12 @@ jobs:
target: ${{ matrix.target }}
- name: Install cargo-deb
uses: taiki-e/cache-cargo-install-action@b33c63d3b3c85540f4eba8a4f71a5cc0ce030855
uses: taiki-e/cache-cargo-install-action@v2
with:
# Don't upgrade cargo-deb, refer to https://github.com/kornelski/cargo-deb/issues/169.
tool: cargo-deb@2.10.0
tool: cargo-deb
- name: Install cargo-generate-rpm
uses: taiki-e/install-action@daa3c1f1f9a9d46f686d9fc2f65773d0c293688b
uses: taiki-e/install-action@v2
with:
tool: cargo-generate-rpm
@ -96,6 +95,7 @@ jobs:
mkdir -p "$dirname"
mv "target/${{ matrix.target }}/release/dfget" "$dirname"
mv "target/${{ matrix.target }}/release/dfdaemon" "$dirname"
mv "target/${{ matrix.target }}/release/dfstore" "$dirname"
mv "target/${{ matrix.target }}/release/dfcache" "$dirname"
mv "target/${{ matrix.target }}/release/dfinit" "$dirname"
mv CONTRIBUTING.md LICENSE README.md "$dirname"
@ -119,7 +119,7 @@ jobs:
contents: write
steps:
- name: Download Release Artifacts
uses: actions/download-artifact@v5
uses: actions/download-artifact@v4
with:
path: releases
pattern: release-*
@ -153,8 +153,6 @@ jobs:
- name: Install Rust
uses: dtolnay/rust-toolchain@stable
with:
toolchain: 1.85.0
- name: Install dependencies
run: |
@ -165,4 +163,3 @@ jobs:
with:
registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }}
ignore-unpublished-changes: true
args: --locked

View File

@ -1,31 +0,0 @@
name: Close stale issues and PRs
on:
workflow_dispatch:
schedule:
- cron: "0 0 * * *"
permissions:
issues: write
pull-requests: write
jobs:
stale:
runs-on: ubuntu-latest
steps:
- uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0
id: stale
with:
delete-branch: true
days-before-close: 7
days-before-stale: 90
days-before-pr-close: 7
days-before-pr-stale: 120
stale-issue-label: "stale"
exempt-issue-labels: bug,wip,on-hold
exempt-pr-labels: bug,wip,on-hold
exempt-all-milestones: true
stale-issue-message: 'This issue is stale because it has been open 90 days with no activity.'
close-issue-message: 'This issue was closed because it has been stalled for 7 days with no activity.'
stale-pr-message: 'This PR is stale because it has been open 120 days with no activity.'
close-pr-message: 'This PR was closed because it has been stalled for 7 days with no activity.'

1480
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -12,7 +12,7 @@ members = [
]
[workspace.package]
version = "1.0.10"
version = "0.2.10"
authors = ["The Dragonfly Developers"]
homepage = "https://d7y.io/"
repository = "https://github.com/dragonflyoss/client.git"
@ -22,16 +22,15 @@ readme = "README.md"
edition = "2021"
[workspace.dependencies]
dragonfly-client = { path = "dragonfly-client", version = "1.0.10" }
dragonfly-client-core = { path = "dragonfly-client-core", version = "1.0.10" }
dragonfly-client-config = { path = "dragonfly-client-config", version = "1.0.10" }
dragonfly-client-storage = { path = "dragonfly-client-storage", version = "1.0.10" }
dragonfly-client-backend = { path = "dragonfly-client-backend", version = "1.0.10" }
dragonfly-client-util = { path = "dragonfly-client-util", version = "1.0.10" }
dragonfly-client-init = { path = "dragonfly-client-init", version = "1.0.10" }
dragonfly-api = "2.1.57"
thiserror = "2.0"
futures = "0.3.31"
dragonfly-client = { path = "dragonfly-client", version = "0.2.10" }
dragonfly-client-core = { path = "dragonfly-client-core", version = "0.2.10" }
dragonfly-client-config = { path = "dragonfly-client-config", version = "0.2.10" }
dragonfly-client-storage = { path = "dragonfly-client-storage", version = "0.2.10" }
dragonfly-client-backend = { path = "dragonfly-client-backend", version = "0.2.10" }
dragonfly-client-util = { path = "dragonfly-client-util", version = "0.2.10" }
dragonfly-client-init = { path = "dragonfly-client-init", version = "0.2.10" }
thiserror = "1.0"
dragonfly-api = "=2.1.23"
reqwest = { version = "0.12.4", features = [
"stream",
"native-tls",
@ -41,12 +40,11 @@ reqwest = { version = "0.12.4", features = [
"brotli",
"zstd",
"deflate",
"blocking",
] }
reqwest-middleware = "0.4"
rcgen = { version = "0.12.1", features = ["x509-parser"] }
hyper = { version = "1.6", features = ["full"] }
hyper-util = { version = "0.1.16", features = [
hyper = { version = "1.5", features = ["full"] }
hyper-util = { version = "0.1.10", features = [
"client",
"client-legacy",
"tokio",
@ -59,11 +57,12 @@ http-range-header = "0.4.2"
tracing = "0.1"
url = "2.5.4"
rustls = { version = "0.22.4", features = ["tls12"] }
rustls-pki-types = "1.12.0"
rustls-pki-types = "1.11.0"
rustls-pemfile = "2.2.0"
sha2 = "0.10"
crc32fast = "1.5.0"
uuid = { version = "1.16", features = ["v4"] }
blake3 = "1.5.5"
crc = "3.2.1"
uuid = { version = "1.13", features = ["v4"] }
hex = "0.4"
rocksdb = "0.22.0"
serde = { version = "1.0", features = ["derive"] }
@ -71,16 +70,16 @@ serde_yaml = "0.9"
http = "1"
tonic = { version = "0.12.2", features = ["tls"] }
tonic-reflection = "0.12.3"
tokio = { version = "1.47.1", features = ["full", "tracing"] }
tokio-util = { version = "0.7.16", features = ["full"] }
tokio = { version = "1.43.0", features = ["full"] }
tokio-util = { version = "0.7.13", features = ["full"] }
tokio-stream = "0.1.17"
validator = { version = "0.16", features = ["derive"] }
warp = "0.3.5"
headers = "0.4.1"
headers = "0.4.0"
regex = "1.11.1"
humantime = "2.1.0"
prost-wkt-types = "0.6"
chrono = { version = "0.4.41", features = ["serde", "clock"] }
chrono = { version = "0.4.39", features = ["serde", "clock"] }
openssl = { version = "0.10", features = ["vendored"] }
opendal = { version = "0.48.0", features = [
"services-s3",
@ -91,35 +90,25 @@ opendal = { version = "0.48.0", features = [
"services-cos",
"services-webhdfs",
] }
clap = { version = "4.5.45", features = ["derive"] }
anyhow = "1.0.98"
toml_edit = "0.22.26"
toml = "0.8.23"
bytesize = { version = "1.3.3", features = ["serde"] }
clap = { version = "4.5.28", features = ["derive"] }
anyhow = "1.0.95"
toml_edit = "0.22.23"
toml = "0.8.20"
base16ct = { version = "0.2", features = ["alloc"] }
bytesize = { version = "1.2.0", features = ["serde"] }
bytesize-serde = "0.2.1"
percent-encoding = "2.3.1"
tempfile = "3.20.0"
tempfile = "3.16.0"
tokio-rustls = "0.25.0-alpha.4"
serde_json = "1.0.142"
serde_json = "1.0.138"
lru = "0.12.5"
fs2 = "0.4.3"
lazy_static = "1.5"
bytes = "1.10"
local-ip-address = "0.6.5"
sysinfo = { version = "0.32.1", default-features = false, features = ["component", "disk", "network", "system", "user"] }
[profile.release]
opt-level = 3
lto = "thin"
opt-level = "z"
lto = true
codegen-units = 1
panic = "abort"
strip = "symbols"
[profile.dev]
opt-level = 0
debug = true
incremental = true
strip = false
[profile.bench]
debug = true

View File

@ -20,9 +20,9 @@ You can find the full documentation on the [d7y.io](https://d7y.io).
Join the conversation and help the community.
- **Slack Channel**: [#dragonfly](https://cloud-native.slack.com/messages/dragonfly/) on [CNCF Slack](https://slack.cncf.io/)
- **Github Discussions**: [Dragonfly Discussion Forum](https://github.com/dragonflyoss/dragonfly/discussions)
- **Discussion Group**: <dragonfly-discuss@googlegroups.com>
- **Developer Group**: <dragonfly-developers@googlegroups.com>
- **Maintainer Group**: <dragonfly-maintainers@googlegroups.com>
- **Github Discussions**: [Dragonfly Discussion Forum](https://github.com/dragonflyoss/dragonfly/discussions)
- **Twitter**: [@dragonfly_oss](https://twitter.com/dragonfly_oss)
- **DingTalk**: [22880028764](https://qr.dingtalk.com/action/joingroup?code=v1,k1,pkV9IbsSyDusFQdByPSK3HfCG61ZCLeb8b/lpQ3uUqI=&_dt_no_comment=1&origin=11)
@ -30,3 +30,7 @@ Join the conversation and help the community.
You should check out our
[CONTRIBUTING](./CONTRIBUTING.md) and develop the project together.
## License
[![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2Fdragonflyoss%2Fclient.svg?type=large)](https://app.fossa.com/projects/git%2Bgithub.com%2Fdragonflyoss%2Fclient?ref=badge_large)

View File

@ -1,4 +1,4 @@
FROM public.ecr.aws/docker/library/rust:1.85.0 AS builder
FROM rust:1.82.0 AS builder
WORKDIR /app/client
@ -7,7 +7,6 @@ RUN apt-get update && apt-get install -y \
&& rm -rf /var/lib/apt/lists/*
COPY Cargo.toml Cargo.lock ./
COPY .cargo ./cargo
COPY dragonfly-client/Cargo.toml ./dragonfly-client/Cargo.toml
COPY dragonfly-client/src ./dragonfly-client/src
@ -21,7 +20,6 @@ COPY dragonfly-client-config/build.rs ./dragonfly-client-config/build.rs
COPY dragonfly-client-storage/Cargo.toml ./dragonfly-client-storage/Cargo.toml
COPY dragonfly-client-storage/src ./dragonfly-client-storage/src
COPY dragonfly-client-storage/benches ./dragonfly-client-storage/benches
COPY dragonfly-client-backend/Cargo.toml ./dragonfly-client-backend/Cargo.toml
COPY dragonfly-client-backend/src ./dragonfly-client-backend/src
@ -35,15 +33,9 @@ COPY dragonfly-client-util/src ./dragonfly-client-util/src
COPY dragonfly-client-init/Cargo.toml ./dragonfly-client-init/Cargo.toml
COPY dragonfly-client-init/src ./dragonfly-client-init/src
ARG TARGETPLATFORM
RUN case "${TARGETPLATFORM}" in \
"linux/arm64") export JEMALLOC_SYS_WITH_LG_PAGE=16;; \
esac && \
cargo build --release --verbose --bin dfget --bin dfdaemon --bin dfcache
RUN cargo build --release --verbose --bin dfget --bin dfdaemon --bin dfstore --bin dfcache
RUN cargo install tokio-console --locked --root /usr/local
FROM public.ecr.aws/docker/library/alpine:3.20 AS health
FROM alpine:3.20 AS health
ENV GRPC_HEALTH_PROBE_VERSION=v0.4.24
@ -56,24 +48,22 @@ RUN if [ "$(uname -m)" = "ppc64le" ]; then \
fi && \
chmod +x /bin/grpc_health_probe
FROM public.ecr.aws/docker/library/golang:1.23.0-alpine3.20 AS pprof
FROM golang:1.23.0-alpine3.20 AS pprof
RUN go install github.com/google/pprof@latest
RUN go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest
FROM public.ecr.aws/debian/debian:bookworm-slim
FROM debian:bookworm-slim
RUN apt-get update && apt-get install -y --no-install-recommends iperf3 fio curl \
iotop sysstat bash-completion procps apache2-utils ca-certificates binutils \
dnsutils iputils-ping llvm graphviz lsof strace dstat net-tools \
RUN apt-get update && apt-get install -y --no-install-recommends iperf3 fio wget curl \
bash-completion procps apache2-utils ca-certificates binutils bpfcc-tools \
dnsutils iputils-ping vim linux-perf llvm graphviz \
&& rm -rf /var/lib/apt/lists/*
COPY --from=builder /app/client/target/release/dfget /usr/local/bin/dfget
COPY --from=builder /app/client/target/release/dfdaemon /usr/local/bin/dfdaemon
COPY --from=builder /app/client/target/release/dfstore /usr/local/bin/dfstore
COPY --from=builder /app/client/target/release/dfcache /usr/local/bin/dfcache
COPY --from=builder /usr/local/bin/tokio-console /usr/local/bin/
COPY --from=pprof /go/bin/pprof /bin/pprof
COPY --from=pprof /go/bin/grpcurl /bin/grpcurl
COPY --from=health /bin/grpc_health_probe /bin/grpc_health_probe
ENTRYPOINT ["/usr/local/bin/dfdaemon"]

View File

@ -1,83 +0,0 @@
FROM public.ecr.aws/docker/library/rust:1.85.0 AS builder
WORKDIR /app/client
RUN apt-get update && apt-get install -y \
openssl libclang-dev pkg-config protobuf-compiler git \
&& rm -rf /var/lib/apt/lists/*
COPY Cargo.toml Cargo.lock ./
COPY .cargo ./cargo
COPY dragonfly-client/Cargo.toml ./dragonfly-client/Cargo.toml
COPY dragonfly-client/src ./dragonfly-client/src
COPY dragonfly-client-core/Cargo.toml ./dragonfly-client-core/Cargo.toml
COPY dragonfly-client-core/src ./dragonfly-client-core/src
COPY dragonfly-client-config/Cargo.toml ./dragonfly-client-config/Cargo.toml
COPY dragonfly-client-config/src ./dragonfly-client-config/src
COPY dragonfly-client-config/build.rs ./dragonfly-client-config/build.rs
COPY dragonfly-client-storage/Cargo.toml ./dragonfly-client-storage/Cargo.toml
COPY dragonfly-client-storage/src ./dragonfly-client-storage/src
COPY dragonfly-client-storage/benches ./dragonfly-client-storage/benches
COPY dragonfly-client-backend/Cargo.toml ./dragonfly-client-backend/Cargo.toml
COPY dragonfly-client-backend/src ./dragonfly-client-backend/src
COPY dragonfly-client-backend/examples/plugin/Cargo.toml ./dragonfly-client-backend/examples/plugin/Cargo.toml
COPY dragonfly-client-backend/examples/plugin/src ./dragonfly-client-backend/examples/plugin/src
COPY dragonfly-client-util/Cargo.toml ./dragonfly-client-util/Cargo.toml
COPY dragonfly-client-util/src ./dragonfly-client-util/src
COPY dragonfly-client-init/Cargo.toml ./dragonfly-client-init/Cargo.toml
COPY dragonfly-client-init/src ./dragonfly-client-init/src
ARG TARGETPLATFORM
RUN case "${TARGETPLATFORM}" in \
"linux/arm64") export JEMALLOC_SYS_WITH_LG_PAGE=16;; \
esac && \
cargo build --verbose --bin dfget --bin dfdaemon --bin dfcache
RUN cargo install flamegraph --root /usr/local
RUN cargo install bottom --locked --root /usr/local
RUN cargo install tokio-console --locked --root /usr/local
FROM public.ecr.aws/docker/library/alpine:3.20 AS health
ENV GRPC_HEALTH_PROBE_VERSION=v0.4.24
RUN if [ "$(uname -m)" = "ppc64le" ]; then \
wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-ppc64le; \
elif [ "$(uname -m)" = "aarch64" ]; then \
wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-arm64; \
else \
wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-amd64; \
fi && \
chmod +x /bin/grpc_health_probe
FROM public.ecr.aws/docker/library/golang:1.23.0-alpine3.20 AS pprof
RUN go install github.com/google/pprof@latest
RUN go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest
FROM public.ecr.aws/debian/debian:bookworm-slim
RUN apt-get update && apt-get install -y --no-install-recommends iperf3 fio curl infiniband-diags ibverbs-utils \
iotop sysstat bash-completion procps apache2-utils ca-certificates binutils bpfcc-tools \
dnsutils iputils-ping vim linux-perf llvm lsof socat strace dstat net-tools \
&& rm -rf /var/lib/apt/lists/*
COPY --from=builder /app/client/target/debug/dfget /usr/local/bin/dfget
COPY --from=builder /app/client/target/debug/dfdaemon /usr/local/bin/dfdaemon
COPY --from=builder /app/client/target/debug/dfcache /usr/local/bin/dfcache
COPY --from=builder /usr/local/bin/flamegraph /usr/local/bin/
COPY --from=builder /usr/local/bin/btm /usr/local/bin/
COPY --from=builder /usr/local/bin/tokio-console /usr/local/bin/
COPY --from=pprof /go/bin/pprof /bin/pprof
COPY --from=pprof /go/bin/grpcurl /bin/grpcurl
COPY --from=health /bin/grpc_health_probe /bin/grpc_health_probe
ENTRYPOINT ["/usr/local/bin/dfdaemon"]

View File

@ -1,13 +1,12 @@
FROM public.ecr.aws/docker/library/rust:1.85.0 AS builder
FROM rust:1.82.0 AS builder
RUN apt-get update && apt-get install -y \
openssl libclang-dev pkg-config protobuf-compiler \
&& rm -rf /var/lib/apt/lists/*
openssl libclang-dev pkg-config protobuf-compiler \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app/client
COPY Cargo.toml Cargo.lock ./
COPY .cargo ./cargo
COPY dragonfly-client/Cargo.toml ./dragonfly-client/Cargo.toml
COPY dragonfly-client/src ./dragonfly-client/src
@ -21,7 +20,6 @@ COPY dragonfly-client-config/build.rs ./dragonfly-client-config/build.rs
COPY dragonfly-client-storage/Cargo.toml ./dragonfly-client-storage/Cargo.toml
COPY dragonfly-client-storage/src ./dragonfly-client-storage/src
COPY dragonfly-client-storage/benches ./dragonfly-client-storage/benches
COPY dragonfly-client-backend/Cargo.toml ./dragonfly-client-backend/Cargo.toml
COPY dragonfly-client-backend/src ./dragonfly-client-backend/src
@ -35,13 +33,9 @@ COPY dragonfly-client-util/src ./dragonfly-client-util/src
COPY dragonfly-client-init/Cargo.toml ./dragonfly-client-init/Cargo.toml
COPY dragonfly-client-init/src ./dragonfly-client-init/src
ARG TARGETPLATFORM
RUN case "${TARGETPLATFORM}" in \
"linux/arm64") export JEMALLOC_SYS_WITH_LG_PAGE=16;; \
esac && \
cargo build --release --verbose --bin dfinit
RUN cargo build --release --verbose --bin dfinit
FROM public.ecr.aws/debian/debian:bookworm-slim
FROM debian:bookworm-slim
RUN apt-get update && apt-get install -y --no-install-recommends wget \
&& rm -rf /var/lib/apt/lists/*

View File

@ -5,7 +5,7 @@ After=network-online.target
After=network.target
[Service]
ExecStart=/usr/bin/dfdaemon --config /etc/dragonfly/dfdaemon.yaml --console
ExecStart=/usr/bin/dfdaemon --config /etc/dragonfly/dfdaemon.yaml --verbose
Type=simple
Environment=HOME=/root

View File

@ -69,7 +69,7 @@ cargo build --release --bin dfdaemon
```bash
# prepare client.yaml by yourself.
./target/release/dfdaemon --config client.yaml -l info --console
./target/release/dfdaemon --config client.yaml -l info --verbose
```
## FlameGraph

View File

@ -15,6 +15,8 @@ dragonfly-client-util.workspace = true
dragonfly-api.workspace = true
reqwest.workspace = true
reqwest-middleware.workspace = true
reqwest-retry = "0.7"
reqwest-tracing = "0.5"
tokio.workspace = true
tokio-util.workspace = true
rustls.workspace = true
@ -24,14 +26,12 @@ url.workspace = true
tracing.workspace = true
opendal.workspace = true
percent-encoding.workspace = true
futures.workspace = true
reqwest-retry = "0.7"
reqwest-tracing = "0.5"
libloading = "0.8.8"
futures = "0.3.31"
libloading = "0.8.6"
[dev-dependencies]
tempfile.workspace = true
wiremock = "0.6.4"
wiremock = "0.6.2"
rustls-pki-types.workspace = true
rustls-pemfile.workspace = true
hyper.workspace = true

View File

@ -14,7 +14,7 @@ cargo build --all && mv target/debug/libhdfs.so {plugin_dir}/backend/libhdfs.so
## Run Client with Plugin
```shell
$ cargo run --bin dfdaemon -- --config {config_dir}/config.yaml -l info --console
$ cargo run --bin dfdaemon -- --config {config_dir}/config.yaml -l info --verbose
INFO load [http] builtin backend
INFO load [https] builtin backend
INFO load [hdfs] plugin backend

View File

@ -31,7 +31,6 @@ pub const HDFS_SCHEME: &str = "hdfs";
const DEFAULT_NAMENODE_PORT: u16 = 9870;
/// Hdfs is a struct that implements the Backend trait.
#[derive(Default)]
pub struct Hdfs {
/// scheme is the scheme of the HDFS.
scheme: String,
@ -40,6 +39,7 @@ pub struct Hdfs {
/// Hdfs implements the Backend trait.
impl Hdfs {
/// new returns a new HDFS backend.
#[instrument(skip_all)]
pub fn new() -> Self {
Self {
scheme: HDFS_SCHEME.to_string(),
@ -47,6 +47,7 @@ impl Hdfs {
}
/// operator initializes the operator with the parsed URL and HDFS config.
#[instrument(skip_all)]
pub fn operator(
&self,
url: Url,
@ -83,6 +84,7 @@ impl Hdfs {
#[tonic::async_trait]
impl super::Backend for Hdfs {
/// scheme returns the scheme of the HDFS backend.
#[instrument(skip_all)]
fn scheme(&self) -> String {
self.scheme.clone()
}

View File

@ -43,6 +43,7 @@ pub struct HTTP {
/// HTTP implements the http interface.
impl HTTP {
/// new returns a new HTTP.
#[instrument(skip_all)]
pub fn new(scheme: &str) -> Result<HTTP> {
// Default TLS client config with no validation.
let client_config_builder = rustls::ClientConfig::builder()
@ -50,22 +51,11 @@ impl HTTP {
.with_custom_certificate_verifier(NoVerifier::new())
.with_no_client_auth();
// Disable automatic compression to prevent double-decompression issues.
//
// Problem scenario:
// 1. Origin server supports gzip and returns "content-encoding: gzip" header.
// 2. Backend decompresses the response and stores uncompressed content to disk.
// 3. When user's client downloads via dfdaemon proxy, the original "content-encoding: gzip".
// header is forwarded to it.
// 4. User's client attempts to decompress the already-decompressed content, causing errors.
//
// Solution: Disable all compression formats (gzip, brotli, zstd, deflate) to ensure
// we receive and store uncompressed content, eliminating the double-decompression issue.
let client = reqwest::Client::builder()
.no_gzip()
.no_brotli()
.no_zstd()
.no_deflate()
.gzip(true)
.brotli(true)
.zstd(true)
.deflate(true)
.use_preconfigured_tls(client_config_builder)
.pool_max_idle_per_host(super::POOL_MAX_IDLE_PER_HOST)
.tcp_keepalive(super::KEEP_ALIVE_INTERVAL)
@ -85,6 +75,7 @@ impl HTTP {
}
/// client returns a new reqwest client.
#[instrument(skip_all)]
fn client(
&self,
client_cert: Option<Vec<CertificateDer<'static>>>,
@ -99,22 +90,11 @@ impl HTTP {
.with_root_certificates(root_cert_store)
.with_no_client_auth();
// Disable automatic compression to prevent double-decompression issues.
//
// Problem scenario:
// 1. Origin server supports gzip and returns "content-encoding: gzip" header.
// 2. Backend decompresses the response and stores uncompressed content to disk.
// 3. When user's client downloads via dfdaemon proxy, the original "content-encoding: gzip".
// header is forwarded to it.
// 4. User's client attempts to decompress the already-decompressed content, causing errors.
//
// Solution: Disable all compression formats (gzip, brotli, zstd, deflate) to ensure
// we receive and store uncompressed content, eliminating the double-decompression issue.
let client = reqwest::Client::builder()
.no_gzip()
.no_brotli()
.no_zstd()
.no_deflate()
.gzip(true)
.brotli(true)
.zstd(true)
.deflate(true)
.use_preconfigured_tls(client_config_builder)
.build()?;
@ -137,6 +117,7 @@ impl HTTP {
#[tonic::async_trait]
impl super::Backend for HTTP {
/// scheme returns the scheme of the HTTP backend.
#[instrument(skip_all)]
fn scheme(&self) -> String {
self.scheme.clone()
}
@ -160,13 +141,6 @@ impl super::Backend for HTTP {
.client(request.client_cert)?
.get(&request.url)
.headers(header)
// Add Range header to ensure Content-Length is returned in response headers.
// Some servers (especially when using Transfer-Encoding: chunked,
// refer to https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Transfer-Encoding.) may not
// include Content-Length in HEAD requests. Using "bytes=0-" requests the
// entire file starting from byte 0, forcing the server to include file size
// information in the response headers.
.header(reqwest::header::RANGE, "bytes=0-")
.timeout(request.timeout)
.send()
.await

View File

@ -23,10 +23,11 @@ use libloading::Library;
use reqwest::header::HeaderMap;
use rustls_pki_types::CertificateDer;
use std::path::Path;
use std::str::FromStr;
use std::{collections::HashMap, pin::Pin, time::Duration};
use std::{fmt::Debug, fs};
use tokio::io::{AsyncRead, AsyncReadExt};
use tracing::{error, info, warn};
use tracing::{error, info, instrument, warn};
use url::Url;
pub mod hdfs;
@ -46,7 +47,7 @@ const HTTP2_KEEP_ALIVE_INTERVAL: Duration = Duration::from_secs(300);
const HTTP2_KEEP_ALIVE_TIMEOUT: Duration = Duration::from_secs(20);
/// MAX_RETRY_TIMES is the max retry times for the request.
const MAX_RETRY_TIMES: u32 = 1;
const MAX_RETRY_TIMES: u32 = 3;
/// NAME is the name of the package.
pub const NAME: &str = "backend";
@ -166,7 +167,7 @@ where
}
/// The File Entry of a directory, including some relevant file metadata.
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
#[derive(Debug, PartialEq, Eq)]
pub struct DirEntry {
/// url is the url of the entry.
pub url: String,
@ -226,6 +227,7 @@ pub struct BackendFactory {
/// https://github.com/dragonflyoss/client/tree/main/dragonfly-client-backend/examples/plugin/.
impl BackendFactory {
/// new returns a new BackendFactory.
#[instrument(skip_all)]
pub fn new(plugin_dir: Option<&Path>) -> Result<Self> {
let mut backend_factory = Self::default();
backend_factory.load_builtin_backends()?;
@ -240,12 +242,14 @@ impl BackendFactory {
Ok(backend_factory)
}
/// unsupported_download_directory returns whether the scheme does not support directory download.
pub fn unsupported_download_directory(scheme: &str) -> bool {
scheme == http::HTTP_SCHEME || scheme == http::HTTPS_SCHEME
/// supported_download_directory returns whether the scheme supports directory download.
#[instrument(skip_all)]
pub fn supported_download_directory(scheme: &str) -> bool {
object_storage::Scheme::from_str(scheme).is_ok() || scheme == hdfs::HDFS_SCHEME
}
/// build returns the backend by the scheme of the url.
#[instrument(skip_all)]
pub fn build(&self, url: &str) -> Result<&(dyn Backend + Send + Sync)> {
let url = Url::parse(url).or_err(ErrorType::ParseError)?;
let scheme = url.scheme();
@ -256,6 +260,7 @@ impl BackendFactory {
}
/// load_builtin_backends loads the builtin backends.
#[instrument(skip_all)]
fn load_builtin_backends(&mut self) -> Result<()> {
self.backends.insert(
"http".to_string(),
@ -325,12 +330,13 @@ impl BackendFactory {
}
/// load_plugin_backends loads the plugin backends.
#[instrument(skip_all)]
fn load_plugin_backends(&mut self, plugin_dir: &Path) -> Result<()> {
let backend_plugin_dir = plugin_dir.join(NAME);
if !backend_plugin_dir.exists() {
warn!(
"skip loading plugin backends, because the plugin directory {} does not exist",
backend_plugin_dir.display()
plugin_dir.display()
);
return Ok(());
}
@ -430,15 +436,9 @@ mod tests {
let result = BackendFactory::new(Some(&plugin_dir));
assert!(result.is_err());
let err_msg = format!("{}", result.err().unwrap());
assert!(
err_msg.starts_with("PluginError cause:"),
"error message should start with 'PluginError cause:'"
);
assert!(
err_msg.contains(&lib_path.display().to_string()),
"error message should contain library path"
assert_eq!(
format!("{}", result.err().unwrap()),
format!("PluginError cause: {}: file too short", lib_path.display()),
);
}

View File

@ -177,6 +177,7 @@ pub struct ObjectStorage {
/// ObjectStorage implements the ObjectStorage trait.
impl ObjectStorage {
/// Returns ObjectStorage that implements the Backend trait.
#[instrument(skip_all)]
pub fn new(scheme: Scheme) -> ClientResult<ObjectStorage> {
// Initialize the reqwest client.
let client = reqwest::Client::builder()
@ -195,6 +196,7 @@ impl ObjectStorage {
}
/// operator initializes the operator with the parsed URL and object storage.
#[instrument(skip_all)]
pub fn operator(
&self,
parsed_url: &super::object_storage::ParsedURL,
@ -221,6 +223,7 @@ impl ObjectStorage {
}
/// s3_operator initializes the S3 operator with the parsed URL and object storage.
#[instrument(skip_all)]
pub fn s3_operator(
&self,
parsed_url: &super::object_storage::ParsedURL,
@ -273,6 +276,7 @@ impl ObjectStorage {
}
/// gcs_operator initializes the GCS operator with the parsed URL and object storage.
#[instrument(skip_all)]
pub fn gcs_operator(
&self,
parsed_url: &super::object_storage::ParsedURL,
@ -307,6 +311,7 @@ impl ObjectStorage {
}
/// abs_operator initializes the ABS operator with the parsed URL and object storage.
#[instrument(skip_all)]
pub fn abs_operator(
&self,
parsed_url: &super::object_storage::ParsedURL,
@ -349,6 +354,7 @@ impl ObjectStorage {
}
/// oss_operator initializes the OSS operator with the parsed URL and object storage.
#[instrument(skip_all)]
pub fn oss_operator(
&self,
parsed_url: &super::object_storage::ParsedURL,
@ -392,6 +398,7 @@ impl ObjectStorage {
}
/// obs_operator initializes the OBS operator with the parsed URL and object storage.
#[instrument(skip_all)]
pub fn obs_operator(
&self,
parsed_url: &super::object_storage::ParsedURL,
@ -480,6 +487,7 @@ impl ObjectStorage {
#[tonic::async_trait]
impl crate::Backend for ObjectStorage {
/// scheme returns the scheme of the object storage.
#[instrument(skip_all)]
fn scheme(&self) -> String {
self.scheme.to_string()
}

View File

@ -13,7 +13,6 @@ build = "build.rs"
[dependencies]
dragonfly-client-core.workspace = true
dragonfly-client-util.workspace = true
local-ip-address.workspace = true
clap.workspace = true
regex.workspace = true
serde.workspace = true
@ -22,16 +21,13 @@ validator.workspace = true
humantime.workspace = true
serde_yaml.workspace = true
tokio.workspace = true
tempfile.workspace = true
serde_json.workspace = true
bytesize.workspace = true
bytesize-serde.workspace = true
tonic.workspace = true
rustls-pki-types.workspace = true
rcgen.workspace = true
reqwest.workspace = true
home = "0.5.11"
local-ip-address = "0.6.3"
hostname = "^0.4"
humantime-serde = "1.1.1"
serde_regex = "1.1.0"
http-serde = "2.1.1"

File diff suppressed because it is too large Load Diff

View File

@ -173,7 +173,7 @@ pub struct CRIO {
pub registries: Vec<CRIORegistry>,
}
/// PodmanRegistry is the registry configuration for podman.
/// CRIORegistry is the registry configuration for cri-o.
#[derive(Debug, Clone, Default, Validate, Deserialize, Serialize, PartialEq, Eq)]
#[serde(default, rename_all = "camelCase")]
pub struct PodmanRegistry {
@ -352,62 +352,6 @@ impl Config {
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
#[test]
fn test_default_dfinit_config_path() {
let expected = crate::default_config_dir().join("dfinit.yaml");
assert_eq!(default_dfinit_config_path(), expected);
}
#[test]
fn test_default_dfinit_log_dir() {
let expected = crate::default_log_dir().join(NAME);
assert_eq!(default_dfinit_log_dir(), expected);
}
#[test]
fn test_container_runtime_default_paths() {
assert_eq!(
default_container_runtime_containerd_config_path(),
Path::new("/etc/containerd/config.toml")
);
assert_eq!(
default_container_runtime_docker_config_path(),
Path::new("/etc/docker/daemon.json")
);
assert_eq!(
default_container_runtime_crio_config_path(),
Path::new("/etc/containers/registries.conf")
);
assert_eq!(
default_container_runtime_podman_config_path(),
Path::new("/etc/containers/registries.conf")
);
}
#[test]
fn test_default_unqualified_search_registries() {
let crio_registries = default_container_runtime_crio_unqualified_search_registries();
assert_eq!(
crio_registries,
vec![
"registry.fedoraproject.org",
"registry.access.redhat.com",
"docker.io"
]
);
let podman_registries = default_container_runtime_podman_unqualified_search_registries();
assert_eq!(
podman_registries,
vec![
"registry.fedoraproject.org",
"registry.access.redhat.com",
"docker.io"
]
);
}
#[test]
fn serialize_container_runtime() {
@ -529,43 +473,4 @@ containerRuntime:
panic!("failed to deserialize");
}
}
#[test]
fn deserialize_container_runtime_podman_correctly() {
let raw_data = r#"
proxy:
addr: "hello"
containerRuntime:
podman:
configPath: "test_path"
unqualifiedSearchRegistries:
- "reg1"
- "reg2"
registries:
- prefix: "prefix1"
location: "location1"
- prefix: "prefix2"
location: "location2"
"#;
let cfg: Config = serde_yaml::from_str(raw_data).expect("failed to deserialize");
if let Some(ContainerRuntimeConfig::Podman(c)) = cfg.container_runtime.config {
assert_eq!(PathBuf::from("test_path"), c.config_path);
assert_eq!(vec!["reg1", "reg2"], c.unqualified_search_registries);
assert_eq!(
vec![
PodmanRegistry {
location: "location1".to_string(),
prefix: "prefix1".to_string()
},
PodmanRegistry {
location: "location2".to_string(),
prefix: "prefix2".to_string()
},
],
c.registries
);
} else {
panic!("failed to deserialize");
}
}
}

View File

@ -0,0 +1,25 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use std::path::PathBuf;
/// NAME is the name of dfstore.
pub const NAME: &str = "dfstore";
/// default_dfstore_log_dir is the default log directory for dfstore.
pub fn default_dfstore_log_dir() -> PathBuf {
crate::default_log_dir().join(NAME)
}

View File

@ -21,6 +21,7 @@ pub mod dfcache;
pub mod dfdaemon;
pub mod dfget;
pub mod dfinit;
pub mod dfstore;
/// SERVICE_NAME is the name of the service.
pub const SERVICE_NAME: &str = "dragonfly";
@ -104,7 +105,7 @@ pub fn default_lock_dir() -> PathBuf {
/// default_plugin_dir is the default plugin directory for client.
pub fn default_plugin_dir() -> PathBuf {
#[cfg(target_os = "linux")]
return PathBuf::from("/usr/local/lib/dragonfly/plugins/");
return PathBuf::from("/var/lib/dragonfly/plugins/");
#[cfg(target_os = "macos")]
return home::home_dir().unwrap().join(".dragonfly").join("plugins");
@ -148,20 +149,3 @@ impl clap::builder::TypedValueParser for VersionValueParser {
Ok(false)
}
}
#[cfg(test)]
mod tests {
use super::*;
use clap::{builder::TypedValueParser, Command};
use std::ffi::OsStr;
#[test]
fn version_value_parser_references_non_real_values() {
let parser = VersionValueParser;
let cmd = Command::new("test_app");
let value = OsStr::new("false");
let result = parser.parse_ref(&cmd, None, value);
assert!(result.is_ok());
assert!(!result.unwrap());
}
}

View File

@ -22,3 +22,4 @@ hyper-util.workspace = true
opendal.workspace = true
url.workspace = true
headers.workspace = true
libloading = "0.8.6"

View File

@ -62,10 +62,6 @@ pub enum DFError {
#[error{"piece {0} state is failed"}]
PieceStateIsFailed(String),
/// DownloadPieceFinished is the error when the download piece finished timeout.
#[error{"download piece {0} finished timeout"}]
DownloadPieceFinished(String),
/// WaitForPieceFinishedTimeout is the error when the wait for piece finished timeout.
#[error{"wait for piece {0} finished timeout"}]
WaitForPieceFinishedTimeout(String),

View File

@ -23,6 +23,7 @@ tokio.workspace = true
anyhow.workspace = true
tracing.workspace = true
toml_edit.workspace = true
toml.workspace = true
url.workspace = true
tempfile.workspace = true
serde_json.workspace = true

View File

@ -64,8 +64,12 @@ struct Args {
)]
log_max_files: usize,
#[arg(long, default_value_t = false, help = "Specify whether to print log")]
console: bool,
#[arg(
long = "verbose",
default_value_t = false,
help = "Specify whether to print log"
)]
verbose: bool,
#[arg(
short = 'V',
@ -90,12 +94,8 @@ async fn main() -> Result<(), anyhow::Error> {
args.log_level,
args.log_max_files,
None,
None,
None,
None,
None,
false,
args.console,
args.verbose,
);
// Load config.

View File

@ -50,6 +50,8 @@ impl ContainerRuntime {
/// run runs the container runtime to initialize runtime environment for the dfdaemon.
#[instrument(skip_all)]
pub async fn run(&self) -> Result<()> {
// If containerd is enabled, override the default containerd
// configuration.
match &self.engine {
None => Ok(()),
Some(Engine::Containerd(containerd)) => containerd.run().await,

View File

@ -22,22 +22,13 @@ tracing.workspace = true
prost-wkt-types.workspace = true
tokio.workspace = true
tokio-util.workspace = true
crc32fast.workspace = true
sha2.workspace = true
crc.workspace = true
base16ct.workspace = true
fs2.workspace = true
bytes.workspace = true
bytesize.workspace = true
num_cpus = "1.17"
num_cpus = "1.0"
bincode = "1.3.3"
walkdir = "2.5.0"
rayon = "1.10.0"
[dev-dependencies]
tempfile.workspace = true
criterion = "0.5"
[[bench]]
name = "cache"
harness = false
[[bench]]
name = "lru_cache"
harness = false
tempdir = "0.3"

View File

@ -1,468 +0,0 @@
/*
* Copyright 2025 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use bytes::Bytes;
use bytesize::ByteSize;
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use dragonfly_client_config::dfdaemon::{Config, Storage};
use dragonfly_client_storage::{cache::Cache, metadata::Piece};
use std::sync::Arc;
use tokio::io::AsyncReadExt;
use tokio::runtime::Runtime;
// Number of pieces to write/read in each benchmark.
const PIECE_COUNT: usize = 100;
fn create_config(capacity: ByteSize) -> Config {
Config {
storage: Storage {
cache_capacity: capacity,
..Default::default()
},
..Default::default()
}
}
fn create_piece(length: u64) -> Piece {
Piece {
number: 0,
offset: 0,
length,
digest: String::new(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
}
}
pub fn put_task(c: &mut Criterion) {
let rt: Runtime = Runtime::new().unwrap();
let mut group = c.benchmark_group("Put Task");
group.bench_with_input(
BenchmarkId::new("Put Task", "10MB"),
&ByteSize::mb(10),
|b, size| {
b.iter_batched(
|| rt.block_on(async { Cache::new(Arc::new(create_config(ByteSize::gb(2)))) }),
|mut cache| {
rt.block_on(async {
cache.put_task("task", black_box(size.as_u64())).await;
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Put Task", "100MB"),
&ByteSize::mb(100),
|b, size| {
b.iter_batched(
|| rt.block_on(async { Cache::new(Arc::new(create_config(ByteSize::gb(2)))) }),
|mut cache| {
rt.block_on(async {
cache.put_task("task", black_box(size.as_u64())).await;
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Put Task", "1GB"),
&ByteSize::gb(1),
|b, size| {
b.iter_batched(
|| rt.block_on(async { Cache::new(Arc::new(create_config(ByteSize::gb(2)))) }),
|mut cache| {
rt.block_on(async {
cache.put_task("task", black_box(size.as_u64())).await;
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.finish();
}
pub fn delete_task(c: &mut Criterion) {
let rt: Runtime = Runtime::new().unwrap();
let mut group = c.benchmark_group("Delete Task");
group.bench_with_input(
BenchmarkId::new("Delete Task", "10MB"),
&ByteSize::mb(10),
|b, size| {
b.iter_batched(
|| {
let mut cache =
rt.block_on(async { Cache::new(Arc::new(create_config(ByteSize::gb(2)))) });
rt.block_on(async {
cache.put_task("task", black_box(size.as_u64())).await;
});
cache
},
|mut cache| {
rt.block_on(async {
cache.delete_task("task").await.unwrap();
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Delete Task", "100MB"),
&ByteSize::mb(100),
|b, size| {
b.iter_batched(
|| {
let mut cache =
rt.block_on(async { Cache::new(Arc::new(create_config(ByteSize::gb(2)))) });
rt.block_on(async {
cache.put_task("task", black_box(size.as_u64())).await;
});
cache
},
|mut cache| {
rt.block_on(async {
cache.delete_task("task").await.unwrap();
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Delete Task", "1GB"),
&ByteSize::gb(1),
|b, size| {
b.iter_batched(
|| {
let mut cache =
rt.block_on(async { Cache::new(Arc::new(create_config(ByteSize::gb(2)))) });
rt.block_on(async {
cache.put_task("task", black_box(size.as_u64())).await;
});
cache
},
|mut cache| {
rt.block_on(async {
cache.delete_task("task").await.unwrap();
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.finish();
}
pub fn write_piece(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let mut group = c.benchmark_group("Write Piece");
group.bench_with_input(
BenchmarkId::new("Write Piece", "4MB"),
&vec![1u8; ByteSize::mb(4).as_u64() as usize],
|b, data| {
b.iter_batched(
|| {
let mut cache = rt.block_on(async {
Cache::new(Arc::new(create_config(
ByteSize::mb(4) * PIECE_COUNT as u64,
)))
});
rt.block_on(async {
cache
.put_task("task", (ByteSize::mb(4) * PIECE_COUNT as u64).as_u64())
.await;
});
cache
},
|cache| {
rt.block_on(async {
for i in 0..PIECE_COUNT {
cache
.write_piece(
"task",
&format!("piece{}", i),
Bytes::copy_from_slice(data),
)
.await
.unwrap();
}
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Write Piece", "10MB"),
&vec![1u8; ByteSize::mb(10).as_u64() as usize],
|b, data| {
b.iter_batched(
|| {
let mut cache = rt.block_on(async {
Cache::new(Arc::new(create_config(
ByteSize::mb(10) * PIECE_COUNT as u64,
)))
});
rt.block_on(async {
cache
.put_task("task", (ByteSize::mb(10) * PIECE_COUNT as u64).as_u64())
.await;
});
cache
},
|cache| {
rt.block_on(async {
for i in 0..PIECE_COUNT {
cache
.write_piece(
"task",
&format!("piece{}", i),
Bytes::copy_from_slice(data),
)
.await
.unwrap();
}
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Write Piece", "16MB"),
&vec![1u8; ByteSize::mb(16).as_u64() as usize],
|b, data| {
b.iter_batched(
|| {
let mut cache = rt.block_on(async {
Cache::new(Arc::new(create_config(
ByteSize::mb(16) * PIECE_COUNT as u64,
)))
});
rt.block_on(async {
cache
.put_task("task", (ByteSize::mb(16) * PIECE_COUNT as u64).as_u64())
.await;
});
cache
},
|cache| {
rt.block_on(async {
for i in 0..PIECE_COUNT {
cache
.write_piece(
"task",
&format!("piece{}", i),
Bytes::copy_from_slice(data),
)
.await
.unwrap();
}
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.finish();
}
pub fn read_piece(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let mut group = c.benchmark_group("Read Piece");
group.bench_with_input(
BenchmarkId::new("Read Piece", "4MB"),
&vec![1u8; ByteSize::mb(4).as_u64() as usize],
|b, data| {
b.iter_batched(
|| {
let mut cache = rt.block_on(async {
Cache::new(Arc::new(create_config(
ByteSize::mb(4) * PIECE_COUNT as u64,
)))
});
rt.block_on(async {
cache
.put_task("task", (ByteSize::mb(4) * PIECE_COUNT as u64).as_u64())
.await;
for i in 0..PIECE_COUNT {
cache
.write_piece(
"task",
&format!("piece{}", i),
Bytes::copy_from_slice(data),
)
.await
.unwrap();
}
});
cache
},
|cache| {
rt.block_on(async {
for i in 0..PIECE_COUNT {
let mut reader = cache
.read_piece(
"task",
&format!("piece{}", i),
create_piece(data.len() as u64),
None,
)
.await
.unwrap();
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
}
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Read Piece", "10MB"),
&vec![1u8; ByteSize::mb(10).as_u64() as usize],
|b, data| {
b.iter_batched(
|| {
let mut cache = rt.block_on(async {
Cache::new(Arc::new(create_config(
ByteSize::mb(10) * PIECE_COUNT as u64,
)))
});
rt.block_on(async {
cache
.put_task("task", (ByteSize::mb(10) * PIECE_COUNT as u64).as_u64())
.await;
for i in 0..PIECE_COUNT {
cache
.write_piece(
"task",
&format!("piece{}", i),
Bytes::copy_from_slice(data),
)
.await
.unwrap();
}
});
cache
},
|cache| {
rt.block_on(async {
for i in 0..PIECE_COUNT {
let mut reader = cache
.read_piece(
"task",
&format!("piece{}", i),
create_piece(data.len() as u64),
None,
)
.await
.unwrap();
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
}
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Read Piece", "16MB"),
&vec![1u8; ByteSize::mb(16).as_u64() as usize],
|b, data| {
b.iter_batched(
|| {
let mut cache = rt.block_on(async {
Cache::new(Arc::new(create_config(
ByteSize::mb(16) * PIECE_COUNT as u64,
)))
});
rt.block_on(async {
cache
.put_task("task", (ByteSize::mb(16) * PIECE_COUNT as u64).as_u64())
.await;
for i in 0..PIECE_COUNT {
cache
.write_piece(
"task",
&format!("piece{}", i),
Bytes::copy_from_slice(data),
)
.await
.unwrap();
}
});
cache
},
|cache| {
rt.block_on(async {
for i in 0..PIECE_COUNT {
let mut reader = cache
.read_piece(
"task",
&format!("piece{}", i),
create_piece(data.len() as u64),
None,
)
.await
.unwrap();
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
}
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.finish();
}
criterion_group!(benches, put_task, delete_task, write_piece, read_piece,);
criterion_main!(benches);

View File

@ -1,448 +0,0 @@
/*
* Copyright 2025 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use bytesize::ByteSize;
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use dragonfly_client_storage::cache::lru_cache::LruCache;
// Number of operations to perform in each benchmark
const OPERATION_COUNT: usize = 1000;
pub fn lru_cache_put(c: &mut Criterion) {
let mut group = c.benchmark_group("Lru Cache Put");
group.bench_with_input(
BenchmarkId::new("Lru Cache Put", "4MB"),
&ByteSize::mb(4),
|b, size| {
b.iter_batched(
|| LruCache::new(OPERATION_COUNT),
|mut cache| {
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Put", "10MB"),
&ByteSize::mb(10),
|b, size| {
b.iter_batched(
|| LruCache::new(OPERATION_COUNT),
|mut cache| {
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Put", "16MB"),
&ByteSize::mb(16),
|b, size| {
b.iter_batched(
|| LruCache::new(OPERATION_COUNT),
|mut cache| {
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.finish();
}
pub fn lru_cache_get(c: &mut Criterion) {
let mut group = c.benchmark_group("Lru Cache Get");
group.bench_with_input(
BenchmarkId::new("Lru Cache Get", "4MB"),
&ByteSize::mb(4),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|mut cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.get(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Get", "10MB"),
&ByteSize::mb(10),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|mut cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.get(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Get", "16MB"),
&ByteSize::mb(16),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|mut cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.get(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.finish();
}
pub fn lru_cache_peek(c: &mut Criterion) {
let mut group = c.benchmark_group("Lru Cache Peek");
group.bench_with_input(
BenchmarkId::new("Lru Cache Peek", "4MB"),
&ByteSize::mb(4),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.peek(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Peek", "10MB"),
&ByteSize::mb(10),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.peek(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Peek", "16MB"),
&ByteSize::mb(16),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.peek(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.finish();
}
pub fn lru_cache_contains(c: &mut Criterion) {
let mut group = c.benchmark_group("Lru Cache Contains");
group.bench_with_input(
BenchmarkId::new("Lru Cache Contains", "4MB"),
&ByteSize::mb(4),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.contains(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Contains", "10MB"),
&ByteSize::mb(10),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.contains(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Contains", "16MB"),
&ByteSize::mb(16),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.contains(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.finish();
}
pub fn lru_cache_pop(c: &mut Criterion) {
let mut group = c.benchmark_group("Lru Cache Pop");
group.bench_with_input(
BenchmarkId::new("Lru Cache Pop", "4MB"),
&ByteSize::mb(4),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|mut cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.pop(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Pop", "10MB"),
&ByteSize::mb(10),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|mut cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.pop(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Pop", "16MB"),
&ByteSize::mb(16),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|mut cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.pop(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.finish();
}
pub fn lru_cache_pop_lru(c: &mut Criterion) {
let mut group = c.benchmark_group("Lru Cache Pop Lru");
group.bench_with_input(
BenchmarkId::new("Lru Cache Pop Lru", "4MB"),
&ByteSize::mb(4),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|mut cache| {
while !cache.is_empty() {
black_box(cache.pop_lru());
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Pop Lru", "10MB"),
&ByteSize::mb(10),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|mut cache| {
while !cache.is_empty() {
black_box(cache.pop_lru());
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Pop Lru", "16MB"),
&ByteSize::mb(16),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|mut cache| {
while !cache.is_empty() {
black_box(cache.pop_lru());
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.finish();
}
criterion_group!(
benches,
lru_cache_put,
lru_cache_get,
lru_cache_peek,
lru_cache_contains,
lru_cache_pop,
lru_cache_pop_lru,
);
criterion_main!(benches);

View File

@ -1,509 +0,0 @@
/*
* Copyright 2025 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use std::{borrow::Borrow, collections::HashMap, hash::Hash, hash::Hasher};
/// KeyRef is a reference to the key.
#[derive(Debug, Clone, Copy)]
struct KeyRef<K> {
k: *const K,
}
/// KeyRef implements Hash for KeyRef.
impl<K: Hash> Hash for KeyRef<K> {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
unsafe {
let key = &*self.k;
key.hash(state)
}
}
}
/// KeyRef implements PartialEq for KeyRef.
impl<K: PartialEq> PartialEq for KeyRef<K> {
fn eq(&self, other: &Self) -> bool {
unsafe {
let key1 = &*self.k;
let key2 = &*other.k;
key1.eq(key2)
}
}
}
/// KeyRef implements Eq for KeyRef.
impl<K: Eq> Eq for KeyRef<K> {}
/// KeyWrapper is a wrapper for the key.
#[repr(transparent)]
struct KeyWrapper<K: ?Sized>(K);
/// KeyWrapper implements reference conversion.
impl<K: ?Sized> KeyWrapper<K> {
/// from_ref creates a new KeyWrapper from a reference to the key.
fn from_ref(key: &K) -> &Self {
unsafe { &*(key as *const K as *const KeyWrapper<K>) }
}
}
/// KeyWrapper implements Hash for KeyWrapper.
impl<K: ?Sized + Hash> Hash for KeyWrapper<K> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.0.hash(state)
}
}
/// KeyWrapper implements PartialEq for KeyWrapper.
impl<K: ?Sized + PartialEq> PartialEq for KeyWrapper<K> {
#![allow(unknown_lints)]
#[allow(clippy::unconditional_recursion)]
fn eq(&self, other: &Self) -> bool {
self.0.eq(&other.0)
}
}
/// KeyWrapper implements Eq for KeyWrapper.
impl<K: ?Sized + Eq> Eq for KeyWrapper<K> {}
/// KeyWrapper implements Borrow for KeyWrapper.
impl<K, Q> Borrow<KeyWrapper<Q>> for KeyRef<K>
where
K: Borrow<Q>,
Q: ?Sized,
{
/// borrow borrows the key.
fn borrow(&self) -> &KeyWrapper<Q> {
unsafe {
let key = &*self.k;
KeyWrapper::from_ref(key.borrow())
}
}
}
/// Entry is a cache entry.
struct Entry<K, V> {
key: K,
value: V,
prev: Option<*mut Entry<K, V>>,
next: Option<*mut Entry<K, V>>,
}
/// Entry implements Drop for Entry.
impl<K, V> Entry<K, V> {
/// new creates a new Entry.
fn new(key: K, value: V) -> Self {
Self {
key,
value,
prev: None,
next: None,
}
}
}
/// LruCache is a least recently used cache.
pub struct LruCache<K, V> {
capacity: usize,
map: HashMap<KeyRef<K>, Box<Entry<K, V>>>,
head: Option<*mut Entry<K, V>>,
tail: Option<*mut Entry<K, V>>,
_marker: std::marker::PhantomData<K>,
}
/// LruCache implements LruCache.
impl<K: Hash + Eq, V> LruCache<K, V> {
/// new creates a new LruCache.
pub fn new(capacity: usize) -> Self {
Self {
capacity,
map: HashMap::new(),
head: None,
tail: None,
_marker: std::marker::PhantomData,
}
}
/// get gets the value of the key.
pub fn get<'a, Q>(&'a mut self, k: &Q) -> Option<&'a V>
where
K: Borrow<Q>,
Q: Hash + Eq + ?Sized,
{
if let Some(entry) = self.map.get_mut(KeyWrapper::from_ref(k)) {
let entry_ptr: *mut Entry<K, V> = &mut **entry;
self.detach(entry_ptr);
self.attach(entry_ptr);
Some(&unsafe { &*entry_ptr }.value)
} else {
None
}
}
/// put puts the key and value into the cache.
pub fn put(&mut self, key: K, mut value: V) -> Option<V> {
if let Some(existing_entry) = self.map.get_mut(KeyWrapper::from_ref(&key)) {
let entry = existing_entry.as_mut();
std::mem::swap(&mut entry.value, &mut value);
let entry_ptr: *mut Entry<K, V> = entry;
self.detach(entry_ptr);
self.attach(entry_ptr);
return Some(value);
}
let mut evicted_value = None;
if self.map.len() >= self.capacity {
if let Some(tail) = self.tail {
self.detach(tail);
unsafe {
if let Some(entry) = self.map.remove(KeyWrapper::from_ref(&(*tail).key)) {
evicted_value = Some(entry.value);
}
}
}
}
let new_entry = Box::new(Entry::new(key, value));
let key_ptr: *const K = &new_entry.key;
let entry_ptr = Box::into_raw(new_entry);
unsafe {
self.attach(entry_ptr);
self.map
.insert(KeyRef { k: key_ptr }, Box::from_raw(entry_ptr));
}
evicted_value
}
/// detach detaches the entry from the cache.
fn detach(&mut self, entry: *mut Entry<K, V>) {
unsafe {
let prev = (*entry).prev;
let next = (*entry).next;
match prev {
Some(prev) => (*prev).next = next,
None => self.head = next,
}
match next {
Some(next) => (*next).prev = prev,
None => self.tail = prev,
}
(*entry).prev = None;
(*entry).next = None;
}
}
/// attach attaches the entry to the cache.
fn attach(&mut self, entry: *mut Entry<K, V>) {
match self.head {
Some(head) => {
unsafe {
(*entry).next = Some(head);
(*head).prev = Some(entry);
}
self.head = Some(entry);
}
None => {
self.head = Some(entry);
self.tail = Some(entry);
}
}
}
/// contains checks whether the key exists in the cache.
pub fn contains<Q>(&self, k: &Q) -> bool
where
K: Borrow<Q>,
Q: Hash + Eq + ?Sized,
{
self.map.contains_key(KeyWrapper::from_ref(k))
}
/// peek peeks the value of the key. It does not move the key to the front of the cache.
pub fn peek<'a, Q>(&'a self, k: &Q) -> Option<&'a V>
where
K: Borrow<Q>,
Q: Hash + Eq + ?Sized,
{
self.map
.get(KeyWrapper::from_ref(k))
.map(|entry| &entry.value)
}
/// pop_lru pops the least recently used value from the cache.
pub fn pop_lru(&mut self) -> Option<(K, V)> {
if self.is_empty() {
return None;
}
let tail = self.tail?;
self.detach(tail);
unsafe {
self.map
.remove(KeyWrapper::from_ref(&(*tail).key))
.map(|entry| (entry.key, entry.value))
}
}
/// pop removes and returns the value for a given key, if it does not exist, it returns None.
pub fn pop<Q>(&mut self, k: &Q) -> Option<(K, V)>
where
K: Borrow<Q>,
Q: Hash + Eq + ?Sized,
{
match self.map.remove(KeyWrapper::from_ref(k)) {
None => None,
Some(entry) => {
let entry_ptr = Box::into_raw(entry);
self.detach(entry_ptr);
unsafe {
let entry = Box::from_raw(entry_ptr);
Some((entry.key, entry.value))
}
}
}
}
/// is_empty checks whether the cache is empty.
pub fn is_empty(&self) -> bool {
self.map.is_empty()
}
}
unsafe impl<K: Send, V: Send> Send for LruCache<K, V> {}
unsafe impl<K: Sync, V: Sync> Sync for LruCache<K, V> {}
impl<K, V> Drop for LruCache<K, V> {
fn drop(&mut self) {
self.map.clear();
self.head = None;
self.tail = None;
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new() {
let test_cases = vec![
// Normal capacity.
(5, 5),
// Minimum meaningful capacity.
(1, 1),
// Zero capacity.
(0, 0),
// Maximum capacity.
(usize::MAX, usize::MAX),
];
for (capacity, expected_capacity) in test_cases {
let cache: LruCache<String, i32> = LruCache::new(capacity);
assert!(cache.is_empty());
assert_eq!(cache.capacity, expected_capacity);
}
}
#[test]
fn test_get() {
let mut cache: LruCache<String, i32> = LruCache::new(3);
let test_cases = vec![
// Initial insertions.
("key1", 1, None),
("key2", 2, None),
("key3", 3, None),
// Update existing key.
("key2", 22, Some(2)),
// Eviction of oldest key.
("key4", 4, Some(1)),
];
for (key, value, expected_result) in test_cases {
let result = cache.put(key.to_string(), value);
assert_eq!(result, expected_result);
}
// Verify final cache state.
assert_eq!(cache.get(&"key1".to_string()), None);
assert_eq!(cache.get(&"key2".to_string()).copied(), Some(22));
assert_eq!(cache.get(&"key3".to_string()).copied(), Some(3));
assert_eq!(cache.get(&"key4".to_string()).copied(), Some(4));
}
#[test]
fn test_get_after_evction() {
let mut cache = LruCache::new(3);
assert_eq!(cache.get(&"nonexistent".to_string()), None);
// Prepare cache with initial values.
for (key, value) in [("key1", 1), ("key2", 2), ("key3", 3)] {
cache.put(key.to_string(), value);
}
let test_cases = vec![
("key1", Some(1)),
("nonexistent", None),
("key1", Some(1)),
("key3", Some(3)),
];
for (key, expected_value) in test_cases {
assert_eq!(cache.get(&key.to_string()).copied(), expected_value);
}
// Test eviction after getting.
cache.put("key4".to_string(), 4);
assert_eq!(cache.get(&"key1".to_string()).copied(), Some(1));
assert_eq!(cache.get(&"key2".to_string()), None);
assert_eq!(cache.get(&"key3".to_string()).copied(), Some(3));
assert_eq!(cache.get(&"key4".to_string()).copied(), Some(4));
}
#[test]
fn test_put() {
let mut cache = LruCache::new(3);
let test_cases = vec![
// Initial insertions within capacity.
("key1", 1, None),
("key2", 2, None),
("key3", 3, None),
// Overflow capacity, should evict oldest.
("key4", 4, Some(1)),
("key5", 5, Some(2)),
// Update existing key.
("key4", 44, Some(4)),
];
for (key, value, expected_result) in test_cases {
let result = cache.put(key.to_string(), value);
assert_eq!(result, expected_result);
}
// Verify final cache state.
assert_eq!(cache.get(&"key1".to_string()), None);
assert_eq!(cache.get(&"key2".to_string()), None);
assert_eq!(cache.get(&"key3".to_string()).copied(), Some(3));
assert_eq!(cache.get(&"key4".to_string()).copied(), Some(44));
assert_eq!(cache.get(&"key5".to_string()).copied(), Some(5));
}
#[test]
fn test_peek() {
let mut cache: LruCache<String, i32> = LruCache::new(3);
assert_eq!(cache.peek(&"nonexistent".to_string()), None);
// Prepare cache with initial values.
for (key, value) in [("key1", 1), ("key2", 2), ("key3", 3)] {
cache.put(key.to_string(), value);
}
let test_cases = vec![
("nonexistent", None),
("key1", Some(1)),
("key2", Some(2)),
("key3", Some(3)),
];
for (key, expected_value) in test_cases {
assert_eq!(cache.peek(&key.to_string()).copied(), expected_value);
}
// Test eviction after peeking.
cache.put("key4".to_string(), 4);
assert_eq!(cache.peek(&"key1".to_string()), None);
assert_eq!(cache.peek(&"key2".to_string()).copied(), Some(2));
assert_eq!(cache.peek(&"key3".to_string()).copied(), Some(3));
assert_eq!(cache.peek(&"key4".to_string()).copied(), Some(4));
}
#[test]
fn test_contains() {
let mut cache: LruCache<String, i32> = LruCache::new(3);
assert!(!cache.contains(&"nonexistent".to_string()));
// Prepare cache with initial values.
for (key, value) in [("key1", 1), ("key2", 2), ("key3", 3)] {
cache.put(key.to_string(), value);
}
let test_cases = vec![
("nonexistent", false),
("key1", true),
("key2", true),
("key3", true),
];
for (key, expected_result) in test_cases {
assert_eq!(cache.contains(&key.to_string()), expected_result);
}
// Test eviction after contains.
cache.put("key4".to_string(), 4);
assert!(!cache.contains(&"key1".to_string()));
assert!(cache.contains(&"key2".to_string()));
assert!(cache.contains(&"key3".to_string()));
assert!(cache.contains(&"key4".to_string()));
}
#[test]
fn test_pop_lru() {
let mut cache: LruCache<String, i32> = LruCache::new(3);
assert_eq!(cache.pop_lru(), None);
for (key, value) in [("key1", 1), ("key2", 2), ("key3", 3)] {
cache.put(key.to_string(), value);
}
assert_eq!(cache.pop_lru(), Some(("key1".to_string(), 1)));
assert_eq!(cache.pop_lru(), Some(("key2".to_string(), 2)));
assert_eq!(cache.pop_lru(), Some(("key3".to_string(), 3)));
assert_eq!(cache.pop_lru(), None);
assert!(cache.is_empty());
}
#[test]
fn test_pop() {
let mut cache: LruCache<String, i32> = LruCache::new(3);
let test_cases = vec![
("key1".to_string(), Some(("key1".to_string(), 1))),
("key2".to_string(), Some(("key2".to_string(), 2))),
("key3".to_string(), Some(("key3".to_string(), 3))),
("key1".to_string(), None),
("key2".to_string(), None),
("key3".to_string(), None),
];
cache.put("key1".to_string(), 1);
cache.put("key2".to_string(), 2);
cache.put("key3".to_string(), 3);
for (key, expected) in test_cases {
assert_eq!(cache.pop(&key), expected);
}
assert!(cache.is_empty());
}
}

View File

@ -1,989 +0,0 @@
/*
* Copyright 2025 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use bytes::Bytes;
use dragonfly_api::common::v2::Range;
use dragonfly_client_config::dfdaemon::Config;
use dragonfly_client_core::{Error, Result};
use lru_cache::LruCache;
use std::cmp::{max, min};
use std::collections::HashMap;
use std::io::Cursor;
use std::sync::Arc;
use tokio::io::{AsyncRead, BufReader};
use tokio::sync::RwLock;
use tracing::info;
pub mod lru_cache;
/// Task is the task content in the cache.
#[derive(Clone, Debug)]
struct Task {
/// content_length is the length of the task content.
content_length: u64,
/// pieces is the pieces content of the task.
pieces: Arc<RwLock<HashMap<String, Bytes>>>,
}
/// Task implements the task content in the cache.
impl Task {
/// new creates a new task.
fn new(content_length: u64) -> Self {
Self {
content_length,
pieces: Arc::new(RwLock::new(HashMap::new())),
}
}
/// write_piece writes the piece content to the task.
async fn write_piece(&self, id: &str, piece: Bytes) {
let mut pieces = self.pieces.write().await;
pieces.insert(id.to_string(), piece);
}
/// read_piece reads the piece content from the task.
async fn read_piece(&self, id: &str) -> Option<Bytes> {
let pieces = self.pieces.read().await;
pieces.get(id).cloned()
}
/// contains checks whether the piece exists in the task.
async fn contains(&self, id: &str) -> bool {
let pieces = self.pieces.read().await;
pieces.contains_key(id)
}
/// content_length returns the content length of the task.
fn content_length(&self) -> u64 {
self.content_length
}
}
/// Cache is the cache for storing piece content by LRU algorithm.
///
/// Cache storage:
/// 1. Users can preheat task by caching to memory (via CacheTask) or to disk (via Task).
/// For more details, refer to https://github.com/dragonflyoss/api/blob/main/proto/dfdaemon.proto#L174.
/// 2. If the download hits the memory cache, it will be faster than reading from the disk, because there is no
/// page cache for the first read.
///
///```text
/// +--------+
/// │ Source │
/// +--------+
/// ^ ^ Preheat
/// │ │ |
/// +-----------------+ │ │ +----------------------------+
/// │ Other Peers │ │ │ │ Peer | │
/// │ │ │ │ │ v │
/// │ +----------+ │ │ │ │ +----------+ │
/// │ │ Cache |<--|----------|<-Miss--| Cache |--Hit-->|<----Download CacheTask
/// │ +----------+ │ │ │ +----------+ │
/// │ │ │ │ │
/// │ +----------+ │ │ │ +----------+ │
/// │ │ Disk |<--|----------|<-Miss--| Disk |--Hit-->|<----Download Task
/// │ +----------+ │ │ +----------+ │
/// │ │ │ ^ │
/// │ │ │ | │
/// +-----------------+ +----------------------------+
/// |
/// Preheat
///```
/// Task is the metadata of the task.
#[derive(Clone)]
pub struct Cache {
/// config is the configuration of the dfdaemon.
config: Arc<Config>,
/// size is the size of the cache in bytes.
size: u64,
/// capacity is the maximum capacity of the cache in bytes.
capacity: u64,
/// tasks stores the tasks with their task id.
tasks: Arc<RwLock<LruCache<String, Task>>>,
}
/// Cache implements the cache for storing piece content by LRU algorithm.
impl Cache {
/// new creates a new cache with the specified capacity.
pub fn new(config: Arc<Config>) -> Self {
Cache {
config: config.clone(),
size: 0,
capacity: config.storage.cache_capacity.as_u64(),
// LRU cache capacity is set to usize::MAX to avoid evicting tasks. LRU cache will evict tasks
// by cache capacity(cache size) itself, and used pop_lru to evict the least recently
// used task.
tasks: Arc::new(RwLock::new(LruCache::new(usize::MAX))),
}
}
/// read_piece reads the piece from the cache.
pub async fn read_piece(
&self,
task_id: &str,
piece_id: &str,
piece: super::metadata::Piece,
range: Option<Range>,
) -> Result<impl AsyncRead> {
let mut tasks = self.tasks.write().await;
let Some(task) = tasks.get(task_id) else {
return Err(Error::TaskNotFound(task_id.to_string()));
};
let Some(piece_content) = task.read_piece(piece_id).await else {
return Err(Error::PieceNotFound(piece_id.to_string()));
};
drop(tasks);
// Calculate the range of bytes to return based on the range provided.
let (target_offset, target_length) = if let Some(range) = range {
let target_offset = max(piece.offset, range.start) - piece.offset;
let target_length = min(
piece.offset + piece.length - 1,
range.start + range.length - 1,
) - target_offset
- piece.offset
+ 1;
(target_offset as usize, target_length as usize)
} else {
(0, piece.length as usize)
};
// Check if the target range is valid.
let begin = target_offset;
let end = target_offset + target_length;
if begin >= piece_content.len() || end > piece_content.len() {
return Err(Error::InvalidParameter);
}
let content = piece_content.slice(begin..end);
let reader =
BufReader::with_capacity(self.config.storage.read_buffer_size, Cursor::new(content));
Ok(reader)
}
/// write_piece writes the piece content to the cache.
pub async fn write_piece(&self, task_id: &str, piece_id: &str, content: Bytes) -> Result<()> {
let mut tasks = self.tasks.write().await;
let Some(task) = tasks.get(task_id) else {
return Err(Error::TaskNotFound(task_id.to_string()));
};
if task.contains(piece_id).await {
return Ok(());
}
task.write_piece(piece_id, content).await;
Ok(())
}
/// put_task puts a new task into the cache, constrained by the capacity of the cache.
pub async fn put_task(&mut self, task_id: &str, content_length: u64) {
// If the content length is 0, we don't cache the task.
if content_length == 0 {
return;
}
// If the content length is larger than the cache capacity and the task cannot be cached.
if content_length > self.capacity {
info!(
"task {} is too large and cannot be cached: {}",
task_id, content_length
);
return;
}
let mut tasks = self.tasks.write().await;
while self.size + content_length > self.capacity {
match tasks.pop_lru() {
Some((_, task)) => {
self.size -= task.content_length();
}
None => {
break;
}
}
}
let task = Task::new(content_length);
tasks.put(task_id.to_string(), task);
self.size += content_length;
}
pub async fn delete_task(&mut self, task_id: &str) -> Result<()> {
let mut tasks = self.tasks.write().await;
let Some((_, task)) = tasks.pop(task_id) else {
return Err(Error::TaskNotFound(task_id.to_string()));
};
self.size -= task.content_length();
Ok(())
}
/// contains_task checks whether the task exists in the cache.
pub async fn contains_task(&self, id: &str) -> bool {
let tasks = self.tasks.read().await;
tasks.contains(id)
}
/// contains_piece checks whether the piece exists in the specified task.
pub async fn contains_piece(&self, task_id: &str, piece_id: &str) -> bool {
let tasks = self.tasks.read().await;
if let Some(task) = tasks.peek(task_id) {
task.contains(piece_id).await
} else {
false
}
}
}
#[cfg(test)]
mod tests {
use super::super::metadata::Piece;
use super::*;
use bytesize::ByteSize;
use dragonfly_api::common::v2::Range;
use dragonfly_client_config::dfdaemon::Storage;
use tokio::io::AsyncReadExt;
#[tokio::test]
async fn test_new() {
let test_cases = vec![
// Default configuration with 64MiB capacity.
(Config::default(), 0, ByteSize::mib(64).as_u64()),
// Custom configuration with 100MiB capacity.
(
Config {
storage: Storage {
cache_capacity: ByteSize::mib(100),
..Default::default()
},
..Default::default()
},
0,
ByteSize::mib(100).as_u64(),
),
// Zero capacity configuration.
(
Config {
storage: Storage {
cache_capacity: ByteSize::b(0),
..Default::default()
},
..Default::default()
},
0,
0,
),
];
for (config, expected_size, expected_capacity) in test_cases {
let cache = Cache::new(Arc::new(config));
assert_eq!(cache.size, expected_size);
assert_eq!(cache.capacity, expected_capacity);
}
}
#[tokio::test]
async fn test_contains_task() {
let config = Config {
storage: Storage {
cache_capacity: ByteSize::mib(10),
..Default::default()
},
..Default::default()
};
let cache = Cache::new(Arc::new(config));
let test_cases = vec![
// Test non-existent task.
("check", "non_existent", 0, false),
// Add and verify task.
("add", "task1", ByteSize::mib(1).as_u64(), true),
("check", "task1", 0, true),
// Remove and verify task.
("remove", "task1", 0, false),
("check", "task1", 0, false),
// Test multiple tasks.
("add", "task1", ByteSize::mib(1).as_u64(), true),
("add", "task2", ByteSize::mib(2).as_u64(), true),
("check", "task1", 0, true),
("check", "task2", 0, true),
("check", "task3", 0, false),
];
for (operation, task_id, content_length, expected_result) in test_cases {
match operation {
"check" => {
assert_eq!(cache.contains_task(task_id).await, expected_result);
}
"add" => {
let task = Task::new(content_length);
cache.tasks.write().await.put(task_id.to_string(), task);
assert_eq!(cache.contains_task(task_id).await, expected_result);
}
"remove" => {
cache.tasks.write().await.pop_lru();
assert_eq!(cache.contains_task(task_id).await, expected_result);
}
_ => panic!("Unknown operation."),
}
}
}
#[tokio::test]
async fn test_put_task() {
let config = Config {
storage: Storage {
cache_capacity: ByteSize::mib(10),
..Default::default()
},
..Default::default()
};
let mut cache = Cache::new(Arc::new(config));
let test_cases = vec![
// Empty task should not be cached.
("empty_task", 0, false),
// Task equal to capacity should not be cached.
("equal_capacity", ByteSize::mib(10).as_u64(), true),
// Task exceeding capacity should not be cached.
("exceed_capacity", ByteSize::mib(10).as_u64() + 1, false),
// Normal sized task should be cached.
("normal_task", ByteSize::mib(1).as_u64(), true),
];
for (task_id, size, should_exist) in test_cases {
if size > 0 {
cache.put_task(task_id, size).await;
}
assert_eq!(cache.contains_task(task_id).await, should_exist);
}
}
#[tokio::test]
async fn test_put_task_lru() {
let config = Config {
storage: Storage {
cache_capacity: ByteSize::mib(5),
..Default::default()
},
..Default::default()
};
let mut cache = Cache::new(Arc::new(config));
let test_cases = vec![
// Add tasks until eviction triggers.
("lru_task_1", ByteSize::mib(2).as_u64(), true),
("lru_task_2", ByteSize::mib(2).as_u64(), true),
// Third task triggers eviction.
("lru_task_3", ByteSize::mib(2).as_u64(), true),
// Verify eviction results.
("lru_task_1", 0, false),
("lru_task_2", 0, true),
("lru_task_3", 0, true),
];
for (task_id, size, should_exist) in test_cases {
if size > 0 {
cache.put_task(task_id, size).await;
}
assert_eq!(cache.contains_task(task_id).await, should_exist);
}
}
#[tokio::test]
async fn test_delete_task() {
let config = Config {
storage: Storage {
cache_capacity: ByteSize::mib(10),
..Default::default()
},
..Default::default()
};
let mut cache = Cache::new(Arc::new(config));
cache.put_task("task1", ByteSize::mib(1).as_u64()).await;
cache.put_task("task2", ByteSize::mib(1).as_u64()).await;
cache.put_task("task3", ByteSize::mib(1).as_u64()).await;
let test_cases = vec![
("task1", true),
("task2", true),
("task3", true),
("nonexistent", false),
("", false),
("large_task", false),
];
for (task_id, exists) in test_cases {
assert_eq!(cache.contains_task(task_id).await, exists);
let result = cache.delete_task(task_id).await;
if exists {
assert!(result.is_ok());
} else {
assert!(result.is_err());
}
assert!(!cache.contains_task(task_id).await);
}
assert!(!cache.contains_task("task1").await);
assert!(!cache.contains_task("task2").await);
assert!(!cache.contains_task("task3").await);
assert!(!cache.contains_task("nonexistent").await);
assert!(!cache.contains_task("").await);
assert!(!cache.contains_task("large_task").await);
}
#[tokio::test]
async fn test_contains_piece() {
let config = Config {
storage: Storage {
cache_capacity: ByteSize::mib(10),
..Default::default()
},
..Default::default()
};
let mut cache = Cache::new(Arc::new(config));
let test_cases = vec![
// Check non-existent task.
("check", "non_existent", "piece1", "", false),
// Check empty piece ID in non-existent task.
("check", "non_existent", "", "", false),
// Add task and verify empty task behavior.
("add_task", "task1", "", "", true),
("check", "task1", "piece1", "", false),
// Add piece and verify existence.
("add_piece", "task1", "piece1", "test data", true),
("check", "task1", "piece1", "", true),
// Check empty piece ID in existing task.
("check", "task1", "", "", false),
// Check non-existent piece in existing task.
("check", "task1", "non_existent_piece", "", false),
// Test piece ID with special characters.
("add_piece", "task1", "piece#$%^&*", "test data", true),
("check", "task1", "piece#$%^&*", "", true),
];
for (operation, task_id, piece_id, content, expected_result) in test_cases {
match operation {
"check" => {
assert_eq!(
cache.contains_piece(task_id, piece_id).await,
expected_result
);
}
"add_task" => {
cache.put_task(task_id, 1000).await;
assert!(cache.contains_task(task_id).await);
}
"add_piece" => {
cache
.write_piece(task_id, piece_id, Bytes::from(content))
.await
.unwrap();
assert_eq!(
cache.contains_piece(task_id, piece_id).await,
expected_result
);
}
_ => panic!("Unknown operation."),
}
}
}
#[tokio::test]
async fn test_write_piece() {
let config = Config {
storage: Storage {
cache_capacity: ByteSize::mib(10),
..Default::default()
},
..Default::default()
};
let mut cache = Cache::new(Arc::new(config));
// Test writing to non-existent task.
let test_data = b"test data".to_vec();
let result = cache
.write_piece("non_existent", "piece1", Bytes::from(test_data))
.await;
assert!(matches!(result, Err(Error::TaskNotFound(_))));
// Create a task for testing.
cache.put_task("task1", ByteSize::mib(1).as_u64()).await;
assert!(cache.contains_task("task1").await);
let test_cases = vec![
("piece1", b"hello world".to_vec()),
("piece2", b"rust programming".to_vec()),
("piece3", b"dragonfly cache".to_vec()),
("piece4", b"unit testing".to_vec()),
("piece5", b"async await".to_vec()),
("piece6", b"error handling".to_vec()),
("piece7", vec![0u8; 1024]),
("piece8", vec![1u8; 2048]),
];
for (piece_id, content) in &test_cases {
let result = cache
.write_piece("task1", piece_id, Bytes::copy_from_slice(content))
.await;
assert!(result.is_ok());
assert!(cache.contains_piece("task1", piece_id).await);
let piece = Piece {
number: 0,
offset: 0,
length: content.len() as u64,
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
};
let mut reader = cache
.read_piece("task1", piece_id, piece, None)
.await
.unwrap();
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
assert_eq!(buffer, *content);
}
// Test attempting to overwrite existing pieces.
// The write should succeed (return Ok) but content should not change.
for (piece_id, original_content) in &test_cases {
let new_content = format!("updated content for {}", piece_id);
let result = cache
.write_piece("task1", piece_id, Bytes::from(new_content))
.await;
assert!(result.is_ok());
// Verify content remains unchanged.
let piece = Piece {
number: 0,
offset: 0,
length: original_content.len() as u64,
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
};
let mut reader = cache
.read_piece("task1", piece_id, piece, None)
.await
.unwrap();
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
assert_eq!(buffer, *original_content);
}
}
#[tokio::test]
async fn test_read_piece() {
let config = Config {
storage: Storage {
cache_capacity: ByteSize::mib(100),
..Default::default()
},
..Default::default()
};
let mut cache = Cache::new(Arc::new(config));
let piece = Piece {
number: 0,
offset: 0,
length: 11,
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
};
let result = cache
.read_piece("non_existent", "piece1", piece.clone(), None)
.await;
assert!(matches!(result, Err(Error::TaskNotFound(_))));
cache.put_task("task1", ByteSize::mib(50).as_u64()).await;
let result = cache
.read_piece("task1", "non_existent", piece.clone(), None)
.await;
assert!(matches!(result, Err(Error::PieceNotFound(_))));
let test_pieces = vec![
// Small pieces for basic functionality testing.
(
"piece1",
b"hello world".to_vec(),
Piece {
number: 0,
offset: 0,
length: 11,
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
},
vec![
(None, b"hello world".to_vec()),
(
Some(Range {
start: 0,
length: 5,
}),
b"hello".to_vec(),
),
],
),
(
"piece2",
b"rust lang".to_vec(),
Piece {
number: 1,
offset: 11,
length: 9,
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
},
vec![
(None, b"rust lang".to_vec()),
(
Some(Range {
start: 11,
length: 4,
}),
b"rust".to_vec(),
),
],
),
(
"piece3",
b"unit test".to_vec(),
Piece {
number: 2,
offset: 20,
length: 9,
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
},
vec![
(None, b"unit test".to_vec()),
(
Some(Range {
start: 20,
length: 4,
}),
b"unit".to_vec(),
),
],
),
// Large piece for boundary testing.
(
"large_piece",
{
let size = ByteSize::mib(50).as_u64();
(0..size).map(|i| (i % 256) as u8).collect()
},
Piece {
number: 2,
offset: 0,
length: ByteSize::mib(50).as_u64(),
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
},
vec![
// Full read.
(
None,
(0..ByteSize::mib(50).as_u64())
.map(|i| (i % 256) as u8)
.collect(),
),
// Read first 1MiB.
(
Some(Range {
start: 0,
length: ByteSize::mib(1).as_u64(),
}),
(0..ByteSize::mib(1).as_u64())
.map(|i| (i % 256) as u8)
.collect(),
),
// Read last 1MiB.
(
Some(Range {
start: ByteSize::mib(49).as_u64(),
length: ByteSize::mib(1).as_u64(),
}),
(ByteSize::mib(49).as_u64()..ByteSize::mib(50).as_u64())
.map(|i| (i % 256) as u8)
.collect(),
),
],
),
];
// Write all pieces.
for (id, content, _, _) in &test_pieces {
cache
.write_piece("task1", id, Bytes::copy_from_slice(content))
.await
.unwrap();
}
// Test all pieces with their read ranges.
for (id, _, piece, ranges) in &test_pieces {
for (range, expected_content) in ranges {
let mut reader = cache
.read_piece("task1", id, piece.clone(), *range)
.await
.unwrap();
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
assert_eq!(&buffer, expected_content);
}
}
}
#[tokio::test]
async fn test_concurrent_read_same_piece() {
let config = Config {
storage: Storage {
cache_capacity: ByteSize::mib(10),
..Default::default()
},
..Default::default()
};
let mut cache = Cache::new(Arc::new(config));
cache.put_task("task1", ByteSize::mib(1).as_u64()).await;
let content = b"test data for concurrent read".to_vec();
cache
.write_piece("task1", "piece1", Bytes::from(content.clone()))
.await
.unwrap();
let cache_arc = Arc::new(cache);
let mut join_set = tokio::task::JoinSet::new();
// Spawn concurrent readers.
for i in 0..50 {
let cache_clone = cache_arc.clone();
let expected_content = content.clone();
join_set.spawn(async move {
let piece = Piece {
number: 0,
offset: 0,
length: expected_content.len() as u64,
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
};
let range = if i % 2 == 0 {
None
} else {
Some(Range {
start: 0,
length: 5,
})
};
let mut reader = cache_clone
.read_piece("task1", "piece1", piece, range)
.await
.unwrap_or_else(|e| panic!("Reader {} failed: {:?}.", i, e));
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
if let Some(range) = range {
assert_eq!(buffer, &expected_content[..range.length as usize]);
} else {
assert_eq!(buffer, expected_content);
}
});
}
while let Some(result) = join_set.join_next().await {
assert!(result.is_ok());
}
}
#[tokio::test]
async fn test_concurrent_write_different_pieces() {
let config = Config {
storage: Storage {
cache_capacity: ByteSize::mib(10),
..Default::default()
},
..Default::default()
};
let mut cache = Cache::new(Arc::new(config));
cache.put_task("task1", ByteSize::mib(1).as_u64()).await;
let cache_arc = Arc::new(cache);
let mut join_set = tokio::task::JoinSet::new();
// Spawn concurrent writers.
for i in 0..50 {
let cache_clone = cache_arc.clone();
let content = format!("content for piece {}", i).into_bytes();
join_set.spawn(async move {
let piece_id = format!("piece{}", i);
let result = cache_clone
.write_piece("task1", &piece_id, Bytes::from(content.clone()))
.await;
assert!(result.is_ok());
let piece = Piece {
number: 0,
offset: 0,
length: content.len() as u64,
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
};
let mut reader = cache_clone
.read_piece("task1", &piece_id, piece, None)
.await
.unwrap();
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
assert_eq!(buffer, content);
});
}
while let Some(result) = join_set.join_next().await {
assert!(result.is_ok());
}
}
#[tokio::test]
async fn test_concurrent_write_same_piece() {
let config = Config {
storage: Storage {
cache_capacity: ByteSize::mib(10),
..Default::default()
},
..Default::default()
};
let mut cache = Cache::new(Arc::new(config));
cache.put_task("task1", ByteSize::mib(1).as_u64()).await;
let original_content = b"original content".to_vec();
cache
.write_piece("task1", "piece1", Bytes::from(original_content.clone()))
.await
.unwrap();
let cache_arc = Arc::new(cache);
let mut join_set = tokio::task::JoinSet::new();
// Spawn concurrent writers.
for i in 0..50 {
let cache_clone = cache_arc.clone();
let new_content = format!("new content from writer {}", i).into_bytes();
join_set.spawn(async move {
let result = cache_clone
.write_piece("task1", "piece1", Bytes::from(new_content))
.await;
assert!(result.is_ok());
});
}
while let Some(result) = join_set.join_next().await {
assert!(result.is_ok());
}
let piece = Piece {
number: 0,
offset: 0,
length: original_content.len() as u64,
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
};
let mut reader = cache_arc
.read_piece("task1", "piece1", piece, None)
.await
.unwrap();
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
assert_eq!(buffer, original_content);
}
}

View File

@ -14,11 +14,10 @@
* limitations under the License.
*/
use bytesize::ByteSize;
use crc::*;
use dragonfly_api::common::v2::Range;
use dragonfly_client_config::dfdaemon::Config;
use dragonfly_client_core::{Error, Result};
use dragonfly_client_util::fs::fallocate;
use dragonfly_client_core::Result;
use std::cmp::{max, min};
use std::path::{Path, PathBuf};
use std::sync::Arc;
@ -28,7 +27,6 @@ use tokio::io::{
};
use tokio_util::io::InspectReader;
use tracing::{error, info, instrument, warn};
use walkdir::WalkDir;
/// DEFAULT_CONTENT_DIR is the default directory for store content.
pub const DEFAULT_CONTENT_DIR: &str = "content";
@ -69,6 +67,7 @@ pub struct WritePersistentCacheTaskResponse {
/// Content implements the content storage.
impl Content {
/// new returns a new content.
#[instrument(skip_all)]
pub async fn new(config: Arc<Config>, dir: &Path) -> Result<Content> {
let dir = dir.join(DEFAULT_CONTENT_DIR);
@ -87,39 +86,12 @@ impl Content {
/// available_space returns the available space of the disk.
pub fn available_space(&self) -> Result<u64> {
let dist_threshold = self.config.gc.policy.dist_threshold;
if dist_threshold != ByteSize::default() {
let usage_space = WalkDir::new(&self.dir)
.into_iter()
.filter_map(|entry| entry.ok())
.filter_map(|entry| entry.metadata().ok())
.filter(|metadata| metadata.is_file())
.fold(0, |acc, m| acc + m.len());
if usage_space >= dist_threshold.as_u64() {
warn!(
"usage space {} is greater than dist threshold {}, no need to calculate available space",
usage_space, dist_threshold
);
return Ok(0);
}
return Ok(dist_threshold.as_u64() - usage_space);
}
let stat = fs2::statvfs(&self.dir)?;
Ok(stat.available_space())
}
/// total_space returns the total space of the disk.
pub fn total_space(&self) -> Result<u64> {
// If the dist_threshold is set, return it directly.
let dist_threshold = self.config.gc.policy.dist_threshold;
if dist_threshold != ByteSize::default() {
return Ok(dist_threshold.as_u64());
}
let stat = fs2::statvfs(&self.dir)?;
Ok(stat.total_space())
}
@ -139,100 +111,93 @@ impl Content {
Ok(true)
}
/// is_same_dev_inode checks if the source and target are the same device and inode.
async fn is_same_dev_inode<P: AsRef<Path>, Q: AsRef<Path>>(
/// hard_link_or_copy_task hard links or copies the task content to the destination.
#[instrument(skip_all)]
pub async fn hard_link_or_copy_task(
&self,
source: P,
target: Q,
) -> Result<bool> {
let source_metadata = fs::metadata(source).await?;
let target_metadata = fs::metadata(target).await?;
task: &crate::metadata::Task,
to: &Path,
range: Option<Range>,
) -> Result<()> {
let task_path = self.get_task_path(task.id.as_str());
#[cfg(unix)]
{
use std::os::unix::fs::MetadataExt;
Ok(source_metadata.dev() == target_metadata.dev()
&& source_metadata.ino() == target_metadata.ino())
}
// Copy the task content to the destination by range
// if the range is specified.
if let Some(range) = range {
// If the range length is 0, no need to copy. Need to open the file to
// ensure the file exists.
if range.length == 0 {
info!("range length is 0, no need to copy");
File::create(to).await.inspect_err(|err| {
error!("create {:?} failed: {}", to, err);
})?;
#[cfg(not(unix))]
{
Err(Error::IO(io::Error::new(
io::ErrorKind::Unsupported,
"platform not supported",
)))
}
}
/// is_same_dev_inode_as_task checks if the task and target are the same device and inode.
pub async fn is_same_dev_inode_as_task(&self, task_id: &str, to: &Path) -> Result<bool> {
let task_path = self.get_task_path(task_id);
self.is_same_dev_inode(&task_path, to).await
}
/// create_task creates a new task content.
///
/// Behavior of `create_task`:
/// 1. If the task already exists, return the task path.
/// 2. If the task does not exist, create the task directory and file.
#[instrument(skip_all)]
pub async fn create_task(&self, task_id: &str, length: u64) -> Result<PathBuf> {
let task_path = self.get_task_path(task_id);
if task_path.exists() {
return Ok(task_path);
}
let task_dir = self.dir.join(DEFAULT_TASK_DIR).join(&task_id[..3]);
fs::create_dir_all(&task_dir).await.inspect_err(|err| {
error!("create {:?} failed: {}", task_dir, err);
})?;
let f = fs::File::create(task_dir.join(task_id))
.await
.inspect_err(|err| {
error!("create {:?} failed: {}", task_dir, err);
})?;
fallocate(&f, length).await.inspect_err(|err| {
error!("fallocate {:?} failed: {}", task_dir, err);
})?;
Ok(task_dir.join(task_id))
}
/// Hard links the task content to the destination.
///
/// Behavior of `hard_link_task`:
/// 1. If the destination exists:
/// 1.1. If the source and destination share the same device and inode, return immediately.
/// 1.2. Otherwise, return an error.
/// 2. If the destination does not exist:
/// 2.1. If the hard link succeeds, return immediately.
/// 2.2. If the hard link fails, copy the task content to the destination once the task is finished, then return immediately.
#[instrument(skip_all)]
pub async fn hard_link_task(&self, task_id: &str, to: &Path) -> Result<()> {
let task_path = self.get_task_path(task_id);
if let Err(err) = fs::hard_link(task_path.clone(), to).await {
if err.kind() == std::io::ErrorKind::AlreadyExists {
if let Ok(true) = self.is_same_dev_inode(&task_path, to).await {
info!("hard already exists, no need to operate");
return Ok(());
}
return Ok(());
}
self.copy_task_by_range(task.id.as_str(), to, range)
.await
.inspect_err(|err| {
error!("copy range {:?} to {:?} failed: {}", task_path, to, err);
})?;
info!("copy range {:?} to {:?} success", task_path, to);
return Ok(());
}
// If the hard link fails, copy the task content to the destination.
fs::remove_file(to).await.unwrap_or_else(|err| {
info!("remove {:?} failed: {}", to, err);
});
if let Err(err) = self.hard_link_task(task.id.as_str(), to).await {
warn!("hard link {:?} to {:?} failed: {}", task_path, to, err);
return Err(Error::IO(err));
// If the task is empty, no need to copy. Need to open the file to
// ensure the file exists.
if task.is_empty() {
info!("task is empty, no need to copy");
File::create(to).await.inspect_err(|err| {
error!("create {:?} failed: {}", to, err);
})?;
return Ok(());
}
self.copy_task(task.id.as_str(), to)
.await
.inspect_err(|err| {
error!("copy {:?} to {:?} failed: {}", task_path, to, err);
})?;
info!("copy {:?} to {:?} success", task_path, to);
return Ok(());
}
info!("hard link {:?} to {:?} success", task_path, to);
Ok(())
}
/// hard_link_task hard links the task content.
#[instrument(skip_all)]
async fn hard_link_task(&self, task_id: &str, link: &Path) -> Result<()> {
fs::hard_link(self.get_task_path(task_id), link).await?;
Ok(())
}
/// copy_task copies the task content to the destination.
#[instrument(skip_all)]
pub async fn copy_task(&self, task_id: &str, to: &Path) -> Result<()> {
async fn copy_task(&self, task_id: &str, to: &Path) -> Result<()> {
// Ensure the parent directory of the destination exists.
if let Some(parent) = to.parent() {
if !parent.exists() {
fs::create_dir_all(parent).await.inspect_err(|err| {
error!("failed to create directory {:?}: {}", parent, err);
})?;
}
}
fs::copy(self.get_task_path(task_id), to).await?;
info!("copy to {:?} success", to);
Ok(())
}
@ -268,6 +233,7 @@ impl Content {
}
/// delete_task deletes the task content.
#[instrument(skip_all)]
pub async fn delete_task(&self, task_id: &str) -> Result<()> {
info!("delete task content: {}", task_id);
let task_path = self.get_task_path(task_id);
@ -353,18 +319,19 @@ impl Content {
Ok((range_reader, reader))
}
/// write_piece writes the piece to the content and calculates the hash of the piece by crc32.
/// write_piece_with_crc32_castagnoli writes the piece to the content with crc32 castagnoli.
/// Calculate the hash of the piece by crc32 castagnoli with hardware acceleration.
#[instrument(skip_all)]
pub async fn write_piece<R: AsyncRead + Unpin + ?Sized>(
pub async fn write_piece_with_crc32_castagnoli<R: AsyncRead + Unpin + ?Sized>(
&self,
task_id: &str,
offset: u64,
expected_length: u64,
reader: &mut R,
) -> Result<WritePieceResponse> {
// Open the file and seek to the offset.
let task_path = self.get_task_path(task_id);
let task_path = self.create_or_get_task_path(task_id).await?;
let mut f = OpenOptions::new()
.create(true)
.truncate(false)
.write(true)
.open(task_path.as_path())
@ -377,15 +344,16 @@ impl Content {
error!("seek {:?} failed: {}", task_path, err);
})?;
let reader = BufReader::with_capacity(self.config.storage.write_buffer_size, reader);
let mut writer = BufWriter::with_capacity(self.config.storage.write_buffer_size, f);
// Copy the piece to the file while updating the CRC32 value.
let mut hasher = crc32fast::Hasher::new();
let reader = BufReader::with_capacity(self.config.storage.write_buffer_size, reader);
let crc = Crc::<u32, Table<16>>::new(&CRC_32_ISCSI);
let mut digest = crc.digest();
let mut tee = InspectReader::new(reader, |bytes| {
hasher.update(bytes);
digest.update(bytes);
});
let mut writer = BufWriter::with_capacity(self.config.storage.write_buffer_size, f);
let length = io::copy(&mut tee, &mut writer).await.inspect_err(|err| {
error!("copy {:?} failed: {}", task_path, err);
})?;
@ -394,21 +362,15 @@ impl Content {
error!("flush {:?} failed: {}", task_path, err);
})?;
if length != expected_length {
return Err(Error::Unknown(format!(
"expected length {} but got {}",
expected_length, length
)));
}
// Calculate the hash of the piece.
Ok(WritePieceResponse {
length,
hash: hasher.finalize().to_string(),
hash: digest.finalize().to_string(),
})
}
/// get_task_path returns the task path by task id.
#[instrument(skip_all)]
fn get_task_path(&self, task_id: &str) -> PathBuf {
// The task needs split by the first 3 characters of task id(sha256) to
// avoid too many files in one directory.
@ -416,76 +378,63 @@ impl Content {
self.dir.join(DEFAULT_TASK_DIR).join(sub_dir).join(task_id)
}
/// is_same_dev_inode_as_persistent_cache_task checks if the persistent cache task and target
/// are the same device and inode.
pub async fn is_same_dev_inode_as_persistent_cache_task(
&self,
task_id: &str,
to: &Path,
) -> Result<bool> {
let task_path = self.get_persistent_cache_task_path(task_id);
self.is_same_dev_inode(&task_path, to).await
}
/// create_persistent_cache_task creates a new persistent cache task content.
///
/// Behavior of `create_persistent_cache_task`:
/// 1. If the persistent cache task already exists, return the persistent cache task path.
/// 2. If the persistent cache task does not exist, create the persistent cache task directory and file.
/// create_or_get_task_path creates parent directories or returns the task path by task id.
#[instrument(skip_all)]
pub async fn create_persistent_cache_task(
&self,
task_id: &str,
length: u64,
) -> Result<PathBuf> {
let task_path = self.get_persistent_cache_task_path(task_id);
if task_path.exists() {
return Ok(task_path);
}
let task_dir = self
.dir
.join(DEFAULT_PERSISTENT_CACHE_TASK_DIR)
.join(&task_id[..3]);
async fn create_or_get_task_path(&self, task_id: &str) -> Result<PathBuf> {
let task_dir = self.dir.join(DEFAULT_TASK_DIR).join(&task_id[..3]);
fs::create_dir_all(&task_dir).await.inspect_err(|err| {
error!("create {:?} failed: {}", task_dir, err);
})?;
let f = fs::File::create(task_dir.join(task_id))
.await
.inspect_err(|err| {
error!("create {:?} failed: {}", task_dir, err);
})?;
fallocate(&f, length).await.inspect_err(|err| {
error!("fallocate {:?} failed: {}", task_dir, err);
})?;
Ok(task_dir.join(task_id))
}
/// Hard links the persistent cache task content to the destination.
///
/// Behavior of `hard_link_persistent_cache_task`:
/// 1. If the destination exists:
/// 1.1. If the source and destination share the same device and inode, return immediately.
/// 1.2. Otherwise, return an error.
/// 2. If the destination does not exist:
/// 2.1. If the hard link succeeds, return immediately.
/// 2.2. If the hard link fails, copy the persistent cache task content to the destination once the task is finished, then return immediately.
/// hard_link_or_copy_persistent_cache_task hard links or copies the task content to the destination.
#[instrument(skip_all)]
pub async fn hard_link_persistent_cache_task(&self, task_id: &str, to: &Path) -> Result<()> {
let task_path = self.get_persistent_cache_task_path(task_id);
if let Err(err) = fs::hard_link(task_path.clone(), to).await {
if err.kind() == std::io::ErrorKind::AlreadyExists {
if let Ok(true) = self.is_same_dev_inode(&task_path, to).await {
info!("hard already exists, no need to operate");
return Ok(());
}
pub async fn hard_link_or_copy_persistent_cache_task(
&self,
task: &crate::metadata::PersistentCacheTask,
to: &Path,
) -> Result<()> {
// Ensure the parent directory of the destination exists.
if let Some(parent) = to.parent() {
if !parent.exists() {
fs::create_dir_all(parent).await.inspect_err(|err| {
error!("failed to create directory {:?}: {}", parent, err);
})?;
}
}
// Get the persistent cache task path.
let task_path = self.get_persistent_cache_task_path(task.id.as_str());
// If the hard link fails, copy the task content to the destination.
fs::remove_file(to).await.unwrap_or_else(|err| {
info!("remove {:?} failed: {}", to, err);
});
if let Err(err) = self.hard_link_task(task.id.as_str(), to).await {
warn!("hard link {:?} to {:?} failed: {}", task_path, to, err);
// If the persistent cache task is empty, no need to copy. Need to open the file to
// ensure the file exists.
if task.is_empty() {
info!("persistent cache task is empty, no need to copy");
File::create(to).await.inspect_err(|err| {
error!("create {:?} failed: {}", to, err);
})?;
return Ok(());
}
warn!("hard link {:?} to {:?} failed: {}", task_path, to, err);
return Err(Error::IO(err));
self.copy_task(task.id.as_str(), to)
.await
.inspect_err(|err| {
error!("copy {:?} to {:?} failed: {}", task_path, to, err);
})?;
info!("copy {:?} to {:?} success", task_path, to);
return Ok(());
}
info!("hard link {:?} to {:?} success", task_path, to);
@ -494,100 +443,19 @@ impl Content {
/// copy_persistent_cache_task copies the persistent cache task content to the destination.
#[instrument(skip_all)]
pub async fn copy_persistent_cache_task(&self, task_id: &str, to: &Path) -> Result<()> {
fs::copy(self.get_persistent_cache_task_path(task_id), to).await?;
info!("copy to {:?} success", to);
Ok(())
}
/// read_persistent_cache_piece reads the persistent cache piece from the content.
#[instrument(skip_all)]
pub async fn read_persistent_cache_piece(
pub async fn write_persistent_cache_task(
&self,
task_id: &str,
offset: u64,
length: u64,
range: Option<Range>,
) -> Result<impl AsyncRead> {
let task_path = self.get_persistent_cache_task_path(task_id);
from: &Path,
) -> Result<WritePersistentCacheTaskResponse> {
// Open the file to copy the content.
let from_f = File::open(from).await?;
// Calculate the target offset and length based on the range.
let (target_offset, target_length) = calculate_piece_range(offset, length, range);
let f = File::open(task_path.as_path()).await.inspect_err(|err| {
error!("open {:?} failed: {}", task_path, err);
})?;
let mut f_reader = BufReader::with_capacity(self.config.storage.read_buffer_size, f);
f_reader
.seek(SeekFrom::Start(target_offset))
.await
.inspect_err(|err| {
error!("seek {:?} failed: {}", task_path, err);
})?;
Ok(f_reader.take(target_length))
}
/// read_persistent_cache_piece_with_dual_read return two readers, one is the range reader, and the other is the
/// full reader of the persistent cache piece. It is used for cache the piece content to the proxy cache.
#[instrument(skip_all)]
pub async fn read_persistent_cache_piece_with_dual_read(
&self,
task_id: &str,
offset: u64,
length: u64,
range: Option<Range>,
) -> Result<(impl AsyncRead, impl AsyncRead)> {
let task_path = self.get_persistent_cache_task_path(task_id);
// Calculate the target offset and length based on the range.
let (target_offset, target_length) = calculate_piece_range(offset, length, range);
let f = File::open(task_path.as_path()).await.inspect_err(|err| {
error!("open {:?} failed: {}", task_path, err);
})?;
let mut f_range_reader = BufReader::with_capacity(self.config.storage.read_buffer_size, f);
f_range_reader
.seek(SeekFrom::Start(target_offset))
.await
.inspect_err(|err| {
error!("seek {:?} failed: {}", task_path, err);
})?;
let range_reader = f_range_reader.take(target_length);
// Create full reader of the piece.
let f = File::open(task_path.as_path()).await.inspect_err(|err| {
error!("open {:?} failed: {}", task_path, err);
})?;
let mut f_reader = BufReader::with_capacity(self.config.storage.read_buffer_size, f);
f_reader
.seek(SeekFrom::Start(offset))
.await
.inspect_err(|err| {
error!("seek {:?} failed: {}", task_path, err);
})?;
let reader = f_reader.take(length);
Ok((range_reader, reader))
}
/// write_persistent_cache_piece writes the persistent cache piece to the content and
/// calculates the hash of the piece by crc32.
#[instrument(skip_all)]
pub async fn write_persistent_cache_piece<R: AsyncRead + Unpin + ?Sized>(
&self,
task_id: &str,
offset: u64,
expected_length: u64,
reader: &mut R,
) -> Result<WritePieceResponse> {
// Open the file and seek to the offset.
let task_path = self.get_persistent_cache_task_path(task_id);
let mut f = OpenOptions::new()
.truncate(false)
let task_path = self
.create_or_get_persistent_cache_task_path(task_id)
.await?;
let to_f = OpenOptions::new()
.create_new(true)
.write(true)
.open(task_path.as_path())
.await
@ -595,19 +463,16 @@ impl Content {
error!("open {:?} failed: {}", task_path, err);
})?;
f.seek(SeekFrom::Start(offset)).await.inspect_err(|err| {
error!("seek {:?} failed: {}", task_path, err);
})?;
// Copy the content to the file while updating the CRC32 value.
let mut reader = BufReader::with_capacity(self.config.storage.write_buffer_size, from_f);
let crc = Crc::<u32, Table<16>>::new(&CRC_32_ISCSI);
let mut digest = crc.digest();
let reader = BufReader::with_capacity(self.config.storage.write_buffer_size, reader);
let mut writer = BufWriter::with_capacity(self.config.storage.write_buffer_size, f);
// Copy the piece to the file while updating the CRC32 value.
let mut hasher = crc32fast::Hasher::new();
let mut tee = InspectReader::new(reader, |bytes| {
hasher.update(bytes);
let mut tee = InspectReader::new(&mut reader, |bytes| {
digest.update(bytes);
});
let mut writer = BufWriter::with_capacity(self.config.storage.write_buffer_size, to_f);
let length = io::copy(&mut tee, &mut writer).await.inspect_err(|err| {
error!("copy {:?} failed: {}", task_path, err);
})?;
@ -616,21 +481,14 @@ impl Content {
error!("flush {:?} failed: {}", task_path, err);
})?;
if length != expected_length {
return Err(Error::Unknown(format!(
"expected length {} but got {}",
expected_length, length
)));
}
// Calculate the hash of the piece.
Ok(WritePieceResponse {
Ok(WritePersistentCacheTaskResponse {
length,
hash: hasher.finalize().to_string(),
hash: digest.finalize().to_string(),
})
}
/// delete_task deletes the persistent cache task content.
#[instrument(skip_all)]
pub async fn delete_persistent_cache_task(&self, task_id: &str) -> Result<()> {
info!("delete persistent cache task content: {}", task_id);
let persistent_cache_task_path = self.get_persistent_cache_task_path(task_id);
@ -643,6 +501,7 @@ impl Content {
}
/// get_persistent_cache_task_path returns the persistent cache task path by task id.
#[instrument(skip_all)]
fn get_persistent_cache_task_path(&self, task_id: &str) -> PathBuf {
// The persistent cache task needs split by the first 3 characters of task id(sha256) to
// avoid too many files in one directory.
@ -651,6 +510,21 @@ impl Content {
.join(&task_id[..3])
.join(task_id)
}
/// create_or_get_persistent_cache_task_path creates parent directories or returns the persistent cache task path by task id.
#[instrument(skip_all)]
async fn create_or_get_persistent_cache_task_path(&self, task_id: &str) -> Result<PathBuf> {
let task_dir = self
.dir
.join(DEFAULT_PERSISTENT_CACHE_TASK_DIR)
.join(&task_id[..3]);
fs::create_dir_all(&task_dir).await.inspect_err(|err| {
error!("create {:?} failed: {}", task_dir, err);
})?;
Ok(task_dir.join(task_id))
}
}
/// calculate_piece_range calculates the target offset and length based on the piece range and
@ -669,319 +543,9 @@ pub fn calculate_piece_range(offset: u64, length: u64, range: Option<Range>) ->
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
use tempfile::tempdir;
#[tokio::test]
async fn test_create_task() {
let temp_dir = tempdir().unwrap();
let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap();
let task_id = "60409bd0ec44160f44c53c39b3fe1c5fdfb23faded0228c68bee83bc15a200e3";
let task_path = content.create_task(task_id, 0).await.unwrap();
assert!(task_path.exists());
assert_eq!(task_path, temp_dir.path().join("content/tasks/604/60409bd0ec44160f44c53c39b3fe1c5fdfb23faded0228c68bee83bc15a200e3"));
let task_path_exists = content.create_task(task_id, 0).await.unwrap();
assert_eq!(task_path, task_path_exists);
}
#[tokio::test]
async fn test_hard_link_task() {
let temp_dir = tempdir().unwrap();
let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap();
let task_id = "c71d239df91726fc519c6eb72d318ec65820627232b2f796219e87dcf35d0ab4";
content.create_task(task_id, 0).await.unwrap();
let to = temp_dir
.path()
.join("c71d239df91726fc519c6eb72d318ec65820627232b2f796219e87dcf35d0ab4");
content.hard_link_task(task_id, &to).await.unwrap();
assert!(to.exists());
content.hard_link_task(task_id, &to).await.unwrap();
}
#[tokio::test]
async fn test_copy_task() {
let temp_dir = tempdir().unwrap();
let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap();
let task_id = "bfd3c02fb31a7373e25b405fd5fd3082987ccfbaf210889153af9e65bbf13002";
content.create_task(task_id, 64).await.unwrap();
let to = temp_dir
.path()
.join("bfd3c02fb31a7373e25b405fd5fd3082987ccfbaf210889153af9e65bbf13002");
content.copy_task(task_id, &to).await.unwrap();
assert!(to.exists());
}
#[tokio::test]
async fn test_delete_task() {
let temp_dir = tempdir().unwrap();
let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap();
let task_id = "4e19f03b0fceb38f23ff4f657681472a53ef335db3660ae5494912570b7a2bb7";
let task_path = content.create_task(task_id, 0).await.unwrap();
assert!(task_path.exists());
content.delete_task(task_id).await.unwrap();
assert!(!task_path.exists());
}
#[tokio::test]
async fn test_read_piece() {
let temp_dir = tempdir().unwrap();
let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap();
let task_id = "c794a3bbae81e06d1c8d362509bdd42a7c105b0fb28d80ffe27f94b8f04fc845";
content.create_task(task_id, 13).await.unwrap();
let data = b"hello, world!";
let mut reader = Cursor::new(data);
content
.write_piece(task_id, 0, 13, &mut reader)
.await
.unwrap();
let mut reader = content.read_piece(task_id, 0, 13, None).await.unwrap();
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
assert_eq!(buffer, data);
let mut reader = content
.read_piece(
task_id,
0,
13,
Some(Range {
start: 0,
length: 5,
}),
)
.await
.unwrap();
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
assert_eq!(buffer, b"hello");
}
#[tokio::test]
async fn test_write_piece() {
let temp_dir = tempdir().unwrap();
let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap();
let task_id = "60b48845606946cea72084f14ed5cce61ec96e69f80a30f891a6963dccfd5b4f";
content.create_task(task_id, 4).await.unwrap();
let data = b"test";
let mut reader = Cursor::new(data);
let response = content
.write_piece(task_id, 0, 4, &mut reader)
.await
.unwrap();
assert_eq!(response.length, 4);
assert!(!response.hash.is_empty());
}
#[tokio::test]
async fn test_create_persistent_task() {
let temp_dir = tempdir().unwrap();
let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap();
let task_id = "c4f108ab1d2b8cfdffe89ea9676af35123fa02e3c25167d62538f630d5d44745";
let task_path = content
.create_persistent_cache_task(task_id, 0)
.await
.unwrap();
assert!(task_path.exists());
assert_eq!(task_path, temp_dir.path().join("content/persistent-cache-tasks/c4f/c4f108ab1d2b8cfdffe89ea9676af35123fa02e3c25167d62538f630d5d44745"));
let task_path_exists = content
.create_persistent_cache_task(task_id, 0)
.await
.unwrap();
assert_eq!(task_path, task_path_exists);
}
#[tokio::test]
async fn test_hard_link_persistent_cache_task() {
let temp_dir = tempdir().unwrap();
let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap();
let task_id = "5e81970eb2b048910cc84cab026b951f2ceac0a09c72c0717193bb6e466e11cd";
content
.create_persistent_cache_task(task_id, 0)
.await
.unwrap();
let to = temp_dir
.path()
.join("5e81970eb2b048910cc84cab026b951f2ceac0a09c72c0717193bb6e466e11cd");
content
.hard_link_persistent_cache_task(task_id, &to)
.await
.unwrap();
assert!(to.exists());
content
.hard_link_persistent_cache_task(task_id, &to)
.await
.unwrap();
}
#[tokio::test]
async fn test_copy_persistent_cache_task() {
let temp_dir = tempdir().unwrap();
let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap();
let task_id = "194b9c2018429689fb4e596a506c7e9db564c187b9709b55b33b96881dfb6dd5";
content
.create_persistent_cache_task(task_id, 64)
.await
.unwrap();
let to = temp_dir
.path()
.join("194b9c2018429689fb4e596a506c7e9db564c187b9709b55b33b96881dfb6dd5");
content
.copy_persistent_cache_task(task_id, &to)
.await
.unwrap();
assert!(to.exists());
}
#[tokio::test]
async fn test_delete_persistent_cache_task() {
let temp_dir = tempdir().unwrap();
let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap();
let task_id = "17430ba545c3ce82790e9c9f77e64dca44bb6d6a0c9e18be175037c16c73713d";
let task_path = content
.create_persistent_cache_task(task_id, 0)
.await
.unwrap();
assert!(task_path.exists());
content.delete_persistent_cache_task(task_id).await.unwrap();
assert!(!task_path.exists());
}
#[tokio::test]
async fn test_read_persistent_cache_piece() {
let temp_dir = tempdir().unwrap();
let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap();
let task_id = "9cb27a4af09aee4eb9f904170217659683f4a0ea7cd55e1a9fbcb99ddced659a";
content
.create_persistent_cache_task(task_id, 13)
.await
.unwrap();
let data = b"hello, world!";
let mut reader = Cursor::new(data);
content
.write_persistent_cache_piece(task_id, 0, 13, &mut reader)
.await
.unwrap();
let mut reader = content
.read_persistent_cache_piece(task_id, 0, 13, None)
.await
.unwrap();
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
assert_eq!(buffer, data);
let mut reader = content
.read_persistent_cache_piece(
task_id,
0,
13,
Some(Range {
start: 0,
length: 5,
}),
)
.await
.unwrap();
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
assert_eq!(buffer, b"hello");
}
#[tokio::test]
async fn test_write_persistent_cache_piece() {
let temp_dir = tempdir().unwrap();
let config = Arc::new(Config::default());
let content = Content::new(config, temp_dir.path()).await.unwrap();
let task_id = "ca1afaf856e8a667fbd48093ca3ca1b8eeb4bf735912fbe551676bc5817a720a";
content
.create_persistent_cache_task(task_id, 4)
.await
.unwrap();
let data = b"test";
let mut reader = Cursor::new(data);
let response = content
.write_persistent_cache_piece(task_id, 0, 4, &mut reader)
.await
.unwrap();
assert_eq!(response.length, 4);
assert!(!response.hash.is_empty());
}
#[tokio::test]
async fn test_has_enough_space() {
let config = Arc::new(Config::default());
let temp_dir = tempdir().unwrap();
let content = Content::new(config, temp_dir.path()).await.unwrap();
let has_space = content.has_enough_space(1).unwrap();
assert!(has_space);
let has_space = content.has_enough_space(u64::MAX).unwrap();
assert!(!has_space);
let mut config = Config::default();
config.gc.policy.dist_threshold = ByteSize::mib(10);
let config = Arc::new(config);
let content = Content::new(config, temp_dir.path()).await.unwrap();
let file_path = Path::new(temp_dir.path())
.join(DEFAULT_CONTENT_DIR)
.join(DEFAULT_TASK_DIR)
.join("1mib");
let mut file = File::create(&file_path).await.unwrap();
let buffer = vec![0u8; ByteSize::mib(1).as_u64() as usize];
file.write_all(&buffer).await.unwrap();
file.flush().await.unwrap();
let has_space = content
.has_enough_space(ByteSize::mib(9).as_u64() + 1)
.unwrap();
assert!(!has_space);
let has_space = content.has_enough_space(ByteSize::mib(9).as_u64()).unwrap();
assert!(has_space);
}
#[tokio::test]
async fn test_calculate_piece_range() {
async fn should_calculate_piece_range() {
let test_cases = vec![
(1, 4, None, 1, 4),
(

View File

@ -14,7 +14,6 @@
* limitations under the License.
*/
use chrono::NaiveDateTime;
use dragonfly_api::common::v2::Range;
use dragonfly_client_config::dfdaemon::Config;
use dragonfly_client_core::{Error, Result};
@ -25,11 +24,8 @@ use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use tokio::io::AsyncRead;
use tokio::time::sleep;
use tokio_util::either::Either;
use tracing::{debug, error, info, instrument, warn};
use tracing::{debug, error, instrument, warn};
pub mod cache;
pub mod content;
pub mod metadata;
pub mod storage_engine;
@ -47,24 +43,19 @@ pub struct Storage {
/// content implements the content storage.
content: content::Content,
/// cache implements the cache storage.
cache: cache::Cache,
}
/// Storage implements the storage.
impl Storage {
/// new returns a new storage.
#[instrument(skip_all)]
pub async fn new(config: Arc<Config>, dir: &Path, log_dir: PathBuf) -> Result<Self> {
let metadata = metadata::Metadata::new(config.clone(), dir, &log_dir)?;
let content = content::Content::new(config.clone(), dir).await?;
let cache = cache::Cache::new(config.clone());
Ok(Storage {
config,
metadata,
content,
cache,
})
}
@ -83,48 +74,28 @@ impl Storage {
self.content.has_enough_space(content_length)
}
/// hard_link_task hard links the task content to the destination.
/// hard_link_or_copy_task hard links or copies the task content to the destination.
#[instrument(skip_all)]
pub async fn hard_link_task(&self, task_id: &str, to: &Path) -> Result<()> {
self.content.hard_link_task(task_id, to).await
pub async fn hard_link_or_copy_task(
&self,
task: &metadata::Task,
to: &Path,
range: Option<Range>,
) -> Result<()> {
self.content.hard_link_or_copy_task(task, to, range).await
}
/// copy_task copies the task content to the destination.
/// download_task_started updates the metadata of the task when the task downloads started.
#[instrument(skip_all)]
pub async fn copy_task(&self, id: &str, to: &Path) -> Result<()> {
self.content.copy_task(id, to).await
}
/// is_same_dev_inode_as_task checks if the task content is on the same device inode as the
/// destination.
pub async fn is_same_dev_inode_as_task(&self, id: &str, to: &Path) -> Result<bool> {
self.content.is_same_dev_inode_as_task(id, to).await
}
/// prepare_download_task_started prepares the metadata of the task when the task downloads
/// started.
pub async fn prepare_download_task_started(&self, id: &str) -> Result<metadata::Task> {
self.metadata.download_task_started(id, None, None, None)
}
/// download_task_started updates the metadata of the task and create task content
/// when the task downloads started.
#[instrument(skip_all)]
pub async fn download_task_started(
pub fn download_task_started(
&self,
id: &str,
piece_length: u64,
content_length: u64,
piece_length: Option<u64>,
content_length: Option<u64>,
response_header: Option<HeaderMap>,
) -> Result<metadata::Task> {
self.content.create_task(id, content_length).await?;
self.metadata.download_task_started(
id,
Some(piece_length),
Some(content_length),
response_header,
)
self.metadata
.download_task_started(id, piece_length, content_length, response_header)
}
/// download_task_finished updates the metadata of the task when the task downloads finished.
@ -189,36 +160,17 @@ impl Storage {
self.content.delete_task(id).await.unwrap_or_else(|err| {
error!("delete task content failed: {}", err);
});
let mut cache = self.cache.clone();
cache.delete_task(id).await.unwrap_or_else(|err| {
info!("delete task from cache failed: {}", err);
});
}
/// hard_link_persistent_cache_task hard links the persistent cache task content to the destination.
/// hard_link_or_copy_persistent_cache_task hard links or copies the persistent cache task content to the destination.
#[instrument(skip_all)]
pub async fn hard_link_persistent_cache_task(&self, task_id: &str, to: &Path) -> Result<()> {
self.content
.hard_link_persistent_cache_task(task_id, to)
.await
}
/// copy_taskcopy_persistent_cache_taskcopies the persistent cache task content to the destination.
#[instrument(skip_all)]
pub async fn copy_persistent_cache_task(&self, id: &str, to: &Path) -> Result<()> {
self.content.copy_persistent_cache_task(id, to).await
}
/// is_same_dev_inode_as_persistent_cache_task checks if the persistent cache task content is on the same device inode as the
/// destination.
pub async fn is_same_dev_inode_as_persistent_cache_task(
pub async fn hard_link_or_copy_persistent_cache_task(
&self,
id: &str,
task: &metadata::PersistentCacheTask,
to: &Path,
) -> Result<bool> {
) -> Result<()> {
self.content
.is_same_dev_inode_as_persistent_cache_task(id, to)
.hard_link_or_copy_persistent_cache_task(task, to)
.await
}
@ -231,17 +183,8 @@ impl Storage {
piece_length: u64,
content_length: u64,
) -> Result<metadata::PersistentCacheTask> {
let metadata = self.metadata.create_persistent_cache_task_started(
id,
ttl,
piece_length,
content_length,
)?;
self.content
.create_persistent_cache_task(id, content_length)
.await?;
Ok(metadata)
self.metadata
.create_persistent_cache_task_started(id, ttl, piece_length, content_length)
}
/// create_persistent_cache_task_finished updates the metadata of the persistent cache task
@ -262,30 +205,23 @@ impl Storage {
}
/// download_persistent_cache_task_started updates the metadata of the persistent cache task
/// and creates the persistent cache task content when the persistent cache task downloads started.
/// when the persistent cache task downloads started.
#[instrument(skip_all)]
pub async fn download_persistent_cache_task_started(
pub fn download_persistent_cache_task_started(
&self,
id: &str,
ttl: Duration,
persistent: bool,
piece_length: u64,
content_length: u64,
created_at: NaiveDateTime,
) -> Result<metadata::PersistentCacheTask> {
let metadata = self.metadata.download_persistent_cache_task_started(
self.metadata.download_persistent_cache_task_started(
id,
ttl,
persistent,
piece_length,
content_length,
created_at,
)?;
self.content
.create_persistent_cache_task(id, content_length)
.await?;
Ok(metadata)
)
}
/// download_persistent_cache_task_finished updates the metadata of the persistent cache task when the persistent cache task downloads finished.
@ -376,7 +312,7 @@ impl Storage {
) -> Result<metadata::Piece> {
let response = self
.content
.write_persistent_cache_piece(task_id, offset, length, reader)
.write_piece_with_crc32_castagnoli(task_id, offset, reader)
.await?;
let digest = Digest::new(Algorithm::Crc32, response.hash);
@ -406,7 +342,6 @@ impl Storage {
}
/// download_piece_from_source_finished is used for downloading piece from source.
#[allow(clippy::too_many_arguments)]
#[instrument(skip_all)]
pub async fn download_piece_from_source_finished<R: AsyncRead + Unpin + ?Sized>(
&self,
@ -415,34 +350,13 @@ impl Storage {
offset: u64,
length: u64,
reader: &mut R,
timeout: Duration,
) -> Result<metadata::Piece> {
tokio::select! {
piece = self.handle_downloaded_from_source_finished(piece_id, task_id, offset, length, reader) => {
piece
}
_ = sleep(timeout) => {
Err(Error::DownloadPieceFinished(piece_id.to_string()))
}
}
}
// handle_downloaded_from_source_finished handles the downloaded piece from source.
#[instrument(skip_all)]
async fn handle_downloaded_from_source_finished<R: AsyncRead + Unpin + ?Sized>(
&self,
piece_id: &str,
task_id: &str,
offset: u64,
length: u64,
reader: &mut R,
) -> Result<metadata::Piece> {
let response = self
.content
.write_piece(task_id, offset, length, reader)
.write_piece_with_crc32_castagnoli(task_id, offset, reader)
.await?;
let digest = Digest::new(Algorithm::Crc32, response.hash);
self.metadata.download_piece_finished(
piece_id,
offset,
@ -453,45 +367,19 @@ impl Storage {
}
/// download_piece_from_parent_finished is used for downloading piece from parent.
#[allow(clippy::too_many_arguments)]
#[instrument(skip_all)]
pub async fn download_piece_from_parent_finished<R: AsyncRead + Unpin + ?Sized>(
&self,
piece_id: &str,
task_id: &str,
offset: u64,
length: u64,
expected_digest: &str,
parent_id: &str,
reader: &mut R,
timeout: Duration,
) -> Result<metadata::Piece> {
tokio::select! {
piece = self.handle_downloaded_piece_from_parent_finished(piece_id, task_id, offset, length, expected_digest, parent_id, reader) => {
piece
}
_ = sleep(timeout) => {
Err(Error::DownloadPieceFinished(piece_id.to_string()))
}
}
}
// handle_downloaded_piece_from_parent_finished handles the downloaded piece from parent.
#[allow(clippy::too_many_arguments)]
#[instrument(skip_all)]
async fn handle_downloaded_piece_from_parent_finished<R: AsyncRead + Unpin + ?Sized>(
&self,
piece_id: &str,
task_id: &str,
offset: u64,
length: u64,
expected_digest: &str,
parent_id: &str,
reader: &mut R,
) -> Result<metadata::Piece> {
let response = self
.content
.write_piece(task_id, offset, length, reader)
.write_piece_with_crc32_castagnoli(task_id, offset, reader)
.await?;
let length = response.length;
@ -538,24 +426,6 @@ impl Storage {
// Get the piece metadata and return the content of the piece.
match self.metadata.get_piece(piece_id) {
Ok(Some(piece)) => {
if self.cache.contains_piece(task_id, piece_id).await {
match self
.cache
.read_piece(task_id, piece_id, piece.clone(), range)
.await
{
Ok(reader) => {
// Finish uploading the task.
self.metadata.upload_task_finished(task_id)?;
debug!("get piece from cache: {}", piece_id);
return Ok(Either::Left(reader));
}
Err(err) => {
return Err(err);
}
}
}
match self
.content
.read_piece(task_id, piece.offset, piece.length, range)
@ -564,7 +434,55 @@ impl Storage {
Ok(reader) => {
// Finish uploading the task.
self.metadata.upload_task_finished(task_id)?;
Ok(Either::Right(reader))
Ok(reader)
}
Err(err) => {
// Failed uploading the task.
self.metadata.upload_task_failed(task_id)?;
Err(err)
}
}
}
Ok(None) => {
// Failed uploading the task.
self.metadata.upload_task_failed(task_id)?;
Err(Error::PieceNotFound(piece_id.to_string()))
}
Err(err) => {
// Failed uploading the task.
self.metadata.upload_task_failed(task_id)?;
Err(err)
}
}
}
/// upload_piece_with_dual_read returns the dual reader of the piece, one is the range reader, and the other is the
/// full reader of the piece. It is used for cache the piece content to the proxy cache.
#[instrument(skip_all)]
pub async fn upload_piece_with_dual_read(
&self,
piece_id: &str,
task_id: &str,
range: Option<Range>,
) -> Result<(impl AsyncRead, impl AsyncRead)> {
// Wait for the piece to be finished.
self.wait_for_piece_finished(piece_id).await?;
// Start uploading the task.
self.metadata.upload_task_started(task_id)?;
// Get the piece metadata and return the content of the piece.
match self.metadata.get_piece(piece_id) {
Ok(Some(piece)) => {
match self
.content
.read_piece_with_dual_read(task_id, piece.offset, piece.length, range)
.await
{
Ok(dual_reader) => {
// Finish uploading the task.
self.metadata.upload_task_finished(task_id)?;
Ok(dual_reader)
}
Err(err) => {
// Failed uploading the task.
@ -587,6 +505,7 @@ impl Storage {
}
/// get_piece returns the piece metadata.
#[instrument(skip_all)]
pub fn get_piece(&self, piece_id: &str) -> Result<Option<metadata::Piece>> {
self.metadata.get_piece(piece_id)
}
@ -598,13 +517,13 @@ impl Storage {
}
/// get_pieces returns the piece metadatas.
#[instrument(skip_all)]
pub fn get_pieces(&self, task_id: &str) -> Result<Vec<metadata::Piece>> {
self.metadata.get_pieces(task_id)
}
/// piece_id returns the piece id.
#[inline]
#[instrument(skip_all)]
pub fn piece_id(&self, task_id: &str, number: u32) -> String {
self.metadata.piece_id(task_id, number)
}
@ -629,7 +548,6 @@ impl Storage {
}
/// download_persistent_cache_piece_from_parent_finished is used for downloading persistent cache piece from parent.
#[allow(clippy::too_many_arguments)]
#[instrument(skip_all)]
pub async fn download_persistent_cache_piece_from_parent_finished<
R: AsyncRead + Unpin + ?Sized,
@ -638,14 +556,13 @@ impl Storage {
piece_id: &str,
task_id: &str,
offset: u64,
length: u64,
expected_digest: &str,
parent_id: &str,
reader: &mut R,
) -> Result<metadata::Piece> {
let response = self
.content
.write_persistent_cache_piece(task_id, offset, length, reader)
.write_piece_with_crc32_castagnoli(task_id, offset, reader)
.await?;
let length = response.length;
@ -696,7 +613,7 @@ impl Storage {
Ok(Some(piece)) => {
match self
.content
.read_persistent_cache_piece(task_id, piece.offset, piece.length, range)
.read_piece(task_id, piece.offset, piece.length, range)
.await
{
Ok(reader) => {
@ -744,6 +661,7 @@ impl Storage {
/// persistent_cache_piece_id returns the persistent cache piece id.
#[inline]
#[instrument(skip_all)]
pub fn persistent_cache_piece_id(&self, task_id: &str, number: u32) -> String {
self.metadata.piece_id(task_id, number)
}
@ -751,12 +669,12 @@ impl Storage {
/// wait_for_piece_finished waits for the piece to be finished.
#[instrument(skip_all)]
async fn wait_for_piece_finished(&self, piece_id: &str) -> Result<metadata::Piece> {
// Total timeout for downloading a piece, combining the download time and the time to write to storage.
let wait_timeout = tokio::time::sleep(
self.config.download.piece_timeout + self.config.storage.write_piece_timeout,
);
tokio::pin!(wait_timeout);
// Initialize the timeout of piece.
let piece_timeout = tokio::time::sleep(self.config.download.piece_timeout);
tokio::pin!(piece_timeout);
// Initialize the interval of piece.
let mut wait_for_piece_count = 0;
let mut interval = tokio::time::interval(DEFAULT_WAIT_FOR_PIECE_FINISHED_INTERVAL);
loop {
tokio::select! {
@ -770,8 +688,13 @@ impl Storage {
debug!("wait piece finished success");
return Ok(piece);
}
if wait_for_piece_count > 0 {
debug!("wait piece finished");
}
wait_for_piece_count += 1;
}
_ = &mut wait_timeout => {
_ = &mut piece_timeout => {
self.metadata.wait_for_piece_finished_failed(piece_id).unwrap_or_else(|err| error!("delete piece metadata failed: {}", err));
return Err(Error::WaitForPieceFinishedTimeout(piece_id.to_string()));
}
@ -785,12 +708,12 @@ impl Storage {
&self,
piece_id: &str,
) -> Result<metadata::Piece> {
// Total timeout for downloading a piece, combining the download time and the time to write to storage.
let wait_timeout = tokio::time::sleep(
self.config.download.piece_timeout + self.config.storage.write_piece_timeout,
);
tokio::pin!(wait_timeout);
// Initialize the timeout of piece.
let piece_timeout = tokio::time::sleep(self.config.download.piece_timeout);
tokio::pin!(piece_timeout);
// Initialize the interval of piece.
let mut wait_for_piece_count = 0;
let mut interval = tokio::time::interval(DEFAULT_WAIT_FOR_PIECE_FINISHED_INTERVAL);
loop {
tokio::select! {
@ -804,8 +727,13 @@ impl Storage {
debug!("wait piece finished success");
return Ok(piece);
}
if wait_for_piece_count > 0 {
debug!("wait piece finished");
}
wait_for_piece_count += 1;
}
_ = &mut wait_timeout => {
_ = &mut piece_timeout => {
self.metadata.wait_for_piece_finished_failed(piece_id).unwrap_or_else(|err| error!("delete piece metadata failed: {}", err));
return Err(Error::WaitForPieceFinishedTimeout(piece_id.to_string()));
}

View File

@ -17,7 +17,8 @@
use chrono::{NaiveDateTime, Utc};
use dragonfly_client_config::dfdaemon::Config;
use dragonfly_client_core::{Error, Result};
use dragonfly_client_util::{digest, http::headermap_to_hashmap};
use dragonfly_client_util::http::headermap_to_hashmap;
use rayon::prelude::*;
use reqwest::header::HeaderMap;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
@ -300,20 +301,6 @@ impl Piece {
None => None,
}
}
/// calculate_digest return the digest of the piece metadata, including the piece number,
/// offset, length and content digest. The digest is used to check the integrity of the
/// piece metadata.
pub fn calculate_digest(&self) -> String {
let mut hasher = crc32fast::Hasher::new();
hasher.update(&self.number.to_be_bytes());
hasher.update(&self.offset.to_be_bytes());
hasher.update(&self.length.to_be_bytes());
hasher.update(self.digest.as_bytes());
let encoded = hasher.finalize().to_string();
digest::Digest::new(digest::Algorithm::Crc32, encoded).to_string()
}
}
/// Metadata manages the metadata of [Task], [Piece] and [PersistentCacheTask].
@ -526,7 +513,7 @@ impl<E: StorageEngineOwned> Metadata<E> {
.collect::<Result<Vec<Box<[u8]>>>>()?;
tasks
.iter()
.par_iter()
.map(|task| Task::deserialize_from(task))
.collect()
}
@ -596,7 +583,6 @@ impl<E: StorageEngineOwned> Metadata<E> {
persistent: bool,
piece_length: u64,
content_length: u64,
created_at: NaiveDateTime,
) -> Result<PersistentCacheTask> {
let task = match self.db.get::<PersistentCacheTask>(id.as_bytes())? {
Some(mut task) => {
@ -615,7 +601,7 @@ impl<E: StorageEngineOwned> Metadata<E> {
piece_length,
content_length,
updated_at: Utc::now().naive_utc(),
created_at,
created_at: Utc::now().naive_utc(),
..Default::default()
},
};
@ -840,6 +826,7 @@ impl<E: StorageEngineOwned> Metadata<E> {
}
/// get_piece gets the piece metadata.
#[instrument(skip_all)]
pub fn get_piece(&self, piece_id: &str) -> Result<Option<Piece>> {
self.db.get(piece_id.as_bytes())
}
@ -851,7 +838,6 @@ impl<E: StorageEngineOwned> Metadata<E> {
}
/// get_pieces gets the piece metadatas.
#[instrument(skip_all)]
pub fn get_pieces(&self, task_id: &str) -> Result<Vec<Piece>> {
let pieces = self
.db
@ -863,7 +849,7 @@ impl<E: StorageEngineOwned> Metadata<E> {
.collect::<Result<Vec<Box<[u8]>>>>()?;
pieces
.iter()
.par_iter()
.map(|piece| Piece::deserialize_from(piece))
.collect()
}
@ -888,7 +874,7 @@ impl<E: StorageEngineOwned> Metadata<E> {
.collect::<Result<Vec<Box<[u8]>>>>()?;
let piece_ids_refs = piece_ids
.iter()
.par_iter()
.map(|id| {
let id_ref = id.as_ref();
info!(
@ -906,6 +892,7 @@ impl<E: StorageEngineOwned> Metadata<E> {
/// piece_id returns the piece id.
#[inline]
#[instrument(skip_all)]
pub fn piece_id(&self, task_id: &str, number: u32) -> String {
format!("{}-{}", task_id, number)
}
@ -938,25 +925,11 @@ impl Metadata<RocksdbStorageEngine> {
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[test]
fn test_calculate_digest() {
let piece = Piece {
number: 1,
offset: 0,
length: 1024,
digest: "crc32:1929153120".to_string(),
..Default::default()
};
let digest = piece.calculate_digest();
assert_eq!(digest, "crc32:3299754941");
}
use tempdir::TempDir;
#[test]
fn should_create_metadata() {
let dir = tempdir().unwrap();
let dir = TempDir::new("metadata").unwrap();
let log_dir = dir.path().join("log");
let metadata = Metadata::new(Arc::new(Config::default()), dir.path(), &log_dir).unwrap();
assert!(metadata.get_tasks().unwrap().is_empty());
@ -968,7 +941,7 @@ mod tests {
#[test]
fn test_task_lifecycle() {
let dir = tempdir().unwrap();
let dir = TempDir::new("metadata").unwrap();
let log_dir = dir.path().join("log");
let metadata = Metadata::new(Arc::new(Config::default()), dir.path(), &log_dir).unwrap();
let task_id = "d3c4e940ad06c47fc36ac67801e6f8e36cb400e2391708620bc7e865b102062c";
@ -1028,7 +1001,7 @@ mod tests {
#[test]
fn test_piece_lifecycle() {
let dir = tempdir().unwrap();
let dir = TempDir::new("metadata").unwrap();
let log_dir = dir.path().join("log");
let metadata = Metadata::new(Arc::new(Config::default()), dir.path(), &log_dir).unwrap();
let task_id = "d3c4e940ad06c47fc36ac67801e6f8e36cb400e2391708620bc7e865b102062c";

View File

@ -65,7 +65,6 @@ pub trait Operations {
fn iter<O: DatabaseObject>(&self) -> Result<impl Iterator<Item = Result<(Box<[u8]>, O)>>>;
/// iter_raw iterates all objects without serialization.
#[allow(clippy::type_complexity)]
fn iter_raw<O: DatabaseObject>(
&self,
) -> Result<impl Iterator<Item = Result<(Box<[u8]>, Box<[u8]>)>>>;
@ -77,7 +76,6 @@ pub trait Operations {
) -> Result<impl Iterator<Item = Result<(Box<[u8]>, O)>>>;
/// prefix_iter_raw iterates all objects with prefix without serialization.
#[allow(clippy::type_complexity)]
fn prefix_iter_raw<O: DatabaseObject>(
&self,
prefix: &[u8],

View File

@ -24,7 +24,7 @@ use std::{
ops::Deref,
path::{Path, PathBuf},
};
use tracing::{info, warn};
use tracing::{info, instrument, warn};
/// RocksdbStorageEngine is a storage engine based on rocksdb.
pub struct RocksdbStorageEngine {
@ -67,6 +67,7 @@ impl RocksdbStorageEngine {
const DEFAULT_LOG_MAX_FILES: usize = 10;
/// open opens a rocksdb storage engine with the given directory and column families.
#[instrument(skip_all)]
pub fn open(dir: &Path, log_dir: &PathBuf, cf_names: &[&str], keep: bool) -> Result<Self> {
info!("initializing metadata directory: {:?} {:?}", dir, cf_names);
// Initialize rocksdb options.
@ -134,6 +135,7 @@ impl RocksdbStorageEngine {
/// RocksdbStorageEngine implements the storage engine operations.
impl Operations for RocksdbStorageEngine {
/// get gets the object by key.
#[instrument(skip_all)]
fn get<O: DatabaseObject>(&self, key: &[u8]) -> Result<Option<O>> {
let cf = cf_handle::<O>(self)?;
let value = self.get_cf(cf, key).or_err(ErrorType::StorageError)?;
@ -144,6 +146,7 @@ impl Operations for RocksdbStorageEngine {
}
/// is_exist checks if the object exists by key.
#[instrument(skip_all)]
fn is_exist<O: DatabaseObject>(&self, key: &[u8]) -> Result<bool> {
let cf = cf_handle::<O>(self)?;
Ok(self
@ -153,6 +156,7 @@ impl Operations for RocksdbStorageEngine {
}
/// put puts the object by key.
#[instrument(skip_all)]
fn put<O: DatabaseObject>(&self, key: &[u8], value: &O) -> Result<()> {
let cf = cf_handle::<O>(self)?;
self.put_cf(cf, key, value.serialized()?)
@ -161,6 +165,7 @@ impl Operations for RocksdbStorageEngine {
}
/// delete deletes the object by key.
#[instrument(skip_all)]
fn delete<O: DatabaseObject>(&self, key: &[u8]) -> Result<()> {
let cf = cf_handle::<O>(self)?;
let mut options = WriteOptions::default();
@ -172,6 +177,7 @@ impl Operations for RocksdbStorageEngine {
}
/// iter iterates all objects.
#[instrument(skip_all)]
fn iter<O: DatabaseObject>(&self) -> Result<impl Iterator<Item = Result<(Box<[u8]>, O)>>> {
let cf = cf_handle::<O>(self)?;
let iter = self.iterator_cf(cf, rocksdb::IteratorMode::Start);
@ -182,6 +188,7 @@ impl Operations for RocksdbStorageEngine {
}
/// iter_raw iterates all objects without serialization.
#[instrument(skip_all)]
fn iter_raw<O: DatabaseObject>(
&self,
) -> Result<impl Iterator<Item = Result<(Box<[u8]>, Box<[u8]>)>>> {
@ -195,6 +202,7 @@ impl Operations for RocksdbStorageEngine {
}
/// prefix_iter iterates all objects with prefix.
#[instrument(skip_all)]
fn prefix_iter<O: DatabaseObject>(
&self,
prefix: &[u8],
@ -208,6 +216,7 @@ impl Operations for RocksdbStorageEngine {
}
/// prefix_iter_raw iterates all objects with prefix without serialization.
#[instrument(skip_all)]
fn prefix_iter_raw<O: DatabaseObject>(
&self,
prefix: &[u8],
@ -220,6 +229,7 @@ impl Operations for RocksdbStorageEngine {
}
/// batch_delete deletes objects by keys.
#[instrument(skip_all)]
fn batch_delete<O: DatabaseObject>(&self, keys: Vec<&[u8]>) -> Result<()> {
let cf = cf_handle::<O>(self)?;
let mut batch = rocksdb::WriteBatch::default();
@ -236,7 +246,7 @@ impl Operations for RocksdbStorageEngine {
}
/// RocksdbStorageEngine implements the rocksdb of the storage engine.
impl StorageEngine<'_> for RocksdbStorageEngine {}
impl<'db> StorageEngine<'db> for RocksdbStorageEngine {}
/// cf_handle returns the column family handle for the given object.
fn cf_handle<T>(db: &rocksdb::DB) -> Result<&rocksdb::ColumnFamily>
@ -247,399 +257,3 @@ where
db.cf_handle(cf_name)
.ok_or_else(|| Error::ColumnFamilyNotFound(cf_name.to_string()))
}
#[cfg(test)]
mod tests {
use super::*;
use serde::{Deserialize, Serialize};
use tempfile::tempdir;
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
struct Object {
id: String,
value: i32,
}
impl DatabaseObject for Object {
const NAMESPACE: &'static str = "object";
}
fn create_test_engine() -> RocksdbStorageEngine {
let temp_dir = tempdir().unwrap();
let log_dir = temp_dir.path().to_path_buf();
RocksdbStorageEngine::open(temp_dir.path(), &log_dir, &[Object::NAMESPACE], false).unwrap()
}
#[test]
fn test_put_and_get() {
let engine = create_test_engine();
let object = Object {
id: "1".to_string(),
value: 42,
};
engine.put::<Object>(object.id.as_bytes(), &object).unwrap();
let retrieved_object = engine.get::<Object>(object.id.as_bytes()).unwrap().unwrap();
assert_eq!(object, retrieved_object);
}
#[test]
fn test_is_exist() {
let engine = create_test_engine();
let object = Object {
id: "2".to_string(),
value: 100,
};
assert!(!engine.is_exist::<Object>(object.id.as_bytes()).unwrap());
engine.put::<Object>(object.id.as_bytes(), &object).unwrap();
assert!(engine.is_exist::<Object>(object.id.as_bytes()).unwrap());
}
#[test]
fn test_delete() {
let engine = create_test_engine();
let object = Object {
id: "3".to_string(),
value: 200,
};
engine.put::<Object>(object.id.as_bytes(), &object).unwrap();
assert!(engine.is_exist::<Object>(object.id.as_bytes()).unwrap());
engine.delete::<Object>(object.id.as_bytes()).unwrap();
assert!(!engine.is_exist::<Object>(object.id.as_bytes()).unwrap());
}
#[test]
fn test_batch_delete() {
let engine = create_test_engine();
let objects = vec![
Object {
id: "1".to_string(),
value: 1,
},
Object {
id: "2".to_string(),
value: 2,
},
Object {
id: "3".to_string(),
value: 3,
},
];
for object in &objects {
engine.put::<Object>(object.id.as_bytes(), object).unwrap();
assert!(engine.is_exist::<Object>(object.id.as_bytes()).unwrap());
}
let ids: Vec<&[u8]> = objects.iter().map(|object| object.id.as_bytes()).collect();
engine.batch_delete::<Object>(ids).unwrap();
for object in &objects {
assert!(!engine.is_exist::<Object>(object.id.as_bytes()).unwrap());
}
}
#[test]
fn test_iter() {
let engine = create_test_engine();
let objects = vec![
Object {
id: "1".to_string(),
value: 10,
},
Object {
id: "2".to_string(),
value: 20,
},
Object {
id: "3".to_string(),
value: 30,
},
];
for object in &objects {
engine.put::<Object>(object.id.as_bytes(), object).unwrap();
}
let retrieved_objects = engine
.iter::<Object>()
.unwrap()
.collect::<Result<Vec<_>>>()
.unwrap();
assert_eq!(retrieved_objects.len(), objects.len());
for object in &objects {
let found = retrieved_objects
.iter()
.any(|(_, v)| v.id == object.id && v.value == object.value);
assert!(found, "could not find object with id {:?}", object.id);
}
}
#[test]
fn test_prefix_iter() {
let engine = create_test_engine();
// RocksDB prefix extractor is configured with fixed_prefix(64) in the open method.
let prefix_a = [b'a'; 64];
let prefix_b = [b'b'; 64];
// Create test keys with 64-byte identical prefixes.
let key_a1 = [&prefix_a[..], b"_suffix1"].concat();
let key_a2 = [&prefix_a[..], b"_suffix2"].concat();
let key_b1 = [&prefix_b[..], b"_suffix1"].concat();
let key_b2 = [&prefix_b[..], b"_suffix2"].concat();
let objects_with_prefix_a = vec![
(
key_a1.clone(),
Object {
id: "prefix_id_a1".to_string(),
value: 100,
},
),
(
key_a2.clone(),
Object {
id: "prefix_id_a2".to_string(),
value: 200,
},
),
];
let objects_with_prefix_b = vec![
(
key_b1.clone(),
Object {
id: "prefix_id_b1".to_string(),
value: 300,
},
),
(
key_b2.clone(),
Object {
id: "prefix_id_b2".to_string(),
value: 400,
},
),
];
for (key, obj) in &objects_with_prefix_a {
engine.put::<Object>(key, obj).unwrap();
}
for (key, obj) in &objects_with_prefix_b {
engine.put::<Object>(key, obj).unwrap();
}
let retrieved_objects = engine
.prefix_iter::<Object>(&prefix_a)
.unwrap()
.collect::<Result<Vec<_>>>()
.unwrap();
assert_eq!(
retrieved_objects.len(),
objects_with_prefix_a.len(),
"expected {} objects with prefix 'a', but got {}",
objects_with_prefix_a.len(),
retrieved_objects.len()
);
// Verify each object with prefix is correctly retrieved.
for (key, object) in &objects_with_prefix_a {
let found = retrieved_objects
.iter()
.any(|(_, v)| v.id == object.id && v.value == object.value);
assert!(found, "could not find object with key {:?}", key);
}
// Verify objects with different prefix are not retrieved.
for (key, object) in &objects_with_prefix_b {
let found = retrieved_objects
.iter()
.any(|(_, v)| v.id == object.id && v.value == object.value);
assert!(!found, "found object with different prefix: {:?}", key);
}
}
#[test]
fn test_iter_raw() {
let engine = create_test_engine();
let objects = vec![
Object {
id: "1".to_string(),
value: 10,
},
Object {
id: "2".to_string(),
value: 20,
},
Object {
id: "3".to_string(),
value: 30,
},
];
for object in &objects {
engine.put::<Object>(object.id.as_bytes(), object).unwrap();
}
let retrieved_objects = engine
.iter_raw::<Object>()
.unwrap()
.collect::<Result<Vec<_>>>()
.unwrap();
assert_eq!(retrieved_objects.len(), objects.len());
// Verify each object can be deserialized from the raw bytes.
for object in &objects {
let found = retrieved_objects
.iter()
.any(|(_, v)| match Object::deserialize_from(v) {
Ok(deserialized) => {
deserialized.id == object.id && deserialized.value == object.value
}
Err(_) => false,
});
assert!(
found,
"could not find or deserialize object with key {:?}",
object.id
);
}
}
#[test]
fn test_prefix_iter_raw() {
let engine = create_test_engine();
// RocksDB prefix extractor is configured with fixed_prefix(64) in the open method.
let prefix_a = [b'a'; 64];
let prefix_b = [b'b'; 64];
// Create test keys with 64-byte identical prefixes.
let key_a1 = [&prefix_a[..], b"_raw_suffix1"].concat();
let key_a2 = [&prefix_a[..], b"_raw_suffix2"].concat();
let key_b1 = [&prefix_b[..], b"_raw_suffix1"].concat();
let key_b2 = [&prefix_b[..], b"_raw_suffix2"].concat();
let objects_with_prefix_a = vec![
(
key_a1.clone(),
Object {
id: "raw_prefix_id_a1".to_string(),
value: 100,
},
),
(
key_a2.clone(),
Object {
id: "raw_prefix_id_a2".to_string(),
value: 200,
},
),
];
let objects_with_prefix_b = vec![
(
key_b1.clone(),
Object {
id: "raw_prefix_id_b1".to_string(),
value: 300,
},
),
(
key_b2.clone(),
Object {
id: "raw_prefix_id_b2".to_string(),
value: 400,
},
),
];
for (key, obj) in &objects_with_prefix_a {
engine.put::<Object>(key, obj).unwrap();
}
for (key, obj) in &objects_with_prefix_b {
engine.put::<Object>(key, obj).unwrap();
}
let retrieved_objects = engine
.prefix_iter_raw::<Object>(&prefix_a)
.unwrap()
.collect::<Result<Vec<_>>>()
.unwrap();
assert_eq!(
retrieved_objects.len(),
objects_with_prefix_a.len(),
"expected {} raw objects with prefix 'a', but got {}",
objects_with_prefix_a.len(),
retrieved_objects.len()
);
// Verify each object with prefix can be deserialized from raw bytes.
for (_, object) in &objects_with_prefix_a {
let found = retrieved_objects
.iter()
.any(|(_, v)| match Object::deserialize_from(v) {
Ok(deserialized) => {
deserialized.id == object.id && deserialized.value == object.value
}
Err(_) => false,
});
assert!(
found,
"could not find or deserialize object with key {:?}",
object.id
);
}
// Verify objects with different prefix are not retrieved.
for (key, _) in &objects_with_prefix_b {
let found = retrieved_objects
.iter()
.any(|(k, _)| k.as_ref() == key.as_slice());
assert!(!found, "found object with different prefix: {:?}", key);
}
}
#[test]
fn test_column_family_not_found() {
let engine = create_test_engine();
// Define a new type with a different namespace that hasn't been registered.
#[derive(Debug, Serialize, Deserialize, PartialEq)]
struct UnregisteredObject {
data: String,
}
impl DatabaseObject for UnregisteredObject {
const NAMESPACE: &'static str = "unregistered";
}
let key = b"unregistered";
let result = engine.get::<UnregisteredObject>(key);
assert!(result.is_err());
if let Err(err) = result {
assert!(format!("{:?}", err).contains("ColumnFamilyNotFound"));
}
}
}

View File

@ -13,6 +13,7 @@ edition.workspace = true
dragonfly-client-core.workspace = true
dragonfly-api.workspace = true
reqwest.workspace = true
hyper.workspace = true
http-range-header.workspace = true
http.workspace = true
tracing.workspace = true
@ -23,17 +24,13 @@ rustls-pki-types.workspace = true
rustls-pemfile.workspace = true
sha2.workspace = true
uuid.workspace = true
sysinfo.workspace = true
hex.workspace = true
crc32fast.workspace = true
openssl.workspace = true
lazy_static.workspace = true
bytesize.workspace = true
lru.workspace = true
tokio.workspace = true
rustix = { version = "1.0.8", features = ["fs"] }
blake3.workspace = true
crc.workspace = true
base16ct.workspace = true
base64 = "0.22.1"
pnet = "0.35.0"
wyhash = "0.5.0"
[dev-dependencies]
tempfile.workspace = true

View File

@ -14,10 +14,11 @@
* limitations under the License.
*/
use dragonfly_client_core::{Error as ClientError, Result as ClientResult};
use crc::*;
use dragonfly_client_core::Result as ClientResult;
use sha2::Digest as Sha2Digest;
use std::fmt;
use std::io::{self, Read};
use std::io::Read;
use std::path::Path;
use std::str::FromStr;
use tracing::instrument;
@ -31,6 +32,9 @@ pub enum Algorithm {
/// Crc32 is crc32 algorithm for generate digest.
Crc32,
/// Blake3 is blake3 algorithm for generate digest.
Blake3,
/// Sha256 is sha256 algorithm for generate digest.
Sha256,
@ -44,6 +48,7 @@ impl fmt::Display for Algorithm {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Algorithm::Crc32 => write!(f, "crc32"),
Algorithm::Blake3 => write!(f, "blake3"),
Algorithm::Sha256 => write!(f, "sha256"),
Algorithm::Sha512 => write!(f, "sha512"),
}
@ -58,6 +63,7 @@ impl FromStr for Algorithm {
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"crc32" => Ok(Algorithm::Crc32),
"blake3" => Ok(Algorithm::Blake3),
"sha256" => Ok(Algorithm::Sha256),
"sha512" => Ok(Algorithm::Sha512),
_ => Err(format!("invalid digest algorithm: {}", s)),
@ -112,36 +118,10 @@ impl FromStr for Digest {
}
let algorithm = match parts[0] {
"crc32" => {
if parts[1].len() != 10 {
return Err(format!(
"invalid crc32 digest length: {}, expected 10",
parts[1].len()
));
}
Algorithm::Crc32
}
"sha256" => {
if parts[1].len() != 64 {
return Err(format!(
"invalid sha256 digest length: {}, expected 64",
parts[1].len()
));
}
Algorithm::Sha256
}
"sha512" => {
if parts[1].len() != 128 {
return Err(format!(
"invalid sha512 digest length: {}, expected 128",
parts[1].len()
));
}
Algorithm::Sha512
}
"crc32" => Algorithm::Crc32,
"blake3" => Algorithm::Blake3,
"sha256" => Algorithm::Sha256,
"sha512" => Algorithm::Sha512,
_ => return Err(format!("invalid digest algorithm: {}", parts[0])),
};
@ -149,58 +129,47 @@ impl FromStr for Digest {
}
}
/// calculate_file_digest calculates the digest of a file.
/// calculate_file_hash calculates the hash of a file.
#[instrument(skip_all)]
pub fn calculate_file_digest(algorithm: Algorithm, path: &Path) -> ClientResult<Digest> {
pub fn calculate_file_hash(algorithm: Algorithm, path: &Path) -> ClientResult<Digest> {
let f = std::fs::File::open(path)?;
let mut reader = io::BufReader::new(f);
let mut reader = std::io::BufReader::new(f);
match algorithm {
Algorithm::Crc32 => {
let mut buffer = [0; 4096];
let mut hasher = crc32fast::Hasher::new();
let crc = Crc::<u32, Table<16>>::new(&CRC_32_ISCSI);
let mut digest = crc.digest();
loop {
match reader.read(&mut buffer) {
Ok(0) => break,
Ok(n) => hasher.update(&buffer[..n]),
Err(ref err) if err.kind() == io::ErrorKind::Interrupted => continue,
Err(err) => return Err(err.into()),
};
let n = reader.read(&mut buffer)?;
if n == 0 {
break;
}
digest.update(&buffer[..n]);
}
Ok(Digest::new(algorithm, hasher.finalize().to_string()))
Ok(Digest::new(algorithm, digest.finalize().to_string()))
}
Algorithm::Blake3 => {
let mut hasher = blake3::Hasher::new();
std::io::copy(&mut reader, &mut hasher)?;
Ok(Digest::new(
algorithm,
base16ct::lower::encode_string(hasher.finalize().as_bytes()),
))
}
Algorithm::Sha256 => {
let mut hasher = sha2::Sha256::new();
io::copy(&mut reader, &mut hasher)?;
std::io::copy(&mut reader, &mut hasher)?;
Ok(Digest::new(algorithm, hex::encode(hasher.finalize())))
}
Algorithm::Sha512 => {
let mut hasher = sha2::Sha512::new();
io::copy(&mut reader, &mut hasher)?;
std::io::copy(&mut reader, &mut hasher)?;
Ok(Digest::new(algorithm, hex::encode(hasher.finalize())))
}
}
}
/// verify_file_digest verifies the digest of a file against an expected digest.
pub fn verify_file_digest(expected_digest: Digest, file_path: &Path) -> ClientResult<()> {
let digest = match calculate_file_digest(expected_digest.algorithm(), file_path) {
Ok(digest) => digest,
Err(err) => {
return Err(err);
}
};
if digest.to_string() != expected_digest.to_string() {
return Err(ClientError::DigestMismatch(
expected_digest.to_string(),
digest.to_string(),
));
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
@ -210,6 +179,7 @@ mod tests {
#[test]
fn test_algorithm_display() {
assert_eq!(Algorithm::Crc32.to_string(), "crc32");
assert_eq!(Algorithm::Blake3.to_string(), "blake3");
assert_eq!(Algorithm::Sha256.to_string(), "sha256");
assert_eq!(Algorithm::Sha512.to_string(), "sha512");
}
@ -217,6 +187,7 @@ mod tests {
#[test]
fn test_algorithm_from_str() {
assert_eq!("crc32".parse::<Algorithm>(), Ok(Algorithm::Crc32));
assert_eq!("blake3".parse::<Algorithm>(), Ok(Algorithm::Blake3));
assert_eq!("sha256".parse::<Algorithm>(), Ok(Algorithm::Sha256));
assert_eq!("sha512".parse::<Algorithm>(), Ok(Algorithm::Sha512));
assert!("invalid".parse::<Algorithm>().is_err());
@ -229,50 +200,31 @@ mod tests {
}
#[test]
fn test_calculate_file_digest() {
fn test_calculate_file_hash() {
let content = b"test content";
let temp_file = tempfile::NamedTempFile::new().expect("failed to create temp file");
let path = temp_file.path();
let mut file = File::create(path).expect("failed to create file");
file.write_all(content).expect("failed to write to file");
let expected_blake3 = "ead3df8af4aece7792496936f83b6b6d191a7f256585ce6b6028db161278017e";
let digest =
calculate_file_hash(Algorithm::Blake3, path).expect("failed to calculate Blake3 hash");
assert_eq!(digest.encoded(), expected_blake3);
let expected_sha256 = "6ae8a75555209fd6c44157c0aed8016e763ff435a19cf186f76863140143ff72";
let digest = calculate_file_digest(Algorithm::Sha256, path)
.expect("failed to calculate Sha256 hash");
let digest =
calculate_file_hash(Algorithm::Sha256, path).expect("failed to calculate Sha256 hash");
assert_eq!(digest.encoded(), expected_sha256);
let expected_sha512 = "0cbf4caef38047bba9a24e621a961484e5d2a92176a859e7eb27df343dd34eb98d538a6c5f4da1ce302ec250b821cc001e46cc97a704988297185a4df7e99602";
let digest = calculate_file_digest(Algorithm::Sha512, path)
.expect("failed to calculate Sha512 hash");
let digest =
calculate_file_hash(Algorithm::Sha512, path).expect("failed to calculate Sha512 hash");
assert_eq!(digest.encoded(), expected_sha512);
let expected_crc32 = "1475635037";
let expected_crc32 = "422618885";
let digest =
calculate_file_digest(Algorithm::Crc32, path).expect("failed to calculate Crc32 hash");
calculate_file_hash(Algorithm::Crc32, path).expect("failed to calculate Sha512 hash");
assert_eq!(digest.encoded(), expected_crc32);
}
#[test]
fn test_verify_file_digest() {
let content = b"test content";
let temp_file = tempfile::NamedTempFile::new().expect("failed to create temp file");
let path = temp_file.path();
let mut file = File::create(path).expect("failed to create file");
file.write_all(content).expect("failed to write to file");
let expected_sha256_digest = Digest::new(
Algorithm::Sha256,
"6ae8a75555209fd6c44157c0aed8016e763ff435a19cf186f76863140143ff72".to_string(),
);
assert!(verify_file_digest(expected_sha256_digest, path).is_ok());
let expected_sha512_digest = Digest::new(
Algorithm::Sha512,
"0cbf4caef38047bba9a24e621a961484e5d2a92176a859e7eb27df343dd34eb98d538a6c5f4da1ce302ec250b821cc001e46cc97a704988297185a4df7e99602".to_string(),
);
assert!(verify_file_digest(expected_sha512_digest, path).is_ok());
let expected_crc32_digest = Digest::new(Algorithm::Crc32, "1475635037".to_string());
assert!(verify_file_digest(expected_crc32_digest, path).is_ok());
}
}

View File

@ -1,54 +0,0 @@
/*
* Copyright 2025 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use dragonfly_client_core::Result;
use tokio::fs;
/// fallocate allocates the space for the file and fills it with zero, only on Linux.
#[allow(unused_variables)]
pub async fn fallocate(f: &fs::File, length: u64) -> Result<()> {
// No allocation needed for zero length. Avoids potential fallocate errors.
if length == 0 {
return Ok(());
}
#[cfg(target_os = "linux")]
{
use dragonfly_client_core::Error;
use rustix::fs::{fallocate, FallocateFlags};
use std::os::unix::io::AsFd;
use tokio::io;
// Set length (potential truncation).
f.set_len(length).await?;
let fd = f.as_fd();
let offset = 0;
let flags = FallocateFlags::KEEP_SIZE;
loop {
match fallocate(fd, flags, offset, length) {
Ok(_) => return Ok(()),
Err(rustix::io::Errno::INTR) => continue,
Err(err) => {
return Err(Error::IO(io::Error::from_raw_os_error(err.raw_os_error())))
}
}
}
}
#[cfg(not(target_os = "linux"))]
Ok(())
}

View File

@ -20,6 +20,7 @@ use dragonfly_client_core::{
Error, Result,
};
use http::header::{self, HeaderMap};
use tracing::instrument;
/// Credentials is the credentials for the basic auth.
pub struct Credentials {
@ -33,6 +34,7 @@ pub struct Credentials {
/// Credentials is the basic auth.
impl Credentials {
/// new returns a new Credentials.
#[instrument(skip_all)]
pub fn new(username: &str, password: &str) -> Credentials {
Self {
username: username.to_string(),

View File

@ -21,10 +21,12 @@ use dragonfly_client_core::{
};
use reqwest::header::{HeaderMap, HeaderName, HeaderValue};
use std::collections::HashMap;
use tracing::instrument;
pub mod basic_auth;
/// headermap_to_hashmap converts a headermap to a hashmap.
#[instrument(skip_all)]
pub fn headermap_to_hashmap(header: &HeaderMap<HeaderValue>) -> HashMap<String, String> {
let mut hashmap: HashMap<String, String> = HashMap::with_capacity(header.len());
for (k, v) in header {
@ -37,6 +39,7 @@ pub fn headermap_to_hashmap(header: &HeaderMap<HeaderValue>) -> HashMap<String,
}
/// hashmap_to_headermap converts a hashmap to a headermap.
#[instrument(skip_all)]
pub fn hashmap_to_headermap(header: &HashMap<String, String>) -> Result<HeaderMap<HeaderValue>> {
let mut headermap = HeaderMap::with_capacity(header.len());
for (k, v) in header {
@ -49,6 +52,7 @@ pub fn hashmap_to_headermap(header: &HashMap<String, String>) -> Result<HeaderMa
}
/// header_vec_to_hashmap converts a vector of header string to a hashmap.
#[instrument(skip_all)]
pub fn header_vec_to_hashmap(raw_header: Vec<String>) -> Result<HashMap<String, String>> {
let mut header = HashMap::with_capacity(raw_header.len());
for h in raw_header {
@ -61,11 +65,13 @@ pub fn header_vec_to_hashmap(raw_header: Vec<String>) -> Result<HashMap<String,
}
/// header_vec_to_headermap converts a vector of header string to a reqwest headermap.
#[instrument(skip_all)]
pub fn header_vec_to_headermap(raw_header: Vec<String>) -> Result<HeaderMap> {
hashmap_to_headermap(&header_vec_to_hashmap(raw_header)?)
}
/// get_range gets the range from http header.
#[instrument(skip_all)]
pub fn get_range(header: &HeaderMap, content_length: u64) -> Result<Option<Range>> {
match header.get(reqwest::header::RANGE) {
Some(range) => {
@ -79,6 +85,7 @@ pub fn get_range(header: &HeaderMap, content_length: u64) -> Result<Option<Range
/// parse_range_header parses a Range header string as per RFC 7233,
/// supported Range Header: "Range": "bytes=100-200", "Range": "bytes=-50",
/// "Range": "bytes=150-", "Range": "bytes=0-0,-1".
#[instrument(skip_all)]
pub fn parse_range_header(range_header_value: &str, content_length: u64) -> Result<Range> {
let parsed_ranges =
http_range_header::parse_range_header(range_header_value).or_err(ErrorType::ParseError)?;

View File

@ -20,10 +20,13 @@ use dragonfly_client_core::{
Result,
};
use sha2::{Digest, Sha256};
use std::io::{self, Read};
use std::hash::Hasher;
use std::io::Read;
use std::path::PathBuf;
use tracing::instrument;
use url::Url;
use uuid::Uuid;
use wyhash::WyHash;
/// SEED_PEER_SUFFIX is the suffix of the seed peer.
const SEED_PEER_SUFFIX: &str = "seed";
@ -31,34 +34,6 @@ const SEED_PEER_SUFFIX: &str = "seed";
/// PERSISTENT_CACHE_TASK_SUFFIX is the suffix of the persistent cache task.
const PERSISTENT_CACHE_TASK_SUFFIX: &str = "persistent-cache-task";
/// TaskIDParameter is the parameter of the task id.
pub enum TaskIDParameter {
/// Content uses the content to generate the task id.
Content(String),
/// URLBased uses the url, piece_length, tag, application and filtered_query_params to generate
/// the task id.
URLBased {
url: String,
piece_length: Option<u64>,
tag: Option<String>,
application: Option<String>,
filtered_query_params: Vec<String>,
},
}
/// PersistentCacheTaskIDParameter is the parameter of the persistent cache task id.
pub enum PersistentCacheTaskIDParameter {
/// Content uses the content to generate the persistent cache task id.
Content(String),
/// FileContentBased uses the file path, piece_length, tag and application to generate the persistent cache task id.
FileContentBased {
path: PathBuf,
piece_length: Option<u64>,
tag: Option<String>,
application: Option<String>,
},
}
/// IDGenerator is used to generate the id for the resources.
#[derive(Debug)]
pub struct IDGenerator {
@ -75,6 +50,7 @@ pub struct IDGenerator {
/// IDGenerator implements the IDGenerator.
impl IDGenerator {
/// new creates a new IDGenerator.
#[instrument(skip_all)]
pub fn new(ip: String, hostname: String, is_seed_peer: bool) -> Self {
IDGenerator {
ip,
@ -85,6 +61,7 @@ impl IDGenerator {
/// host_id generates the host id.
#[inline]
#[instrument(skip_all)]
pub fn host_id(&self) -> String {
if self.is_seed_peer {
return format!("{}-{}-{}", self.ip, self.hostname, "seed");
@ -95,124 +72,101 @@ impl IDGenerator {
/// task_id generates the task id.
#[inline]
pub fn task_id(&self, parameter: TaskIDParameter) -> Result<String> {
match parameter {
TaskIDParameter::Content(content) => {
Ok(hex::encode(Sha256::digest(content.as_bytes())))
}
TaskIDParameter::URLBased {
url,
piece_length,
tag,
application,
filtered_query_params,
} => {
// Filter the query parameters.
let url = Url::parse(url.as_str()).or_err(ErrorType::ParseError)?;
let query = url
.query_pairs()
.filter(|(k, _)| !filtered_query_params.contains(&k.to_string()));
#[instrument(skip_all)]
pub fn task_id(
&self,
url: &str,
tag: Option<&str>,
application: Option<&str>,
filtered_query_params: Vec<String>,
) -> Result<String> {
// Filter the query parameters.
let url = Url::parse(url).or_err(ErrorType::ParseError)?;
let query = url
.query_pairs()
.filter(|(k, _)| !filtered_query_params.contains(&k.to_string()));
let mut artifact_url = url.clone();
if query.clone().count() == 0 {
artifact_url.set_query(None);
} else {
artifact_url.query_pairs_mut().clear().extend_pairs(query);
}
let artifact_url_str = artifact_url.to_string();
let final_url = if artifact_url_str.ends_with('/') && artifact_url.path() == "/" {
artifact_url_str.trim_end_matches('/').to_string()
} else {
artifact_url_str
};
// Initialize the hasher.
let mut hasher = Sha256::new();
// Add the url to generate the task id.
hasher.update(final_url);
// Add the tag to generate the task id.
if let Some(tag) = tag {
hasher.update(tag);
}
// Add the application to generate the task id.
if let Some(application) = application {
hasher.update(application);
}
// Add the piece length to generate the task id.
if let Some(piece_length) = piece_length {
hasher.update(piece_length.to_string());
}
hasher.update(TaskType::Standard.as_str_name().as_bytes());
// Generate the task id.
Ok(hex::encode(hasher.finalize()))
}
let mut artifact_url = url.clone();
if query.clone().count() == 0 {
artifact_url.set_query(None);
} else {
artifact_url.query_pairs_mut().clear().extend_pairs(query);
}
let artifact_url_str = artifact_url.to_string();
let final_url = if artifact_url_str.ends_with('/') && artifact_url.path() == "/" {
artifact_url_str.trim_end_matches('/').to_string()
} else {
artifact_url_str
};
// Initialize the hasher.
let mut hasher = Sha256::new();
// Add the url to generate the task id.
hasher.update(final_url);
// Add the tag to generate the task id.
if let Some(tag) = tag {
hasher.update(tag);
}
// Add the application to generate the task id.
if let Some(application) = application {
hasher.update(application);
}
// Generate the task id.
Ok(hex::encode(hasher.finalize()))
}
/// persistent_cache_task_id generates the persistent cache task id.
#[inline]
#[instrument(skip_all)]
pub fn persistent_cache_task_id(
&self,
parameter: PersistentCacheTaskIDParameter,
path: &PathBuf,
tag: Option<&str>,
application: Option<&str>,
) -> Result<String> {
let mut hasher = crc32fast::Hasher::new();
match parameter {
PersistentCacheTaskIDParameter::Content(content) => {
hasher.update(content.as_bytes());
Ok(hasher.finalize().to_string())
// Calculate the hash of the file.
let f = std::fs::File::open(path)?;
let mut buffer = [0; 4096];
let mut reader = std::io::BufReader::with_capacity(buffer.len(), f);
let mut hasher = WyHash::default();
loop {
let n = reader.read(&mut buffer)?;
if n == 0 {
break;
}
PersistentCacheTaskIDParameter::FileContentBased {
path,
piece_length,
tag,
application,
} => {
// Calculate the hash of the file.
let f = std::fs::File::open(path)?;
let mut buffer = [0; 4096];
let mut reader = io::BufReader::with_capacity(buffer.len(), f);
loop {
match reader.read(&mut buffer) {
Ok(0) => break,
Ok(n) => hasher.update(&buffer[..n]),
Err(ref err) if err.kind() == io::ErrorKind::Interrupted => continue,
Err(err) => return Err(err.into()),
};
}
// Add the tag to generate the persistent cache task id.
if let Some(tag) = tag {
hasher.update(tag.as_bytes());
}
// Add the application to generate the persistent cache task id.
if let Some(application) = application {
hasher.update(application.as_bytes());
}
// Add the piece length to generate the persistent cache task id.
if let Some(piece_length) = piece_length {
hasher.update(piece_length.to_string().as_bytes());
}
hasher.update(TaskType::PersistentCache.as_str_name().as_bytes());
// Generate the task id by crc32.
Ok(hasher.finalize().to_string())
}
hasher.write(&buffer[..n]);
}
// Add the tag to generate the persistent cache task id.
if let Some(tag) = tag {
hasher.write(tag.as_bytes());
}
// Add the application to generate the persistent cache task id.
if let Some(application) = application {
hasher.write(application.as_bytes());
}
// Generate the task id by wyhash.
let id = hasher.finish().to_string();
// Generate the persistent cache task ID. The original ID is too short, so we calculate the SHA-256
// hash to ensure it can be prefix-searched by the storage engine.
let mut hasher = Sha256::new();
hasher.update(id);
Ok(hex::encode(hasher.finalize()))
}
/// peer_id generates the peer id.
#[inline]
#[instrument(skip_all)]
pub fn peer_id(&self) -> String {
if self.is_seed_peer {
return format!(
@ -228,6 +182,7 @@ impl IDGenerator {
}
/// task_type generates the task type by the task id.
#[instrument(skip_all)]
pub fn task_type(&self, id: &str) -> TaskType {
if id.ends_with(PERSISTENT_CACHE_TASK_SUFFIX) {
return TaskType::PersistentCache;
@ -267,140 +222,81 @@ mod tests {
let test_cases = vec![
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
TaskIDParameter::URLBased {
url: "https://example.com".to_string(),
piece_length: Some(1024_u64),
tag: Some("foo".to_string()),
application: Some("bar".to_string()),
filtered_query_params: vec![],
},
"27554d06dfc788c2c2c60e01960152ffbd4b145fc103fcb80b432b4dc238a6fe",
"https://example.com",
Some("foo"),
Some("bar"),
vec![],
"160fa7f001d9d2e893130894fbb60a5fb006e1d61bff82955f2946582bc9de1d",
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
TaskIDParameter::URLBased {
url: "https://example.com".to_string(),
piece_length: None,
tag: Some("foo".to_string()),
application: Some("bar".to_string()),
filtered_query_params: vec![],
},
"06408fbf247ddaca478f8cb9565fe5591c28efd0994b8fea80a6a87d3203c5ca",
"https://example.com",
Some("foo"),
None,
vec![],
"2773851c628744fb7933003195db436ce397c1722920696c4274ff804d86920b",
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
TaskIDParameter::URLBased {
url: "https://example.com".to_string(),
piece_length: None,
tag: Some("foo".to_string()),
application: None,
filtered_query_params: vec![],
},
"3c3f230ef9f191dd2821510346a7bc138e4894bee9aee184ba250a3040701d2a",
"https://example.com",
None,
Some("bar"),
vec![],
"63dee2822037636b0109876b58e95692233840753a882afa69b9b5ee82a6c57d",
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
TaskIDParameter::URLBased {
url: "https://example.com".to_string(),
piece_length: None,
tag: None,
application: Some("bar".to_string()),
filtered_query_params: vec![],
},
"c9f9261b7305c24371244f9f149f5d4589ed601348fdf22d7f6f4b10658fdba2",
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
TaskIDParameter::URLBased {
url: "https://example.com".to_string(),
piece_length: Some(1024_u64),
tag: None,
application: None,
filtered_query_params: vec![],
},
"9f7c9aafbc6f30f8f41a96ca77eeae80c5b60964b3034b0ee43ccf7b2f9e52b8",
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
TaskIDParameter::URLBased {
url: "https://example.com?foo=foo&bar=bar".to_string(),
piece_length: None,
tag: None,
application: None,
filtered_query_params: vec!["foo".to_string(), "bar".to_string()],
},
"457b4328cde278e422c9e243f7bfd1e97f511fec43a80f535cf6b0ef6b086776",
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
TaskIDParameter::Content("This is a test file".to_string()),
"e2d0fe1585a63ec6009c8016ff8dda8b17719a637405a4e23c0ff81339148249",
"https://example.com?foo=foo&bar=bar",
None,
None,
vec!["foo".to_string(), "bar".to_string()],
"100680ad546ce6a577f42f52df33b4cfdca756859e664b8d7de329b150d09ce9",
),
];
for (generator, parameter, expected_id) in test_cases {
let task_id = generator.task_id(parameter).unwrap();
for (generator, url, tag, application, filtered_query_params, expected_id) in test_cases {
let task_id = generator
.task_id(url, tag, application, filtered_query_params)
.unwrap();
assert_eq!(task_id, expected_id);
}
}
#[test]
fn should_generate_persistent_cache_task_id() {
let dir = tempdir().unwrap();
let file_path = dir.path().join("testfile");
let mut f = File::create(&file_path).unwrap();
f.write_all("This is a test file".as_bytes()).unwrap();
let test_cases = vec![
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
PersistentCacheTaskIDParameter::FileContentBased {
path: file_path.clone(),
piece_length: Some(1024_u64),
tag: Some("tag1".to_string()),
application: Some("app1".to_string()),
},
"3490958009",
"This is a test file",
Some("tag1"),
Some("app1"),
"ed401a8aa6b9a47b426d2aa01245127d9ac2d1b7974ca866719da59b5456ac4d",
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
PersistentCacheTaskIDParameter::FileContentBased {
path: file_path.clone(),
piece_length: None,
tag: None,
application: Some("app1".to_string()),
},
"735741469",
"This is a test file",
None,
Some("app1"),
"4cbb2c5142f609e98a7d9a887c6404c7432475a52d6c64c52d543b5614a99c63",
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
PersistentCacheTaskIDParameter::FileContentBased {
path: file_path.clone(),
piece_length: None,
tag: Some("tag1".to_string()),
application: None,
},
"3954905097",
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
PersistentCacheTaskIDParameter::FileContentBased {
path: file_path.clone(),
piece_length: Some(1024_u64),
tag: None,
application: None,
},
"4162557545",
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
PersistentCacheTaskIDParameter::Content("This is a test file".to_string()),
"107352521",
"This is a test file",
Some("tag1"),
None,
"65094f31f9997904f779a27ed0d1ce460c9c4082f214e7626a179f2ea491d34e",
),
];
for (generator, parameter, expected_id) in test_cases {
let task_id = generator.persistent_cache_task_id(parameter).unwrap();
for (generator, file_content, tag, application, expected_id) in test_cases {
let dir = tempdir().unwrap();
let file_path = dir.path().join("testfile");
let mut f = File::create(&file_path).unwrap();
f.write_all(file_content.as_bytes()).unwrap();
let task_id = generator
.persistent_cache_task_id(&file_path, tag, application)
.unwrap();
assert_eq!(task_id, expected_id);
}
}

View File

@ -15,8 +15,6 @@
*/
pub mod digest;
pub mod fs;
pub mod http;
pub mod id_generator;
pub mod net;
pub mod tls;

View File

@ -1,230 +0,0 @@
/*
* Copyright 2025 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use bytesize::ByteSize;
use pnet::datalink::{self, NetworkInterface};
use std::cmp::min;
use std::net::IpAddr;
use std::sync::Arc;
use std::time::Duration;
use sysinfo::Networks;
use tokio::sync::Mutex;
use tracing::{info, warn};
/// Interface represents a network interface with its information.
#[derive(Debug, Clone, Default)]
pub struct Interface {
/// name is the name of the network interface.
pub name: String,
/// bandwidth is the bandwidth of the network interface in bps.
pub bandwidth: u64,
// network_data_mutex is a mutex to protect access to network data.
network_data_mutex: Arc<Mutex<()>>,
}
/// NetworkData represents the network data for a specific interface,
#[derive(Debug, Clone, Default)]
pub struct NetworkData {
/// max_rx_bandwidth is the maximum receive bandwidth of the interface in bps.
pub max_rx_bandwidth: u64,
/// rx_bandwidth is the current receive bandwidth of the interface in bps.
pub rx_bandwidth: Option<u64>,
/// max_tx_bandwidth is the maximum transmit bandwidth of the interface in bps.
pub max_tx_bandwidth: u64,
/// tx_bandwidth is the current transmit bandwidth of the interface in bps.
pub tx_bandwidth: Option<u64>,
}
/// Interface methods provide functionality to get network interface information.
impl Interface {
/// DEFAULT_NETWORKS_REFRESH_INTERVAL is the default interval for refreshing network data.
const DEFAULT_NETWORKS_REFRESH_INTERVAL: Duration = Duration::from_secs(2);
/// new creates a new Interface instance based on the provided IP address and rate limit.
pub fn new(ip: IpAddr, rate_limit: ByteSize) -> Interface {
let rate_limit = Self::byte_size_to_bits(rate_limit); // convert to bps
let Some(interface) = Self::get_network_interface_by_ip(ip) else {
warn!(
"can not find interface for IP address {}, network interface unknown with bandwidth {} bps",
ip, rate_limit
);
return Interface {
name: "unknown".to_string(),
bandwidth: rate_limit,
network_data_mutex: Arc::new(Mutex::new(())),
};
};
match Self::get_speed(&interface.name) {
Some(speed) => {
let bandwidth = min(Self::megabits_to_bits(speed), rate_limit);
info!(
"network interface {} with bandwidth {} bps",
interface.name, bandwidth
);
Interface {
name: interface.name,
bandwidth,
network_data_mutex: Arc::new(Mutex::new(())),
}
}
None => {
warn!(
"can not get speed, network interface {} with bandwidth {} bps",
interface.name, rate_limit
);
Interface {
name: interface.name,
bandwidth: rate_limit,
network_data_mutex: Arc::new(Mutex::new(())),
}
}
}
}
/// get_network_data retrieves the network data for the interface.
pub async fn get_network_data(&self) -> NetworkData {
// Lock the mutex to ensure exclusive access to network data.
let _guard = self.network_data_mutex.lock().await;
// Initialize sysinfo network.
let mut networks = Networks::new_with_refreshed_list();
// Sleep to calculate the network traffic difference over
// the DEFAULT_NETWORKS_REFRESH_INTERVAL.
tokio::time::sleep(Self::DEFAULT_NETWORKS_REFRESH_INTERVAL).await;
// Refresh network information.
networks.refresh();
let Some(network_data) = networks.get(self.name.as_str()) else {
warn!("can not find network data for interface {}", self.name);
return NetworkData {
max_rx_bandwidth: self.bandwidth,
max_tx_bandwidth: self.bandwidth,
..Default::default()
};
};
// Calculate the receive and transmit bandwidth in bits per second.
let rx_bandwidth = (Self::bytes_to_bits(network_data.received()) as f64
/ Self::DEFAULT_NETWORKS_REFRESH_INTERVAL.as_secs_f64())
.round() as u64;
// Calculate the transmit bandwidth in bits per second.
let tx_bandwidth = (Self::bytes_to_bits(network_data.transmitted()) as f64
/ Self::DEFAULT_NETWORKS_REFRESH_INTERVAL.as_secs_f64())
.round() as u64;
NetworkData {
max_rx_bandwidth: self.bandwidth,
rx_bandwidth: Some(rx_bandwidth),
max_tx_bandwidth: self.bandwidth,
tx_bandwidth: Some(tx_bandwidth),
}
}
/// get_speed returns the speed of the network interface in Mbps.
pub fn get_speed(name: &str) -> Option<u64> {
#[cfg(target_os = "linux")]
{
let speed_path = format!("/sys/class/net/{}/speed", name);
std::fs::read_to_string(&speed_path)
.ok()
.and_then(|speed_str| speed_str.trim().parse::<u64>().ok())
}
#[cfg(not(target_os = "linux"))]
{
warn!("can not get interface {} speed on non-linux platform", name);
None
}
}
/// get_network_interface_by_ip returns the network interface that has the specified
/// IP address.
pub fn get_network_interface_by_ip(ip: IpAddr) -> Option<NetworkInterface> {
datalink::interfaces()
.into_iter()
.find(|interface| interface.ips.iter().any(|ip_net| ip_net.ip() == ip))
}
/// byte_size_to_bits converts a ByteSize to bits.
pub fn byte_size_to_bits(size: ByteSize) -> u64 {
size.as_u64() * 8
}
/// megabits_to_bit converts megabits to bits.
pub fn megabits_to_bits(size: u64) -> u64 {
size * 1_000_000 // 1 Mbit = 1,000,000 bits
}
/// bytes_to_bits converts bytes to bits.
pub fn bytes_to_bits(size: u64) -> u64 {
size * 8 // 1 byte = 8 bits
}
}
#[cfg(test)]
mod tests {
use super::*;
use bytesize::ByteSize;
#[test]
fn test_byte_size_to_bits() {
let test_cases = vec![
(ByteSize::kb(1), 8_000u64),
(ByteSize::mb(1), 8_000_000u64),
(ByteSize::gb(1), 8_000_000_000u64),
(ByteSize::b(0), 0u64),
];
for (input, expected) in test_cases {
let result = Interface::byte_size_to_bits(input);
assert_eq!(result, expected);
}
}
#[test]
fn test_megabits_to_bits() {
let test_cases = vec![
(1u64, 1_000_000u64),
(1000u64, 1_000_000_000u64),
(0u64, 0u64),
];
for (input, expected) in test_cases {
let result = Interface::megabits_to_bits(input);
assert_eq!(result, expected);
}
}
#[test]
fn test_bytes_to_bits() {
let test_cases = vec![(1u64, 8u64), (1000u64, 8_000u64), (0u64, 0u64)];
for (input, expected) in test_cases {
let result = Interface::bytes_to_bits(input);
assert_eq!(result, expected);
}
}
}

View File

@ -16,35 +16,14 @@
use dragonfly_client_core::error::{ErrorType, OrErr};
use dragonfly_client_core::{Error as ClientError, Result as ClientResult};
use lazy_static::lazy_static;
use lru::LruCache;
use rcgen::{Certificate, CertificateParams, KeyPair};
use rustls_pki_types::{CertificateDer, PrivateKeyDer, ServerName, UnixTime};
use std::num::NonZeroUsize;
use std::path::PathBuf;
use std::sync::{Arc, Mutex};
use std::sync::Arc;
use std::vec::Vec;
use std::{fs, io};
use tracing::instrument;
/// DEFAULT_CERTS_CACHE_CAPACITY is the default capacity of the certificates cache.
const DEFAULT_CERTS_CACHE_CAPACITY: usize = 1000;
/// CertKeyPair is the type of the certificate and private key pair.
type CertKeyPair = (Vec<CertificateDer<'static>>, PrivateKeyDer<'static>);
lazy_static! {
/// SELF_SIGNED_CERTS is a map that stores the self-signed certificates to avoid
/// generating the same certificates multiple times.
static ref SELF_SIGNED_CERTS: Arc<Mutex<LruCache<String, CertKeyPair>>> =
Arc::new(Mutex::new(LruCache::new(NonZeroUsize::new(DEFAULT_CERTS_CACHE_CAPACITY).unwrap())));
/// SIMPLE_SELF_SIGNED_CERTS is a map that stores the simple self-signed certificates to avoid
/// generating the same certificates multiple times.
static ref SIMPLE_SELF_SIGNED_CERTS: Arc<Mutex<LruCache<String, CertKeyPair>>> =
Arc::new(Mutex::new(LruCache::new(NonZeroUsize::new(DEFAULT_CERTS_CACHE_CAPACITY).unwrap())));
}
/// NoVerifier is a verifier that does not verify the server certificate.
/// It is used for testing and should not be used in production.
#[derive(Debug)]
@ -145,15 +124,8 @@ pub fn generate_cert_from_pem(cert_path: &PathBuf) -> ClientResult<Vec<Certifica
#[instrument(skip_all)]
pub fn generate_self_signed_certs_by_ca_cert(
ca_cert: &Certificate,
host: &str,
subject_alt_names: Vec<String>,
) -> ClientResult<(Vec<CertificateDer<'static>>, PrivateKeyDer<'static>)> {
let mut cache = SELF_SIGNED_CERTS.lock().unwrap();
if let Some((certs, key)) = cache.get(host) {
return Ok((certs.clone(), key.clone_key()));
};
drop(cache);
// Sign certificate with CA certificate by given subject alternative names.
let params = CertificateParams::new(subject_alt_names);
let cert = Certificate::from_params(params).or_err(ErrorType::CertificateError)?;
@ -171,23 +143,14 @@ pub fn generate_self_signed_certs_by_ca_cert(
let key = rustls_pemfile::private_key(&mut key_pem_reader)?
.ok_or_else(|| ClientError::Unknown("failed to load private key".to_string()))?;
let mut cache = SELF_SIGNED_CERTS.lock().unwrap();
cache.push(host.to_string(), (certs.clone(), key.clone_key()));
Ok((certs, key))
}
/// generate_simple_self_signed_certs generates a simple self-signed certificates
#[instrument(skip_all)]
pub fn generate_simple_self_signed_certs(
host: &str,
subject_alt_names: impl Into<Vec<String>>,
) -> ClientResult<(Vec<CertificateDer<'static>>, PrivateKeyDer<'static>)> {
let mut cache = SIMPLE_SELF_SIGNED_CERTS.lock().unwrap();
if let Some((certs, key)) = cache.get(host) {
return Ok((certs.clone(), key.clone_key()));
};
drop(cache);
let cert = rcgen::generate_simple_self_signed(subject_alt_names)
.or_err(ErrorType::CertificateError)?;
let key = rustls_pki_types::PrivateKeyDer::Pkcs8(cert.serialize_private_key_der().into());
@ -196,8 +159,6 @@ pub fn generate_simple_self_signed_certs(
.or_err(ErrorType::CertificateError)?
.into()];
let mut cache = SIMPLE_SELF_SIGNED_CERTS.lock().unwrap();
cache.push(host.to_string(), (certs.clone(), key.clone_key()));
Ok((certs, key))
}
@ -370,10 +331,9 @@ Z+yQ5jhu/fmSBNhqO/8Lp+Y=
&ca_key_file.path().to_path_buf(),
)
.unwrap();
let host = "example.com";
let subject_alt_names = vec![host.to_string()];
let subject_alt_names = vec!["example.com".to_string()];
let result = generate_self_signed_certs_by_ca_cert(&ca_cert, host, subject_alt_names);
let result = generate_self_signed_certs_by_ca_cert(&ca_cert, subject_alt_names);
assert!(result.is_ok());
let (certs, key) = result.unwrap();
assert!(!certs.is_empty());

View File

@ -18,6 +18,10 @@ path = "src/bin/dfdaemon/main.rs"
name = "dfget"
path = "src/bin/dfget/main.rs"
[[bin]]
name = "dfstore"
path = "src/bin/dfstore/main.rs"
[[bin]]
name = "dfcache"
path = "src/bin/dfcache/main.rs"
@ -34,6 +38,8 @@ hyper.workspace = true
hyper-util.workspace = true
hyper-rustls.workspace = true
tracing.workspace = true
validator.workspace = true
humantime.workspace = true
serde.workspace = true
chrono.workspace = true
prost-wkt-types.workspace = true
@ -51,42 +57,39 @@ http.workspace = true
openssl.workspace = true
clap.workspace = true
anyhow.workspace = true
bytes.workspace = true
blake3.workspace = true
bytesize.workspace = true
humantime.workspace = true
uuid.workspace = true
percent-encoding.workspace = true
tokio-rustls.workspace = true
serde_json.workspace = true
lru.workspace = true
fs2.workspace = true
lazy_static.workspace = true
futures.workspace = true
local-ip-address.workspace = true
sysinfo.workspace = true
tracing-appender = "0.2.3"
lazy_static = "1.5"
tracing-log = "0.2"
tracing-subscriber = { version = "0.3", features = ["env-filter", "time", "chrono"] }
tracing-panic = "0.1.2"
tracing-opentelemetry = "0.30.0"
opentelemetry = { version = "0.29.1", default-features = false, features = ["trace"] }
opentelemetry-otlp = { version = "0.29.0", default-features = false, features = ["trace", "grpc-tonic", "http-proto", "reqwest-blocking-client"] }
opentelemetry_sdk = { version = "0.29.0", default-features = false, features = ["trace", "rt-tokio"] }
opentelemetry-semantic-conventions = { version = "0.30.0", features = ["semconv_experimental"] }
tracing-appender = "0.2.3"
rolling-file = "0.2.0"
pprof = { version = "0.15", features = ["flamegraph", "protobuf-codec"] }
tracing-opentelemetry = "0.18.0"
tracing-flame = "0.2.0"
opentelemetry = { version = "0.18.0", default-features = false, features = ["trace", "rt-tokio"] }
opentelemetry-jaeger = { version = "0.17.0", features = ["rt-tokio"] }
pprof = { version = "0.14", features = ["flamegraph", "protobuf-codec"] }
prometheus = { version = "0.13", features = ["process"] }
tonic-health = "0.12.3"
tower = { version = "0.4.13", features = ["limit", "load-shed", "buffer"] }
indicatif = "0.18.0"
hashring = "0.3.6"
leaky-bucket = "1.1.2"
http-body-util = "0.1.3"
termion = "4.0.5"
tabled = "0.20.0"
path-absolutize = "3.1.1"
bytes = "1.10"
sysinfo = "0.32.1"
tower = "0.4.13"
indicatif = "0.17.11"
dashmap = "6.1.0"
fastrand = "2.3.0"
glob = "0.3.3"
console-subscriber = "0.4.1"
hashring = "0.3.6"
fslock = "0.2.1"
leaky-bucket = "1.1.2"
http-body-util = "0.1.2"
futures-util = "0.3.31"
termion = "4.0.3"
tabled = "0.18.0"
path-absolutize = "3.1.1"
[dev-dependencies]
tempfile.workspace = true
@ -117,6 +120,11 @@ assets = [
"usr/bin/dfcache",
"755",
],
[
"../target/x86_64-unknown-linux-gnu/release/dfstore",
"usr/bin/dfstore",
"755",
],
[
"../ci/dfdaemon.service",
"lib/systemd/system/dfdaemon.service",
@ -159,6 +167,11 @@ assets = [
"usr/bin/dfcache",
"755",
],
[
"../target/x86_64-unknown-linux-musl/release/dfstore",
"usr/bin/dfstore",
"755",
],
[
"../ci/dfdaemon.service",
"lib/systemd/system/dfdaemon.service",
@ -201,6 +214,11 @@ assets = [
"usr/bin/dfcache",
"755",
],
[
"../target/aarch64-unknown-linux-gnu/release/dfstore",
"usr/bin/dfstore",
"755",
],
[
"../ci/dfdaemon.service",
"lib/systemd/system/dfdaemon.service",
@ -243,6 +261,11 @@ assets = [
"usr/bin/dfcache",
"755",
],
[
"../target/aarch64-unknown-linux-musl/release/dfstore",
"usr/bin/dfstore",
"755",
],
[
"../ci/dfdaemon.service",
"lib/systemd/system/dfdaemon.service",
@ -270,6 +293,7 @@ assets = [
{ source = "../target/x86_64-unknown-linux-gnu/release/dfget", dest = "/usr/bin/dfget", mode = "755" },
{ source = "../target/x86_64-unknown-linux-gnu/release/dfdaemon", dest = "/usr/bin/dfdaemon", mode = "755" },
{ source = "../target/x86_64-unknown-linux-gnu/release/dfcache", dest = "/usr/bin/dfcache", mode = "755" },
{ source = "../target/x86_64-unknown-linux-gnu/release/dfstore", dest = "/usr/bin/dfstore", mode = "755" },
{ source = "../ci/dfdaemon.service", dest = "/lib/systemd/system/dfdaemon.service", config = true, mode = "644" },
{ source = "../CONTRIBUTING.md", dest = "/usr/share/doc/client/CONTRIBUTING.md", mode = "644", doc = true },
{ source = "../LICENSE", dest = "/usr/share/doc/client/LICENSE.md", mode = "644", doc = true },
@ -281,6 +305,7 @@ assets = [
{ source = "../target/x86_64-unknown-linux-musl/release/dfget", dest = "/usr/bin/dfget", mode = "755" },
{ source = "../target/x86_64-unknown-linux-musl/release/dfdaemon", dest = "/usr/bin/dfdaemon", mode = "755" },
{ source = "../target/x86_64-unknown-linux-musl/release/dfcache", dest = "/usr/bin/dfcache", mode = "755" },
{ source = "../target/x86_64-unknown-linux-musl/release/dfstore", dest = "/usr/bin/dfstore", mode = "755" },
{ source = "../ci/dfdaemon.service", dest = "/lib/systemd/system/dfdaemon.service", config = true, mode = "644" },
{ source = "../CONTRIBUTING.md", dest = "/usr/share/doc/client/CONTRIBUTING.md", mode = "644", doc = true },
{ source = "../LICENSE", dest = "/usr/share/doc/client/LICENSE.md", mode = "644", doc = true },
@ -293,6 +318,7 @@ assets = [
{ source = "../target/aarch64-unknown-linux-gnu/release/dfget", dest = "/usr/bin/dfget", mode = "755" },
{ source = "../target/aarch64-unknown-linux-gnu/release/dfdaemon", dest = "/usr/bin/dfdaemon", mode = "755" },
{ source = "../target/aarch64-unknown-linux-gnu/release/dfcache", dest = "/usr/bin/dfcache", mode = "755" },
{ source = "../target/aarch64-unknown-linux-gnu/release/dfstore", dest = "/usr/bin/dfstore", mode = "755" },
{ source = "../ci/dfdaemon.service", dest = "/lib/systemd/system/dfdaemon.service", config = true, mode = "644" },
{ source = "../CONTRIBUTING.md", dest = "/usr/share/doc/client/CONTRIBUTING.md", mode = "644", doc = true },
{ source = "../LICENSE", dest = "/usr/share/doc/client/LICENSE.md", mode = "644", doc = true },
@ -304,6 +330,7 @@ assets = [
{ source = "../target/aarch64-unknown-linux-musl/release/dfget", dest = "/usr/bin/dfget", mode = "755" },
{ source = "../target/aarch64-unknown-linux-musl/release/dfdaemon", dest = "/usr/bin/dfdaemon", mode = "755" },
{ source = "../target/aarch64-unknown-linux-musl/release/dfcache", dest = "/usr/bin/dfcache", mode = "755" },
{ source = "../target/aarch64-unknown-linux-musl/release/dfstore", dest = "/usr/bin/dfstore", mode = "755" },
{ source = "../ci/dfdaemon.service", dest = "/lib/systemd/system/dfdaemon.service", config = true, mode = "644" },
{ source = "../CONTRIBUTING.md", dest = "/usr/share/doc/client/CONTRIBUTING.md", mode = "644", doc = true },
{ source = "../LICENSE", dest = "/usr/share/doc/client/LICENSE.md", mode = "644", doc = true },

View File

@ -14,9 +14,10 @@
* limitations under the License.
*/
use crate::grpc::scheduler::SchedulerClient;
use crate::grpc::{manager::ManagerClient, scheduler::SchedulerClient};
use crate::shutdown;
use dragonfly_api::common::v2::{Build, Cpu, Disk, Host, Memory, Network};
use dragonfly_api::manager::v2::{DeleteSeedPeerRequest, SourceType, UpdateSeedPeerRequest};
use dragonfly_api::scheduler::v2::{AnnounceHostRequest, DeleteHostRequest};
use dragonfly_client_config::{
dfdaemon::{Config, HostType},
@ -24,13 +25,91 @@ use dragonfly_client_config::{
};
use dragonfly_client_core::error::{ErrorType, OrErr};
use dragonfly_client_core::Result;
use dragonfly_client_util::net::Interface;
use std::env;
use std::sync::Arc;
use std::sync::{Arc, Mutex};
use std::time::Duration;
use sysinfo::System;
use tokio::sync::mpsc;
use tracing::{debug, error, info, instrument};
use tracing::{error, info, instrument};
/// ManagerAnnouncer is used to announce the dfdaemon information to the manager.
pub struct ManagerAnnouncer {
/// config is the configuration of the dfdaemon.
config: Arc<Config>,
/// manager_client is the grpc client of the manager.
manager_client: Arc<ManagerClient>,
/// shutdown is used to shutdown the announcer.
shutdown: shutdown::Shutdown,
/// _shutdown_complete is used to notify the announcer is shutdown.
_shutdown_complete: mpsc::UnboundedSender<()>,
}
/// ManagerAnnouncer implements the manager announcer of the dfdaemon.
impl ManagerAnnouncer {
/// new creates a new manager announcer.
#[instrument(skip_all)]
pub fn new(
config: Arc<Config>,
manager_client: Arc<ManagerClient>,
shutdown: shutdown::Shutdown,
shutdown_complete_tx: mpsc::UnboundedSender<()>,
) -> Self {
Self {
config,
manager_client,
shutdown,
_shutdown_complete: shutdown_complete_tx,
}
}
/// run announces the dfdaemon information to the manager.
#[instrument(skip_all)]
pub async fn run(&self) -> Result<()> {
// Clone the shutdown channel.
let mut shutdown = self.shutdown.clone();
// If the seed peer is enabled, we should announce the seed peer to the manager.
if self.config.seed_peer.enable {
// Register the seed peer to the manager.
self.manager_client
.update_seed_peer(UpdateSeedPeerRequest {
source_type: SourceType::SeedPeerSource.into(),
hostname: self.config.host.hostname.clone(),
r#type: self.config.seed_peer.kind.to_string(),
idc: self.config.host.idc.clone(),
location: self.config.host.location.clone(),
ip: self.config.host.ip.unwrap().to_string(),
port: self.config.upload.server.port as i32,
download_port: self.config.upload.server.port as i32,
seed_peer_cluster_id: self.config.seed_peer.cluster_id,
})
.await?;
// Announce to scheduler shutting down with signals.
shutdown.recv().await;
// Delete the seed peer from the manager.
self.manager_client
.delete_seed_peer(DeleteSeedPeerRequest {
source_type: SourceType::SeedPeerSource.into(),
hostname: self.config.host.hostname.clone(),
ip: self.config.host.ip.unwrap().to_string(),
seed_peer_cluster_id: self.config.seed_peer.cluster_id,
})
.await?;
info!("announce to manager shutting down");
} else {
shutdown.recv().await;
info!("announce to manager shutting down");
}
Ok(())
}
}
/// Announcer is used to announce the dfdaemon information to the manager and scheduler.
pub struct SchedulerAnnouncer {
@ -43,8 +122,8 @@ pub struct SchedulerAnnouncer {
/// scheduler_client is the grpc client of the scheduler.
scheduler_client: Arc<SchedulerClient>,
/// interface is the network interface.
interface: Arc<Interface>,
// system is the system information.
system: Arc<Mutex<System>>,
/// shutdown is used to shutdown the announcer.
shutdown: shutdown::Shutdown,
@ -56,11 +135,11 @@ pub struct SchedulerAnnouncer {
/// SchedulerAnnouncer implements the scheduler announcer of the dfdaemon.
impl SchedulerAnnouncer {
/// new creates a new scheduler announcer.
#[instrument(skip_all)]
pub async fn new(
config: Arc<Config>,
host_id: String,
scheduler_client: Arc<SchedulerClient>,
interface: Arc<Interface>,
shutdown: shutdown::Shutdown,
shutdown_complete_tx: mpsc::UnboundedSender<()>,
) -> Result<Self> {
@ -68,7 +147,7 @@ impl SchedulerAnnouncer {
config,
host_id,
scheduler_client,
interface,
system: Arc::new(Mutex::new(System::new_all())),
shutdown,
_shutdown_complete: shutdown_complete_tx,
};
@ -76,12 +155,13 @@ impl SchedulerAnnouncer {
// Initialize the scheduler announcer.
announcer
.scheduler_client
.init_announce_host(announcer.make_announce_host_request(Duration::ZERO).await?)
.init_announce_host(announcer.make_announce_host_request(Duration::ZERO)?)
.await?;
Ok(announcer)
}
/// run announces the dfdaemon information to the scheduler.
#[instrument(skip_all)]
pub async fn run(&self) {
// Clone the shutdown channel.
let mut shutdown = self.shutdown.clone();
@ -91,7 +171,7 @@ impl SchedulerAnnouncer {
loop {
tokio::select! {
_ = interval.tick() => {
let request = match self.make_announce_host_request(interval.period()).await {
let request = match self.make_announce_host_request(interval.period()) {
Ok(request) => request,
Err(err) => {
error!("make announce host request failed: {}", err);
@ -120,7 +200,7 @@ impl SchedulerAnnouncer {
/// make_announce_host_request makes the announce host request.
#[instrument(skip_all)]
async fn make_announce_host_request(&self, interval: Duration) -> Result<AnnounceHostRequest> {
fn make_announce_host_request(&self, interval: Duration) -> Result<AnnounceHostRequest> {
// If the seed peer is enabled, we should announce the seed peer to the scheduler.
let host_type = if self.config.seed_peer.enable {
self.config.seed_peer.kind
@ -129,7 +209,7 @@ impl SchedulerAnnouncer {
};
// Refresh the system information.
let mut sys = System::new_all();
let mut sys = self.system.lock().unwrap();
sys.refresh_all();
// Get the process information.
@ -156,25 +236,25 @@ impl SchedulerAnnouncer {
free: sys.free_memory(),
};
// Wait for getting the network data.
let network_data = self.interface.get_network_data().await;
debug!(
"network data: rx bandwidth {}/{} bps, tx bandwidth {}/{} bps",
network_data.rx_bandwidth.unwrap_or(0),
network_data.max_rx_bandwidth,
network_data.tx_bandwidth.unwrap_or(0),
network_data.max_tx_bandwidth
);
// Get the network information.
let network = Network {
// TODO: Get the count of the tcp connection.
tcp_connection_count: 0,
// TODO: Get the count of the upload tcp connection.
upload_tcp_connection_count: 0,
idc: self.config.host.idc.clone(),
location: self.config.host.location.clone(),
max_rx_bandwidth: network_data.max_rx_bandwidth,
rx_bandwidth: network_data.rx_bandwidth,
max_tx_bandwidth: network_data.max_tx_bandwidth,
tx_bandwidth: network_data.tx_bandwidth,
..Default::default()
// TODO: Get the network download rate, refer to
// https://docs.rs/sysinfo/latest/sysinfo/struct.NetworkData.html#method.received.
download_rate: 0,
download_rate_limit: self.config.download.rate_limit.as_u64(),
// TODO: Get the network download rate, refer to
// https://docs.rs/sysinfo/latest/sysinfo/struct.NetworkData.html#method.transmitted
upload_rate: 0,
upload_rate_limit: self.config.upload.rate_limit.as_u64(),
};
// Get the disk information.

View File

@ -23,9 +23,7 @@ use dragonfly_client_core::{
error::{ErrorType, OrErr},
Error, Result,
};
use dragonfly_client_util::fs::fallocate;
use indicatif::{ProgressBar, ProgressState, ProgressStyle};
use local_ip_address::local_ip;
use path_absolutize::*;
use std::path::{Path, PathBuf};
use std::time::Duration;
@ -50,13 +48,6 @@ pub struct ExportCommand {
)]
transfer_from_dfdaemon: bool,
#[arg(
long = "force-hard-link",
default_value_t = false,
help = "Specify whether the download file must be hard linked to the output path. If hard link is failed, download will be failed. If it is false, dfdaemon will copy the file to the output path if hard link is failed."
)]
force_hard_link: bool,
#[arg(
long = "application",
default_value = "",
@ -86,13 +77,6 @@ pub struct ExportCommand {
)]
timeout: Duration,
#[arg(
long = "digest",
required = false,
help = "Verify the integrity of the downloaded file using the specified digest, support sha256, sha512, crc32. If the digest is not specified, the downloaded file will not be verified. Format: <algorithm>:<digest>, e.g. sha256:1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef, crc32:12345678"
)]
digest: Option<String>,
#[arg(
short = 'e',
long = "endpoint",
@ -123,19 +107,17 @@ pub struct ExportCommand {
)]
log_max_files: usize,
#[arg(long, default_value_t = false, help = "Specify whether to print log")]
console: bool,
#[arg(
long = "verbose",
default_value_t = false,
help = "Specify whether to print log"
)]
verbose: bool,
}
/// Implement the execute for ExportCommand.
impl ExportCommand {
/// Executes the export command with comprehensive validation and advanced error handling.
///
/// This function serves as the main entry point for the dfcache export command execution.
/// It handles the complete workflow including argument parsing, validation, logging setup,
/// dfdaemon client connection, and export operation execution. The function provides
/// sophisticated error reporting with colored terminal output, including specialized
/// handling for backend errors with HTTP status codes and headers.
/// execute executes the export command.
pub async fn execute(&self) -> Result<()> {
// Parse command line arguments.
Args::parse();
@ -147,12 +129,8 @@ impl ExportCommand {
self.log_level,
self.log_max_files,
None,
None,
None,
None,
None,
false,
self.console,
self.verbose,
);
// Validate the command line arguments.
@ -442,13 +420,7 @@ impl ExportCommand {
Ok(())
}
/// Executes the export operation to retrieve cached files from the persistent cache system.
///
/// This function handles the core export functionality by downloading a cached file from the
/// dfdaemon persistent cache system. It supports two transfer modes: direct file transfer
/// by dfdaemon (hardlink/copy) or streaming piece content through the client for manual
/// file assembly. The operation provides real-time progress feedback and handles file
/// creation, directory setup, and efficient piece-by-piece writing with sparse file allocation.
/// run runs the export command.
async fn run(&self, dfdaemon_download_client: DfdaemonDownloadClient) -> Result<()> {
// Dfcache needs to notify dfdaemon to transfer the piece content of downloading file via unix domain socket
// when the `transfer_from_dfdaemon` is true. Otherwise, dfdaemon will download the file and hardlink or
@ -476,9 +448,6 @@ impl ExportCommand {
.or_err(ErrorType::ParseError)?,
),
need_piece_content,
force_hard_link: self.force_hard_link,
digest: self.digest.clone(),
remote_ip: Some(local_ip().unwrap().to_string()),
})
.await
.inspect_err(|err| {
@ -512,8 +481,8 @@ impl ExportCommand {
};
// Initialize progress bar.
let progress_bar = ProgressBar::new(0);
progress_bar.set_style(
let pb = ProgressBar::new(0);
pb.set_style(
ProgressStyle::with_template(
"[{elapsed_precise}] [{wide_bar}] {bytes}/{total_bytes} ({bytes_per_sec}, {eta})",
)
@ -534,15 +503,7 @@ impl ExportCommand {
Some(download_persistent_cache_task_response::Response::DownloadPersistentCacheTaskStartedResponse(
response,
)) => {
if let Some(f) = &f {
fallocate(f, response.content_length)
.await
.inspect_err(|err| {
error!("fallocate {:?} failed: {}", self.output, err);
})?;
}
progress_bar.set_length(response.content_length);
pb.set_length(response.content_length);
}
Some(download_persistent_cache_task_response::Response::DownloadPieceFinishedResponse(
response,
@ -566,23 +527,18 @@ impl ExportCommand {
};
downloaded += piece.length;
let position = min(downloaded + piece.length, progress_bar.length().unwrap_or(0));
progress_bar.set_position(position);
let position = min(downloaded + piece.length, pb.length().unwrap_or(0));
pb.set_position(position);
}
None => {}
}
}
progress_bar.finish_with_message("downloaded");
pb.finish_with_message("downloaded");
Ok(())
}
/// Validates command line arguments for the export operation to ensure safe file output.
///
/// This function performs essential validation of the output path to prevent file conflicts
/// and ensure the target location is suitable for export operations. It checks parent
/// directory existence, prevents accidental file overwrites, and validates path accessibility
/// before allowing the export operation to proceed.
/// validate_args validates the command line arguments.
fn validate_args(&self) -> Result<()> {
let absolute_path = Path::new(&self.output).absolutize()?;
match absolute_path.parent() {

View File

@ -14,17 +14,14 @@
* limitations under the License.
*/
use bytesize::ByteSize;
use clap::Parser;
use dragonfly_api::dfdaemon::v2::UploadPersistentCacheTaskRequest;
use dragonfly_client::resource::piece::MIN_PIECE_LENGTH;
use dragonfly_client_config::dfcache::default_dfcache_persistent_replica_count;
use dragonfly_client_core::{
error::{ErrorType, OrErr},
Error, Result,
};
use indicatif::{ProgressBar, ProgressStyle};
use local_ip_address::local_ip;
use path_absolutize::*;
use std::path::{Path, PathBuf};
use std::time::Duration;
@ -43,10 +40,11 @@ pub struct ImportCommand {
path: PathBuf,
#[arg(
long = "content-for-calculating-task-id",
help = "Specify the content used to calculate the persistent cache task ID. If it is set, use its value to calculate the task ID, Otherwise, calculate the persistent cache task ID based on url, piece-length, tag, application, and filtered-query-params."
long = "id",
required = false,
help = "Specify the id of the persistent cache task, its length must be 64 bytes. If id is none, dfdaemon will generate the new task id based on the file content, tag and application by wyhash algorithm."
)]
content_for_calculating_task_id: Option<String>,
id: Option<String>,
#[arg(
long = "persistent-replica-count",
@ -55,17 +53,10 @@ pub struct ImportCommand {
)]
persistent_replica_count: u64,
#[arg(
long = "piece-length",
required = false,
help = "Specify the piece length for downloading file. If the piece length is not specified, the piece length will be calculated according to the file size. Different piece lengths will be divided into different persistent cache tasks. The value needs to be set with human readable format and needs to be greater than or equal to 4mib, for example: 4mib, 1gib"
)]
piece_length: Option<ByteSize>,
#[arg(
long = "application",
required = false,
help = "Different applications for the same url will be divided into different persistent cache tasks"
help = "Caller application which is used for statistics and access control"
)]
application: Option<String>,
@ -122,19 +113,17 @@ pub struct ImportCommand {
)]
log_max_files: usize,
#[arg(long, default_value_t = false, help = "Specify whether to print log")]
console: bool,
#[arg(
long = "verbose",
default_value_t = false,
help = "Specify whether to print log"
)]
verbose: bool,
}
/// Implement the execute for ImportCommand.
impl ImportCommand {
/// Executes the import sub command with comprehensive validation and error handling.
///
/// This function serves as the main entry point for the dfcache import command execution.
/// It handles the complete workflow including argument parsing, validation, logging setup,
/// dfdaemon client connection, and import operation execution. The function provides
/// detailed error reporting with colored terminal output and follows a fail-fast approach
/// with immediate process termination on any critical failures.
/// execute executes the import sub command.
pub async fn execute(&self) -> Result<()> {
// Parse command line arguments.
Args::parse();
@ -146,12 +135,8 @@ impl ImportCommand {
self.log_level,
self.log_max_files,
None,
None,
None,
None,
None,
false,
self.console,
self.verbose,
);
// Validate the command line arguments.
@ -332,34 +317,27 @@ impl ImportCommand {
Ok(())
}
/// Executes the cache import operation by uploading a file to the persistent cache system.
///
/// This function handles the core import functionality by uploading a local file to the
/// dfdaemon persistent cache system. It provides visual feedback through a progress spinner,
/// converts the file path to absolute format, and configures the cache task with specified
/// parameters including TTL, replica count, and piece length. The operation is asynchronous
/// and provides completion feedback with the generated task ID.
/// run runs the import sub command.
async fn run(&self, dfdaemon_download_client: DfdaemonDownloadClient) -> Result<()> {
let absolute_path = Path::new(&self.path).absolutize()?;
info!("import file: {}", absolute_path.to_string_lossy());
let progress_bar = ProgressBar::new_spinner();
progress_bar.enable_steady_tick(DEFAULT_PROGRESS_BAR_STEADY_TICK_INTERVAL);
progress_bar.set_style(
let pb = ProgressBar::new_spinner();
pb.enable_steady_tick(DEFAULT_PROGRESS_BAR_STEADY_TICK_INTERVAL);
pb.set_style(
ProgressStyle::with_template("{spinner:.blue} {msg}")
.unwrap()
.tick_strings(&["", "", "", "", "", "", "", ""]),
);
progress_bar.set_message("Importing...");
pb.set_message("Importing...");
let persistent_cache_task = dfdaemon_download_client
.upload_persistent_cache_task(UploadPersistentCacheTaskRequest {
content_for_calculating_task_id: self.content_for_calculating_task_id.clone(),
task_id: self.id.clone(),
path: absolute_path.to_string_lossy().to_string(),
persistent_replica_count: self.persistent_replica_count,
tag: self.tag.clone(),
application: self.application.clone(),
piece_length: self.piece_length.map(|piece_length| piece_length.as_u64()),
ttl: Some(
prost_wkt_types::Duration::try_from(self.ttl).or_err(ErrorType::ParseError)?,
),
@ -367,28 +345,22 @@ impl ImportCommand {
prost_wkt_types::Duration::try_from(self.timeout)
.or_err(ErrorType::ParseError)?,
),
remote_ip: Some(local_ip().unwrap().to_string()),
})
.await?;
progress_bar.finish_with_message(format!("Done: {}", persistent_cache_task.id));
pb.finish_with_message(format!("Done: {}", persistent_cache_task.id));
Ok(())
}
/// Validates command line arguments for the import operation to ensure safe and correct execution.
///
/// This function performs comprehensive validation of import-specific parameters to prevent
/// invalid operations and ensure the import request meets all system requirements. It validates
/// TTL boundaries, file existence and type, and piece length constraints before allowing the
/// import operation to proceed.
/// validate_args validates the command line arguments.
fn validate_args(&self) -> Result<()> {
if self.ttl < Duration::from_secs(5 * 60)
|| self.ttl > Duration::from_secs(7 * 24 * 60 * 60)
{
return Err(Error::ValidationError(format!(
"ttl must be between 5 minutes and 7 days, but got {}",
self.ttl.as_secs()
)));
if let Some(id) = self.id.as_ref() {
if id.len() != 64 {
return Err(Error::ValidationError(format!(
"id length must be 64 bytes, but got {}",
id.len()
)));
}
}
if self.path.is_dir() {
@ -405,16 +377,6 @@ impl ImportCommand {
)));
}
if let Some(piece_length) = self.piece_length {
if piece_length.as_u64() < MIN_PIECE_LENGTH {
return Err(Error::ValidationError(format!(
"piece length {} bytes is less than the minimum piece length {} bytes",
piece_length.as_u64(),
MIN_PIECE_LENGTH
)));
}
}
Ok(())
}
}

View File

@ -106,12 +106,7 @@ async fn main() -> anyhow::Result<()> {
Ok(())
}
/// Creates and validates a dfdaemon download client with health checking.
///
/// This function establishes a connection to the dfdaemon service via Unix domain socket
/// and performs a health check to ensure the service is running and ready to handle
/// download requests. Only after successful health verification does it return the
/// download client for actual use.
/// get_and_check_dfdaemon_download_client gets a dfdaemon download client and checks its health.
pub async fn get_dfdaemon_download_client(endpoint: PathBuf) -> Result<DfdaemonDownloadClient> {
// Check dfdaemon's health.
let health_client = HealthClient::new_unix(endpoint.clone()).await?;

View File

@ -22,7 +22,6 @@ use dragonfly_client_core::{
Error, Result,
};
use humantime::format_duration;
use local_ip_address::local_ip;
use std::time::Duration;
use tabled::{
settings::{object::Rows, Alignment, Modify, Style},
@ -68,19 +67,17 @@ pub struct StatCommand {
)]
log_max_files: usize,
#[arg(long, default_value_t = false, help = "Specify whether to print log")]
console: bool,
#[arg(
long = "verbose",
default_value_t = false,
help = "Specify whether to print log"
)]
verbose: bool,
}
/// Implement the execute for StatCommand.
impl StatCommand {
/// Executes the stat command with comprehensive error handling and user feedback.
///
/// This function serves as the main entry point for the dfcache stat command execution.
/// It handles the complete lifecycle including argument parsing, logging initialization,
/// dfdaemon client setup, and command execution with detailed error reporting. The
/// function provides colored terminal output for better user experience and exits
/// with appropriate status codes on failure.
/// execute executes the stat command.
pub async fn execute(&self) -> Result<()> {
// Parse command line arguments.
Args::parse();
@ -92,12 +89,8 @@ impl StatCommand {
self.log_level,
self.log_max_files,
None,
None,
None,
None,
None,
false,
self.console,
self.verbose,
);
// Get dfdaemon download client.
@ -240,17 +233,11 @@ impl StatCommand {
Ok(())
}
/// Executes the stat command to retrieve and display persistent cache task information.
///
/// This function queries the dfdaemon service for detailed information about a specific
/// persistent cache task and presents it in a formatted table for user consumption.
/// It handles data conversion from raw protocol buffer values to human-readable formats
/// including byte sizes, durations, and timestamps with proper timezone conversion.
/// run runs the stat command.
async fn run(&self, dfdaemon_download_client: DfdaemonDownloadClient) -> Result<()> {
let task = dfdaemon_download_client
.stat_persistent_cache_task(StatPersistentCacheTaskRequest {
task_id: self.id.clone(),
remote_ip: Some(local_ip().unwrap().to_string()),
})
.await?;

View File

@ -15,7 +15,7 @@
*/
use clap::Parser;
use dragonfly_client::announcer::SchedulerAnnouncer;
use dragonfly_client::announcer::{ManagerAnnouncer, SchedulerAnnouncer};
use dragonfly_client::dynconfig::Dynconfig;
use dragonfly_client::gc::GC;
use dragonfly_client::grpc::{
@ -30,9 +30,10 @@ use dragonfly_client::shutdown;
use dragonfly_client::stats::Stats;
use dragonfly_client::tracing::init_tracing;
use dragonfly_client_backend::BackendFactory;
use dragonfly_client_config::{dfdaemon, VersionValueParser};
use dragonfly_client_config::dfdaemon;
use dragonfly_client_config::VersionValueParser;
use dragonfly_client_storage::Storage;
use dragonfly_client_util::{id_generator::IDGenerator, net::Interface};
use dragonfly_client_util::id_generator::IDGenerator;
use std::net::SocketAddr;
use std::path::PathBuf;
use std::sync::Arc;
@ -91,8 +92,12 @@ struct Args {
)]
log_max_files: usize,
#[arg(long, default_value_t = true, help = "Specify whether to print log")]
console: bool,
#[arg(
long = "verbose",
default_value_t = true,
help = "Specify whether to print log"
)]
verbose: bool,
#[arg(
short = 'V',
@ -145,13 +150,9 @@ async fn main() -> Result<(), anyhow::Error> {
args.log_dir.clone(),
args.log_level,
args.log_max_files,
config.tracing.protocol.clone(),
config.tracing.endpoint.clone(),
config.tracing.path.clone(),
Some(config.tracing.headers.clone()),
Some(config.host.clone()),
config.seed_peer.enable,
args.console,
config.tracing.addr.to_owned(),
config.tracing.flamegraph,
args.verbose,
);
// Initialize storage.
@ -229,9 +230,6 @@ async fn main() -> Result<(), anyhow::Error> {
)?;
let persistent_cache_task = Arc::new(persistent_cache_task);
let interface = Interface::new(config.host.ip.unwrap(), config.upload.rate_limit);
let interface = Arc::new(interface);
// Initialize health server.
let health = Health::new(
SocketAddr::new(config.health.server.ip.unwrap(), config.health.server.port),
@ -261,12 +259,19 @@ async fn main() -> Result<(), anyhow::Error> {
shutdown_complete_tx.clone(),
);
// Initialize manager announcer.
let manager_announcer = ManagerAnnouncer::new(
config.clone(),
manager_client.clone(),
shutdown.clone(),
shutdown_complete_tx.clone(),
);
// Initialize scheduler announcer.
let scheduler_announcer = SchedulerAnnouncer::new(
config.clone(),
id_generator.host_id(),
scheduler_client.clone(),
interface.clone(),
shutdown.clone(),
shutdown_complete_tx.clone(),
)
@ -281,14 +286,12 @@ async fn main() -> Result<(), anyhow::Error> {
SocketAddr::new(config.upload.server.ip.unwrap(), config.upload.server.port),
task.clone(),
persistent_cache_task.clone(),
interface.clone(),
shutdown.clone(),
shutdown_complete_tx.clone(),
);
// Initialize download grpc server.
let mut dfdaemon_download_grpc = DfdaemonDownloadServer::new(
config.clone(),
config.download.server.socket_path.clone(),
task.clone(),
persistent_cache_task.clone(),
@ -330,6 +333,10 @@ async fn main() -> Result<(), anyhow::Error> {
info!("stats server exited");
},
_ = tokio::spawn(async move { manager_announcer.run().await.unwrap_or_else(|err| error!("announcer manager failed: {}", err))} ) => {
info!("announcer manager exited");
},
_ = tokio::spawn(async move { scheduler_announcer.run().await }) => {
info!("announcer scheduler exited");
},

View File

@ -14,33 +14,30 @@
* limitations under the License.
*/
use bytesize::ByteSize;
use clap::Parser;
use dragonfly_api::common::v2::{Download, Hdfs, ObjectStorage, TaskType};
use dragonfly_api::dfdaemon::v2::{
download_task_response, DownloadTaskRequest, ListTaskEntriesRequest,
};
use dragonfly_api::dfdaemon::v2::{download_task_response, DownloadTaskRequest};
use dragonfly_api::errordetails::v2::Backend;
use dragonfly_client::grpc::dfdaemon_download::DfdaemonDownloadClient;
use dragonfly_client::grpc::health::HealthClient;
use dragonfly_client::resource::piece::MIN_PIECE_LENGTH;
use dragonfly_client::metrics::{
collect_backend_request_failure_metrics, collect_backend_request_finished_metrics,
collect_backend_request_started_metrics,
};
use dragonfly_client::tracing::init_tracing;
use dragonfly_client_backend::{hdfs, object_storage, BackendFactory, DirEntry};
use dragonfly_client_backend::{hdfs, object_storage, BackendFactory, DirEntry, HeadRequest};
use dragonfly_client_config::VersionValueParser;
use dragonfly_client_config::{self, dfdaemon, dfget};
use dragonfly_client_core::error::{ErrorType, OrErr};
use dragonfly_client_core::error::{BackendError, ErrorType, OrErr};
use dragonfly_client_core::{Error, Result};
use dragonfly_client_util::{fs::fallocate, http::header_vec_to_hashmap};
use glob::Pattern;
use dragonfly_client_util::http::{header_vec_to_hashmap, header_vec_to_headermap};
use indicatif::{MultiProgress, ProgressBar, ProgressState, ProgressStyle};
use local_ip_address::local_ip;
use path_absolutize::*;
use percent_encoding::percent_decode_str;
use std::collections::{HashMap, HashSet};
use std::path::{Component, Path, PathBuf};
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;
use std::time::Duration;
use std::time::{Duration, Instant};
use std::{cmp::min, fmt::Write};
use termion::{color, style};
use tokio::fs::{self, OpenOptions};
@ -102,19 +99,6 @@ struct Args {
)]
transfer_from_dfdaemon: bool,
#[arg(
long = "force-hard-link",
default_value_t = false,
help = "Specify whether the download file must be hard linked to the output path. If hard link is failed, download will be failed. If it is false, dfdaemon will copy the file to the output path if hard link is failed."
)]
force_hard_link: bool,
#[arg(
long = "content-for-calculating-task-id",
help = "Specify the content used to calculate the task ID. If it is set, use its value to calculate the task ID, Otherwise, calculate the task ID based on URL, piece-length, tag, application, and filtered-query-params."
)]
content_for_calculating_task_id: Option<String>,
#[arg(
short = 'O',
long = "output",
@ -139,11 +123,12 @@ struct Args {
timeout: Duration,
#[arg(
short = 'd',
long = "digest",
required = false,
help = "Verify the integrity of the downloaded file using the specified digest, support sha256, sha512, crc32. If the digest is not specified, the downloaded file will not be verified. Format: <algorithm>:<digest>. Examples: sha256:1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef, crc32:12345678"
default_value = "",
help = "Verify the integrity of the downloaded file using the specified digest, e.g. md5:86d3f3a95c324c9479bd8986968f4327"
)]
digest: Option<String>,
digest: String,
#[arg(
short = 'p',
@ -153,24 +138,17 @@ struct Args {
)]
priority: i32,
#[arg(
long = "piece-length",
required = false,
help = "Specify the piece length for downloading file. If the piece length is not specified, the piece length will be calculated according to the file size. Different piece lengths will be divided into different tasks. The value needs to be set with human readable format and needs to be greater than or equal to 4mib, for example: 4mib, 1gib"
)]
piece_length: Option<ByteSize>,
#[arg(
long = "application",
default_value = "",
help = "Different applications for the same URL will be divided into different tasks"
help = "Caller application which is used for statistics and access control"
)]
application: String,
#[arg(
long = "tag",
default_value = "",
help = "Different tags for the same URL will be divided into different tasks"
help = "Different tags for the same url will be divided into different tasks"
)]
tag: String,
@ -178,24 +156,17 @@ struct Args {
short = 'H',
long = "header",
required = false,
help = "Specify the header for downloading file. Examples: --header='Content-Type: application/json' --header='Accept: application/json'"
help = "Specify the header for downloading file, e.g. --header='Content-Type: application/json' --header='Accept: application/json'"
)]
header: Option<Vec<String>>,
#[arg(
long = "filtered-query-param",
required = false,
help = "Filter the query parameters of the downloaded URL. If the download URL is the same, it will be scheduled as the same task. Examples: --filtered-query-param='signature' --filtered-query-param='timeout'"
help = "Filter the query parameters of the downloaded URL. If the download URL is the same, it will be scheduled as the same task, e.g. --filtered-query-param='signature' --filtered-query-param='timeout'"
)]
filtered_query_params: Option<Vec<String>>,
#[arg(
long = "include-files",
required = false,
help = "Filter files to download in a directory using glob patterns relative to the root URL's path. Examples: --include-files='*.txt' --include-files='subdir/file.txt'"
)]
include_files: Option<Vec<String>>,
#[arg(
long = "disable-back-to-source",
default_value_t = false,
@ -282,8 +253,12 @@ struct Args {
)]
log_max_files: usize,
#[arg(long, default_value_t = false, help = "Specify whether to print log")]
console: bool,
#[arg(
long = "verbose",
default_value_t = false,
help = "Specify whether to print log"
)]
verbose: bool,
#[arg(
short = 'V',
@ -308,12 +283,8 @@ async fn main() -> anyhow::Result<()> {
args.log_level,
args.log_max_files,
None,
None,
None,
None,
None,
false,
args.console,
args.verbose,
);
// Validate command line arguments.
@ -603,12 +574,7 @@ async fn main() -> anyhow::Result<()> {
Ok(())
}
/// Runs the dfget command to download files or directories from a given URL.
///
/// This function serves as the main entry point for the dfget download operation.
/// It handles both single file downloads and directory downloads based on the URL format.
/// The function performs path normalization, validates the URL scheme's capabilities,
/// and delegates to the appropriate download handler.
/// run runs the dfget command.
async fn run(mut args: Args, dfdaemon_download_client: DfdaemonDownloadClient) -> Result<()> {
// Get the absolute path of the output file.
args.output = Path::new(&args.output).absolutize()?.into();
@ -618,7 +584,7 @@ async fn run(mut args: Args, dfdaemon_download_client: DfdaemonDownloadClient) -
// then download all files in the directory. Otherwise, download the single file.
let scheme = args.url.scheme();
if args.url.path().ends_with('/') {
if BackendFactory::unsupported_download_directory(scheme) {
if !BackendFactory::supported_download_directory(scheme) {
return Err(Error::Unsupported(format!("{} download directory", scheme)));
};
@ -628,13 +594,7 @@ async fn run(mut args: Args, dfdaemon_download_client: DfdaemonDownloadClient) -
download(args, ProgressBar::new(0), dfdaemon_download_client).await
}
/// Downloads all files in a directory from various storage backends (object storage, HDFS, etc.).
///
/// This function handles directory-based downloads by recursively fetching all entries
/// in the specified directory. It supports filtering files based on include patterns,
/// enforces download limits, and performs concurrent downloads with configurable
/// concurrency control. The function creates the necessary directory structure
/// locally and downloads files while preserving the remote directory hierarchy.
/// download_dir downloads all files in the directory.
async fn download_dir(args: Args, download_client: DfdaemonDownloadClient) -> Result<()> {
// Initialize the object storage config and the hdfs config.
let object_storage = Some(ObjectStorage {
@ -651,17 +611,12 @@ async fn download_dir(args: Args, download_client: DfdaemonDownloadClient) -> Re
delegation_token: args.hdfs_delegation_token.clone(),
});
// Get all entries in the directory.
let mut entries = get_entries(&args, object_storage, hdfs, download_client.clone()).await?;
if let Some(ref include_files) = args.include_files {
entries = filter_entries(&args.url, entries, include_files)?;
}
// If the entries is empty, then return directly.
// Get all entries in the directory. If the directory is empty, then return directly.
let entries = get_entries(args.clone(), object_storage, hdfs).await?;
if entries.is_empty() {
warn!("no entries found in directory {}", args.url);
warn!("directory {} is empty", args.url);
return Ok(());
}
};
// If the actual file count is greater than the max_files, then reject the downloading.
let count = entries.iter().filter(|entry| !entry.is_dir).count();
@ -732,13 +687,7 @@ async fn download_dir(args: Args, download_client: DfdaemonDownloadClient) -> Re
Ok(())
}
/// Downloads a single file from various storage backends using the dfdaemon service.
///
/// This function handles single file downloads by communicating with a dfdaemon client.
/// It supports multiple storage protocols (object storage, HDFS, HTTP/HTTPS) and provides
/// two transfer modes: direct download by dfdaemon or streaming piece content through
/// the client. The function includes progress tracking, file creation, and proper error
/// handling throughout the download process.
/// download downloads the single file.
async fn download(
args: Args,
progress_bar: ProgressBar,
@ -785,7 +734,7 @@ async fn download(
.download_task(DownloadTaskRequest {
download: Some(Download {
url: args.url.to_string(),
digest: args.digest,
digest: Some(args.digest),
// NOTE: Dfget does not support range download.
range: None,
r#type: TaskType::Standard as i32,
@ -794,7 +743,7 @@ async fn download(
priority: args.priority,
filtered_query_params,
request_header: header_vec_to_hashmap(args.header.unwrap_or_default())?,
piece_length: args.piece_length.map(|piece_length| piece_length.as_u64()),
piece_length: None,
output_path,
timeout: Some(
prost_wkt_types::Duration::try_from(args.timeout)
@ -808,9 +757,7 @@ async fn download(
need_piece_content,
object_storage,
hdfs,
force_hard_link: args.force_hard_link,
content_for_calculating_task_id: args.content_for_calculating_task_id,
remote_ip: Some(local_ip().unwrap().to_string()),
load_to_cache: false,
}),
})
.await
@ -866,14 +813,6 @@ async fn download(
})? {
match message.response {
Some(download_task_response::Response::DownloadTaskStartedResponse(response)) => {
if let Some(f) = &f {
fallocate(f, response.content_length)
.await
.inspect_err(|err| {
error!("fallocate {:?} failed: {}", args.output, err);
})?;
}
progress_bar.set_length(response.content_length);
}
Some(download_task_response::Response::DownloadPieceFinishedResponse(response)) => {
@ -910,116 +849,69 @@ async fn download(
Ok(())
}
/// Retrieves all directory entries from a remote storage location.
///
/// This function communicates with the dfdaemon service to list all entries
/// (files and subdirectories) in the specified directory URL. It supports
/// various storage backends including object storage and HDFS by passing
/// the appropriate credentials and configuration. The function converts
/// the gRPC response into a local `DirEntry` format for further processing.
/// get_entries gets all entries in the directory.
async fn get_entries(
args: &Args,
args: Args,
object_storage: Option<ObjectStorage>,
hdfs: Option<Hdfs>,
download_client: DfdaemonDownloadClient,
) -> Result<Vec<DirEntry>> {
info!("list task entries: {:?}", args.url);
// List task entries.
let response = download_client
.list_task_entries(ListTaskEntriesRequest {
// Initialize backend factory and build backend.
let backend_factory = BackendFactory::new(None)?;
let backend = backend_factory.build(args.url.as_str())?;
// Collect backend request started metrics.
collect_backend_request_started_metrics(backend.scheme().as_str(), http::Method::HEAD.as_str());
// Record the start time.
let start_time = Instant::now();
let response = backend
.head(HeadRequest {
// NOTE: Mock a task id for head request.
task_id: Uuid::new_v4().to_string(),
url: args.url.to_string(),
request_header: header_vec_to_hashmap(args.header.clone().unwrap_or_default())?,
timeout: None,
certificate_chain: Vec::new(),
http_header: Some(header_vec_to_headermap(
args.header.clone().unwrap_or_default(),
)?),
timeout: args.timeout,
client_cert: None,
object_storage,
hdfs,
remote_ip: Some(local_ip().unwrap().to_string()),
})
.await
.inspect_err(|err| {
error!("list task entries failed: {}", err);
.inspect_err(|_err| {
// Collect backend request failure metrics.
collect_backend_request_failure_metrics(
backend.scheme().as_str(),
http::Method::HEAD.as_str(),
);
})?;
Ok(response
.entries
.into_iter()
.map(|entry| DirEntry {
url: entry.url,
content_length: entry.content_length as usize,
is_dir: entry.is_dir,
})
.collect())
}
// Return error when response is failed.
if !response.success {
// Collect backend request failure metrics.
collect_backend_request_failure_metrics(
backend.scheme().as_str(),
http::Method::HEAD.as_str(),
);
/// Filters directory entries based on include patterns and validates their URLs.
///
/// This function takes a collection of directory entries and filters them based on
/// glob patterns specified in `include_files`. It performs URL validation to ensure
/// all entries have valid URLs and that their paths fall within the scope of the
/// root URL. When an entry matches a pattern, both the entry and its parent
/// directory (if it exists) are included in the result.
fn filter_entries(
url: &Url,
entries: Vec<DirEntry>,
include_files: &[String],
) -> Result<Vec<DirEntry>> {
let patterns: Vec<Pattern> = include_files
.iter()
.filter_map(|include_file| Pattern::new(include_file).ok())
.collect();
// Build a HashMap of DirEntry objects keyed by relative paths for filtering and
// validates URLs and ensures paths are within the root URL's scope.
let mut entries_by_relative_path = HashMap::with_capacity(entries.len());
for entry in entries {
let entry_url = Url::parse(&entry.url).map_err(|err| {
error!("failed to parse entry URL '{}': {}", entry.url, err);
Error::ValidationError(format!("invalid URL: {}", entry.url))
})?;
let entry_path = entry_url.path();
match entry_path.strip_prefix(url.path()) {
Some(relative_path) => entries_by_relative_path
.insert(relative_path.trim_start_matches('/').to_string(), entry),
None => {
error!(
"entry path '{}' does not belong to the root path",
entry_path
);
return Err(Error::ValidationError(format!(
"path '{}' is outside the expected scope",
entry_path
)));
}
};
return Err(Error::BackendError(Box::new(BackendError {
message: response.error_message.unwrap_or_default(),
status_code: Some(response.http_status_code.unwrap_or_default()),
header: Some(response.http_header.unwrap_or_default()),
})));
}
// Filter entries by matching relative paths against patterns, including
// parent directories for matches.
let mut filtered_entries = HashSet::new();
for (relative_path, entry) in &entries_by_relative_path {
if patterns.iter().any(|pat| pat.matches(relative_path)) {
filtered_entries.insert(entry.clone());
if let Some(parent) = std::path::Path::new(relative_path).parent() {
if let Some(parent_entry) =
entries_by_relative_path.get(&parent.join("").to_string_lossy().to_string())
{
filtered_entries.insert(parent_entry.clone());
}
}
}
}
// Collect backend request finished metrics.
collect_backend_request_finished_metrics(
backend.scheme().as_str(),
http::Method::HEAD.as_str(),
start_time.elapsed(),
);
Ok(filtered_entries.into_iter().collect())
Ok(response.entries)
}
/// Constructs the local output path for a directory entry based on its remote URL.
///
/// This function maps a remote directory entry to its corresponding local file system
/// path by replacing the remote root directory with the local output directory.
/// It handles URL percent-decoding to ensure proper path construction and maintains
/// the relative directory structure from the remote source.
/// make_output_by_entry makes the output path by the entry information.
fn make_output_by_entry(url: Url, output: &Path, entry: DirEntry) -> Result<PathBuf> {
// Get the root directory of the download directory and the output root directory.
let root_dir = url.path().to_string();
@ -1037,12 +929,7 @@ fn make_output_by_entry(url: Url, output: &Path, entry: DirEntry) -> Result<Path
.into())
}
/// Creates and validates a dfdaemon download client with health checking.
///
/// This function establishes a connection to the dfdaemon service via Unix domain socket
/// and performs a health check to ensure the service is running and ready to handle
/// download requests. Only after successful health verification does it return the
/// download client for actual use.
/// get_and_check_dfdaemon_download_client gets a dfdaemon download client and checks its health.
async fn get_dfdaemon_download_client(endpoint: PathBuf) -> Result<DfdaemonDownloadClient> {
// Check dfdaemon's health.
let health_client = HealthClient::new_unix(endpoint.clone()).await?;
@ -1053,13 +940,7 @@ async fn get_dfdaemon_download_client(endpoint: PathBuf) -> Result<DfdaemonDownl
Ok(dfdaemon_download_client)
}
/// Validates command line arguments for consistency and safety requirements.
///
/// This function performs comprehensive validation of the download arguments to ensure
/// they are logically consistent and safe to execute. It checks URL-output path matching,
/// directory existence, file conflicts, piece length constraints, and glob pattern validity.
/// The validation prevents common user errors and potential security issues before
/// starting the download process.
/// validate_args validates the command line arguments.
fn validate_args(args: &Args) -> Result<()> {
// If the URL is a directory, the output path should be a directory.
if args.url.path().ends_with('/') && !args.output.is_dir() {
@ -1098,52 +979,9 @@ fn validate_args(args: &Args) -> Result<()> {
}
}
if let Some(piece_length) = args.piece_length {
if piece_length.as_u64() < MIN_PIECE_LENGTH {
return Err(Error::ValidationError(format!(
"piece length {} bytes is less than the minimum piece length {} bytes",
piece_length.as_u64(),
MIN_PIECE_LENGTH
)));
}
}
if let Some(ref include_files) = args.include_files {
for include_file in include_files {
if Pattern::new(include_file).is_err() {
return Err(Error::ValidationError(format!(
"invalid glob pattern in include_files: '{}'",
include_file
)));
}
if !is_normal_relative_path(include_file) {
return Err(Error::ValidationError(format!(
"path is not a normal relative path in include_files: '{}'. It must not contain '..', '.', or start with '/'.",
include_file
)));
}
}
}
Ok(())
}
/// Validates that a path string is a normal relative path without unsafe components.
///
/// This function ensures that a given path is both relative (doesn't start with '/')
/// and contains only normal path components. It rejects paths with parent directory
/// references ('..'), current directory references ('.'), or any other special
/// path components that could be used for directory traversal attacks or unexpected
/// file system navigation.
fn is_normal_relative_path(path: &str) -> bool {
let path = Path::new(path);
path.is_relative()
&& path
.components()
.all(|comp| matches!(comp, Component::Normal(_)))
}
#[cfg(test)]
mod tests {
use super::*;
@ -1307,346 +1145,4 @@ mod tests {
let result = make_output_by_entry(url, output, entry);
assert!(result.is_err());
}
#[test]
fn should_filter_entries() {
let test_cases = vec![
(
Url::parse("http://example.com/root/").unwrap(),
vec![
DirEntry {
url: "http://example.com/root/dir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/file.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/file2.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file3.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file4.png".to_string(),
content_length: 100,
is_dir: false,
},
],
vec!["dir/file.txt".to_string()],
vec![
DirEntry {
url: "http://example.com/root/dir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/file.txt".to_string(),
content_length: 100,
is_dir: false,
},
],
),
(
Url::parse("http://example.com/root/").unwrap(),
vec![
DirEntry {
url: "http://example.com/root/dir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/file.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/file2.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file3.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file4.png".to_string(),
content_length: 100,
is_dir: false,
},
],
vec![
"dir/file.txt".to_string(),
"dir/subdir/file4.png".to_string(),
],
vec![
DirEntry {
url: "http://example.com/root/dir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/file.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file4.png".to_string(),
content_length: 100,
is_dir: false,
},
],
),
(
Url::parse("http://example.com/root/").unwrap(),
vec![
DirEntry {
url: "http://example.com/root/dir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/file.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/file2.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file3.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file4.png".to_string(),
content_length: 100,
is_dir: false,
},
],
vec!["dir/subdir/*.png".to_string()],
vec![
DirEntry {
url: "http://example.com/root/dir/subdir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file4.png".to_string(),
content_length: 100,
is_dir: false,
},
],
),
(
Url::parse("http://example.com/root/").unwrap(),
vec![
DirEntry {
url: "http://example.com/root/dir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/file.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/file2.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file3.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file4.png".to_string(),
content_length: 100,
is_dir: false,
},
],
vec!["dir/*".to_string()],
vec![
DirEntry {
url: "http://example.com/root/dir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/file.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/file2.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file3.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file4.png".to_string(),
content_length: 100,
is_dir: false,
},
],
),
(
Url::parse("http://example.com/root/").unwrap(),
vec![
DirEntry {
url: "http://example.com/root/dir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/file.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/file2.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file3.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file4.png".to_string(),
content_length: 100,
is_dir: false,
},
],
vec!["dir/".to_string()],
vec![DirEntry {
url: "http://example.com/root/dir/".to_string(),
content_length: 10,
is_dir: true,
}],
),
(
Url::parse("http://example.com/root/").unwrap(),
vec![
DirEntry {
url: "http://example.com/root/dir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/file.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/file2.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/".to_string(),
content_length: 10,
is_dir: true,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file3.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: "http://example.com/root/dir/subdir/file4.png".to_string(),
content_length: 100,
is_dir: false,
},
],
vec!["test".to_string()],
vec![],
),
(
Url::parse("http://example.com/root/").unwrap(),
vec![
DirEntry {
url: "http://example.com/root/dir/file.txt".to_string(),
content_length: 100,
is_dir: false,
},
DirEntry {
url: " ".to_string(),
content_length: 100,
is_dir: false,
},
],
vec!["dir/file.txt".to_string()],
vec![],
),
];
for (url, entries, include_files, expected_entries) in test_cases {
let result = filter_entries(&url, entries, &include_files);
if result.is_err() {
assert!(matches!(result, Err(Error::ValidationError(_))));
} else {
let filtered_entries = result.unwrap();
assert_eq!(filtered_entries.len(), expected_entries.len());
for filtered_entry in &filtered_entries {
assert!(expected_entries
.iter()
.any(|expected_entry| { expected_entry.url == filtered_entry.url }));
}
}
}
}
}

View File

@ -0,0 +1,132 @@
/*
* Copyright 2023 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use clap::{Parser, Subcommand};
use dragonfly_client::tracing::init_tracing;
use dragonfly_client_config::VersionValueParser;
use dragonfly_client_config::{dfdaemon, dfstore};
use std::path::PathBuf;
use tracing::Level;
#[derive(Debug, Parser)]
#[command(
name = dfstore::NAME,
author,
version,
about = "dfstore is a storage command line based on P2P technology in Dragonfly.",
long_about = "A storage command line based on P2P technology in Dragonfly that can rely on different types of object storage, \
such as S3 or OSS, to provide stable object storage capabilities. It uses the entire P2P network as a cache when storing objects. \
Rely on S3 or OSS as the backend to ensure storage reliability. In the process of object storage, \
P2P cache is effectively used for fast read and write storage.",
disable_version_flag = true
)]
struct Args {
#[arg(
short = 'e',
long = "endpoint",
default_value_os_t = dfdaemon::default_download_unix_socket_path(),
help = "Endpoint of dfdaemon's GRPC server"
)]
endpoint: PathBuf,
#[arg(
short = 'l',
long,
default_value = "info",
help = "Specify the logging level [trace, debug, info, warn, error]"
)]
log_level: Level,
#[arg(
long,
default_value_os_t = dfstore::default_dfstore_log_dir(),
help = "Specify the log directory"
)]
log_dir: PathBuf,
#[arg(
long,
default_value_t = 6,
help = "Specify the max number of log files"
)]
log_max_files: usize,
#[arg(
long = "verbose",
default_value_t = true,
help = "Specify whether to print log"
)]
verbose: bool,
#[arg(
short = 'V',
long = "version",
help = "Print version information",
default_value_t = false,
action = clap::ArgAction::SetTrue,
value_parser = VersionValueParser
)]
version: bool,
#[command(subcommand)]
command: Command,
}
#[derive(Debug, Clone, Subcommand)]
#[command()]
pub enum Command {
#[command(
name = "cp",
author,
version,
about = "Download or upload files using object storage in Dragonfly",
long_about = "Download a file from object storage in Dragonfly or upload a local file to object storage in Dragonfly"
)]
Copy(CopyCommand),
#[command(
name = "rm",
author,
version,
about = "Remove a file from Dragonfly object storage",
long_about = "Remove the P2P cache in Dragonfly and remove the file stored in the object storage."
)]
Remove(RemoveCommand),
}
/// Download or upload files using object storage in Dragonfly.
#[derive(Debug, Clone, Parser)]
pub struct CopyCommand {}
/// Remove a file from Dragonfly object storage.
#[derive(Debug, Clone, Parser)]
pub struct RemoveCommand {}
fn main() {
// Parse command line arguments.
let args = Args::parse();
// Initialize tracing.
let _guards = init_tracing(
dfstore::NAME,
args.log_dir,
args.log_level,
args.log_max_files,
None,
false,
args.verbose,
);
}

View File

@ -25,7 +25,7 @@ use dragonfly_client_core::{Error, Result};
use std::sync::Arc;
use tokio::sync::{mpsc, Mutex, RwLock};
use tonic_health::pb::health_check_response::ServingStatus;
use tracing::{debug, error, info, instrument};
use tracing::{error, info, instrument};
use url::Url;
/// Data is the dynamic configuration of the dfdaemon.
@ -65,6 +65,7 @@ pub struct Dynconfig {
/// Dynconfig is the implementation of Dynconfig.
impl Dynconfig {
/// new creates a new Dynconfig.
#[instrument(skip_all)]
pub async fn new(
config: Arc<Config>,
manager_client: Arc<ManagerClient>,
@ -87,6 +88,7 @@ impl Dynconfig {
}
/// run starts the dynconfig server.
#[instrument(skip_all)]
pub async fn run(&self) {
// Clone the shutdown channel.
let mut shutdown = self.shutdown.clone();
@ -96,10 +98,9 @@ impl Dynconfig {
loop {
tokio::select! {
_ = interval.tick() => {
match self.refresh().await {
Err(err) => error!("refresh dynconfig failed: {}", err),
Ok(_) => debug!("refresh dynconfig success"),
}
if let Err(err) = self.refresh().await {
error!("refresh dynconfig failed: {}", err);
};
}
_ = shutdown.recv() => {
// Dynconfig server shutting down with signals.
@ -162,7 +163,6 @@ impl Dynconfig {
location: self.config.host.location.clone(),
version: CARGO_PKG_VERSION.to_string(),
commit: GIT_COMMIT_SHORT_HASH.to_string(),
scheduler_cluster_id: self.config.host.scheduler_cluster_id.unwrap_or(0),
})
.await
}

View File

@ -53,6 +53,7 @@ pub struct GC {
impl GC {
/// new creates a new GC.
#[instrument(skip_all)]
pub fn new(
config: Arc<Config>,
host_id: String,
@ -72,6 +73,7 @@ impl GC {
}
/// run runs the garbage collector.
#[instrument(skip_all)]
pub async fn run(&self) {
// Clone the shutdown channel.
let mut shutdown = self.shutdown.clone();
@ -125,7 +127,6 @@ impl GC {
}
}
info!("evict by task ttl done");
Ok(())
}
@ -152,8 +153,6 @@ impl GC {
if let Err(err) = self.evict_task_space(need_evict_space as u64).await {
info!("failed to evict task by disk usage: {}", err);
}
info!("evict task by disk usage done");
}
Ok(())
@ -233,7 +232,7 @@ impl GC {
/// evict_persistent_cache_task_by_ttl evicts the persistent cache task by ttl.
#[instrument(skip_all)]
async fn evict_persistent_cache_task_by_ttl(&self) -> Result<()> {
info!("start to evict by persistent cache task ttl");
info!("start to evict by persistent cache task ttl * 2");
for task in self.storage.get_persistent_cache_tasks()? {
// If the persistent cache task is expired and not uploading, evict the persistent cache task.
if task.is_expired() {
@ -242,7 +241,6 @@ impl GC {
}
}
info!("evict by persistent cache task ttl done");
Ok(())
}
@ -272,8 +270,6 @@ impl GC {
{
info!("failed to evict task by disk usage: {}", err);
}
info!("evict persistent cache task by disk usage done");
}
Ok(())
@ -312,7 +308,7 @@ impl GC {
}
// Evict the task.
self.storage.delete_persistent_cache_task(&task.id).await;
self.storage.delete_task(&task.id).await;
// Update the evicted space.
let task_space = task.content_length();

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -21,27 +21,27 @@ use dragonfly_client_core::{
use hyper_util::rt::TokioIo;
use std::path::PathBuf;
use tokio::net::UnixStream;
use tonic::service::interceptor::InterceptedService;
use tonic::transport::ClientTlsConfig;
use tonic::transport::{Channel, Endpoint, Uri};
use tonic::{service::interceptor::InterceptedService, transport::ClientTlsConfig};
use tonic_health::pb::{
health_client::HealthClient as HealthGRPCClient, HealthCheckRequest, HealthCheckResponse,
};
use tower::service_fn;
use tracing::{error, instrument};
use super::interceptor::InjectTracingInterceptor;
use super::interceptor::TracingInterceptor;
/// HealthClient is a wrapper of HealthGRPCClient.
#[derive(Clone)]
pub struct HealthClient {
/// client is the grpc client of the certificate.
client: HealthGRPCClient<InterceptedService<Channel, InjectTracingInterceptor>>,
client: HealthGRPCClient<InterceptedService<Channel, TracingInterceptor>>,
}
/// HealthClient implements the grpc client of the health.
impl HealthClient {
/// new creates a new HealthClient.
#[instrument(skip_all)]
pub async fn new(addr: &str, client_tls_config: Option<ClientTlsConfig>) -> Result<Self> {
let channel = match client_tls_config {
Some(client_tls_config) => Channel::from_shared(addr.to_string())
@ -73,13 +73,14 @@ impl HealthClient {
.or_err(ErrorType::ConnectError)?,
};
let client = HealthGRPCClient::with_interceptor(channel, InjectTracingInterceptor)
let client = HealthGRPCClient::with_interceptor(channel, TracingInterceptor)
.max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX);
Ok(Self { client })
}
/// new_unix creates a new HealthClient with unix domain socket.
#[instrument(skip_all)]
pub async fn new_unix(socket_path: PathBuf) -> Result<Self> {
// Ignore the uri because it is not used.
let channel = Endpoint::try_from("http://[::]:50051")
@ -97,8 +98,7 @@ impl HealthClient {
error!("connect failed: {}", err);
})
.or_err(ErrorType::ConnectError)?;
let client = HealthGRPCClient::with_interceptor(channel, InjectTracingInterceptor)
let client = HealthGRPCClient::with_interceptor(channel, TracingInterceptor)
.max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX);
Ok(Self { client })
@ -137,6 +137,7 @@ impl HealthClient {
}
/// make_request creates a new request with timeout.
#[instrument(skip_all)]
fn make_request<T>(request: T) -> tonic::Request<T> {
let mut request = tonic::Request::new(request);
request.set_timeout(super::REQUEST_TIMEOUT);

View File

@ -17,30 +17,11 @@
use tonic::{metadata, service::Interceptor, Request, Status};
use tracing_opentelemetry::OpenTelemetrySpanExt;
/// MetadataMap is a tracing meda data map container for span context.
/// MetadataMap is a tracing meda data map container.
struct MetadataMap<'a>(&'a mut metadata::MetadataMap);
/// MetadataMap implements the otel tracing Extractor.
impl opentelemetry::propagation::Extractor for MetadataMap<'_> {
/// Get a value for a key from the `MetadataMap`. If the value can't be converted to &str, returns None
fn get(&self, key: &str) -> Option<&str> {
self.0.get(key).and_then(|metadata| metadata.to_str().ok())
}
/// Collect all the keys from the `MetadataMap`.
fn keys(&self) -> Vec<&str> {
self.0
.keys()
.map(|key| match key {
tonic::metadata::KeyRef::Ascii(v) => v.as_str(),
tonic::metadata::KeyRef::Binary(v) => v.as_str(),
})
.collect::<Vec<_>>()
}
}
/// MetadataMap implements the otel tracing Injector.
impl opentelemetry::propagation::Injector for MetadataMap<'_> {
impl<'a> opentelemetry::propagation::Injector for MetadataMap<'a> {
/// set a key-value pair to the injector.
fn set(&mut self, key: &str, value: String) {
if let Ok(key) = metadata::MetadataKey::from_bytes(key.as_bytes()) {
@ -51,12 +32,12 @@ impl opentelemetry::propagation::Injector for MetadataMap<'_> {
}
}
/// InjectTracingInterceptor is a auto-inject tracing gRPC interceptor.
/// TracingInterceptor is a auto-inject tracing gRPC interceptor.
#[derive(Clone)]
pub struct InjectTracingInterceptor;
pub struct TracingInterceptor;
/// InjectTracingInterceptor implements the tonic Interceptor interface.
impl Interceptor for InjectTracingInterceptor {
/// TracingInterceptor implements the tonic Interceptor interface.
impl Interceptor for TracingInterceptor {
/// call and inject tracing context into lgobal propagator.
fn call(&mut self, mut request: Request<()>) -> std::result::Result<Request<()>, Status> {
let context = tracing::Span::current().context();
@ -67,20 +48,3 @@ impl Interceptor for InjectTracingInterceptor {
Ok(request)
}
}
/// ExtractTracingInterceptor is a auto-extract tracing gRPC interceptor.
#[derive(Clone)]
pub struct ExtractTracingInterceptor;
/// ExtractTracingInterceptor implements the tonic Interceptor interface.
impl Interceptor for ExtractTracingInterceptor {
/// call and inject tracing context into lgobal propagator.
fn call(&mut self, mut request: Request<()>) -> std::result::Result<Request<()>, Status> {
let parent_cx = opentelemetry::global::get_text_map_propagator(|prop| {
prop.extract(&MetadataMap(request.metadata_mut()))
});
request.extensions_mut().insert(parent_cx);
Ok(request)
}
}

View File

@ -27,21 +27,22 @@ use dragonfly_client_core::{
use std::sync::Arc;
use tonic::{service::interceptor::InterceptedService, transport::Channel};
use tonic_health::pb::health_check_response::ServingStatus;
use tracing::{error, instrument};
use tracing::{error, instrument, warn};
use url::Url;
use super::interceptor::InjectTracingInterceptor;
use super::interceptor::TracingInterceptor;
/// ManagerClient is a wrapper of ManagerGRPCClient.
#[derive(Clone)]
pub struct ManagerClient {
/// client is the grpc client of the manager.
pub client: ManagerGRPCClient<InterceptedService<Channel, InjectTracingInterceptor>>,
pub client: ManagerGRPCClient<InterceptedService<Channel, TracingInterceptor>>,
}
/// ManagerClient implements the grpc client of the manager.
impl ManagerClient {
/// new creates a new ManagerClient.
#[instrument(skip_all)]
pub async fn new(config: Arc<Config>, addr: String) -> Result<Self> {
let domain_name = Url::parse(addr.as_str())?
.host_str()
@ -98,7 +99,7 @@ impl ManagerClient {
.or_err(ErrorType::ConnectError)?,
};
let client = ManagerGRPCClient::with_interceptor(channel, InjectTracingInterceptor)
let client = ManagerGRPCClient::with_interceptor(channel, TracingInterceptor)
.max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX);
Ok(Self { client })
@ -132,6 +133,7 @@ impl ManagerClient {
}
/// make_request creates a new request with timeout.
#[instrument(skip_all)]
fn make_request<T>(request: T) -> tonic::Request<T> {
let mut request = tonic::Request::new(request);
request.set_timeout(super::REQUEST_TIMEOUT);

View File

@ -34,12 +34,8 @@ pub mod scheduler;
/// CONNECT_TIMEOUT is the timeout for GRPC connection.
pub const CONNECT_TIMEOUT: Duration = Duration::from_secs(2);
/// REQUEST_TIMEOUT is the timeout for GRPC requests, default is 10 second.
/// Note: This timeout is used for the whole request, including wait for scheduler
/// scheduling, refer to https://d7y.io/docs/next/reference/configuration/scheduler/.
/// Scheduler'configure `scheduler.retryInterval`, `scheduler.retryBackToSourceLimit` and `scheduler.retryLimit`
/// is used for the scheduler to schedule the task.
pub const REQUEST_TIMEOUT: Duration = Duration::from_secs(15);
/// REQUEST_TIMEOUT is the timeout for GRPC requests.
pub const REQUEST_TIMEOUT: Duration = Duration::from_secs(5);
/// TCP_KEEPALIVE is the keepalive duration for TCP connection.
pub const TCP_KEEPALIVE: Duration = Duration::from_secs(3600);
@ -50,11 +46,11 @@ pub const HTTP2_KEEP_ALIVE_INTERVAL: Duration = Duration::from_secs(300);
/// HTTP2_KEEP_ALIVE_TIMEOUT is the timeout for HTTP2 keep alive.
pub const HTTP2_KEEP_ALIVE_TIMEOUT: Duration = Duration::from_secs(20);
/// MAX_FRAME_SIZE is the max frame size for GRPC, default is 4MB.
pub const MAX_FRAME_SIZE: u32 = 4 * 1024 * 1024;
/// MAX_FRAME_SIZE is the max frame size for GRPC, default is 12MB.
pub const MAX_FRAME_SIZE: u32 = 12 * 1024 * 1024;
/// INITIAL_WINDOW_SIZE is the initial window size for GRPC, default is 512KB.
pub const INITIAL_WINDOW_SIZE: u32 = 512 * 1024;
/// INITIAL_WINDOW_SIZE is the initial window size for GRPC, default is 12MB.
pub const INITIAL_WINDOW_SIZE: u32 = 12 * 1024 * 1024;
/// BUFFER_SIZE is the buffer size for GRPC, default is 64KB.
pub const BUFFER_SIZE: usize = 64 * 1024;

View File

@ -40,7 +40,7 @@ use tonic::transport::Channel;
use tracing::{error, info, instrument, Instrument};
use url::Url;
use super::interceptor::InjectTracingInterceptor;
use super::interceptor::TracingInterceptor;
/// VNode is the virtual node of the hashring.
#[derive(Debug, Copy, Clone, Hash, PartialEq)]
@ -79,6 +79,7 @@ pub struct SchedulerClient {
/// SchedulerClient implements the grpc client of the scheduler.
impl SchedulerClient {
/// new creates a new SchedulerClient.
#[instrument(skip_all)]
pub async fn new(config: Arc<Config>, dynconfig: Arc<Dynconfig>) -> Result<Self> {
let client = Self {
config,
@ -191,10 +192,9 @@ impl SchedulerClient {
})
.or_err(ErrorType::ConnectError)?;
let mut client =
SchedulerGRPCClient::with_interceptor(channel, InjectTracingInterceptor)
.max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX);
let mut client = SchedulerGRPCClient::with_interceptor(channel, TracingInterceptor)
.max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX);
client.announce_host(request).await?;
Ok(())
}
@ -245,10 +245,9 @@ impl SchedulerClient {
})
.or_err(ErrorType::ConnectError)?;
let mut client =
SchedulerGRPCClient::with_interceptor(channel, InjectTracingInterceptor)
.max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX);
let mut client = SchedulerGRPCClient::with_interceptor(channel, TracingInterceptor)
.max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX);
client.announce_host(request).await?;
Ok(())
}
@ -304,10 +303,9 @@ impl SchedulerClient {
})
.or_err(ErrorType::ConnectError)?;
let mut client =
SchedulerGRPCClient::with_interceptor(channel, InjectTracingInterceptor)
.max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX);
let mut client = SchedulerGRPCClient::with_interceptor(channel, TracingInterceptor)
.max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX);
client.delete_host(request).await?;
Ok(())
}
@ -459,7 +457,7 @@ impl SchedulerClient {
&self,
task_id: &str,
peer_id: Option<&str>,
) -> Result<SchedulerGRPCClient<InterceptedService<Channel, InjectTracingInterceptor>>> {
) -> Result<SchedulerGRPCClient<InterceptedService<Channel, TracingInterceptor>>> {
// Update scheduler addresses of the client.
self.update_available_scheduler_addrs().await?;
@ -518,7 +516,7 @@ impl SchedulerClient {
};
Ok(
SchedulerGRPCClient::with_interceptor(channel, InjectTracingInterceptor)
SchedulerGRPCClient::with_interceptor(channel, TracingInterceptor)
.max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX),
)
@ -621,6 +619,7 @@ impl SchedulerClient {
}
/// make_request creates a new request with timeout.
#[instrument(skip_all)]
fn make_request<T>(request: T) -> tonic::Request<T> {
let mut request = tonic::Request::new(request);
request.set_timeout(super::REQUEST_TIMEOUT);

View File

@ -36,6 +36,7 @@ pub struct Health {
/// Health implements the health server.
impl Health {
/// new creates a new Health.
#[instrument(skip_all)]
pub fn new(
addr: SocketAddr,
shutdown: shutdown::Shutdown,
@ -49,6 +50,7 @@ impl Health {
}
/// run starts the health server.
#[instrument(skip_all)]
pub async fn run(&self) {
// Clone the shutdown channel.
let mut shutdown = self.shutdown.clone();
@ -69,6 +71,7 @@ impl Health {
_ = shutdown.recv() => {
// Health server shutting down with signals.
info!("health server shutting down");
return
}
}
}

View File

@ -26,8 +26,9 @@ use prometheus::{
};
use std::net::SocketAddr;
use std::path::Path;
use std::sync::Arc;
use std::sync::{Arc, Mutex};
use std::time::Duration;
use sysinfo::{ProcessRefreshKind, ProcessesToUpdate, RefreshKind, System, UpdateKind};
use tokio::sync::mpsc;
use tracing::{error, info, instrument, warn};
use warp::{Filter, Rejection, Reply};
@ -184,6 +185,14 @@ lazy_static! {
&[]
).expect("metric can be created");
/// PROXY_REQUEST_VIA_DFDAEMON_AND_CACHE_HITS_COUNT is used to count the number of proxy request via
/// dfdaemon and cache hits.
pub static ref PROXY_REQUEST_VIA_DFDAEMON_AND_CACHE_HITS_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("proxy_request_via_dfdaemon_and_cache_hits_total", "Counter of the number of cache hits of the proxy request via dfdaemon.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&[]
).expect("metric can be created");
/// UPDATE_TASK_COUNT is used to count the number of update tasks.
pub static ref UPDATE_TASK_COUNT: IntCounterVec =
IntCounterVec::new(
@ -212,21 +221,7 @@ lazy_static! {
&["type"]
).expect("metric can be created");
/// LIST_TASK_ENTRIES_COUNT is used to count the number of list task entries.
pub static ref LIST_TASK_ENTRIES_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("list_task_entries_total", "Counter of the number of the list task entries.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["type"]
).expect("metric can be created");
/// LIST_TASK_ENTRIES_FAILURE_COUNT is used to count the failed number of list task entries.
pub static ref LIST_TASK_ENTRIES_FAILURE_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("list_task_entries_failure_total", "Counter of the number of failed of the list task entries.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["type"]
).expect("metric can be created");
/// DELETE_TASK_COUNT is used to count the number of delete tasks.
/// DELETE_TASK_COUNT is used to count the number of delete tasks.
pub static ref DELETE_TASK_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("delete_task_total", "Counter of the number of the delete task.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
@ -267,9 +262,24 @@ lazy_static! {
Opts::new("disk_usage_space_total", "Gauge of the disk usage space in bytes").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&[]
).expect("metric can be created");
/// DISK_WRITTEN_BYTES is used to count of the disk written bytes.
pub static ref DISK_WRITTEN_BYTES: IntGaugeVec =
IntGaugeVec::new(
Opts::new("disk_written_bytes", "Gauge of the disk written bytes.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&[]
).expect("metric can be created");
/// DISK_READ_BYTES is used to count of the disk read bytes.
pub static ref DISK_READ_BYTES: IntGaugeVec =
IntGaugeVec::new(
Opts::new("disk_read_bytes", "Gauge of the disk read bytes.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&[]
).expect("metric can be created");
}
/// register_custom_metrics registers all custom metrics.
#[instrument(skip_all)]
fn register_custom_metrics() {
REGISTRY
.register(Box::new(VERSION_GAUGE.clone()))
@ -335,6 +345,12 @@ fn register_custom_metrics() {
.register(Box::new(PROXY_REQUEST_VIA_DFDAEMON_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(
PROXY_REQUEST_VIA_DFDAEMON_AND_CACHE_HITS_COUNT.clone(),
))
.expect("metric can be registered");
REGISTRY
.register(Box::new(UPDATE_TASK_COUNT.clone()))
.expect("metric can be registered");
@ -351,14 +367,6 @@ fn register_custom_metrics() {
.register(Box::new(STAT_TASK_FAILURE_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(LIST_TASK_ENTRIES_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(LIST_TASK_ENTRIES_FAILURE_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(DELETE_TASK_COUNT.clone()))
.expect("metric can be registered");
@ -382,9 +390,18 @@ fn register_custom_metrics() {
REGISTRY
.register(Box::new(DISK_USAGE_SPACE.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(DISK_WRITTEN_BYTES.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(DISK_READ_BYTES.clone()))
.expect("metric can be registered");
}
/// reset_custom_metrics resets all custom metrics.
#[instrument(skip_all)]
fn reset_custom_metrics() {
VERSION_GAUGE.reset();
DOWNLOAD_TASK_COUNT.reset();
@ -402,18 +419,19 @@ fn reset_custom_metrics() {
PROXY_REQUEST_COUNT.reset();
PROXY_REQUEST_FAILURE_COUNT.reset();
PROXY_REQUEST_VIA_DFDAEMON_COUNT.reset();
PROXY_REQUEST_VIA_DFDAEMON_AND_CACHE_HITS_COUNT.reset();
UPDATE_TASK_COUNT.reset();
UPDATE_TASK_FAILURE_COUNT.reset();
STAT_TASK_COUNT.reset();
STAT_TASK_FAILURE_COUNT.reset();
LIST_TASK_ENTRIES_COUNT.reset();
LIST_TASK_ENTRIES_FAILURE_COUNT.reset();
DELETE_TASK_COUNT.reset();
DELETE_TASK_FAILURE_COUNT.reset();
DELETE_HOST_COUNT.reset();
DELETE_HOST_FAILURE_COUNT.reset();
DISK_SPACE.reset();
DISK_USAGE_SPACE.reset();
DISK_WRITTEN_BYTES.reset();
DISK_READ_BYTES.reset();
}
/// TaskSize represents the size of the task.
@ -744,6 +762,14 @@ pub fn collect_proxy_request_via_dfdaemon_metrics() {
.inc();
}
/// collect_proxy_request_via_dfdaemon_and_cache_hits_metrics collects the proxy request via
/// dfdaemon and cache hits metrics.
pub fn collect_proxy_request_via_dfdaemon_and_cache_hits_metrics() {
PROXY_REQUEST_VIA_DFDAEMON_AND_CACHE_HITS_COUNT
.with_label_values(&[])
.inc();
}
/// collect_update_task_started_metrics collects the update task started metrics.
pub fn collect_update_task_started_metrics(typ: i32) {
UPDATE_TASK_COUNT
@ -772,20 +798,6 @@ pub fn collect_stat_task_failure_metrics(typ: i32) {
.inc();
}
/// collect_list_task_entries_started_metrics collects the list task entries started metrics.
pub fn collect_list_task_entries_started_metrics(typ: i32) {
LIST_TASK_ENTRIES_COUNT
.with_label_values(&[typ.to_string().as_str()])
.inc();
}
/// collect_list_task_entries_failure_metrics collects the list task entries failure metrics.
pub fn collect_list_task_entries_failure_metrics(typ: i32) {
LIST_TASK_ENTRIES_FAILURE_COUNT
.with_label_values(&[typ.to_string().as_str()])
.inc();
}
/// collect_delete_task_started_metrics collects the delete task started metrics.
pub fn collect_delete_task_started_metrics(typ: i32) {
DELETE_TASK_COUNT
@ -811,7 +823,7 @@ pub fn collect_delete_host_failure_metrics() {
}
/// collect_disk_metrics collects the disk metrics.
pub fn collect_disk_metrics(path: &Path) {
pub fn collect_disk_metrics(path: &Path, system: &Arc<Mutex<System>>) {
// Collect disk space metrics.
let stats = match fs2::statvfs(path) {
Ok(stats) => stats,
@ -828,6 +840,24 @@ pub fn collect_disk_metrics(path: &Path) {
DISK_USAGE_SPACE
.with_label_values(&[])
.set(usage_space as i64);
// Collect disk bandwidth metrics.
let mut sys = system.lock().unwrap();
sys.refresh_processes_specifics(
ProcessesToUpdate::All,
true,
ProcessRefreshKind::new()
.with_disk_usage()
.with_exe(UpdateKind::Always),
);
let process = sys.process(sysinfo::get_current_pid().unwrap()).unwrap();
DISK_WRITTEN_BYTES
.with_label_values(&[])
.set(process.disk_usage().written_bytes as i64);
DISK_READ_BYTES
.with_label_values(&[])
.set(process.disk_usage().read_bytes as i64);
}
/// Metrics is the metrics server.
@ -836,6 +866,9 @@ pub struct Metrics {
/// config is the configuration of the dfdaemon.
config: Arc<Config>,
// system is the system information, only used for collecting disk metrics.
system: Arc<Mutex<System>>,
/// shutdown is used to shutdown the metrics server.
shutdown: shutdown::Shutdown,
@ -846,6 +879,7 @@ pub struct Metrics {
/// Metrics implements the metrics server.
impl Metrics {
/// new creates a new Metrics.
#[instrument(skip_all)]
pub fn new(
config: Arc<Config>,
shutdown: shutdown::Shutdown,
@ -853,12 +887,20 @@ impl Metrics {
) -> Self {
Self {
config,
system: Arc::new(Mutex::new(System::new_with_specifics(
RefreshKind::new().with_processes(
ProcessRefreshKind::new()
.with_disk_usage()
.with_exe(UpdateKind::Always),
),
))),
shutdown,
_shutdown_complete: shutdown_complete_tx,
}
}
/// run starts the metrics server.
#[instrument(skip_all)]
pub async fn run(&self) {
// Clone the shutdown channel.
let mut shutdown = self.shutdown.clone();
@ -879,6 +921,7 @@ impl Metrics {
// Clone the config.
let config = self.config.clone();
let system = self.system.clone();
// Create the metrics server address.
let addr = SocketAddr::new(
@ -890,7 +933,7 @@ impl Metrics {
let get_metrics_route = warp::path!("metrics")
.and(warp::get())
.and(warp::path::end())
.and_then(move || Self::get_metrics_handler(config.clone()));
.and_then(move || Self::get_metrics_handler(config.clone(), system.clone()));
// Delete the metrics route.
let delete_metrics_route = warp::path!("metrics")
@ -909,15 +952,19 @@ impl Metrics {
_ = shutdown.recv() => {
// Metrics server shutting down with signals.
info!("metrics server shutting down");
return
}
}
}
/// get_metrics_handler handles the metrics request of getting.
#[instrument(skip_all)]
async fn get_metrics_handler(config: Arc<Config>) -> Result<impl Reply, Rejection> {
async fn get_metrics_handler(
config: Arc<Config>,
system: Arc<Mutex<System>>,
) -> Result<impl Reply, Rejection> {
// Collect the disk space metrics.
collect_disk_metrics(config.storage.dir.as_path());
collect_disk_metrics(config.storage.dir.as_path(), &system);
// Encode custom metrics.
let encoder = TextEncoder::new();

View File

@ -0,0 +1,227 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use crate::resource::task::Task;
use dragonfly_api::common::v2::Range;
use dragonfly_api::dfdaemon::v2::DownloadTaskRequest;
use dragonfly_client_core::{Error, Result};
use dragonfly_client_util::http::{get_range, hashmap_to_headermap};
use lru::LruCache;
use std::cmp::{max, min};
use std::num::NonZeroUsize;
use std::sync::{Arc, Mutex};
/// Cache is the cache for storing http response by LRU algorithm.
#[derive(Clone)]
pub struct Cache {
/// pieces stores the piece cache data with piece id and value.
pieces: Arc<Mutex<LruCache<String, bytes::Bytes>>>,
/// task is the task manager.
task: Arc<Task>,
}
/// Cache implements the cache for storing http response by LRU algorithm.
impl Cache {
/// new creates a new cache with the specified capacity.
pub fn new(capacity: usize, task: Arc<Task>) -> Result<Self> {
let capacity = NonZeroUsize::new(capacity).ok_or(Error::InvalidParameter)?;
let pieces = Arc::new(Mutex::new(LruCache::new(capacity)));
Ok(Cache { pieces, task })
}
/// get_by_request gets the content from the cache by the request.
pub async fn get_by_request(
&self,
request: &DownloadTaskRequest,
) -> Result<Option<bytes::Bytes>> {
let Some(download) = &request.download else {
return Err(Error::InvalidParameter);
};
let task_id = self.task.id_generator.task_id(
&download.url,
download.tag.as_deref(),
download.application.as_deref(),
download.filtered_query_params.clone(),
)?;
let Some(task) = self.task.get(&task_id)? else {
return Ok(None);
};
let (Some(content_length), Some(piece_length)) =
(task.content_length(), task.piece_length())
else {
return Ok(None);
};
let Ok(request_header) = hashmap_to_headermap(&download.request_header) else {
return Ok(None);
};
let Ok(range) = get_range(&request_header, content_length) else {
return Ok(None);
};
let interested_pieces =
self.task
.piece
.calculate_interested(piece_length, content_length, range)?;
// Calculate the content capacity based on the interested pieces and push the content into
// the bytes.
let content_capacity = interested_pieces.len() * piece_length as usize;
let mut content = bytes::BytesMut::with_capacity(content_capacity);
for interested_piece in interested_pieces {
let piece_id = self.task.piece.id(&task_id, interested_piece.number);
let Some(piece_content) = self.get_piece(&piece_id) else {
return Ok(None);
};
// Calculate the target offset and length based on the range.
let (piece_target_offset, piece_target_length) =
calculate_piece_range(interested_piece.offset, interested_piece.length, range);
let begin = piece_target_offset;
let end = piece_target_offset + piece_target_length;
if begin >= piece_content.len() || end > piece_content.len() {
return Err(Error::InvalidParameter);
}
let piece_content = piece_content.slice(begin..end);
content.extend_from_slice(&piece_content);
}
Ok(Some(content.freeze()))
}
/// get_piece gets the piece content from the cache.
pub fn get_piece(&self, id: &str) -> Option<bytes::Bytes> {
let mut pieces = self.pieces.lock().unwrap();
pieces.get(id).cloned()
}
/// add_piece create the piece content into the cache, if the key already exists, no operation will
/// be performed.
pub fn add_piece(&self, id: &str, content: bytes::Bytes) {
let mut pieces = self.pieces.lock().unwrap();
if pieces.contains(id) {
return;
}
pieces.put(id.to_string(), content);
}
/// contains_piece checks whether the piece exists in the cache.
pub fn contains_piece(&self, id: &str) -> bool {
let pieces = self.pieces.lock().unwrap();
pieces.contains(id)
}
}
/// calculate_piece_range calculates the target offset and length based on the piece range and
/// request range.
pub fn calculate_piece_range(offset: u64, length: u64, range: Option<Range>) -> (usize, usize) {
if let Some(range) = range {
let target_offset = max(offset, range.start) - offset;
let target_length =
min(offset + length - 1, range.start + range.length - 1) - target_offset - offset + 1;
(target_offset as usize, target_length as usize)
} else {
(0, length as usize)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn should_calculate_piece_range() {
let test_cases = vec![
(1, 4, None, 0, 4),
(
1,
4,
Some(Range {
start: 1,
length: 4,
}),
0,
4,
),
(
1,
4,
Some(Range {
start: 2,
length: 1,
}),
1,
1,
),
(
1,
4,
Some(Range {
start: 1,
length: 1,
}),
0,
1,
),
(
1,
4,
Some(Range {
start: 4,
length: 1,
}),
3,
1,
),
(
1,
4,
Some(Range {
start: 0,
length: 2,
}),
0,
1,
),
(
1,
4,
Some(Range {
start: 4,
length: 3,
}),
3,
1,
),
];
for (piece_offset, piece_length, range, expected_offset, expected_length) in test_cases {
let (target_offset, target_length) =
calculate_piece_range(piece_offset, piece_length, range);
assert_eq!(target_offset, expected_offset);
assert_eq!(target_length, expected_length);
}
}
}

View File

@ -14,10 +14,9 @@
* limitations under the License.
*/
use bytesize::ByteSize;
use dragonfly_api::common::v2::Priority;
use reqwest::header::HeaderMap;
use tracing::error;
use tracing::{error, instrument};
/// DRAGONFLY_TAG_HEADER is the header key of tag in http request.
pub const DRAGONFLY_TAG_HEADER: &str = "X-Dragonfly-Tag";
@ -52,43 +51,8 @@ pub const DRAGONFLY_USE_P2P_HEADER: &str = "X-Dragonfly-Use-P2P";
/// If the value is "false", the range request will fetch the range content.
pub const DRAGONFLY_PREFETCH_HEADER: &str = "X-Dragonfly-Prefetch";
/// DRAGONFLY_OUTPUT_PATH_HEADER is the header key of absolute output path in http request.
///
/// If `X-Dragonfly-Output-Path` is set, the downloaded file will be saved to the specified path.
/// Dfdaemon will try to create hard link to the output path before starting the download. If hard link creation fails,
/// it will copy the file to the output path after the download is completed.
/// For more details refer to https://github.com/dragonflyoss/design/blob/main/systems-analysis/file-download-workflow-with-hard-link/README.md.
pub const DRAGONFLY_OUTPUT_PATH_HEADER: &str = "X-Dragonfly-Output-Path";
/// DRAGONFLY_FORCE_HARD_LINK_HEADER is the header key of force hard link in http request.
///
/// `X-Dragonfly-Force-Hard-Link` is the flag to indicate whether the download file must be hard linked to the output path.
/// For more details refer to https://github.com/dragonflyoss/design/blob/main/systems-analysis/file-download-workflow-with-hard-link/README.md.
pub const DRAGONFLY_FORCE_HARD_LINK_HEADER: &str = "X-Dragonfly-Force-Hard-Link";
/// DRAGONFLY_PIECE_LENGTH_HEADER is the header key of piece length in http request.
/// If the value is set, the piece length will be used to download the file.
/// Different piece length will generate different task id. The value needs to
/// be set with human readable format and needs to be greater than or equal
/// to 4mib, for example: 4mib, 1gib
pub const DRAGONFLY_PIECE_LENGTH_HEADER: &str = "X-Dragonfly-Piece-Length";
/// DRAGONFLY_CONTENT_FOR_CALCULATING_TASK_ID_HEADER is the header key of content for calculating task id.
/// If DRAGONFLY_CONTENT_FOR_CALCULATING_TASK_ID_HEADER is set, use its value to calculate the task ID.
/// Otherwise, calculate the task ID based on `url`, `piece_length`, `tag`, `application`, and `filtered_query_params`.
pub const DRAGONFLY_CONTENT_FOR_CALCULATING_TASK_ID_HEADER: &str =
"X-Dragonfly-Content-For-Calculating-Task-ID";
/// DRAGONFLY_TASK_DOWNLOAD_FINISHED_HEADER is the response header key to indicate whether the task download finished.
/// When the task download is finished, the response will include this header with the value `"true"`,
/// indicating that the download hit the local cache.
pub const DRAGONFLY_TASK_DOWNLOAD_FINISHED_HEADER: &str = "X-Dragonfly-Task-Download-Finished";
/// DRAGONFLY_TASK_ID_HEADER is the response header key of task id. Client will calculate the task ID
/// based on `url`, `piece_length`, `tag`, `application`, and `filtered_query_params`.
pub const DRAGONFLY_TASK_ID_HEADER: &str = "X-Dragonfly-Task-ID";
/// get_tag gets the tag from http header.
#[instrument(skip_all)]
pub fn get_tag(header: &HeaderMap) -> Option<String> {
header
.get(DRAGONFLY_TAG_HEADER)
@ -97,6 +61,7 @@ pub fn get_tag(header: &HeaderMap) -> Option<String> {
}
/// get_application gets the application from http header.
#[instrument(skip_all)]
pub fn get_application(header: &HeaderMap) -> Option<String> {
header
.get(DRAGONFLY_APPLICATION_HEADER)
@ -105,6 +70,7 @@ pub fn get_application(header: &HeaderMap) -> Option<String> {
}
/// get_priority gets the priority from http header.
#[instrument(skip_all)]
pub fn get_priority(header: &HeaderMap) -> i32 {
let default_priority = Priority::Level6 as i32;
match header.get(DRAGONFLY_PRIORITY_HEADER) {
@ -126,6 +92,7 @@ pub fn get_priority(header: &HeaderMap) -> i32 {
}
/// get_registry gets the custom address of container registry from http header.
#[instrument(skip_all)]
pub fn get_registry(header: &HeaderMap) -> Option<String> {
header
.get(DRAGONFLY_REGISTRY_HEADER)
@ -134,6 +101,7 @@ pub fn get_registry(header: &HeaderMap) -> Option<String> {
}
/// get_filters gets the filters from http header.
#[instrument(skip_all)]
pub fn get_filtered_query_params(
header: &HeaderMap,
default_filtered_query_params: Vec<String>,
@ -151,6 +119,7 @@ pub fn get_filtered_query_params(
}
/// get_use_p2p gets the use p2p from http header.
#[instrument(skip_all)]
pub fn get_use_p2p(header: &HeaderMap) -> bool {
match header.get(DRAGONFLY_USE_P2P_HEADER) {
Some(value) => match value.to_str() {
@ -165,6 +134,7 @@ pub fn get_use_p2p(header: &HeaderMap) -> bool {
}
/// get_prefetch gets the prefetch from http header.
#[instrument(skip_all)]
pub fn get_prefetch(header: &HeaderMap) -> Option<bool> {
match header.get(DRAGONFLY_PREFETCH_HEADER) {
Some(value) => match value.to_str() {
@ -177,229 +147,3 @@ pub fn get_prefetch(header: &HeaderMap) -> Option<bool> {
None => None,
}
}
/// get_output_path gets the output path from http header.
pub fn get_output_path(header: &HeaderMap) -> Option<String> {
header
.get(DRAGONFLY_OUTPUT_PATH_HEADER)
.and_then(|output_path| output_path.to_str().ok())
.map(|output_path| output_path.to_string())
}
/// get_force_hard_link gets the force hard link from http header.
pub fn get_force_hard_link(header: &HeaderMap) -> bool {
match header.get(DRAGONFLY_FORCE_HARD_LINK_HEADER) {
Some(value) => match value.to_str() {
Ok(value) => value.eq_ignore_ascii_case("true"),
Err(err) => {
error!("get force hard link from header failed: {}", err);
false
}
},
None => false,
}
}
/// get_piece_length gets the piece length from http header.
pub fn get_piece_length(header: &HeaderMap) -> Option<ByteSize> {
match header.get(DRAGONFLY_PIECE_LENGTH_HEADER) {
Some(piece_length) => match piece_length.to_str() {
Ok(piece_length) => match piece_length.parse::<ByteSize>() {
Ok(piece_length) => Some(piece_length),
Err(err) => {
error!("parse piece length from header failed: {}", err);
None
}
},
Err(err) => {
error!("get piece length from header failed: {}", err);
None
}
},
None => None,
}
}
/// get_content_for_calculating_task_id gets the content for calculating task id from http header.
pub fn get_content_for_calculating_task_id(header: &HeaderMap) -> Option<String> {
header
.get(DRAGONFLY_CONTENT_FOR_CALCULATING_TASK_ID_HEADER)
.and_then(|content| content.to_str().ok())
.map(|content| content.to_string())
}
#[cfg(test)]
mod tests {
use super::*;
use reqwest::header::{HeaderMap, HeaderValue};
#[test]
fn test_get_tag() {
let mut headers = HeaderMap::new();
headers.insert(DRAGONFLY_TAG_HEADER, HeaderValue::from_static("test-tag"));
assert_eq!(get_tag(&headers), Some("test-tag".to_string()));
let empty_headers = HeaderMap::new();
assert_eq!(get_tag(&empty_headers), None);
}
#[test]
fn test_get_application() {
let mut headers = HeaderMap::new();
headers.insert(
DRAGONFLY_APPLICATION_HEADER,
HeaderValue::from_static("test-app"),
);
assert_eq!(get_application(&headers), Some("test-app".to_string()));
let empty_headers = HeaderMap::new();
assert_eq!(get_application(&empty_headers), None);
}
#[test]
fn test_get_priority() {
let mut headers = HeaderMap::new();
headers.insert(DRAGONFLY_PRIORITY_HEADER, HeaderValue::from_static("5"));
assert_eq!(get_priority(&headers), 5);
let empty_headers = HeaderMap::new();
assert_eq!(get_priority(&empty_headers), Priority::Level6 as i32);
headers.insert(
DRAGONFLY_PRIORITY_HEADER,
HeaderValue::from_static("invalid"),
);
assert_eq!(get_priority(&headers), Priority::Level6 as i32);
}
#[test]
fn test_get_registry() {
let mut headers = HeaderMap::new();
headers.insert(
DRAGONFLY_REGISTRY_HEADER,
HeaderValue::from_static("test-registry"),
);
assert_eq!(get_registry(&headers), Some("test-registry".to_string()));
let empty_headers = HeaderMap::new();
assert_eq!(get_registry(&empty_headers), None);
}
#[test]
fn test_get_filtered_query_params() {
let mut headers = HeaderMap::new();
headers.insert(
DRAGONFLY_FILTERED_QUERY_PARAMS_HEADER,
HeaderValue::from_static("param1,param2"),
);
assert_eq!(
get_filtered_query_params(&headers, vec!["default".to_string()]),
vec!["param1".to_string(), "param2".to_string()]
);
let empty_headers = HeaderMap::new();
assert_eq!(
get_filtered_query_params(&empty_headers, vec!["default".to_string()]),
vec!["default".to_string()]
);
}
#[test]
fn test_get_use_p2p() {
let mut headers = HeaderMap::new();
headers.insert(DRAGONFLY_USE_P2P_HEADER, HeaderValue::from_static("true"));
assert!(get_use_p2p(&headers));
headers.insert(DRAGONFLY_USE_P2P_HEADER, HeaderValue::from_static("false"));
assert!(!get_use_p2p(&headers));
let empty_headers = HeaderMap::new();
assert!(!get_use_p2p(&empty_headers));
}
#[test]
fn test_get_prefetch() {
let mut headers = HeaderMap::new();
headers.insert(DRAGONFLY_PREFETCH_HEADER, HeaderValue::from_static("true"));
assert_eq!(get_prefetch(&headers), Some(true));
headers.insert(DRAGONFLY_PREFETCH_HEADER, HeaderValue::from_static("false"));
assert_eq!(get_prefetch(&headers), Some(false));
let empty_headers = HeaderMap::new();
assert_eq!(get_prefetch(&empty_headers), None);
}
#[test]
fn test_get_output_path() {
let mut headers = HeaderMap::new();
headers.insert(
DRAGONFLY_OUTPUT_PATH_HEADER,
HeaderValue::from_static("/path/to/output"),
);
assert_eq!(
get_output_path(&headers),
Some("/path/to/output".to_string())
);
let empty_headers = HeaderMap::new();
assert_eq!(get_output_path(&empty_headers), None);
}
#[test]
fn test_get_force_hard_link() {
let mut headers = HeaderMap::new();
headers.insert(
DRAGONFLY_FORCE_HARD_LINK_HEADER,
HeaderValue::from_static("true"),
);
assert!(get_force_hard_link(&headers));
headers.insert(
DRAGONFLY_FORCE_HARD_LINK_HEADER,
HeaderValue::from_static("false"),
);
assert!(!get_force_hard_link(&headers));
let empty_headers = HeaderMap::new();
assert!(!get_force_hard_link(&empty_headers));
}
#[test]
fn test_get_piece_length() {
let mut headers = HeaderMap::new();
headers.insert(
DRAGONFLY_PIECE_LENGTH_HEADER,
HeaderValue::from_static("4mib"),
);
assert_eq!(get_piece_length(&headers), Some(ByteSize::mib(4)));
let empty_headers = HeaderMap::new();
assert_eq!(get_piece_length(&empty_headers), None);
headers.insert(
DRAGONFLY_PIECE_LENGTH_HEADER,
HeaderValue::from_static("invalid"),
);
assert_eq!(get_piece_length(&headers), None);
headers.insert(DRAGONFLY_PIECE_LENGTH_HEADER, HeaderValue::from_static("0"));
assert_eq!(get_piece_length(&headers), Some(ByteSize::b(0)));
}
#[test]
fn test_get_content_for_calculating_task_id() {
let mut headers = HeaderMap::new();
headers.insert(
DRAGONFLY_CONTENT_FOR_CALCULATING_TASK_ID_HEADER,
HeaderValue::from_static("test-content"),
);
assert_eq!(
get_content_for_calculating_task_id(&headers),
Some("test-content".to_string())
);
let empty_headers = HeaderMap::new();
assert_eq!(get_registry(&empty_headers), None);
}
}

View File

@ -16,13 +16,15 @@
use crate::grpc::{dfdaemon_download::DfdaemonDownloadClient, REQUEST_TIMEOUT};
use crate::metrics::{
collect_proxy_request_failure_metrics, collect_proxy_request_started_metrics,
collect_download_piece_traffic_metrics, collect_proxy_request_failure_metrics,
collect_proxy_request_started_metrics,
collect_proxy_request_via_dfdaemon_and_cache_hits_metrics,
collect_proxy_request_via_dfdaemon_metrics,
};
use crate::resource::{piece::MIN_PIECE_LENGTH, task::Task};
use crate::resource::task::Task;
use crate::shutdown;
use bytes::Bytes;
use dragonfly_api::common::v2::{Download, TaskType};
use dragonfly_api::common::v2::{Download, TaskType, TrafficType};
use dragonfly_api::dfdaemon::v2::{
download_task_response, DownloadTaskRequest, DownloadTaskStartedResponse,
};
@ -34,8 +36,8 @@ use dragonfly_client_util::{
http::{hashmap_to_headermap, headermap_to_hashmap},
tls::{generate_self_signed_certs_by_ca_cert, generate_simple_self_signed_certs, NoVerifier},
};
use futures::TryStreamExt;
use http_body_util::{combinators::BoxBody, BodyExt, Empty, StreamBody};
use futures_util::TryStreamExt;
use http_body_util::{combinators::BoxBody, BodyExt, Empty, Full, StreamBody};
use hyper::body::Frame;
use hyper::client::conn::http1::Builder as ClientBuilder;
use hyper::server::conn::http1::Builder as ServerBuilder;
@ -46,28 +48,25 @@ use hyper_util::{
client::legacy::Client,
rt::{tokio::TokioIo, TokioExecutor},
};
use lazy_static::lazy_static;
use rcgen::Certificate;
use rustls::{RootCertStore, ServerConfig};
use rustls_pki_types::CertificateDer;
use std::collections::HashMap;
use std::net::SocketAddr;
use std::sync::Arc;
use tokio::io::{AsyncWriteExt, BufReader, BufWriter};
use std::time::Duration;
use tokio::io::{AsyncReadExt, AsyncWriteExt, BufReader};
use tokio::net::TcpListener;
use tokio::net::TcpStream;
use tokio::sync::{mpsc, Barrier};
use tokio::time::sleep;
use tokio_rustls::TlsAcceptor;
use tokio_util::io::ReaderStream;
use tracing::{debug, error, info, instrument, Instrument, Span};
pub mod cache;
pub mod header;
lazy_static! {
/// SUPPORTED_HTTP_PROTOCOLS is the supported HTTP protocols, including http/1.1 and http/1.0.
static ref SUPPORTED_HTTP_PROTOCOLS: Vec<Vec<u8>> = vec![b"http/1.1".to_vec(), b"http/1.0".to_vec()];
}
/// Response is the response of the proxy server.
pub type Response = hyper::Response<BoxBody<Bytes, ClientError>>;
@ -76,6 +75,9 @@ pub struct Proxy {
/// config is the configuration of the dfdaemon.
config: Arc<Config>,
/// cache is the cache manager for storing the piece content.
cache: Arc<cache::Cache>,
/// task is the task manager.
task: Arc<Task>,
@ -99,6 +101,7 @@ pub struct Proxy {
/// Proxy implements the proxy server.
impl Proxy {
/// new creates a new Proxy.
#[instrument(skip_all)]
pub fn new(
config: Arc<Config>,
task: Arc<Task>,
@ -107,6 +110,7 @@ impl Proxy {
) -> Self {
let mut proxy = Self {
config: config.clone(),
cache: Arc::new(cache::Cache::new(config.proxy.cache_capacity, task.clone()).unwrap()),
task: task.clone(),
addr: SocketAddr::new(config.proxy.server.ip.unwrap(), config.proxy.server.port),
registry_cert: Arc::new(None),
@ -143,9 +147,9 @@ impl Proxy {
}
/// run starts the proxy server.
#[instrument(skip_all)]
pub async fn run(&self, grpc_server_started_barrier: Arc<Barrier>) -> ClientResult<()> {
let mut shutdown = self.shutdown.clone();
let read_buffer_size = self.config.proxy.read_buffer_size;
// When the grpc server is started, notify the barrier. If the shutdown signal is received
// before barrier is waited successfully, the server will shutdown immediately.
@ -165,23 +169,6 @@ impl Proxy {
DfdaemonDownloadClient::new_unix(self.config.download.server.socket_path.clone())
.await?;
#[derive(Clone)]
struct Context {
config: Arc<Config>,
task: Arc<Task>,
dfdaemon_download_client: DfdaemonDownloadClient,
registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>,
server_ca_cert: Arc<Option<Certificate>>,
}
let context = Context {
config: self.config.clone(),
task: self.task.clone(),
dfdaemon_download_client,
registry_cert: self.registry_cert.clone(),
server_ca_cert: self.server_ca_cert.clone(),
};
let listener = TcpListener::bind(self.addr).await?;
info!("proxy server listening on {}", self.addr);
@ -196,21 +183,21 @@ impl Proxy {
let io = TokioIo::new(tcp);
debug!("accepted connection from {}", remote_address);
let context = context.clone();
let config = self.config.clone();
let cache = self.cache.clone();
let task = self.task.clone();
let dfdaemon_download_client = dfdaemon_download_client.clone();
let registry_cert = self.registry_cert.clone();
let server_ca_cert = self.server_ca_cert.clone();
tokio::task::spawn(async move {
if let Err(err) = ServerBuilder::new()
.keep_alive(true)
.max_buf_size(read_buffer_size)
.preserve_header_case(true)
.title_case_headers(true)
.serve_connection(
io,
service_fn(move |request|{
let context = context.clone();
async move {
handler(context.config, context.task, request, context.dfdaemon_download_client, context.registry_cert, context.server_ca_cert, remote_address.ip()).await
}
} ),
service_fn(move |request| handler(config.clone(), cache.clone(), task.clone(), request, dfdaemon_download_client.clone(), registry_cert.clone(), server_ca_cert.clone())),
)
.with_upgrades()
.await
@ -231,21 +218,16 @@ impl Proxy {
}
/// handler handles the request from the client.
#[instrument(skip_all, fields(url, method, remote_ip))]
#[instrument(skip_all, fields(uri, method))]
pub async fn handler(
config: Arc<Config>,
cache: Arc<cache::Cache>,
task: Arc<Task>,
request: Request<hyper::body::Incoming>,
dfdaemon_download_client: DfdaemonDownloadClient,
registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>,
server_ca_cert: Arc<Option<Certificate>>,
remote_ip: std::net::IpAddr,
) -> ClientResult<Response> {
// Span record the url and method.
Span::current().record("url", request.uri().to_string().as_str());
Span::current().record("method", request.method().as_str());
Span::current().record("remote_ip", remote_ip.to_string().as_str());
// Record the proxy request started metrics. The metrics will be recorded
// when the request is kept alive.
collect_proxy_request_started_metrics();
@ -256,9 +238,9 @@ pub async fn handler(
if Method::CONNECT == request.method() {
return registry_mirror_https_handler(
config,
cache,
task,
request,
remote_ip,
dfdaemon_download_client,
registry_cert,
server_ca_cert,
@ -268,22 +250,26 @@ pub async fn handler(
return registry_mirror_http_handler(
config,
cache,
task,
request,
remote_ip,
dfdaemon_download_client,
registry_cert,
)
.await;
}
// Span record the uri and method.
Span::current().record("uri", request.uri().to_string().as_str());
Span::current().record("method", request.method().as_str());
// Handle CONNECT request.
if Method::CONNECT == request.method() {
return https_handler(
config,
cache,
task,
request,
remote_ip,
dfdaemon_download_client,
registry_cert,
server_ca_cert,
@ -293,9 +279,9 @@ pub async fn handler(
http_handler(
config,
cache,
task,
request,
remote_ip,
dfdaemon_download_client,
registry_cert,
)
@ -306,18 +292,18 @@ pub async fn handler(
#[instrument(skip_all)]
pub async fn registry_mirror_http_handler(
config: Arc<Config>,
cache: Arc<cache::Cache>,
task: Arc<Task>,
request: Request<hyper::body::Incoming>,
remote_ip: std::net::IpAddr,
dfdaemon_download_client: DfdaemonDownloadClient,
registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>,
) -> ClientResult<Response> {
let request = make_registry_mirror_request(config.clone(), request)?;
return http_handler(
config,
cache,
task,
request,
remote_ip,
dfdaemon_download_client,
registry_cert,
)
@ -328,9 +314,9 @@ pub async fn registry_mirror_http_handler(
#[instrument(skip_all)]
pub async fn registry_mirror_https_handler(
config: Arc<Config>,
cache: Arc<cache::Cache>,
task: Arc<Task>,
request: Request<hyper::body::Incoming>,
remote_ip: std::net::IpAddr,
dfdaemon_download_client: DfdaemonDownloadClient,
registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>,
server_ca_cert: Arc<Option<Certificate>>,
@ -338,9 +324,9 @@ pub async fn registry_mirror_https_handler(
let request = make_registry_mirror_request(config.clone(), request)?;
return https_handler(
config,
cache,
task,
request,
remote_ip,
dfdaemon_download_client,
registry_cert,
server_ca_cert,
@ -352,9 +338,9 @@ pub async fn registry_mirror_https_handler(
#[instrument(skip_all)]
pub async fn http_handler(
config: Arc<Config>,
cache: Arc<cache::Cache>,
task: Arc<Task>,
request: Request<hyper::body::Incoming>,
remote_ip: std::net::IpAddr,
dfdaemon_download_client: DfdaemonDownloadClient,
registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>,
) -> ClientResult<Response> {
@ -388,10 +374,10 @@ pub async fn http_handler(
);
return proxy_via_dfdaemon(
config,
cache,
task,
&rule,
request,
remote_ip,
dfdaemon_download_client,
)
.await;
@ -407,10 +393,10 @@ pub async fn http_handler(
);
return proxy_via_dfdaemon(
config,
cache,
task,
&Rule::default(),
request,
remote_ip,
dfdaemon_download_client,
)
.await;
@ -437,9 +423,9 @@ pub async fn http_handler(
#[instrument(skip_all)]
pub async fn https_handler(
config: Arc<Config>,
cache: Arc<cache::Cache>,
task: Arc<Task>,
request: Request<hyper::body::Incoming>,
remote_ip: std::net::IpAddr,
dfdaemon_download_client: DfdaemonDownloadClient,
registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>,
server_ca_cert: Arc<Option<Certificate>>,
@ -449,17 +435,15 @@ pub async fn https_handler(
// Proxy the request directly to the remote server.
if let Some(host) = request.uri().host() {
let host = host.to_string();
let port = request.uri().port_u16().unwrap_or(443);
tokio::task::spawn(async move {
match hyper::upgrade::on(request).await {
Ok(upgraded) => {
if let Err(e) = upgraded_tunnel(
config,
cache,
task,
upgraded,
host,
port,
remote_ip,
dfdaemon_download_client,
registry_cert,
server_ca_cert,
@ -486,11 +470,10 @@ pub async fn https_handler(
#[instrument(skip_all)]
async fn upgraded_tunnel(
config: Arc<Config>,
cache: Arc<cache::Cache>,
task: Arc<Task>,
upgraded: Upgraded,
host: String,
port: u16,
remote_ip: std::net::IpAddr,
dfdaemon_download_client: DfdaemonDownloadClient,
registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>,
server_ca_cert: Arc<Option<Certificate>>,
@ -502,11 +485,11 @@ async fn upgraded_tunnel(
let (server_certs, server_key) = match server_ca_cert.as_ref() {
Some(server_ca_cert) => {
info!("generate self-signed certificate by CA certificate");
generate_self_signed_certs_by_ca_cert(server_ca_cert, host.as_ref(), subject_alt_names)?
generate_self_signed_certs_by_ca_cert(server_ca_cert, subject_alt_names)?
}
None => {
info!("generate simple self-signed certificate");
generate_simple_self_signed_certs(host.as_ref(), subject_alt_names)?
generate_simple_self_signed_certs(subject_alt_names)?
}
};
@ -515,7 +498,7 @@ async fn upgraded_tunnel(
.with_no_client_auth()
.with_single_cert(server_certs, server_key)
.or_err(ErrorType::TLSConfigError)?;
server_config.alpn_protocols = SUPPORTED_HTTP_PROTOCOLS.clone();
server_config.alpn_protocols = vec![b"http/1.1".to_vec(), b"http/1.0".to_vec()];
let tls_acceptor = TlsAcceptor::from(Arc::new(server_config));
let tls_stream = tls_acceptor.accept(TokioIo::new(upgraded)).await?;
@ -535,11 +518,10 @@ async fn upgraded_tunnel(
service_fn(move |request| {
upgraded_handler(
config.clone(),
cache.clone(),
task.clone(),
host.clone(),
port,
request,
remote_ip,
dfdaemon_download_client.clone(),
registry_cert.clone(),
)
@ -555,20 +537,18 @@ async fn upgraded_tunnel(
}
/// upgraded_handler handles the upgraded https request from the client.
#[allow(clippy::too_many_arguments)]
#[instrument(skip_all, fields(url, method))]
#[instrument(skip_all, fields(uri, method))]
pub async fn upgraded_handler(
config: Arc<Config>,
cache: Arc<cache::Cache>,
task: Arc<Task>,
host: String,
port: u16,
mut request: Request<hyper::body::Incoming>,
remote_ip: std::net::IpAddr,
dfdaemon_download_client: DfdaemonDownloadClient,
registry_cert: Arc<Option<Vec<CertificateDer<'static>>>>,
) -> ClientResult<Response> {
// Span record the url and method.
Span::current().record("url", request.uri().to_string().as_str());
// Span record the uri and method.
Span::current().record("uri", request.uri().to_string().as_str());
Span::current().record("method", request.method().as_str());
// Authenticate the request with the basic auth.
@ -587,18 +567,8 @@ pub async fn upgraded_handler(
// If the scheme is not set, set the scheme to https.
if request.uri().scheme().is_none() {
let builder = http::uri::Builder::new();
*request.uri_mut() = builder
.scheme("https")
.authority(format!("{}:{}", host, port))
.path_and_query(
request
.uri()
.path_and_query()
.map(|v| v.as_str())
.unwrap_or("/"),
)
.build()
*request.uri_mut() = format!("https://{}{}", host, request.uri())
.parse()
.or_err(ErrorType::ParseError)?;
}
@ -615,10 +585,10 @@ pub async fn upgraded_handler(
);
return proxy_via_dfdaemon(
config,
cache,
task,
&rule,
request,
remote_ip,
dfdaemon_download_client,
)
.await;
@ -634,10 +604,10 @@ pub async fn upgraded_handler(
);
return proxy_via_dfdaemon(
config,
cache,
task,
&Rule::default(),
request,
remote_ip,
dfdaemon_download_client,
)
.await;
@ -664,27 +634,55 @@ pub async fn upgraded_handler(
#[instrument(skip_all, fields(host_id, task_id, peer_id))]
async fn proxy_via_dfdaemon(
config: Arc<Config>,
cache: Arc<cache::Cache>,
task: Arc<Task>,
rule: &Rule,
request: Request<hyper::body::Incoming>,
remote_ip: std::net::IpAddr,
dfdaemon_download_client: DfdaemonDownloadClient,
) -> ClientResult<Response> {
// Collect the metrics for the proxy request via dfdaemon.
collect_proxy_request_via_dfdaemon_metrics();
// Make the download task request.
let download_task_request =
match make_download_task_request(config.clone(), rule, request, remote_ip) {
Ok(download_task_request) => download_task_request,
Err(err) => {
error!("make download task request failed: {}", err);
return Ok(make_error_response(
http::StatusCode::INTERNAL_SERVER_ERROR,
None,
));
}
};
let download_task_request = match make_download_task_request(config.clone(), rule, request) {
Ok(download_task_request) => download_task_request,
Err(err) => {
error!("make download task request failed: {}", err);
return Ok(make_error_response(
http::StatusCode::INTERNAL_SERVER_ERROR,
None,
));
}
};
// Get the content from the cache by the request.
match cache.get_by_request(&download_task_request).await {
Ok(None) => {
debug!("cache miss");
}
Ok(Some(content)) => {
info!("cache hit");
// Collect the download piece traffic metrics and the proxy request via dfdaemon and
// cache hits metrics.
collect_proxy_request_via_dfdaemon_and_cache_hits_metrics();
collect_download_piece_traffic_metrics(
&TrafficType::LocalPeer,
TaskType::Standard as i32,
content.len() as u64,
);
let body_boxed = Full::new(content).map_err(ClientError::from).boxed();
return Ok(Response::new(body_boxed));
}
Err(err) => {
error!("get content from cache failed: {}", err);
return Ok(make_error_response(
http::StatusCode::INTERNAL_SERVER_ERROR,
None,
));
}
}
// Download the task by the dfdaemon download client.
let response = match dfdaemon_download_client
@ -751,29 +749,25 @@ async fn proxy_via_dfdaemon(
));
};
// Write the task data to the reader.
let (reader, mut writer) = tokio::io::duplex(256 * 1024);
// Write the status code to the writer.
let (sender, mut receiver) = mpsc::channel(10 * 1024);
// Get the read buffer size from the config.
let read_buffer_size = config.proxy.read_buffer_size;
// Write the task data to the reader.
let (reader, writer) = tokio::io::duplex(read_buffer_size);
let mut writer = BufWriter::with_capacity(read_buffer_size, writer);
let reader_stream = ReaderStream::with_capacity(reader, read_buffer_size);
// Construct the response body.
let reader_stream = ReaderStream::new(reader);
let stream_body = StreamBody::new(reader_stream.map_ok(Frame::data).map_err(ClientError::from));
let boxed_body = stream_body.boxed();
// Construct the response.
let mut response = Response::new(boxed_body);
*response.headers_mut() = make_response_headers(
message.task_id.as_str(),
download_task_started_response.clone(),
)?;
*response.headers_mut() = make_response_headers(download_task_started_response.clone())?;
*response.status_mut() = http::StatusCode::OK;
// Get the read buffer size from the config.
let read_buffer_size = config.proxy.read_buffer_size;
// Return the response if the client return the first piece.
let mut initialized = false;
@ -807,6 +801,10 @@ async fn proxy_via_dfdaemon(
),
) = message.response
{
// Sleep for a while to avoid the out stream is aborted. If the task is small, proxy read the piece
// before the task download is finished. It will cause `user body write aborted` error.
sleep(Duration::from_millis(1)).await;
// Send the none response to the client, if the first piece is received.
if !initialized {
debug!("first piece received, send response");
@ -826,9 +824,9 @@ async fn proxy_via_dfdaemon(
return;
};
let piece_range_reader = match task
let (piece_range_reader, piece_reader) = match task
.piece
.download_from_local_into_async_read(
.download_from_local_into_dual_async_read(
task.piece
.id(message.task_id.as_str(), piece.number)
.as_str(),
@ -840,7 +838,7 @@ async fn proxy_via_dfdaemon(
)
.await
{
Ok(piece_range_reader) => piece_range_reader,
Ok(dual_reader) => dual_reader,
Err(err) => {
error!("download piece reader error: {}", err);
if let Err(err) = writer.shutdown().await {
@ -855,10 +853,14 @@ async fn proxy_via_dfdaemon(
let piece_range_reader =
BufReader::with_capacity(read_buffer_size, piece_range_reader);
// Write the piece data to the pipe in order.
finished_piece_readers.insert(piece.number, piece_range_reader);
while let Some(mut piece_range_reader) =
finished_piece_readers.remove(&need_piece_number)
// Write the piece data to the pipe in order and store the piece reader
// in the cache.
finished_piece_readers
.insert(piece.number, (piece_range_reader, piece_reader));
while let Some((mut piece_range_reader, piece_reader)) =
finished_piece_readers
.get_mut(&need_piece_number)
.map(|(range_reader, reader)| (range_reader, reader))
{
debug!("copy piece {} to stream", need_piece_number);
if let Err(err) =
@ -872,6 +874,31 @@ async fn proxy_via_dfdaemon(
return;
}
// If the piece is not in the cache, add it to the cache.
let piece_id =
task.piece.id(message.task_id.as_str(), need_piece_number);
if !cache.contains_piece(&piece_id) {
let mut content =
bytes::BytesMut::with_capacity(piece.length as usize);
loop {
let n = match piece_reader.read_buf(&mut content).await {
Ok(n) => n,
Err(err) => {
error!("read piece reader error: {}", err);
break;
}
};
// When the piece reader reads to the end, add the piece
// to the cache.
if n == 0 {
cache.add_piece(&piece_id, content.freeze());
break;
}
}
}
need_piece_number += 1;
}
} else {
@ -1019,6 +1046,7 @@ async fn proxy_via_https(
}
/// make_registry_mirror_request makes a registry mirror request by the request.
#[instrument(skip_all)]
fn make_registry_mirror_request(
config: Arc<Config>,
mut request: Request<hyper::body::Incoming>,
@ -1052,11 +1080,11 @@ fn make_registry_mirror_request(
}
/// make_download_task_request makes a download task request by the request.
#[instrument(skip_all)]
fn make_download_task_request(
config: Arc<Config>,
rule: &Rule,
request: Request<hyper::body::Incoming>,
remote_ip: std::net::IpAddr,
) -> ClientResult<DownloadTaskRequest> {
// Convert the Reqwest header to the Hyper header.
let mut header = request.headers().clone();
@ -1064,17 +1092,6 @@ fn make_download_task_request(
// Registry will return the 403 status code if the Host header is set.
header.remove(reqwest::header::HOST);
// Validate the request arguments.
let piece_length = header::get_piece_length(&header).map(|piece_length| piece_length.as_u64());
if let Some(piece_length) = piece_length {
if piece_length < MIN_PIECE_LENGTH {
return Err(ClientError::ValidationError(format!(
"piece length {} is less than the minimum piece length {}",
piece_length, MIN_PIECE_LENGTH
)));
}
}
Ok(DownloadTaskRequest {
download: Some(Download {
url: make_download_url(request.uri(), rule.use_tls, rule.redirect.clone())?,
@ -1090,9 +1107,8 @@ fn make_download_task_request(
rule.filtered_query_params.clone(),
),
request_header: headermap_to_hashmap(&header),
piece_length,
// Need the absolute path.
output_path: header::get_output_path(&header),
piece_length: None,
output_path: None,
timeout: None,
need_back_to_source: false,
disable_back_to_source: config.proxy.disable_back_to_source,
@ -1102,15 +1118,14 @@ fn make_download_task_request(
hdfs: None,
is_prefetch: false,
need_piece_content: false,
force_hard_link: header::get_force_hard_link(&header),
content_for_calculating_task_id: header::get_content_for_calculating_task_id(&header),
remote_ip: Some(remote_ip.to_string()),
load_to_cache: false,
}),
})
}
/// need_prefetch returns whether the prefetch is needed by the configuration and the request
/// header.
#[instrument(skip_all)]
fn need_prefetch(config: Arc<Config>, header: &http::HeaderMap) -> bool {
// If the header not contains the range header, the request does not need prefetch.
if !header.contains_key(reqwest::header::RANGE) {
@ -1124,10 +1139,11 @@ fn need_prefetch(config: Arc<Config>, header: &http::HeaderMap) -> bool {
}
// Return the prefetch value from the configuration.
config.proxy.prefetch
return config.proxy.prefetch;
}
/// make_download_url makes a download url by the given uri.
#[instrument(skip_all)]
fn make_download_url(
uri: &hyper::Uri,
use_tls: bool,
@ -1152,8 +1168,8 @@ fn make_download_url(
}
/// make_response_headers makes the response headers.
#[instrument(skip_all)]
fn make_response_headers(
task_id: &str,
mut download_task_started_response: DownloadTaskStartedResponse,
) -> ClientResult<hyper::header::HeaderMap> {
// Insert the content range header to the response header.
@ -1174,28 +1190,18 @@ fn make_response_headers(
);
};
if download_task_started_response.is_finished {
download_task_started_response.response_header.insert(
header::DRAGONFLY_TASK_DOWNLOAD_FINISHED_HEADER.to_string(),
"true".to_string(),
);
}
download_task_started_response.response_header.insert(
header::DRAGONFLY_TASK_ID_HEADER.to_string(),
task_id.to_string(),
);
hashmap_to_headermap(&download_task_started_response.response_header)
}
/// find_matching_rule returns whether the dfdaemon should be used to download the task.
/// If the dfdaemon should be used, return the matched rule.
#[instrument(skip_all)]
fn find_matching_rule(rules: Option<&[Rule]>, url: &str) -> Option<Rule> {
rules?.iter().find(|rule| rule.regex.is_match(url)).cloned()
}
/// make_error_response makes an error response with the given status and message.
#[instrument(skip_all)]
fn make_error_response(status: http::StatusCode, header: Option<http::HeaderMap>) -> Response {
let mut response = Response::new(empty());
*response.status_mut() = status;
@ -1209,6 +1215,7 @@ fn make_error_response(status: http::StatusCode, header: Option<http::HeaderMap>
}
/// empty returns an empty body.
#[instrument(skip_all)]
fn empty() -> BoxBody<Bytes, ClientError> {
Empty::<Bytes>::new()
.map_err(|never| match never {})

View File

@ -15,7 +15,6 @@
*/
use crate::grpc::{scheduler::SchedulerClient, REQUEST_TIMEOUT};
use chrono::DateTime;
use dragonfly_api::common::v2::{
PersistentCachePeer, PersistentCacheTask as CommonPersistentCacheTask, Piece, TrafficType,
};
@ -84,6 +83,7 @@ pub struct PersistentCacheTask {
/// PersistentCacheTask is the implementation of PersistentCacheTask.
impl PersistentCacheTask {
/// new creates a new PersistentCacheTask.
#[instrument(skip_all)]
pub fn new(
config: Arc<Config>,
id_generator: Arc<IDGenerator>,
@ -104,7 +104,7 @@ impl PersistentCacheTask {
id_generator,
storage,
scheduler_client,
piece,
piece: piece.clone(),
})
}
@ -128,29 +128,22 @@ impl PersistentCacheTask {
let ttl = Duration::try_from(request.ttl.ok_or(Error::UnexpectedResponse)?)
.or_err(ErrorType::ParseError)?;
// Get the content length of the file asynchronously.
let content_length = tokio::fs::metadata(path.as_path())
.await
// Get the content length of the file.
let content_length = std::fs::metadata(path.as_path())
.inspect_err(|err| {
error!("get file metadata error: {}", err);
})?
.len();
// Get the piece length of the file.
let piece_length = match request.piece_length {
Some(piece_length) => self
.piece
.calculate_piece_length(piece::PieceLengthStrategy::FixedPieceLength(piece_length)),
None => {
self.piece
.calculate_piece_length(piece::PieceLengthStrategy::OptimizeByFileLength(
content_length,
))
}
};
let piece_length = self.piece.calculate_piece_length(
piece::PieceLengthStrategy::OptimizeByFileLength,
content_length,
);
// Notify the scheduler that the persistent cache task is started.
self.scheduler_client
match self
.scheduler_client
.upload_persistent_cache_task_started(UploadPersistentCacheTaskStartedRequest {
host_id: host_id.to_string(),
task_id: task_id.to_string(),
@ -166,7 +159,13 @@ impl PersistentCacheTask {
ttl: request.ttl,
})
.await
.inspect_err(|err| error!("upload persistent cache task started: {}", err))?;
{
Ok(_) => {}
Err(err) => {
error!("upload persistent cache task started: {}", err);
return Err(err);
}
}
// Check if the storage has enough space to store the persistent cache task.
let has_enough_space = self.storage.has_enough_space(content_length)?;
@ -432,11 +431,6 @@ impl PersistentCacheTask {
let ttl = Duration::try_from(response.ttl.ok_or(Error::InvalidParameter)?)
.or_err(ErrorType::ParseError)?;
// Convert prost_wkt_types::Timestamp to chrono::DateTime.
let created_at = response.created_at.ok_or(Error::InvalidParameter)?;
let created_at = DateTime::from_timestamp(created_at.seconds, created_at.nanos as u32)
.ok_or(Error::InvalidParameter)?;
// If the persistent cache task is not found, check if the storage has enough space to
// store the persistent cache task.
if let Ok(None) = self.get(task_id) {
@ -449,40 +443,13 @@ impl PersistentCacheTask {
}
}
let task = self
.storage
.download_persistent_cache_task_started(
task_id,
ttl,
request.persistent,
response.piece_length,
response.content_length,
created_at.naive_utc(),
)
.await?;
// Attempt to create a hard link from the task file to the output path.
//
// Behavior based on force_hard_link setting:
// 1. force_hard_link is true:
// - Success: Continue processing
// - Failure: Return error immediately
// 2. force_hard_link is false:
// - Success: Continue processing
// - Failure: Fall back to copying the file instead
if let Some(output_path) = &request.output_path {
if let Err(err) = self
.storage
.hard_link_persistent_cache_task(task_id, Path::new(output_path.as_str()))
.await
{
if request.force_hard_link {
return Err(err);
}
}
}
Ok(task)
self.storage.download_persistent_cache_task_started(
task_id,
ttl,
request.persistent,
response.piece_length,
response.content_length,
)
}
/// download_finished updates the metadata of the persistent cache task when the task downloads finished.
@ -501,17 +468,16 @@ impl PersistentCacheTask {
Ok(())
}
/// is_same_dev_inode checks if the persistent cache task is on the same device inode as the given path.
pub async fn is_same_dev_inode(&self, id: &str, to: &Path) -> ClientResult<bool> {
self.storage
.is_same_dev_inode_as_persistent_cache_task(id, to)
.await
}
//// copy_task copies the persistent cache task content to the destination.
/// hard_link_or_copy hard links or copies the persistent cache task content to the destination.
#[instrument(skip_all)]
pub async fn copy_task(&self, id: &str, to: &Path) -> ClientResult<()> {
self.storage.copy_persistent_cache_task(id, to).await
pub async fn hard_link_or_copy(
&self,
task: &metadata::PersistentCacheTask,
to: &Path,
) -> ClientResult<()> {
self.storage
.hard_link_or_copy_persistent_cache_task(task, to)
.await
}
/// download downloads a persistent cache task.
@ -747,7 +713,7 @@ impl PersistentCacheTask {
})? {
// Check if the schedule count is exceeded.
schedule_count += 1;
if schedule_count > self.config.scheduler.max_schedule_count {
if schedule_count >= self.config.scheduler.max_schedule_count {
in_stream_tx
.send_timeout(
AnnouncePersistentCachePeerRequest {
@ -1011,8 +977,7 @@ impl PersistentCacheTask {
host: peer.host,
})
.collect(),
)
.await;
);
let mut piece_collector_rx = piece_collector.run().await;
// Initialize the interrupt. If download from parent failed with scheduler or download
@ -1143,13 +1108,13 @@ impl PersistentCacheTask {
REQUEST_TIMEOUT,
)
.await
.unwrap_or_else(|err| {
.inspect_err(|err| {
error!(
"send DownloadPieceFinishedRequest for piece {} failed: {:?}",
piece_id, err
);
interrupt.store(true, Ordering::SeqCst);
});
})?;
// Send the download progress.
download_progress_tx
@ -1169,13 +1134,13 @@ impl PersistentCacheTask {
REQUEST_TIMEOUT,
)
.await
.unwrap_or_else(|err| {
.inspect_err(|err| {
error!(
"send DownloadPieceFinishedResponse for piece {} failed: {:?}",
piece_id, err
);
interrupt.store(true, Ordering::SeqCst);
});
})?;
info!(
"finished persistent cache piece {} from parent {:?}",
@ -1370,12 +1335,12 @@ impl PersistentCacheTask {
REQUEST_TIMEOUT,
)
.await
.unwrap_or_else(|err| {
.inspect_err(|err| {
error!(
"send DownloadPieceFinishedResponse for piece {} failed: {:?}",
piece_id, err
);
});
})?;
// Store the finished piece.
finished_pieces.push(interested_piece.clone());

View File

@ -30,7 +30,6 @@ use dragonfly_client_util::id_generator::IDGenerator;
use leaky_bucket::RateLimiter;
use reqwest::header::{self, HeaderMap};
use std::collections::HashMap;
use std::io::Cursor;
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::io::{AsyncRead, AsyncReadExt};
@ -40,21 +39,18 @@ use tracing::{error, info, instrument, Span};
/// than MAX_PIECE_COUNT, the piece length will be optimized by the file length.
/// When piece length became the MAX_PIECE_LENGTH, the piece count
/// probably will be upper than MAX_PIECE_COUNT.
pub const MAX_PIECE_COUNT: u64 = 500;
const MAX_PIECE_COUNT: u64 = 500;
/// MIN_PIECE_LENGTH is the minimum piece length.
pub const MIN_PIECE_LENGTH: u64 = 4 * 1024 * 1024;
const MIN_PIECE_LENGTH: u64 = 4 * 1024 * 1024;
/// MAX_PIECE_LENGTH is the maximum piece length.
pub const MAX_PIECE_LENGTH: u64 = 64 * 1024 * 1024;
const MAX_PIECE_LENGTH: u64 = 16 * 1024 * 1024;
/// PieceLengthStrategy sets the optimization strategy of piece length.
pub enum PieceLengthStrategy {
/// OptimizeByFileLength optimizes the piece length by the file length.
OptimizeByFileLength(u64),
/// FixedPieceLength sets the fixed piece length.
FixedPieceLength(u64),
OptimizeByFileLength,
}
/// Piece represents a piece manager.
@ -68,8 +64,8 @@ pub struct Piece {
/// storage is the local storage.
storage: Arc<Storage>,
/// downloader is the piece downloader.
downloader: Arc<dyn piece_downloader::Downloader>,
/// downloader_factory is the piece downloader factory.
downloader_factory: Arc<piece_downloader::DownloaderFactory>,
/// backend_factory is the backend factory.
backend_factory: Arc<BackendFactory>,
@ -87,6 +83,7 @@ pub struct Piece {
/// Piece implements the piece manager.
impl Piece {
/// new returns a new Piece.
#[instrument(skip_all)]
pub fn new(
config: Arc<Config>,
id_generator: Arc<IDGenerator>,
@ -97,11 +94,10 @@ impl Piece {
config: config.clone(),
id_generator,
storage,
downloader: piece_downloader::DownloaderFactory::new(
downloader_factory: Arc::new(piece_downloader::DownloaderFactory::new(
config.storage.server.protocol.as_str(),
config.clone(),
)?
.build(),
)?),
backend_factory,
download_rate_limiter: Arc::new(
RateLimiter::builder()
@ -135,20 +131,17 @@ impl Piece {
/// id generates a new piece id.
#[inline]
#[instrument(skip_all)]
pub fn id(&self, task_id: &str, number: u32) -> String {
self.storage.piece_id(task_id, number)
}
/// get gets a piece from the local storage.
#[instrument(skip_all)]
pub fn get(&self, piece_id: &str) -> Result<Option<metadata::Piece>> {
self.storage.get_piece(piece_id)
}
/// get_all gets all pieces of a task from the local storage.
pub fn get_all(&self, task_id: &str) -> Result<Vec<metadata::Piece>> {
self.storage.get_pieces(task_id)
}
/// calculate_interested calculates the interested pieces by content_length and range.
pub fn calculate_interested(
&self,
@ -304,9 +297,13 @@ impl Piece {
}
/// calculate_piece_size calculates the piece size by content_length.
pub fn calculate_piece_length(&self, strategy: PieceLengthStrategy) -> u64 {
pub fn calculate_piece_length(
&self,
strategy: PieceLengthStrategy,
content_length: u64,
) -> u64 {
match strategy {
PieceLengthStrategy::OptimizeByFileLength(content_length) => {
PieceLengthStrategy::OptimizeByFileLength => {
let piece_length = (content_length as f64 / MAX_PIECE_COUNT as f64) as u64;
let actual_piece_length = piece_length.next_power_of_two();
@ -319,7 +316,6 @@ impl Piece {
(false, _) => MIN_PIECE_LENGTH,
}
}
PieceLengthStrategy::FixedPieceLength(piece_length) => piece_length,
}
}
@ -340,7 +336,6 @@ impl Piece {
) -> Result<impl AsyncRead> {
// Span record the piece_id.
Span::current().record("piece_id", piece_id);
Span::current().record("piece_length", length);
// Acquire the upload rate limiter.
if !disable_rate_limit {
@ -351,7 +346,38 @@ impl Piece {
self.storage
.upload_piece(piece_id, task_id, range)
.await
.inspect(|_| {
.inspect(|_reader| {
collect_upload_piece_traffic_metrics(
self.id_generator.task_type(task_id) as i32,
length,
);
})
}
/// upload_from_local_into_async_read. It will return two readers, one is the range reader, and the other is the
/// full reader of the piece.
#[instrument(skip_all, fields(piece_id))]
pub async fn upload_from_local_into_dual_async_read(
&self,
piece_id: &str,
task_id: &str,
length: u64,
range: Option<Range>,
disable_rate_limit: bool,
) -> Result<(impl AsyncRead, impl AsyncRead)> {
// Span record the piece_id.
Span::current().record("piece_id", piece_id);
// Acquire the upload rate limiter.
if !disable_rate_limit {
self.upload_rate_limiter.acquire(length as usize).await;
}
// Upload the piece content.
self.storage
.upload_piece_with_dual_read(piece_id, task_id, range)
.await
.inspect(|_reader| {
collect_upload_piece_traffic_metrics(
self.id_generator.task_type(task_id) as i32,
length,
@ -372,7 +398,6 @@ impl Piece {
) -> Result<impl AsyncRead> {
// Span record the piece_id.
Span::current().record("piece_id", piece_id);
Span::current().record("piece_length", length);
// Acquire the download rate limiter.
if !disable_rate_limit {
@ -389,6 +414,38 @@ impl Piece {
self.storage.upload_piece(piece_id, task_id, range).await
}
/// download_from_local_into_dual_async_read returns two readers, one is the range reader, and
/// the other is the full reader of the piece.
#[instrument(skip_all, fields(piece_id))]
pub async fn download_from_local_into_dual_async_read(
&self,
piece_id: &str,
task_id: &str,
length: u64,
range: Option<Range>,
disable_rate_limit: bool,
is_prefetch: bool,
) -> Result<(impl AsyncRead, impl AsyncRead)> {
// Span record the piece_id.
Span::current().record("piece_id", piece_id);
// Acquire the download rate limiter.
if !disable_rate_limit {
if is_prefetch {
// Acquire the prefetch rate limiter.
self.prefetch_rate_limiter.acquire(length as usize).await;
} else {
// Acquire the download rate limiter.
self.download_rate_limiter.acquire(length as usize).await;
}
}
// Upload the piece content.
self.storage
.upload_piece_with_dual_read(piece_id, task_id, range)
.await
}
/// download_from_local downloads a single piece from local cache. Fake the download piece
/// from the local cache, just collect the metrics.
#[instrument(skip_all)]
@ -415,7 +472,14 @@ impl Piece {
) -> Result<metadata::Piece> {
// Span record the piece_id.
Span::current().record("piece_id", piece_id);
Span::current().record("piece_length", length);
if is_prefetch {
// Acquire the prefetch rate limiter.
self.prefetch_rate_limiter.acquire(length as usize).await;
} else {
// Acquire the download rate limiter.
self.download_rate_limiter.acquire(length as usize).await;
}
// Record the start of downloading piece.
let piece = self
@ -426,18 +490,9 @@ impl Piece {
// If the piece is downloaded by the other thread,
// return the piece directly.
if piece.is_finished() {
info!("finished piece {} from local", piece_id);
return Ok(piece);
}
if is_prefetch {
// Acquire the prefetch rate limiter.
self.prefetch_rate_limiter.acquire(length as usize).await;
} else {
// Acquire the download rate limiter.
self.download_rate_limiter.acquire(length as usize).await;
}
// Create a dfdaemon client.
let host = parent.host.clone().ok_or_else(|| {
error!("peer host is empty");
@ -449,7 +504,8 @@ impl Piece {
})?;
let (content, offset, digest) = self
.downloader
.downloader_factory
.build()
.download_piece(
format!("{}:{}", host.ip, host.port).as_str(),
number,
@ -463,7 +519,6 @@ impl Piece {
error!("set piece metadata failed: {}", err)
};
})?;
let mut reader = Cursor::new(content);
// Record the finish of downloading piece.
match self
@ -472,11 +527,9 @@ impl Piece {
piece_id,
task_id,
offset,
length,
digest.as_str(),
parent.id.as_str(),
&mut reader,
self.config.storage.write_piece_timeout,
&mut content.as_slice(),
)
.await
{
@ -518,7 +571,14 @@ impl Piece {
) -> Result<metadata::Piece> {
// Span record the piece_id.
Span::current().record("piece_id", piece_id);
Span::current().record("piece_length", length);
if is_prefetch {
// Acquire the prefetch rate limiter.
self.prefetch_rate_limiter.acquire(length as usize).await;
} else {
// Acquire the download rate limiter.
self.download_rate_limiter.acquire(length as usize).await;
}
// Record the start of downloading piece.
let piece = self
@ -529,18 +589,9 @@ impl Piece {
// If the piece is downloaded by the other thread,
// return the piece directly.
if piece.is_finished() {
info!("finished piece {} from local", piece_id);
return Ok(piece);
}
if is_prefetch {
// Acquire the prefetch rate limiter.
self.prefetch_rate_limiter.acquire(length as usize).await;
} else {
// Acquire the download rate limiter.
self.download_rate_limiter.acquire(length as usize).await;
}
// Add range header to the request by offset and length.
let mut request_header = request_header.clone();
request_header.insert(
@ -638,7 +689,6 @@ impl Piece {
offset,
length,
&mut response.reader,
self.config.storage.write_piece_timeout,
)
.await
{
@ -664,6 +714,7 @@ impl Piece {
/// persistent_cache_id generates a new persistent cache piece id.
#[inline]
#[instrument(skip_all)]
pub fn persistent_cache_id(&self, task_id: &str, number: u32) -> String {
self.storage.persistent_cache_piece_id(task_id, number)
}
@ -701,7 +752,6 @@ impl Piece {
) -> Result<impl AsyncRead> {
// Span record the piece_id.
Span::current().record("piece_id", piece_id);
Span::current().record("piece_length", length);
// Acquire the upload rate limiter.
self.upload_rate_limiter.acquire(length as usize).await;
@ -710,7 +760,7 @@ impl Piece {
self.storage
.upload_persistent_cache_piece(piece_id, task_id, range)
.await
.inspect(|_| {
.inspect(|_reader| {
collect_upload_piece_traffic_metrics(
self.id_generator.task_type(task_id) as i32,
length,
@ -731,7 +781,6 @@ impl Piece {
) -> Result<impl AsyncRead> {
// Span record the piece_id.
Span::current().record("piece_id", piece_id);
Span::current().record("piece_length", length);
// Acquire the download rate limiter.
if !disable_rate_limit {
@ -776,7 +825,6 @@ impl Piece {
) -> Result<metadata::Piece> {
// Span record the piece_id.
Span::current().record("piece_id", piece_id);
Span::current().record("piece_length", length);
if is_prefetch {
// Acquire the prefetch rate limiter.
@ -795,7 +843,6 @@ impl Piece {
// If the piece is downloaded by the other thread,
// return the piece directly.
if piece.is_finished() {
info!("finished persistent cache piece {} from local", piece_id);
return Ok(piece);
}
@ -814,7 +861,8 @@ impl Piece {
})?;
let (content, offset, digest) = self
.downloader
.downloader_factory
.build()
.download_persistent_cache_piece(
format!("{}:{}", host.ip, host.port).as_str(),
number,
@ -832,7 +880,6 @@ impl Piece {
error!("set persistent cache piece metadata failed: {}", err)
};
})?;
let mut reader = Cursor::new(content);
// Record the finish of downloading piece.
match self
@ -841,10 +888,9 @@ impl Piece {
piece_id,
task_id,
offset,
length,
digest.as_str(),
parent.id.as_str(),
&mut reader,
&mut content.as_slice(),
)
.await
{
@ -879,7 +925,7 @@ mod tests {
use tempfile::tempdir;
#[tokio::test]
async fn test_calculate_interested() {
async fn should_calculate_interested() {
let temp_dir = tempdir().unwrap();
let config = Config::default();

View File

@ -28,8 +28,6 @@ use tokio::task::JoinSet;
use tokio_stream::StreamExt;
use tracing::{error, info, instrument, Instrument};
const DEFAULT_WAIT_FOR_PIECE_FROM_DIFFERENT_PARENTS: Duration = Duration::from_millis(5);
/// CollectedParent is the parent peer collected from the parent.
#[derive(Clone, Debug)]
pub struct CollectedParent {
@ -69,14 +67,15 @@ pub struct PieceCollector {
/// interested_pieces is the pieces interested by the collector.
interested_pieces: Vec<metadata::Piece>,
/// collected_pieces is a map to store the collected pieces from different parents.
collected_pieces: Arc<DashMap<u32, Vec<CollectedParent>>>,
/// collected_pieces is the pieces collected from peers.
collected_pieces: Arc<DashMap<u32, String>>,
}
/// PieceCollector is used to collect pieces from peers.
impl PieceCollector {
/// new creates a new PieceCollector.
pub async fn new(
#[instrument(skip_all)]
pub fn new(
config: Arc<Config>,
host_id: &str,
task_id: &str,
@ -85,7 +84,7 @@ impl PieceCollector {
) -> Self {
let collected_pieces = Arc::new(DashMap::with_capacity(interested_pieces.len()));
for interested_piece in &interested_pieces {
collected_pieces.insert(interested_piece.number, Vec::new());
collected_pieces.insert(interested_piece.number, String::new());
}
Self {
@ -107,8 +106,8 @@ impl PieceCollector {
let parents = self.parents.clone();
let interested_pieces = self.interested_pieces.clone();
let collected_pieces = self.collected_pieces.clone();
let collected_piece_timeout = self.config.download.collected_piece_timeout;
let (collected_piece_tx, collected_piece_rx) = mpsc::channel(128 * 1024);
let collected_piece_timeout = self.config.download.piece_timeout;
let (collected_piece_tx, collected_piece_rx) = mpsc::channel(10 * 1024);
tokio::spawn(
async move {
Self::collect_from_parents(
@ -132,25 +131,7 @@ impl PieceCollector {
collected_piece_rx
}
/// collect_from_parents collects pieces from multiple parents with load balancing strategy.
///
/// The collection process works in two phases:
/// 1. **Synchronization Phase**: Waits for a configured duration (DEFAULT_WAIT_FOR_PIECE_FROM_DIFFERENT_PARENTS)
/// to collect the same piece information from different parents. This allows the collector
/// to gather multiple sources for each piece.
///
/// 2. **Selection Phase**: After the wait period, randomly selects one parent from the available
/// candidates for each piece and forwards it to the piece downloader.
///
/// **Load Balancing Strategy**:
/// The random parent selection is designed to distribute download load across multiple parents
/// during concurrent piece downloads. This approach ensures:
/// - Optimal utilization of bandwidth from multiple parent nodes
/// - Prevention of overwhelming any single parent with too many requests
/// - Better overall download performance through parallel connections
///
/// This strategy is particularly effective when downloading multiple pieces simultaneously,
/// as it naturally spreads the workload across the available parent pool.
/// collect_from_parents collects pieces from parents.
#[allow(clippy::too_many_arguments)]
#[instrument(skip_all)]
async fn collect_from_parents(
@ -159,7 +140,7 @@ impl PieceCollector {
task_id: &str,
parents: Vec<CollectedParent>,
interested_pieces: Vec<metadata::Piece>,
collected_pieces: Arc<DashMap<u32, Vec<CollectedParent>>>,
collected_pieces: Arc<DashMap<u32, String>>,
collected_piece_tx: Sender<CollectedPiece>,
collected_piece_timeout: Duration,
) -> Result<()> {
@ -172,8 +153,9 @@ impl PieceCollector {
host_id: String,
task_id: String,
parent: CollectedParent,
parents: Vec<CollectedParent>,
interested_pieces: Vec<metadata::Piece>,
collected_pieces: Arc<DashMap<u32, Vec<CollectedParent>>>,
collected_pieces: Arc<DashMap<u32, String>>,
collected_piece_tx: Sender<CollectedPiece>,
collected_piece_timeout: Duration,
) -> Result<CollectedParent> {
@ -186,18 +168,15 @@ impl PieceCollector {
})?;
// Create a dfdaemon client.
let dfdaemon_upload_client = DfdaemonUploadClient::new(
config,
format!("http://{}:{}", host.ip, host.port),
false,
)
.await
.inspect_err(|err| {
error!(
"create dfdaemon upload client from parent {} failed: {}",
parent.id, err
);
})?;
let dfdaemon_upload_client =
DfdaemonUploadClient::new(config, format!("http://{}:{}", host.ip, host.port))
.await
.inspect_err(|err| {
error!(
"create dfdaemon upload client from parent {} failed: {}",
parent.id, err
);
})?;
let response = dfdaemon_upload_client
.sync_pieces(SyncPiecesRequest {
@ -221,36 +200,26 @@ impl PieceCollector {
error!("sync pieces from parent {} failed: {}", parent.id, err);
})? {
let message = message?;
if let Some(mut parents) = collected_pieces.get_mut(&message.number) {
parents.push(parent.clone());
} else {
continue;
}
// Wait for collecting the piece from different parents when the first
// piece is collected.
tokio::time::sleep(DEFAULT_WAIT_FOR_PIECE_FROM_DIFFERENT_PARENTS).await;
let parents = match collected_pieces.remove(&message.number) {
Some((_, parents)) => parents,
None => continue,
};
let parent = match parents.get(fastrand::usize(..parents.len())) {
Some(parent) => parent,
None => {
error!(
"collected_pieces does not contain parent for piece {}",
message.number
);
continue;
}
};
let mut parent_id =
match collected_pieces.try_get_mut(&message.number).try_unwrap() {
Some(parent_id) => parent_id,
None => continue,
};
parent_id.push_str(&parent.id);
info!(
"picked up piece {}-{} metadata from parent {}",
"received piece {}-{} metadata from parent {}",
task_id, message.number, parent.id
);
let parent = parents
.iter()
.find(|parent| parent.id == parent_id.as_str())
.ok_or_else(|| {
error!("parent {} not found", parent_id.as_str());
Error::InvalidPeer(parent_id.clone())
})?;
collected_piece_tx
.send(CollectedPiece {
number: message.number,
@ -261,6 +230,12 @@ impl PieceCollector {
.inspect_err(|err| {
error!("send CollectedPiece failed: {}", err);
})?;
// Release the lock of the piece with parent_id.
drop(parent_id);
// Remove the piece from collected_pieces.
collected_pieces.remove(&message.number);
}
Ok(parent)
@ -272,6 +247,7 @@ impl PieceCollector {
host_id.to_string(),
task_id.to_string(),
parent.clone(),
parents.clone(),
interested_pieces.clone(),
collected_pieces.clone(),
collected_piece_tx.clone(),
@ -323,14 +299,15 @@ pub struct PersistentCachePieceCollector {
/// interested_pieces is the pieces interested by the collector.
interested_pieces: Vec<metadata::Piece>,
/// collected_pieces is a map to store the collected pieces from different parents.
collected_pieces: Arc<DashMap<u32, Vec<CollectedParent>>>,
/// collected_pieces is the pieces collected from peers.
collected_pieces: Arc<DashMap<u32, String>>,
}
/// PersistentCachePieceCollector is used to collect persistent cache pieces from peers.
impl PersistentCachePieceCollector {
/// new creates a new PieceCollector.
pub async fn new(
#[instrument(skip_all)]
pub fn new(
config: Arc<Config>,
host_id: &str,
task_id: &str,
@ -339,7 +316,7 @@ impl PersistentCachePieceCollector {
) -> Self {
let collected_pieces = Arc::new(DashMap::with_capacity(interested_pieces.len()));
for interested_piece in &interested_pieces {
collected_pieces.insert(interested_piece.number, Vec::new());
collected_pieces.insert(interested_piece.number, String::new());
}
Self {
@ -386,25 +363,7 @@ impl PersistentCachePieceCollector {
collected_piece_rx
}
/// collect_from_parents collects pieces from multiple parents with load balancing strategy.
///
/// The collection process works in two phases:
/// 1. **Synchronization Phase**: Waits for a configured duration (DEFAULT_WAIT_FOR_PIECE_FROM_DIFFERENT_PARENTS)
/// to collect the same piece information from different parents. This allows the collector
/// to gather multiple sources for each piece.
///
/// 2. **Selection Phase**: After the wait period, randomly selects one parent from the available
/// candidates for each piece and forwards it to the piece downloader.
///
/// **Load Balancing Strategy**:
/// The random parent selection is designed to distribute download load across multiple parents
/// during concurrent piece downloads. This approach ensures:
/// - Optimal utilization of bandwidth from multiple parent nodes
/// - Prevention of overwhelming any single parent with too many requests
/// - Better overall download performance through parallel connections
///
/// This strategy is particularly effective when downloading multiple pieces simultaneously,
/// as it naturally spreads the workload across the available parent pool.
/// collect_from_parents collects pieces from parents.
#[allow(clippy::too_many_arguments)]
#[instrument(skip_all)]
async fn collect_from_parents(
@ -413,7 +372,7 @@ impl PersistentCachePieceCollector {
task_id: &str,
parents: Vec<CollectedParent>,
interested_pieces: Vec<metadata::Piece>,
collected_pieces: Arc<DashMap<u32, Vec<CollectedParent>>>,
collected_pieces: Arc<DashMap<u32, String>>,
collected_piece_tx: Sender<CollectedPiece>,
collected_piece_timeout: Duration,
) -> Result<()> {
@ -426,8 +385,9 @@ impl PersistentCachePieceCollector {
host_id: String,
task_id: String,
parent: CollectedParent,
parents: Vec<CollectedParent>,
interested_pieces: Vec<metadata::Piece>,
collected_pieces: Arc<DashMap<u32, Vec<CollectedParent>>>,
collected_pieces: Arc<DashMap<u32, String>>,
collected_piece_tx: Sender<CollectedPiece>,
collected_piece_timeout: Duration,
) -> Result<CollectedParent> {
@ -440,18 +400,15 @@ impl PersistentCachePieceCollector {
})?;
// Create a dfdaemon client.
let dfdaemon_upload_client = DfdaemonUploadClient::new(
config,
format!("http://{}:{}", host.ip, host.port),
false,
)
.await
.inspect_err(|err| {
error!(
"create dfdaemon upload client from parent {} failed: {}",
parent.id, err
);
})?;
let dfdaemon_upload_client =
DfdaemonUploadClient::new(config, format!("http://{}:{}", host.ip, host.port))
.await
.inspect_err(|err| {
error!(
"create dfdaemon upload client from parent {} failed: {}",
parent.id, err
);
})?;
let response = dfdaemon_upload_client
.sync_persistent_cache_pieces(SyncPersistentCachePiecesRequest {
@ -481,36 +438,26 @@ impl PersistentCachePieceCollector {
);
})? {
let message = message?;
if let Some(mut parents) = collected_pieces.get_mut(&message.number) {
parents.push(parent.clone());
} else {
continue;
}
// Wait for collecting the piece from different parents when the first
// piece is collected.
tokio::time::sleep(DEFAULT_WAIT_FOR_PIECE_FROM_DIFFERENT_PARENTS).await;
let parents = match collected_pieces.remove(&message.number) {
Some((_, parents)) => parents,
None => continue,
};
let parent = match parents.get(fastrand::usize(..parents.len())) {
Some(parent) => parent,
None => {
error!(
"collected_pieces does not contain parent for piece {}",
message.number
);
continue;
}
};
let mut parent_id =
match collected_pieces.try_get_mut(&message.number).try_unwrap() {
Some(parent_id) => parent_id,
None => continue,
};
parent_id.push_str(&parent.id);
info!(
"picked up piece {}-{} metadata from parent {}",
"received persistent cache piece {}-{} metadata from parent {}",
task_id, message.number, parent.id
);
let parent = parents
.iter()
.find(|parent| parent.id == parent_id.as_str())
.ok_or_else(|| {
error!("parent {} not found", parent_id.as_str());
Error::InvalidPeer(parent_id.clone())
})?;
collected_piece_tx
.send(CollectedPiece {
number: message.number,
@ -521,6 +468,12 @@ impl PersistentCachePieceCollector {
.inspect_err(|err| {
error!("send CollectedPiece failed: {}", err);
})?;
// Release the lock of the piece with parent_id.
drop(parent_id);
// Remove the piece from collected_pieces.
collected_pieces.remove(&message.number);
}
Ok(parent)
@ -532,6 +485,7 @@ impl PersistentCachePieceCollector {
host_id.to_string(),
task_id.to_string(),
parent.clone(),
parents.clone(),
interested_pieces.clone(),
collected_pieces.clone(),
collected_piece_tx.clone(),

View File

@ -18,24 +18,13 @@ use crate::grpc::dfdaemon_upload::DfdaemonUploadClient;
use dragonfly_api::dfdaemon::v2::{DownloadPersistentCachePieceRequest, DownloadPieceRequest};
use dragonfly_client_config::dfdaemon::Config;
use dragonfly_client_core::{Error, Result};
use dragonfly_client_storage::metadata;
use std::collections::HashMap;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::sync::Mutex;
use tracing::{debug, error, instrument};
/// DEFAULT_DOWNLOADER_CAPACITY is the default capacity of the downloader to store the clients.
const DEFAULT_DOWNLOADER_CAPACITY: usize = 2000;
/// DEFAULT_DOWNLOADER_IDLE_TIMEOUT is the default idle timeout for the downloader.
const DEFAULT_DOWNLOADER_IDLE_TIMEOUT: Duration = Duration::from_secs(30);
use tracing::{error, instrument};
/// Downloader is the interface for downloading pieces, which is implemented by different
/// protocols. The downloader is used to download pieces from the other peers.
#[tonic::async_trait]
pub trait Downloader: Send + Sync {
pub trait Downloader {
/// download_piece downloads a piece from the other peer by different protocols.
async fn download_piece(
&self,
@ -66,13 +55,10 @@ pub struct DownloaderFactory {
/// DownloadFactory implements the DownloadFactory trait.
impl DownloaderFactory {
/// new returns a new DownloadFactory.
#[instrument(skip_all)]
pub fn new(protocol: &str, config: Arc<Config>) -> Result<Self> {
let downloader = match protocol {
"grpc" => Arc::new(GRPCDownloader::new(
config.clone(),
DEFAULT_DOWNLOADER_CAPACITY,
DEFAULT_DOWNLOADER_IDLE_TIMEOUT,
)),
"grpc" => Arc::new(GRPCDownloader::new(config.clone())),
_ => {
error!("downloader unsupported protocol: {}", protocol);
return Err(Error::InvalidParameter);
@ -83,181 +69,24 @@ impl DownloaderFactory {
}
/// build returns the downloader.
pub fn build(&self) -> Arc<dyn Downloader> {
#[instrument(skip_all)]
pub fn build(&self) -> Arc<dyn Downloader + Send + Sync> {
self.downloader.clone()
}
}
/// RequestGuard is the guard for the request.
struct RequestGuard {
/// active_requests is the number of the active requests.
active_requests: Arc<AtomicUsize>,
}
/// RequestGuard implements the guard for the request to add or subtract the active requests.
impl RequestGuard {
/// new returns a new RequestGuard.
fn new(active_requests: Arc<AtomicUsize>) -> Self {
active_requests.fetch_add(1, Ordering::SeqCst);
Self { active_requests }
}
}
/// RequestGuard implements the Drop trait.
impl Drop for RequestGuard {
/// drop subtracts the active requests.
fn drop(&mut self) {
self.active_requests.fetch_sub(1, Ordering::SeqCst);
}
}
/// DfdaemonUploadClientEntry is the entry of the dfdaemon upload client.
#[derive(Clone)]
struct DfdaemonUploadClientEntry {
/// client is the dfdaemon upload client.
client: DfdaemonUploadClient,
/// active_requests is the number of the active requests.
active_requests: Arc<AtomicUsize>,
/// actived_at is the time when the client is the last active time.
actived_at: Arc<std::sync::Mutex<Instant>>,
}
/// GRPCDownloader is the downloader for downloading pieces by the gRPC protocol.
/// It will reuse the dfdaemon upload clients to download pieces from the other peers by
/// peer's address.
pub struct GRPCDownloader {
/// config is the configuration of the dfdaemon.
config: Arc<Config>,
/// clients is the map of the dfdaemon upload clients.
clients: Arc<Mutex<HashMap<String, DfdaemonUploadClientEntry>>>,
/// capacity is the capacity of the dfdaemon upload clients. If the number of the
/// clients exceeds the capacity, it will clean up the idle clients.
capacity: usize,
/// client_idle_timeout is the idle timeout for the client. If the client is idle for a long
/// time, it will be removed when cleaning up the idle clients.
idle_timeout: Duration,
/// cleanup_at is the time when the client is the last cleanup time.
cleanup_at: Arc<Mutex<Instant>>,
}
/// GRPCDownloader implements the downloader with the gRPC protocol.
impl GRPCDownloader {
/// new returns a new GRPCDownloader.
pub fn new(config: Arc<Config>, capacity: usize, idle_timeout: Duration) -> Self {
Self {
config,
clients: Arc::new(Mutex::new(HashMap::new())),
capacity,
idle_timeout,
cleanup_at: Arc::new(Mutex::new(Instant::now())),
}
}
/// client returns the dfdaemon upload client by the address.
///
/// Opterations:
/// 1. If the client entry exists, it will return the client directly to reuse the client by
/// the address.
/// 2. If the client entry does not exist, it will create a new client entry and insert it
/// into the clients map.
async fn client(&self, addr: &str) -> Result<DfdaemonUploadClient> {
let now = Instant::now();
// Cleanup the idle clients first to avoid the clients exceeding the capacity and the
// clients are idle for a long time.
self.cleanup_idle_client_entries().await;
let clients = self.clients.lock().await;
if let Some(entry) = clients.get(addr) {
debug!("reusing client: {}", addr);
*entry.actived_at.lock().unwrap() = now;
return Ok(entry.client.clone());
}
drop(clients);
// If there are many concurrent requests to create the client, it will create multiple
// clients for the same address. But it will reuse the same client by entry operation.
debug!("creating client: {}", addr);
let client =
DfdaemonUploadClient::new(self.config.clone(), format!("http://{}", addr), true)
.await?;
let mut clients = self.clients.lock().await;
let entry = clients
.entry(addr.to_string())
.or_insert(DfdaemonUploadClientEntry {
client: client.clone(),
active_requests: Arc::new(AtomicUsize::new(0)),
actived_at: Arc::new(std::sync::Mutex::new(now)),
});
// If it is created by other concurrent requests and reused client, need to update the
// last active time.
*entry.actived_at.lock().unwrap() = now;
Ok(entry.client.clone())
}
/// get_client_entry returns the client entry by the address.
async fn get_client_entry(&self, addr: &str) -> Option<DfdaemonUploadClientEntry> {
let clients = self.clients.lock().await;
clients.get(addr).cloned()
}
/// remove_client_entry removes the client entry if it is idle.
async fn remove_client_entry(&self, addr: &str) {
let mut clients = self.clients.lock().await;
if let Some(entry) = clients.get(addr) {
if entry.active_requests.load(Ordering::SeqCst) == 0 {
clients.remove(addr);
}
}
}
/// cleanup_idle_clients cleans up the idle clients, which are idle for a long time or have no
/// active requests.
async fn cleanup_idle_client_entries(&self) {
let now = Instant::now();
// Avoid hot cleanup for the clients.
let cleanup_at = self.cleanup_at.lock().await;
let interval = self.idle_timeout / 2;
if now.duration_since(*cleanup_at) < interval {
debug!("avoid hot cleanup");
return;
}
drop(cleanup_at);
let mut clients = self.clients.lock().await;
let exceeds_capacity = clients.len() > self.capacity;
clients.retain(|addr, entry| {
let active_requests = entry.active_requests.load(Ordering::SeqCst);
let is_active = active_requests > 0;
let actived_at = entry.actived_at.lock().unwrap();
let idel_duration = now.duration_since(*actived_at);
let is_recent = idel_duration <= self.idle_timeout;
// Retain the client if it is active or not exceeds the capacity and is recent.
let should_retain = is_active || (!exceeds_capacity && is_recent);
if !should_retain {
debug!(
"removing idle client: {}, exceeds_capacity: {}, idle_duration: {}s",
addr,
exceeds_capacity,
idel_duration.as_secs(),
);
}
should_retain
});
// Update the cleanup time.
*self.cleanup_at.lock().await = now;
#[instrument(skip_all)]
pub fn new(config: Arc<Config>) -> Self {
Self { config }
}
}
@ -273,15 +102,10 @@ impl Downloader for GRPCDownloader {
host_id: &str,
task_id: &str,
) -> Result<(Vec<u8>, u64, String)> {
let client = self.client(addr).await?;
let dfdaemon_upload_client =
DfdaemonUploadClient::new(self.config.clone(), format!("http://{}", addr)).await?;
let entry = self
.get_client_entry(addr)
.await
.ok_or(Error::UnexpectedResponse)?;
let request_guard = RequestGuard::new(entry.active_requests.clone());
let response = match client
let response = dfdaemon_upload_client
.download_piece(
DownloadPieceRequest {
host_id: host_id.to_string(),
@ -290,17 +114,7 @@ impl Downloader for GRPCDownloader {
},
self.config.download.piece_timeout,
)
.await
{
Ok(response) => response,
Err(err) => {
// If the request fails, it will drop the request guard and remove the client
// entry to avoid using the invalid client.
drop(request_guard);
self.remove_client_entry(addr).await;
return Err(err);
}
};
.await?;
let Some(piece) = response.piece else {
return Err(Error::InvalidParameter);
@ -310,26 +124,6 @@ impl Downloader for GRPCDownloader {
return Err(Error::InvalidParameter);
};
// Calculate the digest of the piece metadata and compare it with the expected digest,
// it verifies the integrity of the piece metadata.
let piece_metadata = metadata::Piece {
number,
length: piece.length,
offset: piece.offset,
digest: piece.digest.clone(),
..Default::default()
};
if let Some(expected_digest) = response.digest {
let digest = piece_metadata.calculate_digest();
if expected_digest != digest {
return Err(Error::DigestMismatch(
expected_digest.to_string(),
digest.to_string(),
));
}
}
Ok((content, piece.offset, piece.digest))
}
@ -343,15 +137,10 @@ impl Downloader for GRPCDownloader {
host_id: &str,
task_id: &str,
) -> Result<(Vec<u8>, u64, String)> {
let client = self.client(addr).await?;
let dfdaemon_upload_client =
DfdaemonUploadClient::new(self.config.clone(), format!("http://{}", addr)).await?;
let entry = self
.get_client_entry(addr)
.await
.ok_or(Error::UnexpectedResponse)?;
let request_guard = RequestGuard::new(entry.active_requests.clone());
let response = match client
let response = dfdaemon_upload_client
.download_persistent_cache_piece(
DownloadPersistentCachePieceRequest {
host_id: host_id.to_string(),
@ -360,17 +149,7 @@ impl Downloader for GRPCDownloader {
},
self.config.download.piece_timeout,
)
.await
{
Ok(response) => response,
Err(err) => {
// If the request fails, it will drop the request guard and remove the client
// entry to avoid using the invalid client.
drop(request_guard);
self.remove_client_entry(addr).await;
return Err(err);
}
};
.await?;
let Some(piece) = response.piece else {
return Err(Error::InvalidParameter);
@ -380,26 +159,6 @@ impl Downloader for GRPCDownloader {
return Err(Error::InvalidParameter);
};
// Calculate the digest of the piece metadata and compare it with the expected digest,
// it verifies the integrity of the piece metadata.
let piece_metadata = metadata::Piece {
number,
length: piece.length,
offset: piece.offset,
digest: piece.digest.clone(),
..Default::default()
};
if let Some(expected_digest) = response.digest {
let digest = piece_metadata.calculate_digest();
if expected_digest != digest {
return Err(Error::DigestMismatch(
expected_digest.to_string(),
digest.to_string(),
));
}
}
Ok((content, piece.offset, piece.digest))
}
}

View File

@ -20,8 +20,7 @@ use crate::metrics::{
collect_backend_request_started_metrics,
};
use dragonfly_api::common::v2::{
Download, Hdfs, ObjectStorage, Peer, Piece, SizeScope, Task as CommonTask, TaskType,
TrafficType,
Download, Hdfs, ObjectStorage, Peer, Piece, Range, Task as CommonTask, TrafficType,
};
use dragonfly_api::dfdaemon::{
self,
@ -49,7 +48,6 @@ use dragonfly_client_util::{
id_generator::IDGenerator,
};
use reqwest::header::HeaderMap;
use std::collections::HashMap;
use std::path::Path;
use std::sync::{
atomic::{AtomicBool, Ordering},
@ -64,7 +62,7 @@ use tokio::sync::{
use tokio::task::JoinSet;
use tokio_stream::{wrappers::ReceiverStream, StreamExt};
use tonic::{Request, Status};
use tracing::{debug, error, info, instrument, warn, Instrument};
use tracing::{debug, error, info, instrument, Instrument};
use super::*;
@ -92,6 +90,7 @@ pub struct Task {
/// Task implements the task manager.
impl Task {
/// new returns a new Task.
#[instrument(skip_all)]
pub fn new(
config: Arc<Config>,
id_generator: Arc<IDGenerator>,
@ -118,7 +117,6 @@ impl Task {
}
/// get gets the metadata of the task.
#[instrument(skip_all)]
pub fn get(&self, id: &str) -> ClientResult<Option<metadata::Task>> {
self.storage.get_task(id)
}
@ -130,30 +128,8 @@ impl Task {
id: &str,
request: Download,
) -> ClientResult<metadata::Task> {
let task = self.storage.prepare_download_task_started(id).await?;
let task = self.storage.download_task_started(id, None, None, None)?;
if task.content_length.is_some() && task.piece_length.is_some() {
// Attempt to create a hard link from the task file to the output path.
//
// Behavior based on force_hard_link setting:
// 1. force_hard_link is true:
// - Success: Continue processing
// - Failure: Return error immediately
// 2. force_hard_link is false:
// - Success: Continue processing
// - Failure: Fall back to copying the file instead
if let Some(output_path) = &request.output_path {
if let Err(err) = self
.storage
.hard_link_task(id, Path::new(output_path.as_str()))
.await
{
if request.force_hard_link {
return Err(err);
}
}
}
return Ok(task);
}
@ -225,54 +201,26 @@ impl Task {
None => return Err(Error::InvalidContentLength),
};
let piece_length = match request.piece_length {
Some(piece_length) => self
.piece
.calculate_piece_length(piece::PieceLengthStrategy::FixedPieceLength(piece_length)),
None => {
self.piece
.calculate_piece_length(piece::PieceLengthStrategy::OptimizeByFileLength(
content_length,
))
}
};
let piece_length = self.piece.calculate_piece_length(
piece::PieceLengthStrategy::OptimizeByFileLength,
content_length,
);
// If the task is not finished, check if the storage has enough space to
// store the task.
if !task.is_finished() && !self.storage.has_enough_space(content_length)? {
return Err(Error::NoSpace(format!(
"not enough space to store the task: content_length={}",
"not enough space to store the persistent cache task: content_length={}",
content_length
)));
}
let task = self
.storage
.download_task_started(id, piece_length, content_length, response.http_header)
.await;
// Attempt to create a hard link from the task file to the output path.
//
// Behavior based on force_hard_link setting:
// 1. force_hard_link is true:
// - Success: Continue processing
// - Failure: Return error immediately
// 2. force_hard_link is false:
// - Success: Continue processing
// - Failure: Fall back to copying the file instead
if let Some(output_path) = &request.output_path {
if let Err(err) = self
.storage
.hard_link_task(id, Path::new(output_path.as_str()))
.await
{
if request.force_hard_link {
return Err(err);
}
}
}
task
self.storage.download_task_started(
id,
Some(piece_length),
Some(content_length),
response.http_header,
)
}
/// download_finished updates the metadata of the task when the task downloads finished.
@ -299,15 +247,15 @@ impl Task {
self.storage.prefetch_task_failed(id).await
}
/// is_same_dev_inode checks if the task is on the same device inode as the given path.
pub async fn is_same_dev_inode(&self, id: &str, to: &Path) -> ClientResult<bool> {
self.storage.is_same_dev_inode_as_task(id, to).await
}
//// copy_task copies the task content to the destination.
/// hard_link_or_copy hard links or copies the task content to the destination.
#[instrument(skip_all)]
pub async fn copy_task(&self, id: &str, to: &Path) -> ClientResult<()> {
self.storage.copy_task(id, to).await
pub async fn hard_link_or_copy(
&self,
task: &metadata::Task,
to: &Path,
range: Option<Range>,
) -> ClientResult<()> {
self.storage.hard_link_or_copy_task(task, to, range).await
}
/// download downloads a task.
@ -318,7 +266,7 @@ impl Task {
task: &metadata::Task,
host_id: &str,
peer_id: &str,
request: Download,
mut request: Download,
download_progress_tx: Sender<Result<DownloadTaskResponse, Status>>,
) -> ClientResult<()> {
// Get the id of the task.
@ -336,6 +284,9 @@ impl Task {
return Err(Error::InvalidPieceLength);
};
// Add the piece length to the request for register task.
request.piece_length = Some(piece_length);
// Calculate the interested pieces to download.
let interested_pieces =
match self
@ -388,7 +339,6 @@ impl Task {
range: request.range,
response_header: task.response_header.clone(),
pieces,
is_finished: task.is_finished(),
},
),
),
@ -598,7 +548,7 @@ impl Task {
})? {
// Check if the schedule count is exceeded.
schedule_count += 1;
if schedule_count > self.config.scheduler.max_schedule_count {
if schedule_count >= self.config.scheduler.max_schedule_count {
in_stream_tx
.send_timeout(
AnnouncePeerRequest {
@ -996,8 +946,7 @@ impl Task {
host: peer.host,
})
.collect(),
)
.await;
);
let mut piece_collector_rx = piece_collector.run().await;
// Initialize the interrupt. If download from parent failed with scheduler or download
@ -1129,13 +1078,13 @@ impl Task {
REQUEST_TIMEOUT,
)
.await
.unwrap_or_else(|err| {
.inspect_err(|err| {
error!(
"send DownloadPieceFinishedRequest for piece {} failed: {:?}",
piece_id, err
);
interrupt.store(true, Ordering::SeqCst);
});
})?;
// Send the download progress.
download_progress_tx
@ -1155,13 +1104,13 @@ impl Task {
REQUEST_TIMEOUT,
)
.await
.unwrap_or_else(|err| {
.inspect_err(|err| {
error!(
"send DownloadPieceFinishedResponse for piece {} failed: {:?}",
piece_id, err
);
interrupt.store(true, Ordering::SeqCst);
});
})?;
info!(
"finished piece {} from parent {:?}",
@ -1385,9 +1334,9 @@ impl Task {
},
REQUEST_TIMEOUT,
)
.await.unwrap_or_else(|err| {
.await.inspect_err(|err| {
error!("send DownloadPieceBackToSourceFinishedRequest for piece {} failed: {:?}", piece_id, err);
});
})?;
// Send the download progress.
download_progress_tx
@ -1407,12 +1356,12 @@ impl Task {
REQUEST_TIMEOUT,
)
.await
.unwrap_or_else(|err| {
.inspect_err(|err| {
error!(
"send DownloadPieceFinishedResponse for piece {} failed: {:?}",
piece_id, err
);
});
})?;
info!("finished piece {} from source", piece_id);
Ok(metadata)
@ -1577,11 +1526,6 @@ impl Task {
}
};
if !piece.is_finished() {
debug!("piece {} is not finished, skip it", piece_id);
continue;
}
// Fake the download from the local.
self.piece.download_from_local(task_id, piece.length);
info!("finished piece {} from local", piece_id,);
@ -1642,12 +1586,12 @@ impl Task {
REQUEST_TIMEOUT,
)
.await
.unwrap_or_else(|err| {
.inspect_err(|err| {
error!(
"send DownloadPieceFinishedResponse for piece {} failed: {:?}",
piece_id, err
);
});
})?;
// Store the finished piece.
finished_pieces.push(interested_piece.clone());
@ -1754,12 +1698,12 @@ impl Task {
REQUEST_TIMEOUT,
)
.await
.unwrap_or_else(|err| {
.inspect_err(|err| {
error!(
"send DownloadPieceFinishedResponse for piece {} failed: {:?}",
piece_id, err
);
});
})?;
info!("finished piece {} from source", piece_id);
Ok(metadata)
@ -1821,74 +1765,7 @@ impl Task {
/// stat_task returns the task metadata.
#[instrument(skip_all)]
pub async fn stat(
&self,
task_id: &str,
host_id: &str,
local_only: bool,
) -> ClientResult<CommonTask> {
if local_only {
let Some(task_metadata) = self.storage.get_task(task_id).inspect_err(|err| {
error!("get task {} from local storage error: {:?}", task_id, err);
})?
else {
return Err(Error::TaskNotFound(task_id.to_owned()));
};
let piece_metadatas = self.piece.get_all(task_id).inspect_err(|err| {
error!(
"get pieces for task {} from local storage error: {:?}",
task_id, err
);
})?;
let pieces = piece_metadatas
.into_iter()
.filter(|piece| piece.is_finished())
.map(|piece| {
// The traffic_type indicates whether the first download was from the source or hit the remote peer cache.
// If the parent_id exists, the piece was downloaded from a remote peer. Otherwise, it was
// downloaded from the source.
let traffic_type = match piece.parent_id {
None => TrafficType::BackToSource,
Some(_) => TrafficType::RemotePeer,
};
Piece {
number: piece.number,
parent_id: piece.parent_id.clone(),
offset: piece.offset,
length: piece.length,
digest: piece.digest.clone(),
content: None,
traffic_type: Some(traffic_type as i32),
cost: piece.prost_cost(),
created_at: Some(prost_wkt_types::Timestamp::from(piece.created_at)),
}
})
.collect::<Vec<Piece>>();
return Ok(CommonTask {
id: task_metadata.id,
r#type: TaskType::Standard as i32,
url: String::new(),
digest: None,
tag: None,
application: None,
filtered_query_params: Vec::new(),
request_header: HashMap::new(),
content_length: task_metadata.content_length.unwrap_or(0),
piece_count: pieces.len() as u32,
size_scope: SizeScope::Normal as i32,
pieces,
state: String::new(),
peer_count: 0,
has_available_peer: false,
created_at: Some(prost_wkt_types::Timestamp::from(task_metadata.created_at)),
updated_at: Some(prost_wkt_types::Timestamp::from(task_metadata.updated_at)),
});
}
pub async fn stat(&self, task_id: &str, host_id: &str) -> ClientResult<CommonTask> {
let task = self
.scheduler_client
.stat_task(StatTaskRequest {
@ -1934,54 +1811,3 @@ impl Task {
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Arc;
use tempfile::tempdir;
// test_delete_task_not_found tests the Task.delete method when the task does not exist.
#[tokio::test]
async fn test_delete_task_not_found() {
// Create a temporary directory for testing.
let temp_dir = tempdir().unwrap();
let log_dir = temp_dir.path().join("log");
std::fs::create_dir_all(&log_dir).unwrap();
// Create configuration.
let config = Config::default();
let config = Arc::new(config);
// Create storage.
let storage = Storage::new(config.clone(), temp_dir.path(), log_dir)
.await
.unwrap();
let storage = Arc::new(storage);
// Test Storage.get_task and Error::TaskNotFound.
let task_id = "non-existent-task-id";
// Verify that non-existent tasks return None.
let task = storage.get_task(task_id).unwrap();
assert!(task.is_none(), "non-existent tasks should return None");
// Create a task and save it to storage.
let task_id = "test-task-id";
storage
.download_task_started(task_id, 1024, 4096, None)
.await
.unwrap();
// Verify that the task exists.
let task = storage.get_task(task_id).unwrap();
assert!(task.is_some(), "task should exist");
// Delete the task from storage.
storage.delete_task(task_id).await;
// Verify that the task has been deleted.
let task = storage.get_task(task_id).unwrap();
assert!(task.is_none(), "task should be deleted");
}
}

View File

@ -109,100 +109,3 @@ pub async fn shutdown_signal() {
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use tokio::time::{sleep, Duration};
#[tokio::test]
async fn test_shutdown_trigger_and_recv() {
// Create a new shutdown instance.
let mut shutdown = Shutdown::new();
// Trigger the shutdown signal in a separate task.
let shutdown_clone = shutdown.clone();
tokio::spawn(async move {
// Small delay to ensure the receiver is waiting.
sleep(Duration::from_millis(10)).await;
shutdown_clone.trigger();
});
// Wait for the shutdown signal.
shutdown.recv().await;
// Verify that is_shutdown is set to true.
assert!(shutdown.is_shutdown());
}
#[tokio::test]
async fn test_shutdown_multiple_receivers() {
// Create a new shutdown instance.
let mut shutdown1 = Shutdown::new();
let mut shutdown2 = shutdown1.clone();
let mut shutdown3 = shutdown1.clone();
// Trigger the shutdown signal.
shutdown1.trigger();
// All receivers should receive the signal.
shutdown1.recv().await;
shutdown2.recv().await;
shutdown3.recv().await;
// Verify that all instances have is_shutdown set to true.
assert!(shutdown1.is_shutdown());
assert!(shutdown2.is_shutdown());
assert!(shutdown3.is_shutdown());
}
#[tokio::test]
async fn test_shutdown_clone_behavior() {
// Create a new shutdown instance.
let mut shutdown1 = Shutdown::new();
// Set is_shutdown to true.
shutdown1.trigger();
shutdown1.recv().await;
assert!(shutdown1.is_shutdown());
// Clone the instance.
let shutdown2 = shutdown1.clone();
// Verify that the clone has the same is_shutdown value.
assert_eq!(shutdown1.is_shutdown(), shutdown2.is_shutdown());
// Create a new instance before triggering.
let mut shutdown3 = Shutdown::new();
let mut shutdown4 = shutdown3.clone();
// Trigger after cloning.
shutdown3.trigger();
// Both should receive the signal.
shutdown3.recv().await;
shutdown4.recv().await;
assert!(shutdown3.is_shutdown());
assert!(shutdown4.is_shutdown());
}
#[tokio::test]
async fn test_shutdown_already_triggered() {
// Create a new shutdown instance.
let mut shutdown = Shutdown::new();
// Trigger and receive.
shutdown.trigger();
shutdown.recv().await;
assert!(shutdown.is_shutdown());
// Call recv again, should return immediately.
let start = std::time::Instant::now();
shutdown.recv().await;
let elapsed = start.elapsed();
// Verify that recv returned immediately (less than 5ms).
assert!(elapsed < Duration::from_millis(5));
}
}

View File

@ -67,6 +67,7 @@ pub struct Stats {
/// Stats implements the stats server.
impl Stats {
/// new creates a new Stats.
#[instrument(skip_all)]
pub fn new(
addr: SocketAddr,
shutdown: shutdown::Shutdown,
@ -80,6 +81,7 @@ impl Stats {
}
/// run starts the stats server.
#[instrument(skip_all)]
pub async fn run(&self) {
// Clone the shutdown channel.
let mut shutdown = self.shutdown.clone();
@ -108,6 +110,7 @@ impl Stats {
_ = shutdown.recv() => {
// Stats server shutting down with signals.
info!("stats server shutting down");
return
}
}
}

View File

@ -14,19 +14,14 @@
* limitations under the License.
*/
use dragonfly_client_config::dfdaemon::Host;
use opentelemetry::{global, trace::TracerProvider, KeyValue};
use opentelemetry_otlp::{WithExportConfig, WithTonicConfig};
use opentelemetry_sdk::{propagation::TraceContextPropagator, Resource};
use opentelemetry::sdk::propagation::TraceContextPropagator;
use rolling_file::*;
use std::fs;
use std::path::PathBuf;
use std::str::FromStr;
use std::time::Duration;
use tonic::metadata::{MetadataKey, MetadataMap, MetadataValue};
use tracing::{info, Level};
use tracing_appender::non_blocking::WorkerGuard;
use tracing_opentelemetry::OpenTelemetryLayer;
use tracing_flame::FlameLayer;
use tracing_log::LogTracer;
use tracing_subscriber::{
filter::LevelFilter,
fmt::{time::ChronoLocal, Layer},
@ -34,9 +29,6 @@ use tracing_subscriber::{
EnvFilter, Registry,
};
/// SPAN_EXPORTER_TIMEOUT is the timeout for the span exporter.
const SPAN_EXPORTER_TIMEOUT: Duration = Duration::from_secs(10);
/// init_tracing initializes the tracing system.
#[allow(clippy::too_many_arguments)]
pub fn init_tracing(
@ -44,13 +36,9 @@ pub fn init_tracing(
log_dir: PathBuf,
log_level: Level,
log_max_files: usize,
otel_protocol: Option<String>,
otel_endpoint: Option<String>,
otel_path: Option<PathBuf>,
otel_headers: Option<reqwest::header::HeaderMap>,
host: Option<Host>,
is_seed_peer: bool,
console: bool,
jaeger_addr: Option<String>,
flamegraph: bool,
verbose: bool,
) -> Vec<WorkerGuard> {
let mut guards = vec![];
@ -59,7 +47,7 @@ pub fn init_tracing(
guards.push(stdout_guard);
// Initialize stdout layer.
let stdout_filter = if console {
let stdout_filter = if verbose {
LevelFilter::DEBUG
} else {
LevelFilter::OFF
@ -102,116 +90,41 @@ pub fn init_tracing(
let env_filter = EnvFilter::try_from_default_env()
.unwrap_or_else(|_| EnvFilter::default().add_directive(log_level.into()));
// Enable console subscriber layer for tracing spawn tasks on `127.0.0.1:6669` when log level is TRACE.
let console_subscriber_layer = if log_level == Level::TRACE {
Some(console_subscriber::spawn())
// Setup flame layer.
let flame_layer = if flamegraph {
let (flame_layer, _guard) = FlameLayer::with_file(log_dir.join("tracing.folded"))
.expect("failed to create flame layer");
Some(flame_layer)
} else {
None
};
let subscriber = Registry::default()
.with(env_filter)
.with(console_subscriber_layer)
.with(file_logging_layer)
.with(stdout_logging_layer);
.with(stdout_logging_layer)
.with(flame_layer);
// If OTLP protocol and endpoint are provided, set up OpenTelemetry tracing.
if let (Some(protocol), Some(endpoint)) = (otel_protocol, otel_endpoint) {
let otlp_exporter = match protocol.as_str() {
"grpc" => {
let mut metadata = MetadataMap::new();
if let Some(headers) = otel_headers {
for (key, value) in headers.iter() {
metadata.insert(
MetadataKey::from_str(key.as_str())
.expect("failed to create metadata key"),
MetadataValue::from_str(value.to_str().unwrap())
.expect("failed to create metadata value"),
);
}
}
// Setup jaeger layer.
if let Some(jaeger_addr) = jaeger_addr {
opentelemetry::global::set_text_map_propagator(TraceContextPropagator::new());
let tracer = opentelemetry_jaeger::new_agent_pipeline()
.with_service_name(name)
.with_endpoint(jaeger_addr)
.install_batch(opentelemetry::runtime::Tokio)
.expect("install");
let jaeger_layer = tracing_opentelemetry::layer().with_tracer(tracer);
let subscriber = subscriber.with(jaeger_layer);
let endpoint_url = url::Url::parse(&format!("http://{}", endpoint))
.expect("failed to parse OTLP endpoint URL");
opentelemetry_otlp::SpanExporter::builder()
.with_tonic()
.with_endpoint(endpoint_url)
.with_timeout(SPAN_EXPORTER_TIMEOUT)
.with_metadata(metadata)
.build()
.expect("failed to create OTLP exporter")
}
"http" | "https" => {
let mut endpoint_url = url::Url::parse(&format!("{}://{}", protocol, endpoint))
.expect("failed to parse OTLP endpoint URL");
if let Some(path) = otel_path {
endpoint_url = endpoint_url
.join(path.to_str().unwrap())
.expect("failed to join OTLP endpoint path");
}
opentelemetry_otlp::SpanExporter::builder()
.with_http()
.with_endpoint(endpoint_url.as_str())
.with_protocol(opentelemetry_otlp::Protocol::HttpJson)
.with_timeout(SPAN_EXPORTER_TIMEOUT)
.build()
.expect("failed to create OTLP exporter")
}
_ => {
panic!("unsupported OTLP protocol: {}", protocol);
}
};
let host = host.unwrap();
let provider = opentelemetry_sdk::trace::SdkTracerProvider::builder()
.with_batch_exporter(otlp_exporter)
.with_resource(
Resource::builder()
.with_service_name(format!("{}-{}", name, host.ip.unwrap()))
.with_schema_url(
[
KeyValue::new(
opentelemetry_semantic_conventions::attribute::SERVICE_NAMESPACE,
"dragonfly",
),
KeyValue::new(
opentelemetry_semantic_conventions::attribute::HOST_NAME,
host.hostname,
),
KeyValue::new(
opentelemetry_semantic_conventions::attribute::HOST_IP,
host.ip.unwrap().to_string(),
),
],
opentelemetry_semantic_conventions::SCHEMA_URL,
)
.with_attribute(opentelemetry::KeyValue::new(
"host.idc",
host.idc.unwrap_or_default(),
))
.with_attribute(opentelemetry::KeyValue::new(
"host.location",
host.location.unwrap_or_default(),
))
.with_attribute(opentelemetry::KeyValue::new("host.seed_peer", is_seed_peer))
.build(),
)
.build();
let tracer = provider.tracer(name.to_string());
global::set_tracer_provider(provider.clone());
global::set_text_map_propagator(TraceContextPropagator::new());
let jaeger_layer = OpenTelemetryLayer::new(tracer);
subscriber.with(jaeger_layer).init();
tracing::subscriber::set_global_default(subscriber)
.expect("failed to set global subscriber");
} else {
subscriber.init();
tracing::subscriber::set_global_default(subscriber)
.expect("failed to set global subscriber");
}
std::panic::set_hook(Box::new(tracing_panic::panic_hook));
LogTracer::init().expect("failed to init LogTracer");
info!(
"tracing initialized directory: {}, level: {}",
log_dir.as_path().display(),

View File

@ -1,2 +1,2 @@
[toolchain]
channel = "1.85.0"
channel = "1.82.0"