Compare commits

..

No commits in common. "main" and "v0.1.5" have entirely different histories.
main ... v0.1.5

128 changed files with 7467 additions and 40351 deletions

View File

@ -1,2 +0,0 @@
[build]
rustflags = ["--cfg", "tokio_unstable"]

View File

@ -1,16 +0,0 @@
# Set to true to add reviewers to pull requests
addReviewers: true
# Set to true to add assignees to pull requests
addAssignees: author
# A list of reviewers to be added to pull requests (GitHub user name)
reviewers:
- gaius-qi
- yxxhero
- chlins
- CormickKneey
- xujihui1985
# A number of reviewers added to the pull request
numberOfReviewers: 3

View File

@ -1,11 +0,0 @@
name: "Auto Assign"
on:
pull_request_target:
types: [opened, reopened, ready_for_review]
jobs:
add-assignee:
runs-on: ubuntu-latest
steps:
- uses: kentaro-m/auto-assign-action@9f6dbe84a80c6e7639d1b9698048b201052a2a94

View File

@ -19,28 +19,20 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4
- name: Rust cache
uses: Swatinem/rust-cache@v2
with:
cache-on-failure: true
- name: Install Protoc
uses: arduino/setup-protoc@v2
with:
repo-token: ${{ secrets.GH_TOKEN }}
- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable
- name: Set up Clang
uses: egor-tensin/setup-clang@v1
- name: Install stable toolchain
uses: actions-rs/toolchain@v1
with:
version: latest
platform: x64
profile: minimal
toolchain: stable
override: true
- name: Run cargo check
run: |
cargo check --all --all-targets
uses: actions-rs/cargo@v1
with:
command: check
test:
name: Run tests
@ -50,24 +42,8 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4
- name: Rust cache
uses: Swatinem/rust-cache@v2
with:
cache-on-failure: true
- name: Install Protoc
uses: arduino/setup-protoc@v2
with:
repo-token: ${{ secrets.GH_TOKEN }}
- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable
- name: Set up Clang
uses: egor-tensin/setup-clang@v1
with:
version: latest
platform: x64
- name: Install cargo-llvm-cov
uses: taiki-e/install-action@v2
@ -78,8 +54,8 @@ jobs:
run: cargo llvm-cov --all-features --workspace --lcov --output-path lcov.info
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: lcov.info
verbose: true
fail_ci_if_error: true

View File

@ -8,10 +8,10 @@ on:
- v*
jobs:
push_client_image_to_registry:
name: Push Client Image
runs-on: [self-hosted, Linux, X64]
timeout-minutes: 600
push_image_to_registry:
name: Push Image
runs-on: ubuntu-latest
timeout-minutes: 360
steps:
- name: Check out code
uses: actions/checkout@v4
@ -22,10 +22,9 @@ jobs:
id: get_version
run: |
VERSION=${GITHUB_REF#refs/tags/}
if [[ ${GITHUB_REF} == "refs/heads/main" || ${GITHUB_REF} =~ refs/pull/([0-9]+)/merge ]]; then
if [[ ${GITHUB_REF} == "refs/heads/main" ]]; then
VERSION=latest
fi
echo "VERSION=${VERSION}" >> $GITHUB_OUTPUT
- name: Get Git Revision
@ -36,7 +35,7 @@ jobs:
- name: PrepareReg Names
run: |
echo IMAGE_REPOSITORY=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
echo IMAGE_REPOSITORY=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
- name: Setup QEMU
uses: docker/setup-qemu-action@v3
@ -45,7 +44,7 @@ jobs:
uses: docker/setup-buildx-action@v3
- name: Cache Docker layers
uses: actions/cache@v4
uses: actions/cache@v3
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
@ -67,10 +66,9 @@ jobs:
password: ${{ secrets.GITHUB_TOKEN }}
- name: Push to Registry
uses: docker/build-push-action@v6
uses: docker/build-push-action@v5
with:
context: .
file: ci/Dockerfile
platforms: linux/amd64,linux/arm64
labels: |-
org.opencontainers.image.source=https://github.com/${{ github.repository }}
@ -81,214 +79,6 @@ jobs:
tags: |
dragonflyoss/client:${{ steps.get_version.outputs.VERSION }}
ghcr.io/${{ env.IMAGE_REPOSITORY }}:${{ steps.get_version.outputs.VERSION }}
push: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }}
push: true
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache-new
- name: Run Trivy vulnerability scanner in tarball mode
uses: aquasecurity/trivy-action@dc5a429b52fcf669ce959baa2c2dd26090d2a6c4
with:
image-ref: dragonflyoss/client:${{ steps.get_version.outputs.VERSION }}
severity: 'CRITICAL,HIGH'
format: 'sarif'
output: 'trivy-results.sarif'
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@76621b61decf072c1cee8dd1ce2d2a82d33c17ed
with:
sarif_file: 'trivy-results.sarif'
- name: Move cache
run: |
rm -rf /tmp/.buildx-cache
mv /tmp/.buildx-cache-new /tmp/.buildx-cache
push_client_debug_image_to_registry:
name: Push Client Debug Image
runs-on: [self-hosted, Linux, X64]
timeout-minutes: 600
steps:
- name: Check out code
uses: actions/checkout@v4
with:
submodules: recursive
- name: Get Version
id: get_version
run: |
VERSION=${GITHUB_REF#refs/tags/}
if [[ ${GITHUB_REF} == "refs/heads/main" || ${GITHUB_REF} =~ refs/pull/([0-9]+)/merge ]]; then
VERSION=latest
fi
echo "VERSION=${VERSION}" >> $GITHUB_OUTPUT
- name: Get Git Revision
id: vars
shell: bash
run: |
echo "git_revision=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
- name: PrepareReg Names
run: |
echo IMAGE_REPOSITORY=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
- name: Setup QEMU
uses: docker/setup-qemu-action@v3
- name: Setup Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Cache Docker layers
uses: actions/cache@v4
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-debug-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-debug-
- name: Login Docker Hub
uses: docker/login-action@v3
with:
registry: docker.io
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Push to Registry
uses: docker/build-push-action@v6
with:
context: .
file: ci/Dockerfile.debug
platforms: linux/amd64,linux/arm64
labels: |-
org.opencontainers.image.source=https://github.com/${{ github.repository }}
org.opencontainers.image.revision=${{ github.sha }}
build-args: |
GITVERSION=git-${{ steps.vars.outputs.git_revision }}
VERSION=${{ steps.get_version.outputs.VERSION }}-debug
tags: |
dragonflyoss/client:${{ steps.get_version.outputs.VERSION }}-debug
ghcr.io/${{ env.IMAGE_REPOSITORY }}:${{ steps.get_version.outputs.VERSION }}-debug
push: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }}
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache-new
- name: Run Trivy vulnerability scanner in tarball mode
uses: aquasecurity/trivy-action@dc5a429b52fcf669ce959baa2c2dd26090d2a6c4
with:
image-ref: dragonflyoss/client:${{ steps.get_version.outputs.VERSION }}-debug
severity: 'CRITICAL,HIGH'
format: 'sarif'
output: 'trivy-results.sarif'
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@76621b61decf072c1cee8dd1ce2d2a82d33c17ed
with:
sarif_file: 'trivy-results.sarif'
- name: Move cache
run: |
rm -rf /tmp/.buildx-cache
mv /tmp/.buildx-cache-new /tmp/.buildx-cache
push_dfinit_image_to_registry:
name: Push Dfinit Image
runs-on: [self-hosted, Linux, X64]
timeout-minutes: 600
steps:
- name: Check out code
uses: actions/checkout@v4
with:
submodules: recursive
- name: Get Version
id: get_version
run: |
VERSION=${GITHUB_REF#refs/tags/}
if [[ ${GITHUB_REF} == "refs/heads/main" || ${GITHUB_REF} =~ refs/pull/([0-9]+)/merge ]]; then
VERSION=latest
fi
echo "VERSION=${VERSION}" >> $GITHUB_OUTPUT
- name: Get Git Revision
id: vars
shell: bash
run: |
echo "git_revision=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
- name: PrepareReg Names
run: |
echo IMAGE_REPOSITORY=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
- name: Setup QEMU
uses: docker/setup-qemu-action@v3
- name: Setup Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Cache Docker layers
uses: actions/cache@v4
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-
- name: Login Docker Hub
uses: docker/login-action@v3
with:
registry: docker.io
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Push to Registry
uses: docker/build-push-action@v6
with:
context: .
file: ci/Dockerfile.dfinit
platforms: linux/amd64,linux/arm64
labels: |-
org.opencontainers.image.source=https://github.com/${{ github.repository }}
org.opencontainers.image.revision=${{ github.sha }}
build-args: |
GITVERSION=git-${{ steps.vars.outputs.git_revision }}
VERSION=${{ steps.get_version.outputs.VERSION }}
tags: |
dragonflyoss/dfinit:${{ steps.get_version.outputs.VERSION }}
ghcr.io/${{ env.IMAGE_REPOSITORY }}/dfinit:${{ steps.get_version.outputs.VERSION }}
push: ${{ github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/') }}
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache-new
- name: Run Trivy vulnerability scanner in tarball mode
uses: aquasecurity/trivy-action@dc5a429b52fcf669ce959baa2c2dd26090d2a6c4
with:
image-ref: dragonflyoss/dfinit:${{ steps.get_version.outputs.VERSION }}
severity: 'CRITICAL,HIGH'
format: 'sarif'
output: 'trivy-results.sarif'
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@76621b61decf072c1cee8dd1ce2d2a82d33c17ed
with:
sarif_file: 'trivy-results.sarif'
- name: Move cache
run: |
rm -rf /tmp/.buildx-cache
mv /tmp/.buildx-cache-new /tmp/.buildx-cache

View File

@ -15,32 +15,24 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4
- name: Rust cache
uses: Swatinem/rust-cache@v2
with:
cache-on-failure: true
- name: Install Protoc
uses: arduino/setup-protoc@v2
with:
repo-token: ${{ secrets.GH_TOKEN }}
- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable
- name: Install stable toolchain
uses: actions-rs/toolchain@v1
with:
components: rustfmt, clippy
toolchain: 1.85.0
- name: Set up Clang
uses: egor-tensin/setup-clang@v1
with:
version: latest
platform: x64
profile: minimal
toolchain: stable
override: true
- name: Run cargo fmt
run: |
cargo fmt --all -- --check
uses: actions-rs/cargo@v1
with:
command: fmt
args: --all -- --check
- name: Run cargo clippy
run: |
cargo clippy --all --all-targets -- -D warnings
uses: actions-rs/cargo@v1
with:
command: clippy
args: -- -D warnings

View File

@ -1,20 +0,0 @@
name: PR Label
on:
pull_request:
types: [opened, labeled, unlabeled, synchronize]
permissions:
contents: read
jobs:
classify:
name: Classify PR
runs-on: ubuntu-latest
steps:
- name: PR impact specified
uses: mheap/github-action-required-labels@8afbe8ae6ab7647d0c9f0cfa7c2f939650d22509 # v5.5
with:
mode: exactly
count: 1
labels: 'bug, enhancement, documentation, dependencies'

View File

@ -6,9 +6,8 @@ on:
- v*
jobs:
build:
release-github:
runs-on: ubuntu-latest
timeout-minutes: 480
strategy:
matrix:
target:
@ -18,17 +17,6 @@ jobs:
- aarch64-unknown-linux-musl
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
tool-cache: false
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: true
- name: Checkout
uses: actions/checkout@v4
@ -51,118 +39,67 @@ jobs:
with:
target: ${{ matrix.target }}
- name: Install cargo-deb
uses: taiki-e/cache-cargo-install-action@b33c63d3b3c85540f4eba8a4f71a5cc0ce030855
with:
# Don't upgrade cargo-deb, refer to https://github.com/kornelski/cargo-deb/issues/169.
tool: cargo-deb@2.10.0
- name: Install cargo-generate-rpm
uses: taiki-e/install-action@daa3c1f1f9a9d46f686d9fc2f65773d0c293688b
with:
tool: cargo-generate-rpm
- name: Install dependencies
run: |
sudo apt-get update -y
sudo apt-get install -y protobuf-compiler libclang-dev
sudo apt-get update
sudo apt-get install protobuf-compiler
- name: Build binaries
shell: bash
run: |
cargo build --release --bins --workspace --exclude hdfs --target ${{ matrix.target }}
cargo build --release --bins --target ${{ matrix.target }}
- name: Build archive client(DEB)
- name: Build archive dfget
shell: bash
run: |
binary_name="dragonfly-client"
binary_name="dfget"
dirname="$binary_name-${{ env.VERSION }}-${{ matrix.target }}"
cargo deb -p dragonfly-client --no-build --target ${{ matrix.target }} --variant ${{ matrix.target }} --compress-type gzip --output $dirname.deb
echo "CLIENT_DEB_ASSET=$dirname.deb" >> $GITHUB_ENV
- name: Build archive client(RPM)
shell: bash
run: |
binary_name="dragonfly-client"
dirname="$binary_name-${{ env.VERSION }}-${{ matrix.target }}"
cargo generate-rpm -p dragonfly-client --target ${{ matrix.target }} --variant ${{ matrix.target }} --payload-compress none --output $dirname.rpm
echo "CLIENT_RPM_ASSET=$dirname.rpm" >> $GITHUB_ENV
- name: Build archive client(TAR)
shell: bash
run: |
binary_name="dragonfly-client"
dirname="$binary_name-${{ env.VERSION }}-${{ matrix.target }}"
mkdir -p "$dirname"
mv "target/${{ matrix.target }}/release/dfget" "$dirname"
mv "target/${{ matrix.target }}/release/dfdaemon" "$dirname"
mv "target/${{ matrix.target }}/release/dfcache" "$dirname"
mv "target/${{ matrix.target }}/release/dfinit" "$dirname"
mv CONTRIBUTING.md LICENSE README.md "$dirname"
mv "target/${{ matrix.target }}/release/$binary_name" "$dirname"
tar -czf "$dirname.tar.gz" "$dirname"
echo "CLIENT_TAR_ASSET=$dirname.tar.gz" >> $GITHUB_ENV
echo "DFGET_ASSET=$dirname.tar.gz" >> $GITHUB_ENV
- name: Upload Release Artifacts
uses: actions/upload-artifact@v4
with:
name: release-${{ matrix.target }}
path: |
${{ env.CLIENT_RPM_ASSET }}
${{ env.CLIENT_DEB_ASSET }}
${{ env.CLIENT_TAR_ASSET }}
- name: Build archive dfdaemon
shell: bash
run: |
binary_name="dfdaemon"
dirname="$binary_name-${{ env.VERSION }}-${{ matrix.target }}"
mv "target/${{ matrix.target }}/release/$binary_name" "$dirname"
tar -czf "$dirname.tar.gz" "$dirname"
echo "DFDAEMON_ASSET=$dirname.tar.gz" >> $GITHUB_ENV
release-github:
runs-on: ubuntu-latest
timeout-minutes: 360
needs: [build]
permissions:
contents: write
steps:
- name: Download Release Artifacts
uses: actions/download-artifact@v5
with:
path: releases
pattern: release-*
merge-multiple: true
- name: Build archive dfstore
shell: bash
run: |
binary_name="dfstore"
dirname="$binary_name-${{ env.VERSION }}-${{ matrix.target }}"
mv "target/${{ matrix.target }}/release/$binary_name" "$dirname"
tar -czf "$dirname.tar.gz" "$dirname"
echo "DFSTORE_ASSET=$dirname.tar.gz" >> $GITHUB_ENV
- name: Release
uses: ncipollo/release-action@v1
uses: softprops/action-gh-release@v1
with:
artifacts: "releases/*"
generateReleaseNotes: true
allowUpdates: true
generate_release_notes: true
draft: true
files: |
${{ env.DFGET_ASSET }}
${{ env.DFDAEMON_ASSET }}
${{ env.DFSTORE_ASSET }}
publish-crates:
runs-on: ubuntu-latest
timeout-minutes: 360
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
tool-cache: false
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: true
- name: Checkout
uses: actions/checkout@v4
- name: Install Rust
uses: dtolnay/rust-toolchain@stable
with:
toolchain: 1.85.0
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install protobuf-compiler libclang-dev
sudo apt-get install protobuf-compiler
- uses: katyo/publish-crates@v2
with:
registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }}
ignore-unpublished-changes: true
args: --locked

View File

@ -1,31 +0,0 @@
name: Close stale issues and PRs
on:
workflow_dispatch:
schedule:
- cron: "0 0 * * *"
permissions:
issues: write
pull-requests: write
jobs:
stale:
runs-on: ubuntu-latest
steps:
- uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0
id: stale
with:
delete-branch: true
days-before-close: 7
days-before-stale: 90
days-before-pr-close: 7
days-before-pr-stale: 120
stale-issue-label: "stale"
exempt-issue-labels: bug,wip,on-hold
exempt-pr-labels: bug,wip,on-hold
exempt-all-milestones: true
stale-issue-message: 'This issue is stale because it has been open 90 days with no activity.'
close-issue-message: 'This issue was closed because it has been stalled for 7 days with no activity.'
stale-pr-message: 'This PR is stale because it has been open 120 days with no activity.'
close-pr-message: 'This PR was closed because it has been stalled for 7 days with no activity.'

14
.gitignore vendored
View File

@ -1,6 +1,10 @@
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib
/bin/
# Test binary, built with `go test -c`
*.test
@ -45,11 +49,9 @@ Temporary Items
# will have compiled files and executables
/target/
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock
# These are backup files generated by rustfmt
**/*.rs.bk
# Ignore unuseful files
scripts/certs
# Ignore .vscode folder
.vscode

View File

@ -80,7 +80,7 @@ Just remember one principle:
> WE ARE LOOKING FORWARD TO ANY PR FROM YOU.
Since you are ready to improve dfdaemon with a PR,
Since you are ready to improve dfdameon with a PR,
we suggest you could take a look at the PR rules here.
- [Workspace Preparation](#workspace-preparation)

6217
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,125 +1,77 @@
[workspace]
resolver = "2"
members = [
"dragonfly-client",
"dragonfly-client-backend",
"dragonfly-client-config",
"dragonfly-client-core",
"dragonfly-client-init",
"dragonfly-client-storage",
"dragonfly-client-util",
"dragonfly-client-backend/examples/plugin",
]
[workspace.package]
version = "1.0.10"
[package]
name = "dragonfly-client"
version = "0.1.5"
authors = ["The Dragonfly Developers"]
homepage = "https://d7y.io/"
repository = "https://github.com/dragonflyoss/client.git"
description = "Dragonfly client written in Rust"
keywords = ["dragonfly", "dragonfly-client", "p2p", "container", "docker-image"]
license = "Apache-2.0"
readme = "README.md"
edition = "2021"
[workspace.dependencies]
dragonfly-client = { path = "dragonfly-client", version = "1.0.10" }
dragonfly-client-core = { path = "dragonfly-client-core", version = "1.0.10" }
dragonfly-client-config = { path = "dragonfly-client-config", version = "1.0.10" }
dragonfly-client-storage = { path = "dragonfly-client-storage", version = "1.0.10" }
dragonfly-client-backend = { path = "dragonfly-client-backend", version = "1.0.10" }
dragonfly-client-util = { path = "dragonfly-client-util", version = "1.0.10" }
dragonfly-client-init = { path = "dragonfly-client-init", version = "1.0.10" }
dragonfly-api = "2.1.57"
thiserror = "2.0"
futures = "0.3.31"
reqwest = { version = "0.12.4", features = [
"stream",
"native-tls",
"default-tls",
"rustls-tls",
"gzip",
"brotli",
"zstd",
"deflate",
"blocking",
] }
reqwest-middleware = "0.4"
rcgen = { version = "0.12.1", features = ["x509-parser"] }
hyper = { version = "1.6", features = ["full"] }
hyper-util = { version = "0.1.16", features = [
"client",
"client-legacy",
"tokio",
"server-auto",
"http1",
"http2",
] }
hyper-rustls = { version = "0.26", features = ["http1", "http2", "logging"] }
http-range-header = "0.4.2"
[[bin]]
name = "dfdaemon"
path = "src/bin/dfdaemon/main.rs"
[[bin]]
name = "dfget"
path = "src/bin/dfget/main.rs"
[[bin]]
name = "dfstore"
path = "src/bin/dfstore/main.rs"
[dependencies]
clap = { version = "4.1.13", features = [ "derive" ] }
home = "0.5.4"
tracing = "0.1"
url = "2.5.4"
rustls = { version = "0.22.4", features = ["tls12"] }
rustls-pki-types = "1.12.0"
rustls-pemfile = "2.2.0"
sha2 = "0.10"
crc32fast = "1.5.0"
uuid = { version = "1.16", features = ["v4"] }
hex = "0.4"
rocksdb = "0.22.0"
tracing-log = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
tracing-appender = "0.2.3"
tracing-opentelemetry = "0.18.0"
humantime = "2.1.0"
serde = { version = "1.0", features = ["derive"] }
serde_yaml = "0.9"
http = "1"
tonic = { version = "0.12.2", features = ["tls"] }
tonic-reflection = "0.12.3"
tokio = { version = "1.47.1", features = ["full", "tracing"] }
tokio-util = { version = "0.7.16", features = ["full"] }
tokio-stream = "0.1.17"
serde_json = "1.0"
validator = { version = "0.16", features = ["derive"] }
thiserror = "1.0"
opentelemetry = { version = "0.18.0", default-features = false, features = ["trace", "rt-tokio"] }
opentelemetry-jaeger = { version = "0.17.0", features = ["rt-tokio"] }
lazy_static = "1.4"
prometheus = "0.13.3"
warp = "0.3.5"
headers = "0.4.1"
regex = "1.11.1"
humantime = "2.1.0"
prost-wkt-types = "0.6"
chrono = { version = "0.4.41", features = ["serde", "clock"] }
openssl = { version = "0.10", features = ["vendored"] }
opendal = { version = "0.48.0", features = [
"services-s3",
"services-azblob",
"services-gcs",
"services-oss",
"services-obs",
"services-cos",
"services-webhdfs",
] }
clap = { version = "4.5.45", features = ["derive"] }
anyhow = "1.0.98"
toml_edit = "0.22.26"
toml = "0.8.23"
bytesize = { version = "1.3.3", features = ["serde"] }
bytesize-serde = "0.2.1"
percent-encoding = "2.3.1"
tempfile = "3.20.0"
tokio-rustls = "0.25.0-alpha.4"
serde_json = "1.0.142"
lru = "0.12.5"
tonic = { version = "0.9.2", features = ["gzip"] }
tonic-health = "0.9.2"
tonic-reflection = "0.9.2"
tokio-stream = "0.1.14"
reqwest = { version = "0.11.18", features = ["stream"] }
futures = "0.3.28"
tokio = { version = "1.29.1", features = ["full"] }
tokio-util = { version = "0.7.8", features = ["full"] }
bytes = "1.4"
hostname = "^0.3"
local-ip-address = "0.5.3"
rocksdb = "0.21.0"
num_cpus = "1.0"
chrono = { version = "0.4.26", features = ["serde"] }
dragonfly-api = "2.0.73"
sysinfo = "0.29.6"
sha2 = "0.10"
hex = "0.4"
uuid = { version = "1.4", features = ["v4"] }
url = "2.4.0"
async-stream = "0.3.5"
anyhow = "1.0.75"
base16ct = { version = "0.2", features = ["alloc"] }
http = "0.2"
rand = "0.8.5"
prost-wkt-types = "0.4"
tower = "0.4.13"
indicatif = "0.17.7"
dashmap = "5.5.3"
fs2 = "0.4.3"
lazy_static = "1.5"
bytes = "1.10"
local-ip-address = "0.6.5"
sysinfo = { version = "0.32.1", default-features = false, features = ["component", "disk", "network", "system", "user"] }
[profile.release]
opt-level = 3
lto = "thin"
codegen-units = 1
panic = "abort"
strip = "symbols"
[profile.dev]
opt-level = 0
debug = true
incremental = true
strip = false
[profile.bench]
debug = true
hashring = "0.3.3"
libc = "0.2"
fslock = "0.2.1"
openssl = { version = "0.10", features = ["vendored"] }

32
Dockerfile Normal file
View File

@ -0,0 +1,32 @@
ARG BASE_IMAGE=rust
FROM ${BASE_IMAGE} as builder
RUN apt-get update && apt-get install -y openssl libclang-dev pkg-config protobuf-compiler
WORKDIR /app/client
COPY Cargo.toml ./
COPY src/ src/
RUN cargo build --release
FROM alpine:3.17 as health
ENV GRPC_HEALTH_PROBE_VERSION v0.4.24
RUN if [ "$(uname -m)" = "ppc64le" ]; then \
wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-ppc64le; \
elif [ "$(uname -m)" = "aarch64" ]; then \
wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-arm64; \
else \
wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-amd64; \
fi && \
chmod +x /bin/grpc_health_probe
FROM ${BASE_IMAGE}
COPY --from=builder /app/client/target/release/dfget /usr/local/bin/dfget
COPY --from=builder /app/client/target/release/dfdaemon /usr/local/bin/dfdaemon
COPY --from=builder /app/client/target/release/dfstore /usr/local/bin/dfstore
COPY --from=health /bin/grpc_health_probe /bin/grpc_health_probe
ENTRYPOINT ["/usr/local/bin/dfdaemon"]

View File

@ -2,12 +2,11 @@
[![GitHub release](https://img.shields.io/github/release/dragonflyoss/client.svg)](https://github.com/dragonflyoss/client/releases)
[![CI](https://github.com/dragonflyoss/client/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/dragonflyoss/client/actions/workflows/ci.yml)
[![Coverage](https://codecov.io/gh/dragonflyoss/client/branch/main/graph/badge.svg)](https://codecov.io/gh/dragonflyoss/dfdaemon)
[![Coverage](https://codecov.io/gh/dragonflyoss/client/branch/main/graph/badge.svg)](https://codecov.io/gh/dragonflyoss/dfdameon)
[![Open Source Helpers](https://www.codetriage.com/dragonflyoss/client/badges/users.svg)](https://www.codetriage.com/dragonflyoss/client)
[![Discussions](https://img.shields.io/badge/discussions-on%20github-blue?style=flat-square)](https://github.com/dragonflyoss/dragonfly/discussions)
[![Discussions](https://img.shields.io/badge/discussions-on%20github-blue?style=flat-square)](https://github.com/dragonflyoss/Dragonfly2/discussions)
[![Twitter](https://img.shields.io/twitter/url?style=social&url=https%3A%2F%2Ftwitter.com%2Fdragonfly_oss)](https://twitter.com/dragonfly_oss)
[![LICENSE](https://img.shields.io/github/license/dragonflyoss/dragonfly.svg?style=flat-square)](https://github.com/dragonflyoss/dragonfly/blob/main/LICENSE)
[![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2Fdragonflyoss%2Fclient.svg?type=shield)](https://app.fossa.com/projects/git%2Bgithub.com%2Fdragonflyoss%2Fclient?ref=badge_shield)
[![LICENSE](https://img.shields.io/github/license/dragonflyoss/Dragonfly2.svg?style=flat-square)](https://github.com/dragonflyoss/Dragonfly2/blob/main/LICENSE)
Dragonfly client written in Rust. It can serve as both a peer and a seed peer.
@ -20,9 +19,9 @@ You can find the full documentation on the [d7y.io](https://d7y.io).
Join the conversation and help the community.
- **Slack Channel**: [#dragonfly](https://cloud-native.slack.com/messages/dragonfly/) on [CNCF Slack](https://slack.cncf.io/)
- **Github Discussions**: [Dragonfly Discussion Forum](https://github.com/dragonflyoss/dragonfly/discussions)
- **Discussion Group**: <dragonfly-discuss@googlegroups.com>
- **Developer Group**: <dragonfly-developers@googlegroups.com>
- **Maintainer Group**: <dragonfly-maintainers@googlegroups.com>
- **Github Discussions**: [Dragonfly Discussion Forum](https://github.com/dragonflyoss/Dragonfly2/discussions)
- **Twitter**: [@dragonfly_oss](https://twitter.com/dragonfly_oss)
- **DingTalk**: [22880028764](https://qr.dingtalk.com/action/joingroup?code=v1,k1,pkV9IbsSyDusFQdByPSK3HfCG61ZCLeb8b/lpQ3uUqI=&_dt_no_comment=1&origin=11)

View File

@ -1,79 +0,0 @@
FROM public.ecr.aws/docker/library/rust:1.85.0 AS builder
WORKDIR /app/client
RUN apt-get update && apt-get install -y \
openssl libclang-dev pkg-config protobuf-compiler git \
&& rm -rf /var/lib/apt/lists/*
COPY Cargo.toml Cargo.lock ./
COPY .cargo ./cargo
COPY dragonfly-client/Cargo.toml ./dragonfly-client/Cargo.toml
COPY dragonfly-client/src ./dragonfly-client/src
COPY dragonfly-client-core/Cargo.toml ./dragonfly-client-core/Cargo.toml
COPY dragonfly-client-core/src ./dragonfly-client-core/src
COPY dragonfly-client-config/Cargo.toml ./dragonfly-client-config/Cargo.toml
COPY dragonfly-client-config/src ./dragonfly-client-config/src
COPY dragonfly-client-config/build.rs ./dragonfly-client-config/build.rs
COPY dragonfly-client-storage/Cargo.toml ./dragonfly-client-storage/Cargo.toml
COPY dragonfly-client-storage/src ./dragonfly-client-storage/src
COPY dragonfly-client-storage/benches ./dragonfly-client-storage/benches
COPY dragonfly-client-backend/Cargo.toml ./dragonfly-client-backend/Cargo.toml
COPY dragonfly-client-backend/src ./dragonfly-client-backend/src
COPY dragonfly-client-backend/examples/plugin/Cargo.toml ./dragonfly-client-backend/examples/plugin/Cargo.toml
COPY dragonfly-client-backend/examples/plugin/src ./dragonfly-client-backend/examples/plugin/src
COPY dragonfly-client-util/Cargo.toml ./dragonfly-client-util/Cargo.toml
COPY dragonfly-client-util/src ./dragonfly-client-util/src
COPY dragonfly-client-init/Cargo.toml ./dragonfly-client-init/Cargo.toml
COPY dragonfly-client-init/src ./dragonfly-client-init/src
ARG TARGETPLATFORM
RUN case "${TARGETPLATFORM}" in \
"linux/arm64") export JEMALLOC_SYS_WITH_LG_PAGE=16;; \
esac && \
cargo build --release --verbose --bin dfget --bin dfdaemon --bin dfcache
RUN cargo install tokio-console --locked --root /usr/local
FROM public.ecr.aws/docker/library/alpine:3.20 AS health
ENV GRPC_HEALTH_PROBE_VERSION=v0.4.24
RUN if [ "$(uname -m)" = "ppc64le" ]; then \
wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-ppc64le; \
elif [ "$(uname -m)" = "aarch64" ]; then \
wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-arm64; \
else \
wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-amd64; \
fi && \
chmod +x /bin/grpc_health_probe
FROM public.ecr.aws/docker/library/golang:1.23.0-alpine3.20 AS pprof
RUN go install github.com/google/pprof@latest
RUN go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest
FROM public.ecr.aws/debian/debian:bookworm-slim
RUN apt-get update && apt-get install -y --no-install-recommends iperf3 fio curl \
iotop sysstat bash-completion procps apache2-utils ca-certificates binutils \
dnsutils iputils-ping llvm graphviz lsof strace dstat net-tools \
&& rm -rf /var/lib/apt/lists/*
COPY --from=builder /app/client/target/release/dfget /usr/local/bin/dfget
COPY --from=builder /app/client/target/release/dfdaemon /usr/local/bin/dfdaemon
COPY --from=builder /app/client/target/release/dfcache /usr/local/bin/dfcache
COPY --from=builder /usr/local/bin/tokio-console /usr/local/bin/
COPY --from=pprof /go/bin/pprof /bin/pprof
COPY --from=pprof /go/bin/grpcurl /bin/grpcurl
COPY --from=health /bin/grpc_health_probe /bin/grpc_health_probe
ENTRYPOINT ["/usr/local/bin/dfdaemon"]

View File

@ -1,83 +0,0 @@
FROM public.ecr.aws/docker/library/rust:1.85.0 AS builder
WORKDIR /app/client
RUN apt-get update && apt-get install -y \
openssl libclang-dev pkg-config protobuf-compiler git \
&& rm -rf /var/lib/apt/lists/*
COPY Cargo.toml Cargo.lock ./
COPY .cargo ./cargo
COPY dragonfly-client/Cargo.toml ./dragonfly-client/Cargo.toml
COPY dragonfly-client/src ./dragonfly-client/src
COPY dragonfly-client-core/Cargo.toml ./dragonfly-client-core/Cargo.toml
COPY dragonfly-client-core/src ./dragonfly-client-core/src
COPY dragonfly-client-config/Cargo.toml ./dragonfly-client-config/Cargo.toml
COPY dragonfly-client-config/src ./dragonfly-client-config/src
COPY dragonfly-client-config/build.rs ./dragonfly-client-config/build.rs
COPY dragonfly-client-storage/Cargo.toml ./dragonfly-client-storage/Cargo.toml
COPY dragonfly-client-storage/src ./dragonfly-client-storage/src
COPY dragonfly-client-storage/benches ./dragonfly-client-storage/benches
COPY dragonfly-client-backend/Cargo.toml ./dragonfly-client-backend/Cargo.toml
COPY dragonfly-client-backend/src ./dragonfly-client-backend/src
COPY dragonfly-client-backend/examples/plugin/Cargo.toml ./dragonfly-client-backend/examples/plugin/Cargo.toml
COPY dragonfly-client-backend/examples/plugin/src ./dragonfly-client-backend/examples/plugin/src
COPY dragonfly-client-util/Cargo.toml ./dragonfly-client-util/Cargo.toml
COPY dragonfly-client-util/src ./dragonfly-client-util/src
COPY dragonfly-client-init/Cargo.toml ./dragonfly-client-init/Cargo.toml
COPY dragonfly-client-init/src ./dragonfly-client-init/src
ARG TARGETPLATFORM
RUN case "${TARGETPLATFORM}" in \
"linux/arm64") export JEMALLOC_SYS_WITH_LG_PAGE=16;; \
esac && \
cargo build --verbose --bin dfget --bin dfdaemon --bin dfcache
RUN cargo install flamegraph --root /usr/local
RUN cargo install bottom --locked --root /usr/local
RUN cargo install tokio-console --locked --root /usr/local
FROM public.ecr.aws/docker/library/alpine:3.20 AS health
ENV GRPC_HEALTH_PROBE_VERSION=v0.4.24
RUN if [ "$(uname -m)" = "ppc64le" ]; then \
wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-ppc64le; \
elif [ "$(uname -m)" = "aarch64" ]; then \
wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-arm64; \
else \
wget -qO/bin/grpc_health_probe https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/${GRPC_HEALTH_PROBE_VERSION}/grpc_health_probe-linux-amd64; \
fi && \
chmod +x /bin/grpc_health_probe
FROM public.ecr.aws/docker/library/golang:1.23.0-alpine3.20 AS pprof
RUN go install github.com/google/pprof@latest
RUN go install github.com/fullstorydev/grpcurl/cmd/grpcurl@latest
FROM public.ecr.aws/debian/debian:bookworm-slim
RUN apt-get update && apt-get install -y --no-install-recommends iperf3 fio curl infiniband-diags ibverbs-utils \
iotop sysstat bash-completion procps apache2-utils ca-certificates binutils bpfcc-tools \
dnsutils iputils-ping vim linux-perf llvm lsof socat strace dstat net-tools \
&& rm -rf /var/lib/apt/lists/*
COPY --from=builder /app/client/target/debug/dfget /usr/local/bin/dfget
COPY --from=builder /app/client/target/debug/dfdaemon /usr/local/bin/dfdaemon
COPY --from=builder /app/client/target/debug/dfcache /usr/local/bin/dfcache
COPY --from=builder /usr/local/bin/flamegraph /usr/local/bin/
COPY --from=builder /usr/local/bin/btm /usr/local/bin/
COPY --from=builder /usr/local/bin/tokio-console /usr/local/bin/
COPY --from=pprof /go/bin/pprof /bin/pprof
COPY --from=pprof /go/bin/grpcurl /bin/grpcurl
COPY --from=health /bin/grpc_health_probe /bin/grpc_health_probe
ENTRYPOINT ["/usr/local/bin/dfdaemon"]

View File

@ -1,51 +0,0 @@
FROM public.ecr.aws/docker/library/rust:1.85.0 AS builder
RUN apt-get update && apt-get install -y \
openssl libclang-dev pkg-config protobuf-compiler \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app/client
COPY Cargo.toml Cargo.lock ./
COPY .cargo ./cargo
COPY dragonfly-client/Cargo.toml ./dragonfly-client/Cargo.toml
COPY dragonfly-client/src ./dragonfly-client/src
COPY dragonfly-client-core/Cargo.toml ./dragonfly-client-core/Cargo.toml
COPY dragonfly-client-core/src ./dragonfly-client-core/src
COPY dragonfly-client-config/Cargo.toml ./dragonfly-client-config/Cargo.toml
COPY dragonfly-client-config/src ./dragonfly-client-config/src
COPY dragonfly-client-config/build.rs ./dragonfly-client-config/build.rs
COPY dragonfly-client-storage/Cargo.toml ./dragonfly-client-storage/Cargo.toml
COPY dragonfly-client-storage/src ./dragonfly-client-storage/src
COPY dragonfly-client-storage/benches ./dragonfly-client-storage/benches
COPY dragonfly-client-backend/Cargo.toml ./dragonfly-client-backend/Cargo.toml
COPY dragonfly-client-backend/src ./dragonfly-client-backend/src
COPY dragonfly-client-backend/examples/plugin/Cargo.toml ./dragonfly-client-backend/examples/plugin/Cargo.toml
COPY dragonfly-client-backend/examples/plugin/src ./dragonfly-client-backend/examples/plugin/src
COPY dragonfly-client-util/Cargo.toml ./dragonfly-client-util/Cargo.toml
COPY dragonfly-client-util/src ./dragonfly-client-util/src
COPY dragonfly-client-init/Cargo.toml ./dragonfly-client-init/Cargo.toml
COPY dragonfly-client-init/src ./dragonfly-client-init/src
ARG TARGETPLATFORM
RUN case "${TARGETPLATFORM}" in \
"linux/arm64") export JEMALLOC_SYS_WITH_LG_PAGE=16;; \
esac && \
cargo build --release --verbose --bin dfinit
FROM public.ecr.aws/debian/debian:bookworm-slim
RUN apt-get update && apt-get install -y --no-install-recommends wget \
&& rm -rf /var/lib/apt/lists/*
COPY --from=builder /app/client/target/release/dfinit /usr/local/bin/dfinit
ENTRYPOINT ["/usr/local/bin/dfinit"]

View File

@ -1,25 +0,0 @@
[Unit]
Description=dfdaemon is a high performance P2P download daemon
Documentation=https://github.com/dragonflyoss/client
After=network-online.target
After=network.target
[Service]
ExecStart=/usr/bin/dfdaemon --config /etc/dragonfly/dfdaemon.yaml --console
Type=simple
Environment=HOME=/root
Restart=on-success
# Limit CPU usage to 1 CPU core.
CPUQuota=100%
# Limiting CPU Consumption of a Unit.
CPUWeight=400
# Limit Memory usage to 8G, beyond that the process will be OOM killed.
MemoryMax=8G
[Install]
WantedBy=multi-user.target

Binary file not shown.

Before

Width:  |  Height:  |  Size: 40 KiB

View File

@ -1,98 +0,0 @@
# Performance Optimization Guidance
This is a reference benchmark process document designed to
assist in performance analysis and optimization for **client**.
This document provides as general a testing framework as possible,
allowing developers with needs to adjust it
according to their specific circumstances across various platform.
## Flow
![architecture](images/performance-testing-arch.png)
## Preparation
### Step 1: Setup Dragonfly
- Please refer to [official doc](https://d7y.io/docs/next/getting-started/installation/helm-charts/).
### Step 2: Start a file server
- Start with docker:
```bash
export FILE_SERVER_PORT=12345
docker run -d --rm -p ${FILE_SERVER_PORT}:80 --name dragonfly-fs dragonflyoss/file-server:latest
```
- Check the file server is ready:
```bash
# return success if ready
curl -s -o /dev/null \
-w "%{http_code}" \
http://localhost:12345/nano \
| grep -q "200" \
&& echo "Success" \
|| echo "Failed"
```
- Optional:
> you can build your own image, take a reference from [**Dockerfile**](https://github.com/dragonflyoss/perf-tests/blob/main/tools/file-server/Dockerfile).
### Step 3: Install test tools
- Request Generator: [**oha**](https://github.com/hatoo/oha)
```bash
brew install oha
```
- Profiling: [**flamegraph**](https://github.com/flamegraph-rs/flamegraph)
```bash
cargo install flamegraph
```
### Step 4: Setup Dragonfly Peer
> Document: [Install with binary](https://d7y.io/docs/next/getting-started/installation/binaries/).
- Compile the target binary
```bash
cargo build --release --bin dfdaemon
```
- Connect to Dragonfly
```bash
# prepare client.yaml by yourself.
./target/release/dfdaemon --config client.yaml -l info --console
```
## FlameGraph
Now, let's start benchmark with the following params:
- $FILE_SERVER_ADDRESS
- $CLIENT_PROXY_ADDRESS
### Collect Flamegraph
- Capture the flamegraph:
```bash
## stop after all requests done.
sudo flamegraph -o my_flamegraph.svg --pid 3442
```
- Make the request:
```bash
oha -c 1000 \
-n 100 \
--rand-regex-url $FILE_SERVER_ADDRESS/\(nano\|micro\|small\|medium\|large\) \
-x $CLIENT_PROXY_ADDRESS
```

View File

@ -1,40 +0,0 @@
[package]
name = "dragonfly-client-backend"
description = "Backend for the dragonfly client"
version.workspace = true
authors.workspace = true
homepage.workspace = true
repository.workspace = true
keywords.workspace = true
license.workspace = true
edition.workspace = true
[dependencies]
dragonfly-client-core.workspace = true
dragonfly-client-util.workspace = true
dragonfly-api.workspace = true
reqwest.workspace = true
reqwest-middleware.workspace = true
tokio.workspace = true
tokio-util.workspace = true
rustls.workspace = true
rustls-pki-types.workspace = true
tonic.workspace = true
url.workspace = true
tracing.workspace = true
opendal.workspace = true
percent-encoding.workspace = true
futures.workspace = true
reqwest-retry = "0.7"
reqwest-tracing = "0.5"
libloading = "0.8.8"
[dev-dependencies]
tempfile.workspace = true
wiremock = "0.6.4"
rustls-pki-types.workspace = true
rustls-pemfile.workspace = true
hyper.workspace = true
hyper-util.workspace = true
tokio-rustls.workspace = true
rcgen.workspace = true

View File

@ -1,19 +0,0 @@
[package]
name = "hdfs"
description = "An example of HDFS plugin for the Dragonfly client backend"
version.workspace = true
authors.workspace = true
homepage.workspace = true
repository.workspace = true
keywords.workspace = true
license.workspace = true
edition.workspace = true
publish = false
[lib]
crate-type = ["dylib"]
[dependencies]
dragonfly-client-core.workspace = true
dragonfly-client-backend.workspace = true
tonic.workspace = true

View File

@ -1,27 +0,0 @@
# Example of Backend Plugin
An example of HDFS plugin for the Dragonfly client backend.
## Build Plugin
Build the plugin and move it to the plugin directory. If use plugin in MacOS,
you should replace `libhdfs.so` with `libhdfs.dylib`.
```shell
cargo build --all && mv target/debug/libhdfs.so {plugin_dir}/backend/libhdfs.so
```
## Run Client with Plugin
```shell
$ cargo run --bin dfdaemon -- --config {config_dir}/config.yaml -l info --console
INFO load [http] builtin backend
INFO load [https] builtin backend
INFO load [hdfs] plugin backend
```
## Download Task with Plugin
```shell
cargo run --bin dfget hdfs://example.com/file -O file
```

View File

@ -1,56 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use dragonfly_client_backend::{Backend, Body, GetRequest, GetResponse, HeadRequest, HeadResponse};
use dragonfly_client_core::{Error, Result};
/// Hdfs is a struct that implements the Backend trait
struct Hdfs;
/// Hdfs implements the Backend trait
impl Hdfs {
pub fn new() -> Self {
Self {}
}
}
/// Implement the Backend trait for Hdfs.
#[tonic::async_trait]
impl Backend for Hdfs {
/// scheme returns the scheme of the backend.
fn scheme(&self) -> String {
"hdfs".to_string()
}
/// head is an async function that takes a HeadRequest and returns a HeadResponse.
async fn head(&self, request: HeadRequest) -> Result<HeadResponse> {
println!("HDFS head url: {}", request.url);
Err(Error::Unimplemented)
}
/// get is an async function that takes a GetRequest and returns a GetResponse.
async fn get(&self, request: GetRequest) -> Result<GetResponse<Body>> {
println!("HDFS get url: {}", request.url);
Err(Error::Unimplemented)
}
}
/// register_plugin is a function that returns a Box<dyn Backend + Send + Sync>.
/// This function is used to register the HDFS plugin to the Backend.
#[no_mangle]
pub fn register_plugin() -> Box<dyn Backend + Send + Sync> {
Box::new(Hdfs::new())
}

View File

@ -1,272 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use dragonfly_api::common;
use dragonfly_client_core::error::BackendError;
use dragonfly_client_core::{Error as ClientError, Result as ClientResult};
use opendal::{layers::TimeoutLayer, Metakey, Operator};
use percent_encoding::percent_decode_str;
use std::time::Duration;
use tokio_util::io::StreamReader;
use tracing::{error, info, instrument};
use url::Url;
/// HDFS_SCHEME is the scheme of the HDFS.
pub const HDFS_SCHEME: &str = "hdfs";
/// DEFAULT_NAMENODE_PORT is the default port of the HDFS namenode.
const DEFAULT_NAMENODE_PORT: u16 = 9870;
/// Hdfs is a struct that implements the Backend trait.
#[derive(Default)]
pub struct Hdfs {
/// scheme is the scheme of the HDFS.
scheme: String,
}
/// Hdfs implements the Backend trait.
impl Hdfs {
/// new returns a new HDFS backend.
pub fn new() -> Self {
Self {
scheme: HDFS_SCHEME.to_string(),
}
}
/// operator initializes the operator with the parsed URL and HDFS config.
pub fn operator(
&self,
url: Url,
config: Option<common::v2::Hdfs>,
timeout: Duration,
) -> ClientResult<Operator> {
// Get the host and port from the URL.
let host = url
.host_str()
.ok_or_else(|| ClientError::InvalidURI(url.to_string()))?
.to_string();
let port = url.port().unwrap_or(DEFAULT_NAMENODE_PORT);
// Initialize the HDFS operator.
let mut builder = opendal::services::Webhdfs::default();
builder = builder
.root("/")
.endpoint(format!("http://{}:{}", host, port).as_str());
// If HDFS config is not None, set the config for builder.
if let Some(config) = config {
if let Some(delegation_token) = &config.delegation_token {
builder = builder.delegation(delegation_token.as_str());
}
}
Ok(Operator::new(builder)?
.finish()
.layer(TimeoutLayer::new().with_timeout(timeout)))
}
}
/// Implement the Backend trait for Hdfs.
#[tonic::async_trait]
impl super::Backend for Hdfs {
/// scheme returns the scheme of the HDFS backend.
fn scheme(&self) -> String {
self.scheme.clone()
}
/// head gets the header of the request.
#[instrument(skip_all)]
async fn head(&self, request: super::HeadRequest) -> ClientResult<super::HeadResponse> {
info!(
"head request {} {}: {:?}",
request.task_id, request.url, request.http_header
);
// Parse the URL.
let url = Url::parse(request.url.as_ref())
.map_err(|_| ClientError::InvalidURI(request.url.clone()))?;
let decoded_path = percent_decode_str(url.path())
.decode_utf8_lossy()
.to_string();
// Initialize the operator with the parsed URL and HDFS config.
let operator = self.operator(url.clone(), request.hdfs, request.timeout)?;
// Get the entries if url point to a directory.
let entries = if url.path().ends_with('/') {
operator
.list_with(decoded_path.as_str())
.recursive(true)
.metakey(Metakey::ContentLength | Metakey::Mode)
.await // Do the list op here.
.map_err(|err| {
error!(
"list request failed {} {}: {}",
request.task_id, request.url, err
);
ClientError::BackendError(Box::new(BackendError {
message: err.to_string(),
status_code: None,
header: None,
}))
})?
.into_iter()
.map(|entry| {
let metadata = entry.metadata();
let mut url = url.clone();
url.set_path(entry.path());
super::DirEntry {
url: url.to_string(),
content_length: metadata.content_length() as usize,
is_dir: metadata.is_dir(),
}
})
.collect()
} else {
Vec::new()
};
// Stat the path to get the response from HDFS operator.
let response = operator
.stat_with(decoded_path.as_str())
.await
.map_err(|err| {
error!(
"stat request failed {} {}: {}",
request.task_id, request.url, err
);
ClientError::BackendError(Box::new(BackendError {
message: err.to_string(),
status_code: None,
header: None,
}))
})?;
info!(
"head response {} {}: {}",
request.task_id,
request.url,
response.content_length()
);
Ok(super::HeadResponse {
success: true,
content_length: Some(response.content_length()),
http_header: None,
http_status_code: None,
error_message: None,
entries,
})
}
/// get returns content of requested file.
#[instrument(skip_all)]
async fn get(
&self,
request: super::GetRequest,
) -> ClientResult<super::GetResponse<super::Body>> {
info!(
"get request {} {}: {:?}",
request.piece_id, request.url, request.http_header
);
// Parse the URL.
let url = Url::parse(request.url.as_ref())
.map_err(|_| ClientError::InvalidURI(request.url.clone()))?;
let decoded_path = percent_decode_str(url.path())
.decode_utf8_lossy()
.to_string();
// Initialize the operator with the parsed URL and HDFS config.
let operator_reader = self
.operator(url.clone(), request.hdfs, request.timeout)?
.reader(decoded_path.as_ref())
.await
.map_err(|err| {
error!(
"get request failed {} {}: {}",
request.piece_id, request.url, err
);
ClientError::BackendError(Box::new(BackendError {
message: err.to_string(),
status_code: None,
header: None,
}))
})?;
let stream = match request.range {
Some(range) => operator_reader
.into_bytes_stream(range.start..range.start + range.length)
.await
.map_err(|err| {
error!(
"get request failed {} {}: {}",
request.piece_id, request.url, err
);
ClientError::BackendError(Box::new(BackendError {
message: err.to_string(),
status_code: None,
header: None,
}))
})?,
None => operator_reader.into_bytes_stream(..).await.map_err(|err| {
error!(
"get request failed {} {}: {}",
request.piece_id, request.url, err
);
ClientError::BackendError(Box::new(BackendError {
message: err.to_string(),
status_code: None,
header: None,
}))
})?,
};
Ok(crate::GetResponse {
success: true,
http_header: None,
http_status_code: Some(reqwest::StatusCode::OK),
reader: Box::new(StreamReader::new(stream)),
error_message: None,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn should_get_operator() {
let url: Url = Url::parse("hdfs://127.0.0.1:9870/file").unwrap();
let operator = Hdfs::new().operator(url, None, Duration::from_secs(10));
assert!(
operator.is_ok(),
"can not get hdfs operator, due to: {}",
operator.unwrap_err()
);
}
#[test]
fn should_return_error_when_url_not_valid() {
let url: Url = Url::parse("hdfs:/127.0.0.1:9870/file").unwrap();
let result = Hdfs::new().operator(url, None, Duration::from_secs(10));
assert!(result.is_err());
assert!(matches!(result.unwrap_err(), ClientError::InvalidURI(..)));
}
}

View File

@ -1,626 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use dragonfly_client_core::{Error, Result};
use dragonfly_client_util::tls::NoVerifier;
use futures::TryStreamExt;
use reqwest_middleware::{ClientBuilder, ClientWithMiddleware};
use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
use reqwest_tracing::TracingMiddleware;
use rustls_pki_types::CertificateDer;
use std::io::{Error as IOError, ErrorKind};
use tokio_util::io::StreamReader;
use tracing::{debug, error, instrument};
/// HTTP_SCHEME is the HTTP scheme.
pub const HTTP_SCHEME: &str = "http";
/// HTTPS_SCHEME is the HTTPS scheme.
pub const HTTPS_SCHEME: &str = "https";
/// HTTP is the HTTP backend.
pub struct HTTP {
/// scheme is the scheme of the HTTP backend.
scheme: String,
/// client is the reqwest client.
client: ClientWithMiddleware,
}
/// HTTP implements the http interface.
impl HTTP {
/// new returns a new HTTP.
pub fn new(scheme: &str) -> Result<HTTP> {
// Default TLS client config with no validation.
let client_config_builder = rustls::ClientConfig::builder()
.dangerous()
.with_custom_certificate_verifier(NoVerifier::new())
.with_no_client_auth();
// Disable automatic compression to prevent double-decompression issues.
//
// Problem scenario:
// 1. Origin server supports gzip and returns "content-encoding: gzip" header.
// 2. Backend decompresses the response and stores uncompressed content to disk.
// 3. When user's client downloads via dfdaemon proxy, the original "content-encoding: gzip".
// header is forwarded to it.
// 4. User's client attempts to decompress the already-decompressed content, causing errors.
//
// Solution: Disable all compression formats (gzip, brotli, zstd, deflate) to ensure
// we receive and store uncompressed content, eliminating the double-decompression issue.
let client = reqwest::Client::builder()
.no_gzip()
.no_brotli()
.no_zstd()
.no_deflate()
.use_preconfigured_tls(client_config_builder)
.pool_max_idle_per_host(super::POOL_MAX_IDLE_PER_HOST)
.tcp_keepalive(super::KEEP_ALIVE_INTERVAL)
.build()?;
let retry_policy =
ExponentialBackoff::builder().build_with_max_retries(super::MAX_RETRY_TIMES);
let client = ClientBuilder::new(client)
.with(TracingMiddleware::default())
.with(RetryTransientMiddleware::new_with_policy(retry_policy))
.build();
Ok(Self {
scheme: scheme.to_string(),
client,
})
}
/// client returns a new reqwest client.
fn client(
&self,
client_cert: Option<Vec<CertificateDer<'static>>>,
) -> Result<ClientWithMiddleware> {
match client_cert.as_ref() {
Some(client_cert) => {
let mut root_cert_store = rustls::RootCertStore::empty();
root_cert_store.add_parsable_certificates(client_cert.to_owned());
// TLS client config using the custom CA store for lookups.
let client_config_builder = rustls::ClientConfig::builder()
.with_root_certificates(root_cert_store)
.with_no_client_auth();
// Disable automatic compression to prevent double-decompression issues.
//
// Problem scenario:
// 1. Origin server supports gzip and returns "content-encoding: gzip" header.
// 2. Backend decompresses the response and stores uncompressed content to disk.
// 3. When user's client downloads via dfdaemon proxy, the original "content-encoding: gzip".
// header is forwarded to it.
// 4. User's client attempts to decompress the already-decompressed content, causing errors.
//
// Solution: Disable all compression formats (gzip, brotli, zstd, deflate) to ensure
// we receive and store uncompressed content, eliminating the double-decompression issue.
let client = reqwest::Client::builder()
.no_gzip()
.no_brotli()
.no_zstd()
.no_deflate()
.use_preconfigured_tls(client_config_builder)
.build()?;
let retry_policy =
ExponentialBackoff::builder().build_with_max_retries(super::MAX_RETRY_TIMES);
let client = ClientBuilder::new(client)
.with(TracingMiddleware::default())
.with(RetryTransientMiddleware::new_with_policy(retry_policy))
.build();
Ok(client)
}
// Default TLS client config with no validation.
None => Ok(self.client.clone()),
}
}
}
/// Backend implements the Backend trait.
#[tonic::async_trait]
impl super::Backend for HTTP {
/// scheme returns the scheme of the HTTP backend.
fn scheme(&self) -> String {
self.scheme.clone()
}
/// head gets the header of the request.
#[instrument(skip_all)]
async fn head(&self, request: super::HeadRequest) -> Result<super::HeadResponse> {
debug!(
"head request {} {}: {:?}",
request.task_id, request.url, request.http_header
);
// The header of the request is required.
let header = request.http_header.ok_or(Error::InvalidParameter)?;
// The signature in the signed URL generated by the object storage client will include
// the request method. Therefore, the signed URL of the GET method cannot be requested
// through the HEAD method. Use GET request to replace of HEAD request
// to get header and status code.
let response = self
.client(request.client_cert)?
.get(&request.url)
.headers(header)
// Add Range header to ensure Content-Length is returned in response headers.
// Some servers (especially when using Transfer-Encoding: chunked,
// refer to https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Transfer-Encoding.) may not
// include Content-Length in HEAD requests. Using "bytes=0-" requests the
// entire file starting from byte 0, forcing the server to include file size
// information in the response headers.
.header(reqwest::header::RANGE, "bytes=0-")
.timeout(request.timeout)
.send()
.await
.inspect_err(|err| {
error!(
"head request failed {} {}: {}",
request.task_id, request.url, err
);
})?;
let header = response.headers().clone();
let status_code = response.status();
let content_length = response.content_length();
debug!(
"head response {} {}: {:?} {:?} {:?}",
request.task_id, request.url, status_code, content_length, header
);
// Drop the response body to avoid reading it.
drop(response);
Ok(super::HeadResponse {
success: status_code.is_success(),
content_length,
http_header: Some(header),
http_status_code: Some(status_code),
error_message: Some(status_code.to_string()),
entries: Vec::new(),
})
}
/// get gets the content of the request.
#[instrument(skip_all)]
async fn get(&self, request: super::GetRequest) -> Result<super::GetResponse<super::Body>> {
debug!(
"get request {} {} {}: {:?}",
request.task_id, request.piece_id, request.url, request.http_header
);
// The header of the request is required.
let header = request.http_header.ok_or(Error::InvalidParameter)?;
let response = self
.client(request.client_cert)?
.get(&request.url)
.headers(header)
.timeout(request.timeout)
.send()
.await
.inspect_err(|err| {
error!(
"get request failed {} {} {}: {}",
request.task_id, request.piece_id, request.url, err
);
})?;
let header = response.headers().clone();
let status_code = response.status();
let reader = Box::new(StreamReader::new(
response
.bytes_stream()
.map_err(|err| IOError::new(ErrorKind::Other, err)),
));
debug!(
"get response {} {}: {:?} {:?}",
request.task_id, request.piece_id, status_code, header
);
Ok(super::GetResponse {
success: status_code.is_success(),
http_header: Some(header),
http_status_code: Some(status_code),
reader,
error_message: Some(status_code.to_string()),
})
}
}
#[cfg(test)]
mod tests {
use crate::{
http::{HTTP, HTTPS_SCHEME, HTTP_SCHEME},
Backend, GetRequest, HeadRequest,
};
use dragonfly_client_util::tls::{load_certs_from_pem, load_key_from_pem};
use hyper_util::rt::{TokioExecutor, TokioIo};
use reqwest::{header::HeaderMap, StatusCode};
use std::{sync::Arc, time::Duration};
use tokio::net::TcpListener;
use tokio_rustls::rustls::ServerConfig;
use tokio_rustls::TlsAcceptor;
use wiremock::{
matchers::{method, path},
Mock, ResponseTemplate,
};
// Generate the certificate and private key by script(`scripts/generate_certs.sh`).
const SERVER_CERT: &str = r#"""
-----BEGIN CERTIFICATE-----
MIIDsDCCApigAwIBAgIUWuckNOpaPERz+QMACyqCqFJwYIYwDQYJKoZIhvcNAQEL
BQAwYjELMAkGA1UEBhMCQ04xEDAOBgNVBAgMB0JlaWppbmcxEDAOBgNVBAcMB0Jl
aWppbmcxEDAOBgNVBAoMB1Rlc3QgQ0ExCzAJBgNVBAsMAklUMRAwDgYDVQQDDAdU
ZXN0IENBMB4XDTI0MTAxMTEyMTEwN1oXDTI2MDIyMzEyMTEwN1owaDELMAkGA1UE
BhMCQ04xEDAOBgNVBAgMB0JlaWppbmcxEDAOBgNVBAcMB0JlaWppbmcxFDASBgNV
BAoMC1Rlc3QgU2VydmVyMQswCQYDVQQLDAJJVDESMBAGA1UEAwwJbG9jYWxob3N0
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAiA9wEge3Jq8qw8Ix9z6t
ss7ttK/49TMddhnQuqoYrFKjYliuvfbRZOU1nBP7+5XSAliPDCRNPS17JSwsXJk2
bstc69fruDpYmthualSTsUYSwJJqzJjy5mlwSPtBsombcSHrUasMce5C4iXJX8Wx
1O8ZCwuI5LUKxLujt+ZWnYfp5lzDcDhgD6wIzcMk67jv2edcWhqGkKmQbbmmK3Ve
DJRa56NCh0F2U1SW0KCXTzoC1YU/bbB4UCfvHouMzCRNTr3VcrfL5aBIn/z/f6Xt
atQkqFa/T1/lOQ0miMqNyBW58NxkPsTaJm2kVZ21hF2Dvo8MU/8Ras0J0aL8sc4n
LwIDAQABo1gwVjAUBgNVHREEDTALgglsb2NhbGhvc3QwHQYDVR0OBBYEFJP+jy8a
tCfnu6nekyZugvq8XT2gMB8GA1UdIwQYMBaAFOwXKq7J6STkwLUWC1xKwq1Psy63
MA0GCSqGSIb3DQEBCwUAA4IBAQCu8nqnuzNn3E9dNC8ptV7ga1zb7cGdL3ZT5W3d
10gmPo3YijWoCj4snattX9zxI8ThAY7uX6jrR0/HRXGJIw5JnlBmykdgyrQYEDzU
FUL0GGabJNxZ+zDV77P+3WdgCx3F7wLQk+x+etMPvYuWC8RMse7W6dB1INyMT/l6
k1rV73KTupSNJrYhqw0RnmNHIctkwiZLLpzLFj91BHjK5ero7VV4s7vnx+gtO/zQ
FnIyiyfYYcSpVMhhaNkeCtWOfgVYU/m4XXn5bwEOhMN6q0JcdBPnT6kd2otLhiIo
/WeyWEUeZ4rQhS7C1i31AYtNtVnnvI7BrsI4czYdcJcj3CM+
-----END CERTIFICATE-----
"""#;
const SERVER_KEY: &str = r#"""
-----BEGIN PRIVATE KEY-----
MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCID3ASB7cmryrD
wjH3Pq2yzu20r/j1Mx12GdC6qhisUqNiWK699tFk5TWcE/v7ldICWI8MJE09LXsl
LCxcmTZuy1zr1+u4Olia2G5qVJOxRhLAkmrMmPLmaXBI+0GyiZtxIetRqwxx7kLi
JclfxbHU7xkLC4jktQrEu6O35ladh+nmXMNwOGAPrAjNwyTruO/Z51xaGoaQqZBt
uaYrdV4MlFrno0KHQXZTVJbQoJdPOgLVhT9tsHhQJ+8ei4zMJE1OvdVyt8vloEif
/P9/pe1q1CSoVr9PX+U5DSaIyo3IFbnw3GQ+xNombaRVnbWEXYO+jwxT/xFqzQnR
ovyxzicvAgMBAAECggEABqHVkTfe1p+PBGx34tG/4nQxwIRxLJG31no+jeAdYOLF
AEeulqezbmIroyTMA0uQKWscy0V/gXUi3avHOOktp72Vv9fxy98F/fyBPx3YEvLa
69DMnl0qPl06CvLlTey6km8RKxUrRq9S2NoTydD+m1fC9jCIhvHkrNExIXjtaewU
PvAHJy4ho+hVLo40udmQ4i1gnEWYUtjkr65ujuOAlWrlScHGvOrATbrfcaufPi/S
5A/h8UlfahBstmh3a2tBLZlNl82s5ZKsVM1Oq1Vk9hAX5DP2JBAmuZKgX/xSDdpR
62VUQGqp1WLgble5vR6ZUFo5+Jiw1uxe9jmNUg9mMQKBgQC8giG3DeeU6+rX9LVz
cklF4jioU5LMdYutwXbtuGIWgXeJo8r0fzrgBtBVGRn7anS7YnYA+67h+A8SC6MO
SXvktpHIC3Egge2Q9dRrWA4YCpkIxlOQ5ofCqovvCg9kq9sYqGz6lMr3RrzOWkUW
+0hF1CHCV0+KGFeIvTYVIKSsJwKBgQC4xiTsaShmwJ6HdR59jOmij+ccCPQTt2IO
eGcniY2cHIoX9I7nn7Yah6JbMT0c8j75KA+pfCrK3FpRNrb71cI1iqBHedZXpRaV
eshJztmw3AKtxQPNwRYrKYpY/M0ShAduppELeshZz1kubQU3sD4adrhcGCDXkctb
dP44IpipuQKBgC+W5q4Q65L0ECCe3aQciRUEbGtKVfgaAL5H5h9TeifWXXg5Coa5
DAL8lWG2aZHIKVoZHFNZNqhDeIKEv5BeytFNqfYHtXKQeoorFYpX+47kNgg6EWS2
XjWt2o/pSUOQA0rxUjnckHTmvcmWjnSj0XYXfMJUSndBd+/EXL/ussPnAoGAGE5Q
Wxz2KJYcBHuemCtqLG07nI988/8Ckh66ixPoIeoLLF2KUuPKg7Dl5ZMTk/Q13nar
oMLpqifUZayJ45TZ6EslDGH1lS/tSZqOME9aiY5Xd95bwrwsm17qiQwwOchOZfrZ
R6ZOJqpE8/t5XTr84GRPmiW+ZD0UgCJisqWyaVkCgYEAtupQDst0hmZ0KnJSIZ5U
R6skHABhmwNU5lOPUBIzHVorbAaKDKd4iFbBI5wnBuWxXY0SANl2HYX3gZaPccH4
wzvR3jZ1B4UlEBXl2V+VRbrXyPTN4uUF42AkSGuOsK4O878wW8noX+ZZTk7gydTN
Z+yQ5jhu/fmSBNhqO/8Lp+Y=
-----END PRIVATE KEY-----
"""#;
const CA_CERT: &str = r#"""
-----BEGIN CERTIFICATE-----
MIIDpTCCAo2gAwIBAgIULqNbOr0fRj05VwIKlYdDt8HwxsUwDQYJKoZIhvcNAQEL
BQAwYjELMAkGA1UEBhMCQ04xEDAOBgNVBAgMB0JlaWppbmcxEDAOBgNVBAcMB0Jl
aWppbmcxEDAOBgNVBAoMB1Rlc3QgQ0ExCzAJBgNVBAsMAklUMRAwDgYDVQQDDAdU
ZXN0IENBMB4XDTI0MTAxMTEyMTEwNloXDTI3MDgwMTEyMTEwNlowYjELMAkGA1UE
BhMCQ04xEDAOBgNVBAgMB0JlaWppbmcxEDAOBgNVBAcMB0JlaWppbmcxEDAOBgNV
BAoMB1Rlc3QgQ0ExCzAJBgNVBAsMAklUMRAwDgYDVQQDDAdUZXN0IENBMIIBIjAN
BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAvDQCTmptzEmjwAkk6vsnEbch0Gt+
Xp3bEEE1YhW89Jy6/bmclEINXsoRxpgkx4XnW0bcoDcqWBES82sFsQtEFWkP0Q3S
8CQtpymDIuSj63xSVJWG8/cobzwztJfVQjBJwfmdnamXcjtqGHaGo3RjaHurSBTT
Tft+gUvCuzFAblK+liQuQWRMq7JBwONgVzoMYoWSi+JJpEUcy/T+oznn9jNAW8Do
FnXi1xvbRv6JiGOsYH1t869j5R8BkpjyGlZ6RYfPhiKtTg4K/ufnkkKteHzGZfcV
HW2tqXyIkUl4j/+041nYtnyUuOZgLs2sJ33PER7GwVgi3sWG8AsNolRHUQIDAQAB
o1MwUTAdBgNVHQ4EFgQU7BcqrsnpJOTAtRYLXErCrU+zLrcwHwYDVR0jBBgwFoAU
7BcqrsnpJOTAtRYLXErCrU+zLrcwDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0B
AQsFAAOCAQEADFoewfDAIqf8OAhFFcTYiTTu16sbTzZTzRfxSa0R0oOmSl8338If
71q8Yx65gFlu7FMiVRaVASzupwDhtLpqr6oVxLlmNW4fM0Bb+2CbmRuwhlm6ymBo
NXtRh5AkWAxHOp124Rmrr3WB9r+zvZ2kxuWPvN/cOq4H4VAp/F0cBtKPRDw/W0IQ
hDvG4OanBOKLE9Q7VH2kHXb6fJ4imKIztYcU4hOenKdUhfkCIBiIFgntUcEAaEpU
FnJ4fV4c4aJ+9D3VyPlrdiBqIPI0Wms9YqqG2b8EDid561Jj7paIR2wLn0/Gq61b
ePv3eLH0ZmBhSyl4+q/V56Z1TdZU46QZlg==
-----END CERTIFICATE-----
"""#;
const WRONG_CA_CERT: &str = r#"""
-----BEGIN CERTIFICATE-----
MIIDqTCCApGgAwIBAgIUW+6n+025VMqvZd4wm+Xdfzu4o38wDQYJKoZIhvcNAQEL
BQAwZDELMAkGA1UEBhMCQ04xEDAOBgNVBAgMB0JlaWppbmcxEDAOBgNVBAcMB0Jl
aWppbmcxETAPBgNVBAoMCFdyb25nIENBMQswCQYDVQQLDAJJVDERMA8GA1UEAwwI
V3JvbmcgQ0EwHhcNMjQxMDExMTIxMTA2WhcNMjcwODAxMTIxMTA2WjBkMQswCQYD
VQQGEwJDTjEQMA4GA1UECAwHQmVpamluZzEQMA4GA1UEBwwHQmVpamluZzERMA8G
A1UECgwIV3JvbmcgQ0ExCzAJBgNVBAsMAklUMREwDwYDVQQDDAhXcm9uZyBDQTCC
ASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALThl83CHSlT+xHONWqjOlsG
z+qeYcdZRxVJZQWJ9DrfTBcE64fqXnRIMesZbZNGi0d4XyfiJDB8AxVRAD/lVHQi
WR8LHglV/Hd7NjYG3bMQSkRHf5oleKjm1KDLvvnoD25YhqZsVDSCe+V4JkPc6xun
SGU/WJluyzy0j49KJXjKJTzpkFsvYF91s8oYMCjwVMuYxcZLA7OCUgb9phlfZBND
S9Dc5HI99O+0Uxfvfa/nRp85n2WpEJWQruGaazHFP/k842iR6zXIFclySE7n+1IG
SBLJqZ4IYfS0NisTEozD/LcuEJ87/PZ7ag0zFhu7MpnD55JeJP8cq8pISHj8gJcC
AwEAAaNTMFEwHQYDVR0OBBYEFLmV6Oqgwc1kIrv4JKLzn5qpKbvAMB8GA1UdIwQY
MBaAFLmV6Oqgwc1kIrv4JKLzn5qpKbvAMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZI
hvcNAQELBQADggEBAEJ+DbjdAZdJltIkHeIwFx9S4VnhA+Dw5+EBY03XzYo3HB/i
qSQTvYz4laZppierxuR8Z5O6DPOxNJ4pXhXDcn2e2TzlBq+P0fUE9z2w+QBQyTEl
6J2W5ce6dh9ke601pSMedLFDiARDGLkRDsIuEh91i62o+O3gNRkD/OWvjHAorQTf
BOP2lbcTYGg6wMPOUMBHg73E/pyXVXeN9x1qN7dCWN4zDwInII7iUA6BQ0zECJAD
sYhAYqHktkJsl0K4gJVanpnUhAC+SMD3+LRdjwMBp4mk+q3p2FMJMkACK3ffpn9j
TrIVG3cErZoBC6zqBs/Ibe9q3gdHGqS3QLAKy/k=
-----END CERTIFICATE-----
"""#;
/// Start a https server with given public key and private key.
async fn start_https_server(cert_pem: &str, key_pem: &str) -> String {
let server_certs = load_certs_from_pem(cert_pem).unwrap();
let server_key = load_key_from_pem(key_pem).unwrap();
// Setup the server.
let config = ServerConfig::builder()
.with_no_client_auth()
.with_single_cert(server_certs, server_key.clone_key())
.unwrap();
let acceptor = TlsAcceptor::from(Arc::new(config));
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
let addr = listener.local_addr().unwrap();
tokio::spawn(async move {
loop {
let (stream, _) = listener.accept().await.unwrap();
let acceptor = acceptor.clone();
tokio::spawn(async move {
let stream = acceptor.accept(stream).await.unwrap();
// Always return 200 OK with OK as its body for any requests.
let service = hyper::service::service_fn(|_| async {
Ok::<_, hyper::Error>(hyper::Response::new("OK".to_string()))
});
hyper_util::server::conn::auto::Builder::new(TokioExecutor::new())
.serve_connection(TokioIo::new(stream), service)
.await
});
}
});
format!("https://localhost:{}", addr.port())
}
#[tokio::test]
async fn should_get_head_response() {
let server = wiremock::MockServer::start().await;
Mock::given(method("GET"))
.and(path("/head"))
.respond_with(
ResponseTemplate::new(200)
.insert_header("Content-Type", "text/html; charset=UTF-8"),
)
.mount(&server)
.await;
let resp = HTTP::new(HTTP_SCHEME)
.unwrap()
.head(HeadRequest {
task_id: "test".to_string(),
url: format!("{}/head", server.uri()),
http_header: Some(HeaderMap::new()),
timeout: std::time::Duration::from_secs(5),
client_cert: None,
object_storage: None,
hdfs: None,
})
.await
.unwrap();
assert_eq!(resp.http_status_code, Some(StatusCode::OK))
}
#[tokio::test]
async fn should_return_error_response_when_head_notexists() {
let server = wiremock::MockServer::start().await;
Mock::given(method("GET"))
.and(path("/head"))
.respond_with(
ResponseTemplate::new(200)
.insert_header("Content-Type", "text/html; charset=UTF-8"),
)
.mount(&server)
.await;
let resp = HTTP::new(HTTP_SCHEME)
.unwrap()
.head(HeadRequest {
task_id: "test".to_string(),
url: format!("{}/head", server.uri()),
http_header: None,
timeout: std::time::Duration::from_secs(5),
client_cert: None,
object_storage: None,
hdfs: None,
})
.await;
assert!(resp.is_err());
}
#[tokio::test]
async fn should_get_response() {
let server = wiremock::MockServer::start().await;
Mock::given(method("GET"))
.and(path("/get"))
.respond_with(
ResponseTemplate::new(200)
.insert_header("Content-Type", "text/html; charset=UTF-8")
.set_body_string("OK"),
)
.mount(&server)
.await;
let mut resp = HTTP::new(HTTP_SCHEME)
.unwrap()
.get(GetRequest {
task_id: "test".to_string(),
piece_id: "test".to_string(),
url: format!("{}/get", server.uri()),
range: None,
http_header: Some(HeaderMap::new()),
timeout: std::time::Duration::from_secs(5),
client_cert: None,
object_storage: None,
hdfs: None,
})
.await
.unwrap();
assert_eq!(resp.http_status_code, Some(StatusCode::OK));
assert_eq!(resp.text().await.unwrap(), "OK");
}
#[tokio::test]
async fn should_get_head_response_with_self_signed_cert() {
let server_addr = start_https_server(SERVER_CERT, SERVER_KEY).await;
let resp = HTTP::new(HTTPS_SCHEME)
.unwrap()
.head(HeadRequest {
task_id: "test".to_string(),
url: server_addr,
http_header: Some(HeaderMap::new()),
timeout: Duration::from_secs(5),
client_cert: Some(load_certs_from_pem(CA_CERT).unwrap()),
object_storage: None,
hdfs: None,
})
.await
.unwrap();
assert_eq!(resp.http_status_code, Some(StatusCode::OK));
}
#[tokio::test]
async fn should_return_error_response_when_head_with_wrong_cert() {
let server_addr = start_https_server(SERVER_CERT, SERVER_KEY).await;
let resp = HTTP::new(HTTPS_SCHEME)
.unwrap()
.head(HeadRequest {
task_id: "test".to_string(),
url: server_addr,
http_header: Some(HeaderMap::new()),
timeout: Duration::from_secs(5),
client_cert: Some(load_certs_from_pem(WRONG_CA_CERT).unwrap()),
object_storage: None,
hdfs: None,
})
.await;
assert!(resp.is_err());
}
#[tokio::test]
async fn should_get_response_with_self_signed_cert() {
let server_addr = start_https_server(SERVER_CERT, SERVER_KEY).await;
let mut resp = HTTP::new(HTTPS_SCHEME)
.unwrap()
.get(GetRequest {
task_id: "test".to_string(),
piece_id: "test".to_string(),
url: server_addr,
range: None,
http_header: Some(HeaderMap::new()),
timeout: std::time::Duration::from_secs(5),
client_cert: Some(load_certs_from_pem(CA_CERT).unwrap()),
object_storage: None,
hdfs: None,
})
.await
.unwrap();
assert_eq!(resp.http_status_code, Some(StatusCode::OK));
assert_eq!(resp.text().await.unwrap(), "OK");
}
#[tokio::test]
async fn should_return_error_response_when_get_with_wrong_cert() {
let server_addr = start_https_server(SERVER_CERT, SERVER_KEY).await;
let resp = HTTP::new(HTTPS_SCHEME)
.unwrap()
.get(GetRequest {
task_id: "test".to_string(),
piece_id: "test".to_string(),
url: server_addr,
range: None,
http_header: Some(HeaderMap::new()),
timeout: std::time::Duration::from_secs(5),
client_cert: Some(load_certs_from_pem(WRONG_CA_CERT).unwrap()),
object_storage: None,
hdfs: None,
})
.await;
assert!(resp.is_err());
}
#[tokio::test]
async fn should_get_head_response_with_no_verifier() {
let server_addr = start_https_server(SERVER_CERT, SERVER_KEY).await;
let resp = HTTP::new(HTTPS_SCHEME)
.unwrap()
.head(HeadRequest {
task_id: "test".to_string(),
url: server_addr,
http_header: Some(HeaderMap::new()),
timeout: Duration::from_secs(5),
client_cert: None,
object_storage: None,
hdfs: None,
})
.await
.unwrap();
assert_eq!(resp.http_status_code, Some(StatusCode::OK));
}
#[tokio::test]
async fn should_get_response_with_no_verifier() {
let server_addr = start_https_server(SERVER_CERT, SERVER_KEY).await;
let http_backend = HTTP::new(HTTPS_SCHEME);
let mut resp = http_backend
.unwrap()
.get(GetRequest {
task_id: "test".to_string(),
piece_id: "test".to_string(),
url: server_addr,
range: None,
http_header: Some(HeaderMap::new()),
timeout: std::time::Duration::from_secs(5),
client_cert: None,
object_storage: None,
hdfs: None,
})
.await
.unwrap();
assert_eq!(resp.http_status_code, Some(StatusCode::OK));
assert_eq!(resp.text().await.unwrap(), "OK");
}
}

View File

@ -1,512 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use dragonfly_api::common::v2::{Hdfs, ObjectStorage, Range};
use dragonfly_client_core::{
error::{ErrorType, OrErr},
Error, Result,
};
use libloading::Library;
use reqwest::header::HeaderMap;
use rustls_pki_types::CertificateDer;
use std::path::Path;
use std::{collections::HashMap, pin::Pin, time::Duration};
use std::{fmt::Debug, fs};
use tokio::io::{AsyncRead, AsyncReadExt};
use tracing::{error, info, warn};
use url::Url;
pub mod hdfs;
pub mod http;
pub mod object_storage;
/// POOL_MAX_IDLE_PER_HOST is the max idle connections per host.
const POOL_MAX_IDLE_PER_HOST: usize = 1024;
/// KEEP_ALIVE_INTERVAL is the keep alive interval for TCP connection.
const KEEP_ALIVE_INTERVAL: Duration = Duration::from_secs(60);
/// HTTP2_KEEP_ALIVE_INTERVAL is the interval for HTTP2 keep alive.
const HTTP2_KEEP_ALIVE_INTERVAL: Duration = Duration::from_secs(300);
/// HTTP2_KEEP_ALIVE_TIMEOUT is the timeout for HTTP2 keep alive.
const HTTP2_KEEP_ALIVE_TIMEOUT: Duration = Duration::from_secs(20);
/// MAX_RETRY_TIMES is the max retry times for the request.
const MAX_RETRY_TIMES: u32 = 1;
/// NAME is the name of the package.
pub const NAME: &str = "backend";
/// Body is the body of the response.
pub type Body = Box<dyn AsyncRead + Send + Unpin>;
/// HeadRequest is the head request for backend.
pub struct HeadRequest {
/// task_id is the id of the task.
pub task_id: String,
/// url is the url of the request.
pub url: String,
/// http_header is the headers of the request.
pub http_header: Option<HeaderMap>,
/// timeout is the timeout of the request.
pub timeout: Duration,
/// client_cert is the client certificates for the request.
pub client_cert: Option<Vec<CertificateDer<'static>>>,
/// object_storage is the object storage related information.
pub object_storage: Option<ObjectStorage>,
/// hdfs is the hdfs related information.
pub hdfs: Option<Hdfs>,
}
/// HeadResponse is the head response for backend.
#[derive(Debug)]
pub struct HeadResponse {
/// success is the success of the response.
pub success: bool,
/// content_length is the content length of the response.
pub content_length: Option<u64>,
/// http_header is the headers of the response.
pub http_header: Option<HeaderMap>,
/// http_status_code is the status code of the response.
pub http_status_code: Option<reqwest::StatusCode>,
/// Entries is the information of the entries in the directory.
pub entries: Vec<DirEntry>,
/// error_message is the error message of the response.
pub error_message: Option<String>,
}
/// GetRequest is the get request for backend.
pub struct GetRequest {
/// task_id is the id of the task.
pub task_id: String,
/// piece_id is the id of the piece.
pub piece_id: String,
/// url is the url of the request.
pub url: String,
/// range is the range of the request.
pub range: Option<Range>,
/// http_header is the headers of the request.
pub http_header: Option<HeaderMap>,
/// timeout is the timeout of the request.
pub timeout: Duration,
/// client_cert is the client certificates for the request.
pub client_cert: Option<Vec<CertificateDer<'static>>>,
/// the object storage related information.
pub object_storage: Option<ObjectStorage>,
/// hdfs is the hdfs related information.
pub hdfs: Option<Hdfs>,
}
/// GetResponse is the get response for backend.
pub struct GetResponse<R>
where
R: AsyncRead + Unpin,
{
/// success is the success of the response.
pub success: bool,
/// http_header is the headers of the response.
pub http_header: Option<HeaderMap>,
/// http_status_code is the status code of the response.
pub http_status_code: Option<reqwest::StatusCode>,
/// body is the content of the response.
pub reader: R,
/// error_message is the error message of the response.
pub error_message: Option<String>,
}
/// GetResponse implements the response functions.
impl<R> GetResponse<R>
where
R: AsyncRead + Unpin,
{
pub async fn text(&mut self) -> Result<String> {
let mut buffer = String::new();
Pin::new(&mut self.reader)
.read_to_string(&mut buffer)
.await?;
Ok(buffer)
}
}
/// The File Entry of a directory, including some relevant file metadata.
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
pub struct DirEntry {
/// url is the url of the entry.
pub url: String,
/// content_length is the content length of the entry.
pub content_length: usize,
/// is_dir is the flag of the entry is a directory.
pub is_dir: bool,
}
/// Backend is the interface of the backend.
#[tonic::async_trait]
pub trait Backend {
/// scheme returns the scheme of the backend.
fn scheme(&self) -> String;
/// head gets the header of the request.
async fn head(&self, request: HeadRequest) -> Result<HeadResponse>;
/// get gets the content of the request.
async fn get(&self, request: GetRequest) -> Result<GetResponse<Body>>;
}
/// BackendFactory is the factory of the backend.
#[derive(Default)]
pub struct BackendFactory {
/// backends is the backends of the factory, including the plugin backends and
/// the builtin backends.
backends: HashMap<String, Box<dyn Backend + Send + Sync>>,
/// libraries are used to store the plugin's dynamic library, because when not saving the `Library`,
/// it will drop when out of scope, resulting in the null pointer error.
libraries: Vec<Library>,
}
/// BackendFactory implements the factory of the backend. It supports loading builtin
/// backends and plugin backends.
///
/// The builtin backends are http, https, etc., which are implemented
/// by the HTTP struct.
///
/// The plugin backends are shared libraries, which are loaded
/// by the `register_plugin` function. The file name of the shared
/// library is the scheme of the backend. The shared library
/// should implement the Backend trait. Default plugin directory
/// is `/var/lib/dragonfly/plugins/` in linux and `~/.dragonfly/plugins`
/// in macos. The plugin directory can be set by the dfdaemon configuration.
///
/// For example:
/// If implement a plugin backend named `hdfs`, the shared library
/// should be named `libhdfs.so` or `libhdfs.dylib` and move the file to the backend plugin directory
/// `/var/lib/dragonfly/plugins/backend/` in linux or `~/.dragonfly/plugins/backend/`
/// in macos. When the dfdaemon starts, it will load the `hdfs` plugin backend in the
/// backend plugin directory. So the dfdaemon or dfget can use the `hdfs` plugin backend
/// to download the file by the url `hdfs://example.com/file`.
/// The backend plugin implementation can refer to
/// https://github.com/dragonflyoss/client/tree/main/dragonfly-client-backend/examples/plugin/.
impl BackendFactory {
/// new returns a new BackendFactory.
pub fn new(plugin_dir: Option<&Path>) -> Result<Self> {
let mut backend_factory = Self::default();
backend_factory.load_builtin_backends()?;
if let Some(plugin_dir) = plugin_dir {
backend_factory
.load_plugin_backends(plugin_dir)
.inspect_err(|err| {
error!("failed to load plugin backends: {}", err);
})?;
}
Ok(backend_factory)
}
/// unsupported_download_directory returns whether the scheme does not support directory download.
pub fn unsupported_download_directory(scheme: &str) -> bool {
scheme == http::HTTP_SCHEME || scheme == http::HTTPS_SCHEME
}
/// build returns the backend by the scheme of the url.
pub fn build(&self, url: &str) -> Result<&(dyn Backend + Send + Sync)> {
let url = Url::parse(url).or_err(ErrorType::ParseError)?;
let scheme = url.scheme();
self.backends
.get(scheme)
.map(|boxed_backend| &**boxed_backend)
.ok_or(Error::InvalidParameter)
}
/// load_builtin_backends loads the builtin backends.
fn load_builtin_backends(&mut self) -> Result<()> {
self.backends.insert(
"http".to_string(),
Box::new(http::HTTP::new(http::HTTP_SCHEME)?),
);
info!("load [http] builtin backend");
self.backends.insert(
"https".to_string(),
Box::new(http::HTTP::new(http::HTTPS_SCHEME)?),
);
info!("load [https] builtin backend");
self.backends.insert(
"s3".to_string(),
Box::new(object_storage::ObjectStorage::new(
object_storage::Scheme::S3,
)?),
);
info!("load [s3] builtin backend");
self.backends.insert(
"gs".to_string(),
Box::new(object_storage::ObjectStorage::new(
object_storage::Scheme::GCS,
)?),
);
info!("load [gcs] builtin backend");
self.backends.insert(
"abs".to_string(),
Box::new(object_storage::ObjectStorage::new(
object_storage::Scheme::ABS,
)?),
);
info!("load [abs] builtin backend");
self.backends.insert(
"oss".to_string(),
Box::new(object_storage::ObjectStorage::new(
object_storage::Scheme::OSS,
)?),
);
info!("load [oss] builtin backend");
self.backends.insert(
"obs".to_string(),
Box::new(object_storage::ObjectStorage::new(
object_storage::Scheme::OBS,
)?),
);
info!("load [obs] builtin backend");
self.backends.insert(
"cos".to_string(),
Box::new(object_storage::ObjectStorage::new(
object_storage::Scheme::COS,
)?),
);
info!("load [cos] builtin backend");
self.backends
.insert("hdfs".to_string(), Box::new(hdfs::Hdfs::new()));
info!("load [hdfs] builtin backend");
Ok(())
}
/// load_plugin_backends loads the plugin backends.
fn load_plugin_backends(&mut self, plugin_dir: &Path) -> Result<()> {
let backend_plugin_dir = plugin_dir.join(NAME);
if !backend_plugin_dir.exists() {
warn!(
"skip loading plugin backends, because the plugin directory {} does not exist",
backend_plugin_dir.display()
);
return Ok(());
}
for entry in fs::read_dir(backend_plugin_dir)? {
let path = entry?.path();
// Load shared libraries by register_plugin function,
// file name is the scheme of the backend.
unsafe {
self.libraries
.push(Library::new(path.as_os_str()).or_err(ErrorType::PluginError)?);
let lib = &self.libraries[self.libraries.len() - 1];
let register_plugin: libloading::Symbol<
unsafe extern "C" fn() -> Box<dyn Backend + Send + Sync>,
> = lib.get(b"register_plugin").or_err(ErrorType::PluginError)?;
if let Some(file_stem) = path.file_stem() {
if let Some(plugin_name) =
file_stem.to_string_lossy().to_string().strip_prefix("lib")
{
self.backends
.insert(plugin_name.to_string(), register_plugin());
info!("load [{}] plugin backend", plugin_name);
}
}
}
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[test]
fn should_create_backend_factory_without_plugin_dir() {
let result = BackendFactory::new(None);
assert!(result.is_ok());
}
#[test]
fn should_load_builtin_backends() {
let factory = BackendFactory::new(None).unwrap();
let expected_backends = vec![
"http", "https", "s3", "gs", "abs", "oss", "obs", "cos", "hdfs",
];
for backend in expected_backends {
assert!(factory.backends.contains_key(backend));
}
}
#[test]
fn should_load_plugin_backends() {
// Create plugin directory.
let dir = tempdir().unwrap();
let plugin_dir = dir.path().join("plugin");
std::fs::create_dir(&plugin_dir).unwrap();
let backend_dir = plugin_dir.join(NAME);
std::fs::create_dir(&backend_dir).unwrap();
build_example_plugin(&backend_dir);
let result = BackendFactory::new(Some(&plugin_dir));
assert!(result.is_ok());
let factory = result.unwrap();
assert!(factory.backends.contains_key("hdfs"));
}
#[test]
fn should_skip_loading_plugins_when_plugin_dir_is_invalid() {
let dir = tempdir().unwrap();
let plugin_dir = dir.path().join("non_existent_plugin_dir");
let factory = BackendFactory::new(Some(&plugin_dir)).unwrap();
assert_eq!(factory.backends.len(), 9);
}
#[test]
fn should_return_error_when_plugin_loading_fails() {
let dir = tempdir().unwrap();
let plugin_dir = dir.path().join("plugin");
std::fs::create_dir(&plugin_dir).unwrap();
let backend_dir = plugin_dir.join(NAME);
std::fs::create_dir(&backend_dir).unwrap();
// Invalid plugin that cannot be loaded.
let lib_path = backend_dir.join("libinvalid_plugin.so");
std::fs::write(&lib_path, b"invalid content").unwrap();
let result = BackendFactory::new(Some(&plugin_dir));
assert!(result.is_err());
let err_msg = format!("{}", result.err().unwrap());
assert!(
err_msg.starts_with("PluginError cause:"),
"error message should start with 'PluginError cause:'"
);
assert!(
err_msg.contains(&lib_path.display().to_string()),
"error message should contain library path"
);
}
#[test]
fn should_build_correct_backend() {
// Create plugin directory.
let dir = tempdir().unwrap();
let plugin_dir = dir.path().join("plugin");
std::fs::create_dir(&plugin_dir).unwrap();
let backend_dir = plugin_dir.join(NAME);
std::fs::create_dir(&backend_dir).unwrap();
build_example_plugin(&backend_dir);
let factory = BackendFactory::new(Some(&plugin_dir)).unwrap();
let schemes = vec![
"http", "https", "s3", "gs", "abs", "oss", "obs", "cos", "hdfs",
];
for scheme in schemes {
let result = factory.build(&format!("{}://example.com/key", scheme));
assert!(result.is_ok());
let backend = result.unwrap();
assert_eq!(backend.scheme(), scheme);
}
}
#[test]
fn should_return_error_when_backend_scheme_is_not_support() {
let factory = BackendFactory::new(None).unwrap();
let result = factory.build("github://example.com");
assert!(result.is_err());
assert_eq!(format!("{}", result.err().unwrap()), "invalid parameter");
}
#[test]
fn should_return_error_when_backend_scheme_is_invalid() {
let factory = BackendFactory::new(None).unwrap();
let result = factory.build("invalid_scheme://example.com");
assert!(result.is_err());
assert_eq!(
format!("{}", result.err().unwrap()),
"ParseError cause: relative URL without a base",
);
}
// build_example_plugin builds the example plugin.
fn build_example_plugin(backend_dir: &Path) {
// Build example plugin.
let status = std::process::Command::new("cargo")
.arg("build")
.current_dir("./examples/plugin")
.status()
.unwrap();
assert!(status.success());
let plugin_file = if cfg!(target_os = "macos") {
"libhdfs.dylib"
} else {
"libhdfs.so"
};
std::fs::rename(
format!("../target/debug/{}", plugin_file),
backend_dir.join(plugin_file),
)
.unwrap();
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,37 +0,0 @@
[package]
name = "dragonfly-client-config"
description = "Configuration for the dragonfly client"
version.workspace = true
authors.workspace = true
homepage.workspace = true
repository.workspace = true
keywords.workspace = true
license.workspace = true
edition.workspace = true
build = "build.rs"
[dependencies]
dragonfly-client-core.workspace = true
dragonfly-client-util.workspace = true
local-ip-address.workspace = true
clap.workspace = true
regex.workspace = true
serde.workspace = true
tracing.workspace = true
validator.workspace = true
humantime.workspace = true
serde_yaml.workspace = true
tokio.workspace = true
tempfile.workspace = true
serde_json.workspace = true
bytesize.workspace = true
bytesize-serde.workspace = true
tonic.workspace = true
rustls-pki-types.workspace = true
rcgen.workspace = true
reqwest.workspace = true
home = "0.5.11"
hostname = "^0.4"
humantime-serde = "1.1.1"
serde_regex = "1.1.0"
http-serde = "2.1.1"

View File

@ -1,86 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use std::env;
use std::path::Path;
use std::process::Command;
use std::time::{SystemTime, UNIX_EPOCH};
/// Commit represents the git commit information.
struct Commit {
/// hash is the full hash of the commit.
hash: String,
/// short_hash is the short hash of the commit.
short_hash: String,
/// date is the date of the commit.
date: String,
}
/// get_commit_from_git returns the git commit information.
fn get_commit_from_git() -> Option<Commit> {
if !Path::new("../.git").exists() {
return None;
}
let output = match Command::new("git")
.arg("log")
.arg("-1")
.arg("--date=short")
.arg("--format=%H %h %cd")
.arg("--abbrev=9")
.output()
{
Ok(output) if output.status.success() => output,
_ => return None,
};
let stdout = String::from_utf8(output.stdout).unwrap();
let mut parts = stdout.split_whitespace().map(|s| s.to_string());
Some(Commit {
hash: parts.next()?,
short_hash: parts.next()?,
date: parts.next()?,
})
}
fn main() {
// Set the environment variables for the build platform.
let target = env::var("TARGET").unwrap_or_default();
println!("cargo:rustc-env=BUILD_PLATFORM={}", target);
// Set the environment variables for the build time.
if let Ok(build_time) = SystemTime::now().duration_since(UNIX_EPOCH) {
println!("cargo:rustc-env=BUILD_TIMESTAMP={}", build_time.as_secs());
}
// Get the commit information from git.
if let Some(commit) = get_commit_from_git() {
// Set the environment variables for the git commit.
println!("cargo:rustc-env=GIT_COMMIT_HASH={}", commit.hash);
// Set the environment variables for the git commit short.
println!(
"cargo:rustc-env=GIT_COMMIT_SHORT_HASH={}",
commit.short_hash
);
// Set the environment variables for the git commit date.
println!("cargo:rustc-env=GIT_COMMIT_DATE={}", commit.date);
}
}

View File

@ -1,36 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use std::path::PathBuf;
/// NAME is the name of dfcache.
pub const NAME: &str = "dfcache";
// DEFAULT_OUTPUT_FILE_MODE defines the default file mode for output files when downloading with dfcache
// using the `--transfer-from-dfdaemon=true` option.
pub const DEFAULT_OUTPUT_FILE_MODE: u32 = 0o644;
/// default_dfcache_log_dir is the default log directory for dfcache.
#[inline]
pub fn default_dfcache_log_dir() -> PathBuf {
crate::default_log_dir().join(NAME)
}
/// default_dfcache_persistent_replica_count is the default replica count of the persistent cache task.
#[inline]
pub fn default_dfcache_persistent_replica_count() -> u64 {
2
}

File diff suppressed because it is too large Load Diff

View File

@ -1,29 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use std::path::PathBuf;
/// NAME is the name of dfget.
pub const NAME: &str = "dfget";
// DEFAULT_OUTPUT_FILE_MODE defines the default file mode for output files when downloading with dfget
// using the `--transfer-from-dfdaemon=true` option.
pub const DEFAULT_OUTPUT_FILE_MODE: u32 = 0o644;
/// default_dfget_log_dir is the default log directory for dfget.
pub fn default_dfget_log_dir() -> PathBuf {
crate::default_log_dir().join(NAME)
}

View File

@ -1,571 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use crate::dfdaemon::default_proxy_server_port;
use dragonfly_client_core::error::{ErrorType, OrErr};
use dragonfly_client_core::Result;
use serde::{ser::SerializeStruct, Deserialize, Serialize};
use std::fs;
use std::net::Ipv4Addr;
use std::path::PathBuf;
use tracing::{info, instrument};
use validator::Validate;
/// NAME is the name of dfinit.
pub const NAME: &str = "dfinit";
/// default_dfinit_config_path is the default config path for dfinit.
#[inline]
pub fn default_dfinit_config_path() -> PathBuf {
crate::default_config_dir().join("dfinit.yaml")
}
/// default_dfinit_log_dir is the default log directory for dfinit.
pub fn default_dfinit_log_dir() -> PathBuf {
crate::default_log_dir().join(NAME)
}
/// default_container_runtime_containerd_config_path is the default containerd configuration path.
#[inline]
fn default_container_runtime_containerd_config_path() -> PathBuf {
PathBuf::from("/etc/containerd/config.toml")
}
/// default_container_runtime_docker_config_path is the default docker configuration path.
#[inline]
fn default_container_runtime_docker_config_path() -> PathBuf {
PathBuf::from("/etc/docker/daemon.json")
}
/// default_container_runtime_crio_config_path is the default cri-o configuration path.
#[inline]
fn default_container_runtime_crio_config_path() -> PathBuf {
PathBuf::from("/etc/containers/registries.conf")
}
/// default_container_runtime_podman_config_path is the default podman configuration path.
#[inline]
fn default_container_runtime_podman_config_path() -> PathBuf {
PathBuf::from("/etc/containers/registries.conf")
}
/// default_container_runtime_crio_unqualified_search_registries is the default unqualified search registries of cri-o,
/// refer to https://github.com/containers/image/blob/main/docs/containers-registries.conf.5.md#global-settings.
#[inline]
fn default_container_runtime_crio_unqualified_search_registries() -> Vec<String> {
vec![
"registry.fedoraproject.org".to_string(),
"registry.access.redhat.com".to_string(),
"docker.io".to_string(),
]
}
/// default_container_runtime_podman_unqualified_search_registries is the default unqualified search registries of cri-o,
/// refer to https://github.com/containers/image/blob/main/docs/containers-registries.conf.5.md#global-settings.
#[inline]
fn default_container_runtime_podman_unqualified_search_registries() -> Vec<String> {
vec![
"registry.fedoraproject.org".to_string(),
"registry.access.redhat.com".to_string(),
"docker.io".to_string(),
]
}
/// default_proxy_addr is the default proxy address of dfdaemon.
#[inline]
fn default_proxy_addr() -> String {
format!(
"http://{}:{}",
Ipv4Addr::LOCALHOST,
default_proxy_server_port()
)
}
/// default_container_runtime_containerd_registry_host_capabilities is the default
/// capabilities of the containerd registry.
#[inline]
fn default_container_runtime_containerd_registry_capabilities() -> Vec<String> {
vec!["pull".to_string(), "resolve".to_string()]
}
/// Registry is the registry configuration for containerd.
#[derive(Debug, Clone, Default, Validate, Deserialize, Serialize)]
#[serde(default, rename_all = "camelCase")]
pub struct ContainerdRegistry {
/// host_namespace is the location where container images and artifacts are sourced,
/// refer to https://github.com/containerd/containerd/blob/main/docs/hosts.md#registry-host-namespace.
/// The registry host namespace portion is [registry_host_name|IP address][:port], such as
/// docker.io, ghcr.io, gcr.io, etc.
pub host_namespace: String,
/// server_addr specifies the default server for this registry host namespace, refer to
/// https://github.com/containerd/containerd/blob/main/docs/hosts.md#server-field.
pub server_addr: String,
/// capabilities is the list of capabilities in containerd configuration, refer to
/// https://github.com/containerd/containerd/blob/main/docs/hosts.md#capabilities-field.
#[serde(default = "default_container_runtime_containerd_registry_capabilities")]
pub capabilities: Vec<String>,
/// skip_verify is the flag to skip verifying the server's certificate, refer to
/// https://github.com/containerd/containerd/blob/main/docs/hosts.md#bypass-tls-verification-example.
pub skip_verify: Option<bool>,
/// ca (Certificate Authority Certification) can be set to a path or an array of paths each pointing
/// to a ca file for use in authenticating with the registry namespace, refer to
/// https://github.com/containerd/containerd/blob/main/docs/hosts.md#ca-field.
pub ca: Option<Vec<String>>,
}
/// Containerd is the containerd configuration for dfinit.
#[derive(Debug, Clone, Default, Validate, Deserialize, Serialize)]
#[serde(default, rename_all = "camelCase")]
pub struct Containerd {
/// config_path is the path of containerd configuration file.
#[serde(default = "default_container_runtime_containerd_config_path")]
pub config_path: PathBuf,
/// registries is the list of containerd registries.
pub registries: Vec<ContainerdRegistry>,
}
/// CRIORegistry is the registry configuration for cri-o.
#[derive(Debug, Clone, Default, Validate, Deserialize, Serialize, PartialEq, Eq)]
#[serde(default, rename_all = "camelCase")]
pub struct CRIORegistry {
/// prefix is the prefix of the user-specified image name, refer to
/// https://github.com/containers/image/blob/main/docs/containers-registries.conf.5.md#choosing-a-registry-toml-table.
pub prefix: String,
/// location accepts the same format as the prefix field, and specifies the physical location of the prefix-rooted namespace,
/// refer to https://github.com/containers/image/blob/main/docs/containers-registries.conf.5.md#remapping-and-mirroring-registries.
pub location: String,
}
/// CRIO is the cri-o configuration for dfinit.
#[derive(Debug, Clone, Default, Validate, Deserialize, Serialize)]
#[serde(default, rename_all = "camelCase")]
pub struct CRIO {
/// config_path is the path of cri-o registries's configuration file.
#[serde(default = "default_container_runtime_crio_config_path")]
pub config_path: PathBuf,
/// unqualified_search_registries is an array of host[:port] registries to try when pulling an unqualified image, in order.
/// Refer to https://github.com/containers/image/blob/main/docs/containers-registries.conf.5.md#global-settings.
#[serde(default = "default_container_runtime_crio_unqualified_search_registries")]
pub unqualified_search_registries: Vec<String>,
/// registries is the list of cri-o registries, refer to
/// https://github.com/containers/image/blob/main/docs/containers-registries.conf.5.md#namespaced-registry-settings.
pub registries: Vec<CRIORegistry>,
}
/// PodmanRegistry is the registry configuration for podman.
#[derive(Debug, Clone, Default, Validate, Deserialize, Serialize, PartialEq, Eq)]
#[serde(default, rename_all = "camelCase")]
pub struct PodmanRegistry {
/// prefix is the prefix of the user-specified image name, refer to
/// https://github.com/containers/image/blob/main/docs/containers-registries.conf.5.md#choosing-a-registry-toml-table.
pub prefix: String,
/// location accepts the same format as the prefix field, and specifies the physical location of the prefix-rooted namespace,
/// refer to https://github.com/containers/image/blob/main/docs/containers-registries.conf.5.md#remapping-and-mirroring-registries.
pub location: String,
}
/// Podman is the podman configuration for dfinit.
#[derive(Debug, Clone, Default, Validate, Deserialize, Serialize)]
#[serde(default, rename_all = "camelCase")]
pub struct Podman {
/// config_path is the path of cri-o registries's configuration file.
#[serde(default = "default_container_runtime_podman_config_path")]
pub config_path: PathBuf,
/// unqualified_search_registries is an array of host[:port] registries to try when pulling an unqualified image, in order.
/// Refer to https://github.com/containers/image/blob/main/docs/containers-registries.conf.5.md#global-settings.
#[serde(default = "default_container_runtime_podman_unqualified_search_registries")]
pub unqualified_search_registries: Vec<String>,
/// registries is the list of cri-o registries, refer to
/// https://github.com/containers/image/blob/main/docs/containers-registries.conf.5.md#namespaced-registry-settings.
pub registries: Vec<PodmanRegistry>,
}
/// Docker is the docker configuration for dfinit.
#[derive(Debug, Clone, Default, Validate, Deserialize, Serialize)]
#[serde(default, rename_all = "camelCase")]
pub struct Docker {
/// config_path is the path of docker configuration file.
#[serde(default = "default_container_runtime_docker_config_path")]
pub config_path: PathBuf,
}
/// ContainerRuntime is the container runtime configuration for dfinit.
#[derive(Debug, Clone, Default, Validate, Deserialize, Serialize)]
#[serde(default, rename_all = "camelCase")]
pub struct ContainerRuntime {
#[serde(flatten)]
pub config: Option<ContainerRuntimeConfig>,
}
/// ContainerRuntimeConfig is the container runtime configuration for dfinit.
#[derive(Debug, Clone)]
pub enum ContainerRuntimeConfig {
Containerd(Containerd),
Docker(Docker),
CRIO(CRIO),
Podman(Podman),
}
/// Serialize is the implementation of the Serialize trait for ContainerRuntimeConfig.
impl Serialize for ContainerRuntimeConfig {
fn serialize<S>(&self, serializer: S) -> std::prelude::v1::Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
match *self {
ContainerRuntimeConfig::Containerd(ref cfg) => {
let mut state = serializer.serialize_struct("containerd", 1)?;
state.serialize_field("containerd", &cfg)?;
state.end()
}
ContainerRuntimeConfig::Docker(ref cfg) => {
let mut state = serializer.serialize_struct("docker", 1)?;
state.serialize_field("docker", &cfg)?;
state.end()
}
ContainerRuntimeConfig::CRIO(ref cfg) => {
let mut state = serializer.serialize_struct("crio", 1)?;
state.serialize_field("crio", &cfg)?;
state.end()
}
ContainerRuntimeConfig::Podman(ref cfg) => {
let mut state = serializer.serialize_struct("podman", 1)?;
state.serialize_field("podman", &cfg)?;
state.end()
}
}
}
}
/// Deserialize is the implementation of the Deserialize trait for ContainerRuntimeConfig.
impl<'de> Deserialize<'de> for ContainerRuntimeConfig {
fn deserialize<D>(deserializer: D) -> std::prelude::v1::Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
#[derive(Deserialize)]
struct ContainerRuntimeHelper {
containerd: Option<Containerd>,
docker: Option<Docker>,
crio: Option<CRIO>,
podman: Option<Podman>,
}
let helper = ContainerRuntimeHelper::deserialize(deserializer)?;
match helper {
ContainerRuntimeHelper {
containerd: Some(containerd),
..
} => Ok(ContainerRuntimeConfig::Containerd(containerd)),
ContainerRuntimeHelper {
docker: Some(docker),
..
} => Ok(ContainerRuntimeConfig::Docker(docker)),
ContainerRuntimeHelper {
crio: Some(crio), ..
} => Ok(ContainerRuntimeConfig::CRIO(crio)),
ContainerRuntimeHelper {
podman: Some(podman),
..
} => Ok(ContainerRuntimeConfig::Podman(podman)),
_ => {
use serde::de::Error;
Err(D::Error::custom(
"expected containerd or docker or crio or podman",
))
}
}
}
}
/// Proxy is the proxy server configuration for dfdaemon.
#[derive(Debug, Clone, Validate, Deserialize, Serialize)]
#[serde(default, rename_all = "camelCase")]
pub struct Proxy {
// addr is the proxy server address of dfdaemon.
#[serde(default = "default_proxy_addr")]
pub addr: String,
}
/// Proxy implements Default.
impl Default for Proxy {
fn default() -> Self {
Self {
addr: default_proxy_addr(),
}
}
}
/// Config is the configuration for dfinit.
#[derive(Debug, Clone, Default, Validate, Deserialize, Serialize)]
#[serde(default, rename_all = "camelCase")]
pub struct Config {
/// proxy is the configuration of the dfdaemon's HTTP/HTTPS proxy.
#[validate]
pub proxy: Proxy,
/// container_runtime is the container runtime configuration.
#[validate]
pub container_runtime: ContainerRuntime,
}
/// Config implements the config operation of dfinit.
impl Config {
/// load loads configuration from file.
#[instrument(skip_all)]
pub fn load(path: &PathBuf) -> Result<Config> {
// Load configuration from file.
let content = fs::read_to_string(path)?;
let config: Config = serde_yaml::from_str(&content).or_err(ErrorType::ConfigError)?;
info!("load config from {}", path.display());
// Validate configuration.
config.validate().or_err(ErrorType::ValidationError)?;
Ok(config)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
#[test]
fn test_default_dfinit_config_path() {
let expected = crate::default_config_dir().join("dfinit.yaml");
assert_eq!(default_dfinit_config_path(), expected);
}
#[test]
fn test_default_dfinit_log_dir() {
let expected = crate::default_log_dir().join(NAME);
assert_eq!(default_dfinit_log_dir(), expected);
}
#[test]
fn test_container_runtime_default_paths() {
assert_eq!(
default_container_runtime_containerd_config_path(),
Path::new("/etc/containerd/config.toml")
);
assert_eq!(
default_container_runtime_docker_config_path(),
Path::new("/etc/docker/daemon.json")
);
assert_eq!(
default_container_runtime_crio_config_path(),
Path::new("/etc/containers/registries.conf")
);
assert_eq!(
default_container_runtime_podman_config_path(),
Path::new("/etc/containers/registries.conf")
);
}
#[test]
fn test_default_unqualified_search_registries() {
let crio_registries = default_container_runtime_crio_unqualified_search_registries();
assert_eq!(
crio_registries,
vec![
"registry.fedoraproject.org",
"registry.access.redhat.com",
"docker.io"
]
);
let podman_registries = default_container_runtime_podman_unqualified_search_registries();
assert_eq!(
podman_registries,
vec![
"registry.fedoraproject.org",
"registry.access.redhat.com",
"docker.io"
]
);
}
#[test]
fn serialize_container_runtime() {
let cfg = ContainerRuntimeConfig::Containerd(Containerd {
..Default::default()
});
let res = serde_yaml::to_string(&cfg).unwrap();
let expected = r#"
containerd:
configPath: ''
registries: []"#;
assert_eq!(expected.trim(), res.trim());
let runtime_cfg = ContainerRuntimeConfig::Docker(Docker {
config_path: PathBuf::from("/root/.dragonfly/config/dfinit/yaml"),
});
let cfg = Config {
container_runtime: ContainerRuntime {
config: Some(runtime_cfg),
},
proxy: Proxy {
addr: String::from("hello"),
},
};
let res = serde_yaml::to_string(&cfg).unwrap();
let expected = r#"
proxy:
addr: hello
containerRuntime:
docker:
configPath: /root/.dragonfly/config/dfinit/yaml"#;
assert_eq!(expected.trim(), res.trim());
let runtime_cfg = ContainerRuntimeConfig::Containerd(Containerd {
config_path: PathBuf::from("/root/.dragonfly/config/dfinit/yaml"),
..Default::default()
});
let cfg = Config {
container_runtime: ContainerRuntime {
config: Some(runtime_cfg),
},
proxy: Proxy {
addr: String::from("hello"),
},
};
let res = serde_yaml::to_string(&cfg).unwrap();
let expected = r#"
proxy:
addr: hello
containerRuntime:
containerd:
configPath: /root/.dragonfly/config/dfinit/yaml
registries: []"#;
assert_eq!(expected.trim(), res.trim());
}
#[test]
fn deserialize_container_runtime_correctly() {
let raw_data = r#"
proxy:
addr: "hello"
"#;
let cfg: Config = serde_yaml::from_str(raw_data).expect("failed to deserialize");
assert!(cfg.container_runtime.config.is_none());
assert_eq!("hello".to_string(), cfg.proxy.addr);
let raw_data = r#"
proxy:
addr: "hello"
containerRuntime:
containerd:
configPath: "test_path"
"#;
let cfg: Config = serde_yaml::from_str(raw_data).expect("failed to deserialize");
assert_eq!("hello".to_string(), cfg.proxy.addr);
if let Some(ContainerRuntimeConfig::Containerd(c)) = cfg.container_runtime.config {
assert_eq!(PathBuf::from("test_path"), c.config_path);
} else {
panic!("failed to deserialize");
}
}
#[test]
fn deserialize_container_runtime_crio_correctly() {
let raw_data = r#"
proxy:
addr: "hello"
containerRuntime:
crio:
configPath: "test_path"
unqualifiedSearchRegistries:
- "reg1"
- "reg2"
registries:
- prefix: "prefix1"
location: "location1"
- prefix: "prefix2"
location: "location2"
"#;
let cfg: Config = serde_yaml::from_str(raw_data).expect("failed to deserialize");
if let Some(ContainerRuntimeConfig::CRIO(c)) = cfg.container_runtime.config {
assert_eq!(PathBuf::from("test_path"), c.config_path);
assert_eq!(vec!["reg1", "reg2"], c.unqualified_search_registries);
assert_eq!(
vec![
CRIORegistry {
location: "location1".to_string(),
prefix: "prefix1".to_string()
},
CRIORegistry {
location: "location2".to_string(),
prefix: "prefix2".to_string()
},
],
c.registries
);
} else {
panic!("failed to deserialize");
}
}
#[test]
fn deserialize_container_runtime_podman_correctly() {
let raw_data = r#"
proxy:
addr: "hello"
containerRuntime:
podman:
configPath: "test_path"
unqualifiedSearchRegistries:
- "reg1"
- "reg2"
registries:
- prefix: "prefix1"
location: "location1"
- prefix: "prefix2"
location: "location2"
"#;
let cfg: Config = serde_yaml::from_str(raw_data).expect("failed to deserialize");
if let Some(ContainerRuntimeConfig::Podman(c)) = cfg.container_runtime.config {
assert_eq!(PathBuf::from("test_path"), c.config_path);
assert_eq!(vec!["reg1", "reg2"], c.unqualified_search_registries);
assert_eq!(
vec![
PodmanRegistry {
location: "location1".to_string(),
prefix: "prefix1".to_string()
},
PodmanRegistry {
location: "location2".to_string(),
prefix: "prefix2".to_string()
},
],
c.registries
);
} else {
panic!("failed to deserialize");
}
}
}

View File

@ -1,167 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use clap::{Arg, Command};
use std::path::PathBuf;
pub mod dfcache;
pub mod dfdaemon;
pub mod dfget;
pub mod dfinit;
/// SERVICE_NAME is the name of the service.
pub const SERVICE_NAME: &str = "dragonfly";
/// NAME is the name of the package.
pub const NAME: &str = "client";
/// CARGO_PKG_VERSION is the version of the cargo package.
pub const CARGO_PKG_VERSION: &str = env!("CARGO_PKG_VERSION");
/// CARGO_PKG_RUSTC_VERSION is the minimum Rust version supported by the package, not the current Rust version.
pub const CARGO_PKG_RUSTC_VERSION: &str = env!("CARGO_PKG_RUST_VERSION");
/// BUILD_PLATFORM is the platform of the build.
pub const BUILD_PLATFORM: &str = env!("BUILD_PLATFORM");
// BUILD_TIMESTAMP is the timestamp of the build.
pub const BUILD_TIMESTAMP: &str = env!("BUILD_TIMESTAMP");
/// GIT_COMMIT_SHORT_HASH is the short git commit hash of the package.
pub const GIT_COMMIT_SHORT_HASH: &str = {
match option_env!("GIT_COMMIT_SHORT_HASH") {
Some(hash) => hash,
None => "unknown",
}
};
/// GIT_COMMIT_DATE is the git commit date of the package.
pub const GIT_COMMIT_DATE: &str = {
match option_env!("GIT_COMMIT_DATE") {
Some(hash) => hash,
None => "unknown",
}
};
/// default_root_dir is the default root directory for client.
pub fn default_root_dir() -> PathBuf {
#[cfg(target_os = "linux")]
return PathBuf::from("/var/run/dragonfly/");
#[cfg(target_os = "macos")]
return home::home_dir().unwrap().join(".dragonfly");
}
/// default_config_dir is the default config directory for client.
pub fn default_config_dir() -> PathBuf {
#[cfg(target_os = "linux")]
return PathBuf::from("/etc/dragonfly/");
#[cfg(target_os = "macos")]
return home::home_dir().unwrap().join(".dragonfly").join("config");
}
/// default_log_dir is the default log directory for client.
pub fn default_log_dir() -> PathBuf {
#[cfg(target_os = "linux")]
return PathBuf::from("/var/log/dragonfly/");
#[cfg(target_os = "macos")]
return home::home_dir().unwrap().join(".dragonfly").join("logs");
}
/// default_storage_dir is the default storage directory for client.
pub fn default_storage_dir() -> PathBuf {
#[cfg(target_os = "linux")]
return PathBuf::from("/var/lib/dragonfly/");
#[cfg(target_os = "macos")]
return home::home_dir().unwrap().join(".dragonfly").join("storage");
}
/// default_lock_dir is the default lock directory for client.
pub fn default_lock_dir() -> PathBuf {
#[cfg(target_os = "linux")]
return PathBuf::from("/var/lock/dragonfly/");
#[cfg(target_os = "macos")]
return home::home_dir().unwrap().join(".dragonfly");
}
/// default_plugin_dir is the default plugin directory for client.
pub fn default_plugin_dir() -> PathBuf {
#[cfg(target_os = "linux")]
return PathBuf::from("/usr/local/lib/dragonfly/plugins/");
#[cfg(target_os = "macos")]
return home::home_dir().unwrap().join(".dragonfly").join("plugins");
}
/// default_cache_dir is the default cache directory for client.
pub fn default_cache_dir() -> PathBuf {
#[cfg(target_os = "linux")]
return PathBuf::from("/var/cache/dragonfly/");
#[cfg(target_os = "macos")]
return home::home_dir().unwrap().join(".dragonfly").join("cache");
}
/// VersionValueParser is a custom value parser for the version flag.
#[derive(Debug, Clone)]
pub struct VersionValueParser;
/// Implement the TypedValueParser trait for VersionValueParser.
impl clap::builder::TypedValueParser for VersionValueParser {
type Value = bool;
fn parse_ref(
&self,
cmd: &Command,
_arg: Option<&Arg>,
value: &std::ffi::OsStr,
) -> Result<Self::Value, clap::Error> {
if value == std::ffi::OsStr::new("true") {
println!(
"{} {} ({}, {})",
cmd.get_name(),
cmd.get_version().unwrap_or("unknown"),
GIT_COMMIT_SHORT_HASH,
GIT_COMMIT_DATE,
);
std::process::exit(0);
}
Ok(false)
}
}
#[cfg(test)]
mod tests {
use super::*;
use clap::{builder::TypedValueParser, Command};
use std::ffi::OsStr;
#[test]
fn version_value_parser_references_non_real_values() {
let parser = VersionValueParser;
let cmd = Command::new("test_app");
let value = OsStr::new("false");
let result = parser.parse_ref(&cmd, None, value);
assert!(result.is_ok());
assert!(!result.unwrap());
}
}

View File

@ -1,24 +0,0 @@
[package]
name = "dragonfly-client-core"
description = "Core library for the dragonfly client"
version.workspace = true
authors.workspace = true
homepage.workspace = true
repository.workspace = true
keywords.workspace = true
license.workspace = true
edition.workspace = true
[dependencies]
reqwest.workspace = true
reqwest-middleware.workspace = true
thiserror.workspace = true
tonic.workspace = true
tonic-reflection.workspace = true
tokio.workspace = true
tokio-stream.workspace = true
hyper.workspace = true
hyper-util.workspace = true
opendal.workspace = true
url.workspace = true
headers.workspace = true

View File

@ -1,238 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use std::{error::Error as ErrorTrait, fmt};
use super::message::Message;
/// ErrorType is the type of the error.
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum ErrorType {
StorageError,
ConfigError,
SerializeError,
ValidationError,
ParseError,
CertificateError,
TLSConfigError,
AsyncRuntimeError,
StreamError,
ConnectError,
PluginError,
}
/// ErrorType implements the display for the error type.
impl ErrorType {
/// as_str returns the string of the error type.
pub fn as_str(&self) -> &'static str {
match self {
ErrorType::StorageError => "StorageError",
ErrorType::ConfigError => "ConfigError",
ErrorType::ValidationError => "ValidationError",
ErrorType::ParseError => "ParseError",
ErrorType::CertificateError => "CertificateError",
ErrorType::SerializeError => "SerializeError",
ErrorType::TLSConfigError => "TLSConfigError",
ErrorType::AsyncRuntimeError => "AsyncRuntimeError",
ErrorType::StreamError => "StreamError",
ErrorType::ConnectError => "ConnectError",
ErrorType::PluginError => "PluginError",
}
}
}
/// ExternalError is the external error.
#[derive(Debug)]
pub struct ExternalError {
pub etype: ErrorType,
pub cause: Option<Box<dyn ErrorTrait + Send + Sync>>,
pub context: Option<Message>,
}
/// ExternalError implements the error trait.
impl ExternalError {
/// new returns a new ExternalError.
pub fn new(etype: ErrorType) -> Self {
ExternalError {
etype,
cause: None,
context: None,
}
}
/// with_context returns a new ExternalError with the context.
pub fn with_context(mut self, message: impl Into<Message>) -> Self {
self.context = Some(message.into());
self
}
/// with_cause returns a new ExternalError with the cause.
pub fn with_cause(mut self, cause: Box<dyn ErrorTrait + Send + Sync>) -> Self {
self.cause = Some(cause);
self
}
/// chain_display returns the display of the error with the previous error.
fn chain_display(
&self,
previous: Option<&ExternalError>,
f: &mut fmt::Formatter<'_>,
) -> fmt::Result {
if previous.map(|p| p.etype != self.etype).unwrap_or(true) {
write!(f, "{}", self.etype.as_str())?
}
if let Some(c) = self.context.as_ref() {
write!(f, " context: {}", c.as_str())?;
}
if let Some(c) = self.cause.as_ref() {
if let Some(e) = c.downcast_ref::<Box<ExternalError>>() {
write!(f, " cause: ")?;
e.chain_display(Some(self), f)
} else {
write!(f, " cause: {}", c)
}
} else {
Ok(())
}
}
}
/// ExternalError implements the display for the error.
impl fmt::Display for ExternalError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.chain_display(None, f)
}
}
/// ExternalError implements the error trait.
impl ErrorTrait for ExternalError {}
/// OrErr is the trait to extend the result with error.
pub trait OrErr<T, E> {
/// Wrap the E in [Result] with new [ErrorType] and context, the existing E will be the cause.
///
/// This is a shortcut for map_err() + because()
fn or_err(self, et: ErrorType) -> Result<T, ExternalError>
where
E: Into<Box<dyn ErrorTrait + Send + Sync>>;
fn or_context(self, et: ErrorType, context: &'static str) -> Result<T, ExternalError>
where
E: Into<Box<dyn ErrorTrait + Send + Sync>>;
}
/// OrErr implements the OrErr for Result.
impl<T, E> OrErr<T, E> for Result<T, E> {
fn or_err(self, et: ErrorType) -> Result<T, ExternalError>
where
E: Into<Box<dyn ErrorTrait + Send + Sync>>,
{
self.map_err(|err| ExternalError::new(et).with_cause(err.into()))
}
fn or_context(self, et: ErrorType, context: &'static str) -> Result<T, ExternalError>
where
E: Into<Box<dyn ErrorTrait + Send + Sync>>,
{
self.map_err(|err| {
ExternalError::new(et)
.with_cause(err.into())
.with_context(context)
})
}
}
/// BackendError is the error for backend.
#[derive(Debug, thiserror::Error)]
#[error("backend error {message}")]
pub struct BackendError {
/// message is the error message.
pub message: String,
/// status_code is the status code of the response.
pub status_code: Option<reqwest::StatusCode>,
/// header is the headers of the response.
pub header: Option<reqwest::header::HeaderMap>,
}
/// DownloadFromParentFailed is the error when the download from parent is failed.
#[derive(Debug, thiserror::Error)]
#[error("download piece {piece_number} from parent {parent_id} failed")]
pub struct DownloadFromParentFailed {
/// piece_number is the number of the piece.
pub piece_number: u32,
/// parent_id is the parent id of the piece.
pub parent_id: String,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn should_create_error() {
let error = ExternalError::new(ErrorType::StorageError).with_context("error message");
assert_eq!(format!("{}", error), "StorageError context: error message");
let error = ExternalError::new(ErrorType::StorageError)
.with_context(format!("error message {}", "with owned string"));
assert_eq!(
format!("{}", error),
"StorageError context: error message with owned string"
);
let error = ExternalError::new(ErrorType::StorageError)
.with_context(format!("error message {}", "with owned string"))
.with_cause(Box::new(std::io::Error::new(
std::io::ErrorKind::Other,
"inner error",
)));
assert_eq!(
format!("{}", error),
"StorageError context: error message with owned string cause: inner error"
);
}
#[test]
fn should_extend_result_with_error() {
let result: Result<(), std::io::Error> = Err(std::io::Error::new(
std::io::ErrorKind::Other,
"inner error",
));
let error = result.or_err(ErrorType::StorageError).unwrap_err();
assert_eq!(format!("{}", error), "StorageError cause: inner error");
let result: Result<(), std::io::Error> = Err(std::io::Error::new(
std::io::ErrorKind::Other,
"inner error",
));
let error = result
.or_context(ErrorType::StorageError, "error message")
.unwrap_err();
assert_eq!(
format!("{}", error),
"StorageError context: error message cause: inner error"
);
}
}

View File

@ -1,59 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use std::borrow::Cow;
/// Message is the message for the error.
#[derive(Debug)]
pub struct Message(Cow<'static, str>);
/// From<&'static str> for Message implements the conversion from &'static str to Message.
impl From<&'static str> for Message {
/// from returns the message from the string.
fn from(s: &'static str) -> Self {
Message(Cow::Borrowed(s))
}
}
/// From<String> for Message implements the conversion from String to Message.
impl From<String> for Message {
/// from returns the message from the string.
fn from(s: String) -> Self {
Message(Cow::Owned(s))
}
}
/// Message implements the message for the error.
impl Message {
/// as_str returns the string of the message.
pub fn as_str(&self) -> &str {
&self.0
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_message() {
let message: Message = "hello".into();
assert_eq!(message.as_str(), "hello");
let message: Message = "world".to_string().into();
assert_eq!(message.as_str(), "world");
}
}

View File

@ -1,268 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
pub mod errors;
pub mod message;
pub use errors::ErrorType;
pub use errors::ExternalError;
pub use errors::OrErr;
pub use errors::{BackendError, DownloadFromParentFailed};
/// DFError is the error for dragonfly.
#[derive(thiserror::Error, Debug)]
pub enum DFError {
/// IO is the error for IO operation.
#[error(transparent)]
IO(#[from] std::io::Error),
/// MpscSend is the error for send.
#[error("mpsc send: {0}")]
MpscSend(String),
/// SendTimeout is the error for send timeout.
#[error("send timeout")]
SendTimeout,
/// HashRing is the error for hashring.
#[error{"hashring {0} is failed"}]
HashRing(String),
/// NoSpace is the error when there is no space left on device.
#[error("no space left on device: {0}")]
NoSpace(String),
/// HostNotFound is the error when the host is not found.
#[error{"host {0} not found"}]
HostNotFound(String),
/// TaskNotFound is the error when the task is not found.
#[error{"task {0} not found"}]
TaskNotFound(String),
/// PieceNotFound is the error when the piece is not found.
#[error{"piece {0} not found"}]
PieceNotFound(String),
/// PieceStateIsFailed is the error when the piece state is failed.
#[error{"piece {0} state is failed"}]
PieceStateIsFailed(String),
/// DownloadPieceFinished is the error when the download piece finished timeout.
#[error{"download piece {0} finished timeout"}]
DownloadPieceFinished(String),
/// WaitForPieceFinishedTimeout is the error when the wait for piece finished timeout.
#[error{"wait for piece {0} finished timeout"}]
WaitForPieceFinishedTimeout(String),
/// AvailableManagerNotFound is the error when the available manager is not found.
#[error{"available manager not found"}]
AvailableManagerNotFound,
/// AvailableSchedulersNotFound is the error when the available schedulers is not found.
#[error{"available schedulers not found"}]
AvailableSchedulersNotFound,
/// DownloadFromParentFailed is the error when the download from parent is failed.
#[error(transparent)]
DownloadFromParentFailed(DownloadFromParentFailed),
/// ColumnFamilyNotFound is the error when the column family is not found.
#[error{"column family {0} not found"}]
ColumnFamilyNotFound(String),
/// InvalidStateTransition is the error when the state transition is invalid.
#[error{"can not transit from {0} to {1}"}]
InvalidStateTransition(String, String),
/// InvalidState is the error when the state is invalid.
#[error{"invalid state {0}"}]
InvalidState(String),
/// InvalidURI is the error when the uri is invalid.
#[error("invalid uri {0}")]
InvalidURI(String),
/// InvalidPeer is the error when the peer is invalid.
#[error("invalid peer {0}")]
InvalidPeer(String),
/// SchedulerClientNotFound is the error when the scheduler client is not found.
#[error{"scheduler client not found"}]
SchedulerClientNotFound,
/// UnexpectedResponse is the error when the response is unexpected.
#[error{"unexpected response"}]
UnexpectedResponse,
/// DigestMismatch is the error when the digest is mismatch.
#[error{"digest mismatch expected: {0}, actual: {1}"}]
DigestMismatch(String, String),
/// ContentLengthMismatch is the error when the content length is mismatch.
#[error("content length mismatch expected: {0}, actual: {1}")]
ContentLengthMismatch(u64, u64),
/// MaxScheduleCountExceeded is the error when the max schedule count is exceeded.
#[error("max schedule count {0} exceeded")]
MaxScheduleCountExceeded(u32),
/// InvalidContentLength is the error when the content length is invalid.
#[error("invalid content length")]
InvalidContentLength,
/// InvalidPieceLength is the error when the piece length is invalid.
#[error("invalid piece length")]
InvalidPieceLength,
/// InvalidParameter is the error when the parameter is invalid.
#[error("invalid parameter")]
InvalidParameter,
/// Infallible is the error for infallible.
#[error(transparent)]
Infallible(#[from] std::convert::Infallible),
/// Utf8 is the error for utf8.
#[error(transparent)]
Utf8(#[from] std::str::Utf8Error),
/// Unknown is the error when the error is unknown.
#[error("unknown {0}")]
Unknown(String),
/// Unimplemented is the error when the feature is not implemented.
#[error{"unimplemented"}]
Unimplemented,
/// EmptyHTTPRangeError is the error when the range fallback error is empty.
#[error{"RangeUnsatisfiable: Failed to parse range fallback error, please file an issue"}]
EmptyHTTPRangeError,
/// Unauthorized is the error for unauthorized.
#[error{"unauthorized"}]
Unauthorized,
/// TonicStatus is the error for tonic status.
#[error(transparent)]
TonicStatus(#[from] tonic::Status),
/// TonicTransportError is the error for tonic transport.
#[error(transparent)]
TonicTransportError(#[from] tonic::transport::Error),
/// TonicReflectionServerError is the error for tonic reflection server.
#[error(transparent)]
TonicReflectionServerError(#[from] tonic_reflection::server::Error),
/// TonicStreamElapsed is the error for tonic stream elapsed.
#[error(transparent)]
TokioStreamElapsed(#[from] tokio_stream::Elapsed),
/// HeadersError is the error for headers.
#[error(transparent)]
HeadersError(#[from] headers::Error),
/// URLParseError is the error for url parse.
#[error(transparent)]
URLParseError(#[from] url::ParseError),
/// ReqwestError is the error for reqwest.
#[error(transparent)]
ReqwestError(#[from] reqwest::Error),
/// ReqwestMiddlewareError is the error for reqwest middleware.
#[error(transparent)]
ReqwestMiddlewareError(#[from] reqwest_middleware::Error),
/// OpenDALError is the error for opendal.
#[error(transparent)]
OpenDALError(#[from] opendal::Error),
/// HyperError is the error for hyper.
#[error(transparent)]
HyperError(#[from] hyper::Error),
/// BackendError is the error for backend.
#[error(transparent)]
BackendError(Box<BackendError>),
/// HyperUtilClientLegacyError is the error for hyper util client legacy.
#[error(transparent)]
HyperUtilClientLegacyError(#[from] hyper_util::client::legacy::Error),
/// ExternalError is the error for external error.
#[error(transparent)]
ExternalError(#[from] ExternalError),
/// MaxDownloadFilesExceeded is the error for max download files exceeded.
#[error("max number of files to download exceeded: {0}")]
MaxDownloadFilesExceeded(usize),
/// Unsupported is the error for unsupported.
#[error("unsupported {0}")]
Unsupported(String),
/// TokioJoinError is the error for tokio join.
#[error(transparent)]
TokioJoinError(tokio::task::JoinError),
/// ValidationError is the error for validate.
#[error("validate failed: {0}")]
ValidationError(String),
}
/// SendError is the error for send.
impl<T> From<tokio::sync::mpsc::error::SendError<T>> for DFError {
fn from(e: tokio::sync::mpsc::error::SendError<T>) -> Self {
Self::MpscSend(e.to_string())
}
}
/// SendTimeoutError is the error for send timeout.
impl<T> From<tokio::sync::mpsc::error::SendTimeoutError<T>> for DFError {
fn from(err: tokio::sync::mpsc::error::SendTimeoutError<T>) -> Self {
match err {
tokio::sync::mpsc::error::SendTimeoutError::Timeout(_) => Self::SendTimeout,
tokio::sync::mpsc::error::SendTimeoutError::Closed(_) => Self::SendTimeout,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn should_convert_externalerror_to_dferror() {
fn function_return_inner_error() -> Result<(), std::io::Error> {
let inner_error = std::io::Error::new(std::io::ErrorKind::Other, "inner error");
Err(inner_error)
}
fn do_sth_with_error() -> Result<(), DFError> {
function_return_inner_error().map_err(|err| {
ExternalError::new(crate::error::ErrorType::StorageError).with_cause(err.into())
})?;
Ok(())
}
let err = do_sth_with_error().err().unwrap();
assert_eq!(format!("{}", err), "StorageError cause: inner error");
}
}

View File

@ -1,28 +0,0 @@
[package]
name = "dragonfly-client-init"
description = "Initialize runtime environment of the dfdaemon"
version.workspace = true
authors.workspace = true
homepage.workspace = true
repository.workspace = true
keywords.workspace = true
license.workspace = true
readme.workspace = true
edition.workspace = true
[[bin]]
name = "dfinit"
path = "src/bin/main.rs"
[dependencies]
dragonfly-client.workspace = true
dragonfly-client-config.workspace = true
dragonfly-client-core.workspace = true
clap.workspace = true
tokio.workspace = true
anyhow.workspace = true
tracing.workspace = true
toml_edit.workspace = true
url.workspace = true
tempfile.workspace = true
serde_json.workspace = true

View File

@ -1,113 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use clap::Parser;
use dragonfly_client::tracing::init_tracing;
use dragonfly_client_config::dfinit;
use dragonfly_client_config::VersionValueParser;
use dragonfly_client_init::container_runtime;
use std::path::PathBuf;
use tracing::{error, Level};
#[derive(Debug, Parser)]
#[command(
name = dfinit::NAME,
author,
version,
about = "dfinit is a command line for initializing runtime environment of the dfdaemon",
long_about = "A command line for initializing runtime environment of the dfdaemon, \
For example, if the container's runtime is containerd, then dfinit will modify the mirror configuration of containerd and restart the containerd service. \
It also supports to change configuration of the other container's runtime, such as cri-o, docker, etc.",
disable_version_flag = true
)]
struct Args {
#[arg(
short = 'c',
long = "config",
default_value_os_t = dfinit::default_dfinit_config_path(),
help = "Specify config file to use")
]
config: PathBuf,
#[arg(
short = 'l',
long,
default_value = "info",
help = "Specify the logging level [trace, debug, info, warn, error]"
)]
log_level: Level,
#[arg(
long,
default_value_os_t = dfinit::default_dfinit_log_dir(),
help = "Specify the log directory"
)]
log_dir: PathBuf,
#[arg(
long,
default_value_t = 24,
help = "Specify the max number of log files"
)]
log_max_files: usize,
#[arg(long, default_value_t = false, help = "Specify whether to print log")]
console: bool,
#[arg(
short = 'V',
long = "version",
help = "Print version information",
default_value_t = false,
action = clap::ArgAction::SetTrue,
value_parser = VersionValueParser
)]
version: bool,
}
#[tokio::main]
async fn main() -> Result<(), anyhow::Error> {
// Parse command line arguments.
let args = Args::parse();
// Initialize tracing.
let _guards = init_tracing(
dfinit::NAME,
args.log_dir,
args.log_level,
args.log_max_files,
None,
None,
None,
None,
None,
false,
args.console,
);
// Load config.
let config = dfinit::Config::load(&args.config).inspect_err(|err| {
error!("failed to load config: {}", err);
})?;
// Handle features of the container runtime.
let container_runtime = container_runtime::ContainerRuntime::new(&config);
container_runtime.run().await.inspect_err(|err| {
error!("failed to run container runtime: {}", err);
})?;
Ok(())
}

View File

@ -1,252 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use dragonfly_client::proxy::header::DRAGONFLY_REGISTRY_HEADER;
use dragonfly_client_config::dfinit::{self, ContainerdRegistry};
use dragonfly_client_core::{
error::{ErrorType, OrErr},
Error, Result,
};
use std::path::PathBuf;
use tokio::{self, fs};
use toml_edit::{value, Array, DocumentMut, Item, Table, Value};
use tracing::{info, instrument};
/// Containerd represents the containerd runtime manager.
#[derive(Debug, Clone)]
pub struct Containerd {
/// config is the configuration for initializing
/// runtime environment for the dfdaemon.
config: dfinit::Containerd,
/// proxy_config is the configuration for the dfdaemon's proxy server.
proxy_config: dfinit::Proxy,
}
/// Containerd implements the containerd runtime manager.
impl Containerd {
/// new creates a new containerd runtime manager.
#[instrument(skip_all)]
pub fn new(config: dfinit::Containerd, proxy_config: dfinit::Proxy) -> Self {
Self {
config,
proxy_config,
}
}
/// run runs the containerd runtime to initialize
/// runtime environment for the dfdaemon.
#[instrument(skip_all)]
pub async fn run(&self) -> Result<()> {
let content = fs::read_to_string(&self.config.config_path).await?;
let mut containerd_config = content
.parse::<DocumentMut>()
.or_err(ErrorType::ParseError)?;
// If containerd supports config_path mode and config_path is not empty,
// add registries to the certs.d directory.
if let Some(config_path) = containerd_config
.get("plugins")
.and_then(|plugins| plugins.get("io.containerd.grpc.v1.cri"))
.and_then(|cri| cri.get("registry"))
.and_then(|registry| registry.get("config_path"))
.and_then(|config_path| config_path.as_str())
.filter(|config_path| !config_path.is_empty())
{
// Rebind config_path to the first entry if multiple paths are present
let config_path = config_path.split(':').next().unwrap_or(config_path);
info!(
"containerd supports config_path mode, config_path: {}",
config_path.to_string()
);
return self
.add_registries(
config_path,
self.config.registries.clone(),
self.proxy_config.clone(),
)
.await;
}
// If containerd does not support mirror mode and config_path not set, create a new
// config_path for the registries.
info!("containerd not supports mirror mode and config_path not set");
let config_path = "/etc/containerd/certs.d";
// Add config_path to the containerd configuration.
let mut registry_table = Table::new();
registry_table.set_implicit(true);
registry_table.insert("config_path", value(config_path));
containerd_config["plugins"]["io.containerd.grpc.v1.cri"]
.as_table_mut()
.ok_or(Error::Unknown(
"io.containerd.grpc.v1.cri not found".to_string(),
))?
.insert("registry", Item::Table(registry_table));
// Override containerd configuration.
info!("override containerd configuration");
fs::write(
&self.config.config_path,
containerd_config.to_string().as_bytes(),
)
.await?;
self.add_registries(
config_path,
self.config.registries.clone(),
self.proxy_config.clone(),
)
.await?;
Ok(())
}
/// add_registries adds registries to the containerd configuration, when containerd supports
/// config_path mode and config_path is not empty.
#[instrument(skip_all)]
pub async fn add_registries(
&self,
config_path: &str,
registries: Vec<ContainerdRegistry>,
proxy_config: dfinit::Proxy,
) -> Result<()> {
for registry in registries {
info!("add registry: {:?}", registry);
let mut registry_table = toml_edit::DocumentMut::new();
registry_table.set_implicit(true);
registry_table.insert("server", value(registry.server_addr.clone()));
let mut host_config_table = Table::new();
host_config_table.set_implicit(true);
// Add capabilities to the host configuration.
let mut capabilities = Array::default();
for capability in registry.capabilities {
capabilities.push(Value::from(capability));
}
host_config_table.insert("capabilities", value(capabilities));
// Add insecure to the host configuration.
if let Some(skip_verify) = registry.skip_verify {
host_config_table.insert("skip_verify", value(skip_verify));
}
// Add ca to the host configuration.
let mut certs = Array::default();
if let Some(ca) = registry.ca {
for cert in ca {
certs.push(Value::from(cert));
}
host_config_table.insert("ca", Item::Value(Value::Array(certs)));
}
// Add X-Dragonfly-Registry header to the host configuration.
let mut headers_table = Table::new();
headers_table.insert(DRAGONFLY_REGISTRY_HEADER, value(registry.server_addr));
host_config_table.insert("header", Item::Table(headers_table));
// Add host configuration to the registry table.
let mut host_table = Table::new();
host_table.set_implicit(true);
host_table.insert(proxy_config.addr.as_str(), Item::Table(host_config_table));
registry_table.insert("host", Item::Table(host_table));
let registry_config_dir = PathBuf::from(config_path).join(registry.host_namespace);
fs::create_dir_all(registry_config_dir.as_os_str()).await?;
fs::write(
registry_config_dir.join("hosts.toml").as_os_str(),
registry_table.to_string().as_bytes(),
)
.await?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
use tokio::fs;
#[tokio::test]
async fn test_containerd_config_with_existing_config_path() {
let temp_dir = TempDir::new().unwrap();
let config_path = temp_dir.path().join("config.toml");
let certs_dir = temp_dir.path().join("certs.d");
let certs_dir_str = certs_dir.to_str().unwrap();
// Create initial containerd config with config_path
let initial_config = format!(
r#"
[plugins]
[plugins."io.containerd.grpc.v1.cri"]
[plugins."io.containerd.grpc.v1.cri".registry]
config_path = "{}"
"#,
certs_dir_str
);
fs::write(&config_path, initial_config).await.unwrap();
// Create Containerd instance
let containerd = Containerd::new(
dfinit::Containerd {
config_path: config_path.clone(),
registries: vec![ContainerdRegistry {
host_namespace: "docker.io".into(),
server_addr: "https://registry.example.com".into(),
skip_verify: Some(true),
ca: Some(vec!["test-ca-cert".into()]),
capabilities: vec!["pull".into(), "resolve".into()],
}],
},
dfinit::Proxy {
addr: "http://127.0.0.1:65001".into(),
},
);
// Run containerd configuration
let result = containerd.run().await;
if let Err(e) = &result {
println!("Error: {:?}", e);
if let Ok(contents) = fs::read_to_string(&config_path).await {
println!("Current config file contents:\n{}", contents);
}
}
assert!(result.is_ok());
// Verify the hosts.toml file content
let hosts_file_path = certs_dir.join("docker.io").join("hosts.toml");
let contents = fs::read_to_string(&hosts_file_path).await.unwrap();
let expected_contents = r#"server = "https://registry.example.com"
[host."http://127.0.0.1:65001"]
capabilities = ["pull", "resolve"]
skip_verify = true
ca = ["test-ca-cert"]
[host."http://127.0.0.1:65001".header]
X-Dragonfly-Registry = "https://registry.example.com"
"#;
assert_eq!(contents.trim(), expected_contents.trim());
}
}

View File

@ -1,164 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use dragonfly_client_config::dfinit;
use dragonfly_client_core::{
error::{ErrorType, OrErr},
Error, Result,
};
use tokio::{self, fs};
use toml_edit::{value, Array, ArrayOfTables, Item, Table, Value};
use tracing::{info, instrument};
use url::Url;
/// CRIO represents the cri-o runtime manager.
#[derive(Debug, Clone)]
pub struct CRIO {
/// config is the configuration for initializing
/// runtime environment for the dfdaemon.
config: dfinit::CRIO,
/// proxy_config is the configuration for the dfdaemon's proxy server.
proxy_config: dfinit::Proxy,
}
/// CRIO implements the cri-o runtime manager.
impl CRIO {
/// new creates a new cri-o runtime manager.
#[instrument(skip_all)]
pub fn new(config: dfinit::CRIO, proxy_config: dfinit::Proxy) -> Self {
Self {
config,
proxy_config,
}
}
/// run runs the cri-o runtime to initialize
/// runtime environment for the dfdaemon.
#[instrument(skip_all)]
pub async fn run(&self) -> Result<()> {
let mut registries_config_table = toml_edit::DocumentMut::new();
registries_config_table.set_implicit(true);
// Add unqualified-search-registries to registries config.
let mut unqualified_search_registries = Array::default();
for unqualified_search_registry in self.config.unqualified_search_registries.clone() {
unqualified_search_registries.push(Value::from(unqualified_search_registry));
}
registries_config_table.insert(
"unqualified-search-registries",
value(unqualified_search_registries),
);
// Parse proxy address to get host and port.
let proxy_url =
Url::parse(self.proxy_config.addr.as_str()).or_err(ErrorType::ParseError)?;
let proxy_host = proxy_url
.host_str()
.ok_or(Error::Unknown("host not found".to_string()))?;
let proxy_port = proxy_url
.port_or_known_default()
.ok_or(Error::Unknown("port not found".to_string()))?;
let proxy_location = format!("{}:{}", proxy_host, proxy_port);
// Add registries to the registries config.
let mut registries_table = ArrayOfTables::new();
for registry in self.config.registries.clone() {
info!("add registry: {:?}", registry);
let mut registry_mirror_table = Table::new();
registry_mirror_table.set_implicit(true);
registry_mirror_table.insert("insecure", value(true));
registry_mirror_table.insert("location", value(proxy_location.as_str()));
let mut registry_mirrors_table = ArrayOfTables::new();
registry_mirrors_table.push(registry_mirror_table);
let mut registry_table = Table::new();
registry_table.set_implicit(true);
registry_table.insert("prefix", value(registry.prefix));
registry_table.insert("location", value(registry.location));
registry_table.insert("mirror", Item::ArrayOfTables(registry_mirrors_table));
registries_table.push(registry_table);
}
registries_config_table.insert("registry", Item::ArrayOfTables(registries_table));
let registries_config_dir = self
.config
.config_path
.parent()
.ok_or(Error::Unknown("invalid config path".to_string()))?;
fs::create_dir_all(registries_config_dir.as_os_str()).await?;
fs::write(
self.config.config_path.as_os_str(),
registries_config_table.to_string().as_bytes(),
)
.await?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_crio_config() {
use tempfile::NamedTempFile;
let crio_config_file = NamedTempFile::new().unwrap();
let crio = CRIO::new(
dfinit::CRIO {
config_path: crio_config_file.path().to_path_buf(),
registries: vec![dfinit::CRIORegistry {
prefix: "registry.example.com".into(),
location: "registry.example.com".into(),
}],
unqualified_search_registries: vec!["registry.example.com".into()],
},
dfinit::Proxy {
addr: "http://127.0.0.1:65001".into(),
},
);
let result = crio.run().await;
assert!(result.is_ok());
// get the contents of the file
let contents = fs::read_to_string(crio_config_file.path().to_path_buf())
.await
.unwrap();
let expected_contents = r#"unqualified-search-registries = ["registry.example.com"]
[[registry]]
prefix = "registry.example.com"
location = "registry.example.com"
[[registry.mirror]]
insecure = true
location = "127.0.0.1:65001"
"#;
// assert that the contents of the file are as expected
assert_eq!(contents, expected_contents);
// clean up
fs::remove_file(crio_config_file.path().to_path_buf())
.await
.unwrap();
}
}

View File

@ -1,253 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use dragonfly_client_config::dfinit;
use dragonfly_client_core::{
error::{ErrorType, OrErr},
Error, Result,
};
use serde_json::{json, Value};
use tokio::{self, fs};
use tracing::{info, instrument};
use url::Url;
/// Docker represents the docker runtime manager.
#[derive(Debug, Clone)]
pub struct Docker {
/// config is the configuration for initializing
/// runtime environment for the dfdaemon.
config: dfinit::Docker,
/// proxy_config is the configuration for the dfdaemon's proxy server.
proxy_config: dfinit::Proxy,
}
/// Docker implements the docker runtime manager.
impl Docker {
/// new creates a new docker runtime manager.
#[instrument(skip_all)]
pub fn new(config: dfinit::Docker, proxy_config: dfinit::Proxy) -> Self {
Self {
config,
proxy_config,
}
}
/// run runs the docker runtime to initialize
/// runtime environment for the dfdaemon.
#[instrument(skip_all)]
pub async fn run(&self) -> Result<()> {
info!(
"docker feature is enabled, proxy_addr: {}, config_path: {:?}",
self.proxy_config.addr, self.config.config_path,
);
// Parse proxy address to get host and port.
let proxy_url = Url::parse(&self.proxy_config.addr).or_err(ErrorType::ParseError)?;
let proxy_host = proxy_url
.host_str()
.ok_or(Error::Unknown("host not found".to_string()))?;
let proxy_port = proxy_url
.port_or_known_default()
.ok_or(Error::Unknown("port not found".to_string()))?;
let proxy_location = format!("{}:{}", proxy_host, proxy_port);
// Prepare proxies configuration.
let mut proxies_map = serde_json::Map::new();
proxies_map.insert(
"http-proxy".to_string(),
json!(format!("http://{}", proxy_location)),
);
proxies_map.insert(
"https-proxy".to_string(),
json!(format!("http://{}", proxy_location)),
);
let config_path = &self.config.config_path;
let mut docker_config: serde_json::Map<String, Value> = if config_path.exists() {
let contents = fs::read_to_string(config_path).await?;
if contents.trim().is_empty() {
serde_json::Map::new()
} else {
serde_json::from_str(&contents).or_err(ErrorType::ParseError)?
}
} else {
serde_json::Map::new()
};
// Insert or update proxies configuration.
docker_config.insert("proxies".to_string(), Value::Object(proxies_map));
// Create config directory if it doesn't exist.
let config_dir = config_path
.parent()
.ok_or(Error::Unknown("invalid config path".to_string()))?;
fs::create_dir_all(config_dir).await?;
// Write configuration to file.
fs::write(
config_path,
serde_json::to_string_pretty(&Value::Object(docker_config))
.or_err(ErrorType::SerializeError)?,
)
.await?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::NamedTempFile;
use tokio::fs;
#[tokio::test]
async fn test_docker_config_empty() {
let docker_config_file = NamedTempFile::new().unwrap();
let docker = Docker::new(
dfinit::Docker {
config_path: docker_config_file.path().to_path_buf(),
},
dfinit::Proxy {
addr: "http://127.0.0.1:5000".into(),
},
);
let result = docker.run().await;
println!("{:?}", result);
assert!(result.is_ok());
// Read and verify configuration.
let contents = fs::read_to_string(docker_config_file.path()).await.unwrap();
let config: serde_json::Value = serde_json::from_str(&contents).unwrap();
// Verify proxies configuration.
assert_eq!(config["proxies"]["http-proxy"], "http://127.0.0.1:5000");
assert_eq!(config["proxies"]["https-proxy"], "http://127.0.0.1:5000");
}
#[tokio::test]
async fn test_docker_config_existing() {
let docker_config_file = NamedTempFile::new().unwrap();
let initial_config = r#"
{
"log-driver": "json-file",
"experimental": true
}
"#;
fs::write(docker_config_file.path(), initial_config)
.await
.unwrap();
let docker = Docker::new(
dfinit::Docker {
config_path: docker_config_file.path().to_path_buf(),
},
dfinit::Proxy {
addr: "http://127.0.0.1:5000".into(),
},
);
let result = docker.run().await;
assert!(result.is_ok());
// Read and verify configuration.
let contents = fs::read_to_string(docker_config_file.path()).await.unwrap();
let config: serde_json::Value = serde_json::from_str(&contents).unwrap();
// Verify existing configurations.
assert_eq!(config["log-driver"], "json-file");
assert_eq!(config["experimental"], true);
// Verify proxies configuration.
assert_eq!(config["proxies"]["http-proxy"], "http://127.0.0.1:5000");
assert_eq!(config["proxies"]["https-proxy"], "http://127.0.0.1:5000");
}
#[tokio::test]
async fn test_docker_config_invalid_json() {
let docker_config_file = NamedTempFile::new().unwrap();
let invalid_config = r#"
{
"log-driver": "json-file",
"experimental": true,
}
"#;
fs::write(docker_config_file.path(), invalid_config)
.await
.unwrap();
let docker = Docker::new(
dfinit::Docker {
config_path: docker_config_file.path().to_path_buf(),
},
dfinit::Proxy {
addr: "http://127.0.0.1:5000".into(),
},
);
let result = docker.run().await;
assert!(result.is_err());
if let Err(e) = result {
assert_eq!(
format!("{}", e),
"ParseError cause: trailing comma at line 5 column 9"
);
}
}
#[tokio::test]
async fn test_docker_config_proxies_existing() {
let docker_config_file = NamedTempFile::new().unwrap();
let existing_proxies = r#"
{
"proxies": {
"http-proxy": "http://old-proxy:3128",
"https-proxy": "https://old-proxy:3129",
"no-proxy": "old-no-proxy"
},
"log-driver": "json-file"
}
"#;
fs::write(docker_config_file.path(), existing_proxies)
.await
.unwrap();
let docker = Docker::new(
dfinit::Docker {
config_path: docker_config_file.path().to_path_buf(),
},
dfinit::Proxy {
addr: "http://127.0.0.1:5000".into(),
},
);
let result = docker.run().await;
assert!(result.is_ok());
// Read and verify configuration.
let contents = fs::read_to_string(docker_config_file.path()).await.unwrap();
let config: serde_json::Value = serde_json::from_str(&contents).unwrap();
// Verify existing configurations.
assert_eq!(config["log-driver"], "json-file");
// Verify proxies configuration.
assert_eq!(config["proxies"]["http-proxy"], "http://127.0.0.1:5000");
assert_eq!(config["proxies"]["https-proxy"], "http://127.0.0.1:5000");
}
}

View File

@ -1,124 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use dragonfly_client_config::dfinit::{Config, ContainerRuntimeConfig};
use dragonfly_client_core::Result;
use tracing::{info, instrument};
pub mod containerd;
pub mod crio;
pub mod docker;
pub mod podman;
/// Engine represents config of the container runtime engine.
#[derive(Debug, Clone)]
enum Engine {
Containerd(containerd::Containerd),
Docker(docker::Docker),
Crio(crio::CRIO),
Podman(podman::Podman),
}
/// ContainerRuntime represents the container runtime manager.
pub struct ContainerRuntime {
engine: Option<Engine>,
}
/// ContainerRuntime implements the container runtime manager.
impl ContainerRuntime {
/// new creates a new container runtime manager.
#[instrument(skip_all)]
pub fn new(config: &Config) -> Self {
Self {
engine: Self::get_engine(config),
}
}
/// run runs the container runtime to initialize runtime environment for the dfdaemon.
#[instrument(skip_all)]
pub async fn run(&self) -> Result<()> {
match &self.engine {
None => Ok(()),
Some(Engine::Containerd(containerd)) => containerd.run().await,
Some(Engine::Docker(docker)) => docker.run().await,
Some(Engine::Crio(crio)) => crio.run().await,
Some(Engine::Podman(podman)) => podman.run().await,
}
}
/// get_engine returns the runtime engine from the config.
#[instrument(skip_all)]
fn get_engine(config: &Config) -> Option<Engine> {
if let Some(ref container_runtime_config) = config.container_runtime.config {
let engine = match container_runtime_config {
ContainerRuntimeConfig::Containerd(containerd) => Engine::Containerd(
containerd::Containerd::new(containerd.clone(), config.proxy.clone()),
),
ContainerRuntimeConfig::Docker(docker) => {
Engine::Docker(docker::Docker::new(docker.clone(), config.proxy.clone()))
}
ContainerRuntimeConfig::CRIO(crio) => {
Engine::Crio(crio::CRIO::new(crio.clone(), config.proxy.clone()))
}
ContainerRuntimeConfig::Podman(podman) => {
Engine::Podman(podman::Podman::new(podman.clone(), config.proxy.clone()))
}
};
info!("container runtime engine is {:?}", engine);
return Some(engine);
}
info!("container runtime engine is not set");
None
}
}
#[cfg(test)]
mod test {
use dragonfly_client_config::dfinit::Containerd;
use super::*;
#[tokio::test]
async fn should_return_ok_if_container_runtime_not_set() {
let runtime = ContainerRuntime::new(&Config {
..Default::default()
});
assert!(runtime.run().await.is_ok());
}
#[test]
fn should_get_engine_from_config() {
let runtime = ContainerRuntime::new(&Config {
container_runtime: dragonfly_client_config::dfinit::ContainerRuntime {
config: Some(ContainerRuntimeConfig::Containerd(Containerd {
..Default::default()
})),
},
..Default::default()
});
assert!(runtime.engine.is_some());
let runtime = ContainerRuntime::new(&Config {
container_runtime: dragonfly_client_config::dfinit::ContainerRuntime {
config: Some(ContainerRuntimeConfig::CRIO(Default::default())),
},
..Default::default()
});
assert!(runtime.engine.is_some());
}
}

View File

@ -1,163 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use dragonfly_client_config::dfinit;
use dragonfly_client_core::{
error::{ErrorType, OrErr},
Error, Result,
};
use tokio::{self, fs};
use toml_edit::{value, Array, ArrayOfTables, Item, Table, Value};
use tracing::{info, instrument};
use url::Url;
/// Podman represents the podman runtime manager.
#[derive(Debug, Clone)]
pub struct Podman {
/// config is the configuration for initializing
/// runtime environment for the dfdaemon.
config: dfinit::Podman,
/// proxy_config is the configuration for the dfdaemon's proxy server.
proxy_config: dfinit::Proxy,
}
/// Podman implements the podman runtime manager.
impl Podman {
/// new creates a new podman runtime manager.
#[instrument(skip_all)]
pub fn new(config: dfinit::Podman, proxy_config: dfinit::Proxy) -> Self {
Self {
config,
proxy_config,
}
}
/// run runs the podman runtime to initialize
/// runtime environment for the dfdaemon.
#[instrument(skip_all)]
pub async fn run(&self) -> Result<()> {
let mut registries_config_table = toml_edit::DocumentMut::new();
registries_config_table.set_implicit(true);
// Add unqualified-search-registries to registries config.
let mut unqualified_search_registries = Array::default();
for unqualified_search_registry in self.config.unqualified_search_registries.clone() {
unqualified_search_registries.push(Value::from(unqualified_search_registry));
}
registries_config_table.insert(
"unqualified-search-registries",
value(unqualified_search_registries),
);
// Parse proxy address to get host and port.
let proxy_url =
Url::parse(self.proxy_config.addr.as_str()).or_err(ErrorType::ParseError)?;
let proxy_host = proxy_url
.host_str()
.ok_or(Error::Unknown("host not found".to_string()))?;
let proxy_port = proxy_url
.port_or_known_default()
.ok_or(Error::Unknown("port not found".to_string()))?;
let proxy_location = format!("{}:{}", proxy_host, proxy_port);
// Add registries to the registries config.
let mut registries_table = ArrayOfTables::new();
for registry in self.config.registries.clone() {
info!("add registry: {:?}", registry);
let mut registry_mirror_table = Table::new();
registry_mirror_table.set_implicit(true);
registry_mirror_table.insert("insecure", value(true));
registry_mirror_table.insert("location", value(proxy_location.as_str()));
let mut registry_mirrors_table = ArrayOfTables::new();
registry_mirrors_table.push(registry_mirror_table);
let mut registry_table = Table::new();
registry_table.set_implicit(true);
registry_table.insert("prefix", value(registry.prefix));
registry_table.insert("location", value(registry.location));
registry_table.insert("mirror", Item::ArrayOfTables(registry_mirrors_table));
registries_table.push(registry_table);
}
registries_config_table.insert("registry", Item::ArrayOfTables(registries_table));
let registries_config_dir = self
.config
.config_path
.parent()
.ok_or(Error::Unknown("invalid config path".to_string()))?;
fs::create_dir_all(registries_config_dir.as_os_str()).await?;
fs::write(
self.config.config_path.as_os_str(),
registries_config_table.to_string().as_bytes(),
)
.await?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_podman_config() {
use tempfile::NamedTempFile;
let podman_config_file = NamedTempFile::new().unwrap();
let podman = Podman::new(
dfinit::Podman {
config_path: podman_config_file.path().to_path_buf(),
registries: vec![dfinit::PodmanRegistry {
prefix: "registry.example.com".into(),
location: "registry.example.com".into(),
}],
unqualified_search_registries: vec!["registry.example.com".into()],
},
dfinit::Proxy {
addr: "http://127.0.0.1:5000".into(),
},
);
let result = podman.run().await;
assert!(result.is_ok());
// get the contents of the file
let contents = fs::read_to_string(podman_config_file.path().to_path_buf())
.await
.unwrap();
let expected_contents = r#"unqualified-search-registries = ["registry.example.com"]
[[registry]]
prefix = "registry.example.com"
location = "registry.example.com"
[[registry.mirror]]
insecure = true
location = "127.0.0.1:5000"
"#;
// assert that the contents of the file are as expected
assert_eq!(contents, expected_contents);
// clean up
fs::remove_file(podman_config_file.path().to_path_buf())
.await
.unwrap();
}
}

View File

@ -1,43 +0,0 @@
[package]
name = "dragonfly-client-storage"
description = "Storage for the dragonfly client"
version.workspace = true
authors.workspace = true
homepage.workspace = true
repository.workspace = true
keywords.workspace = true
license.workspace = true
edition.workspace = true
[dependencies]
dragonfly-client-core.workspace = true
dragonfly-client-config.workspace = true
dragonfly-client-util.workspace = true
dragonfly-api.workspace = true
chrono.workspace = true
reqwest.workspace = true
rocksdb.workspace = true
serde.workspace = true
tracing.workspace = true
prost-wkt-types.workspace = true
tokio.workspace = true
tokio-util.workspace = true
crc32fast.workspace = true
fs2.workspace = true
bytes.workspace = true
bytesize.workspace = true
num_cpus = "1.17"
bincode = "1.3.3"
walkdir = "2.5.0"
[dev-dependencies]
tempfile.workspace = true
criterion = "0.5"
[[bench]]
name = "cache"
harness = false
[[bench]]
name = "lru_cache"
harness = false

View File

@ -1,468 +0,0 @@
/*
* Copyright 2025 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use bytes::Bytes;
use bytesize::ByteSize;
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use dragonfly_client_config::dfdaemon::{Config, Storage};
use dragonfly_client_storage::{cache::Cache, metadata::Piece};
use std::sync::Arc;
use tokio::io::AsyncReadExt;
use tokio::runtime::Runtime;
// Number of pieces to write/read in each benchmark.
const PIECE_COUNT: usize = 100;
fn create_config(capacity: ByteSize) -> Config {
Config {
storage: Storage {
cache_capacity: capacity,
..Default::default()
},
..Default::default()
}
}
fn create_piece(length: u64) -> Piece {
Piece {
number: 0,
offset: 0,
length,
digest: String::new(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
}
}
pub fn put_task(c: &mut Criterion) {
let rt: Runtime = Runtime::new().unwrap();
let mut group = c.benchmark_group("Put Task");
group.bench_with_input(
BenchmarkId::new("Put Task", "10MB"),
&ByteSize::mb(10),
|b, size| {
b.iter_batched(
|| rt.block_on(async { Cache::new(Arc::new(create_config(ByteSize::gb(2)))) }),
|mut cache| {
rt.block_on(async {
cache.put_task("task", black_box(size.as_u64())).await;
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Put Task", "100MB"),
&ByteSize::mb(100),
|b, size| {
b.iter_batched(
|| rt.block_on(async { Cache::new(Arc::new(create_config(ByteSize::gb(2)))) }),
|mut cache| {
rt.block_on(async {
cache.put_task("task", black_box(size.as_u64())).await;
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Put Task", "1GB"),
&ByteSize::gb(1),
|b, size| {
b.iter_batched(
|| rt.block_on(async { Cache::new(Arc::new(create_config(ByteSize::gb(2)))) }),
|mut cache| {
rt.block_on(async {
cache.put_task("task", black_box(size.as_u64())).await;
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.finish();
}
pub fn delete_task(c: &mut Criterion) {
let rt: Runtime = Runtime::new().unwrap();
let mut group = c.benchmark_group("Delete Task");
group.bench_with_input(
BenchmarkId::new("Delete Task", "10MB"),
&ByteSize::mb(10),
|b, size| {
b.iter_batched(
|| {
let mut cache =
rt.block_on(async { Cache::new(Arc::new(create_config(ByteSize::gb(2)))) });
rt.block_on(async {
cache.put_task("task", black_box(size.as_u64())).await;
});
cache
},
|mut cache| {
rt.block_on(async {
cache.delete_task("task").await.unwrap();
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Delete Task", "100MB"),
&ByteSize::mb(100),
|b, size| {
b.iter_batched(
|| {
let mut cache =
rt.block_on(async { Cache::new(Arc::new(create_config(ByteSize::gb(2)))) });
rt.block_on(async {
cache.put_task("task", black_box(size.as_u64())).await;
});
cache
},
|mut cache| {
rt.block_on(async {
cache.delete_task("task").await.unwrap();
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Delete Task", "1GB"),
&ByteSize::gb(1),
|b, size| {
b.iter_batched(
|| {
let mut cache =
rt.block_on(async { Cache::new(Arc::new(create_config(ByteSize::gb(2)))) });
rt.block_on(async {
cache.put_task("task", black_box(size.as_u64())).await;
});
cache
},
|mut cache| {
rt.block_on(async {
cache.delete_task("task").await.unwrap();
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.finish();
}
pub fn write_piece(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let mut group = c.benchmark_group("Write Piece");
group.bench_with_input(
BenchmarkId::new("Write Piece", "4MB"),
&vec![1u8; ByteSize::mb(4).as_u64() as usize],
|b, data| {
b.iter_batched(
|| {
let mut cache = rt.block_on(async {
Cache::new(Arc::new(create_config(
ByteSize::mb(4) * PIECE_COUNT as u64,
)))
});
rt.block_on(async {
cache
.put_task("task", (ByteSize::mb(4) * PIECE_COUNT as u64).as_u64())
.await;
});
cache
},
|cache| {
rt.block_on(async {
for i in 0..PIECE_COUNT {
cache
.write_piece(
"task",
&format!("piece{}", i),
Bytes::copy_from_slice(data),
)
.await
.unwrap();
}
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Write Piece", "10MB"),
&vec![1u8; ByteSize::mb(10).as_u64() as usize],
|b, data| {
b.iter_batched(
|| {
let mut cache = rt.block_on(async {
Cache::new(Arc::new(create_config(
ByteSize::mb(10) * PIECE_COUNT as u64,
)))
});
rt.block_on(async {
cache
.put_task("task", (ByteSize::mb(10) * PIECE_COUNT as u64).as_u64())
.await;
});
cache
},
|cache| {
rt.block_on(async {
for i in 0..PIECE_COUNT {
cache
.write_piece(
"task",
&format!("piece{}", i),
Bytes::copy_from_slice(data),
)
.await
.unwrap();
}
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Write Piece", "16MB"),
&vec![1u8; ByteSize::mb(16).as_u64() as usize],
|b, data| {
b.iter_batched(
|| {
let mut cache = rt.block_on(async {
Cache::new(Arc::new(create_config(
ByteSize::mb(16) * PIECE_COUNT as u64,
)))
});
rt.block_on(async {
cache
.put_task("task", (ByteSize::mb(16) * PIECE_COUNT as u64).as_u64())
.await;
});
cache
},
|cache| {
rt.block_on(async {
for i in 0..PIECE_COUNT {
cache
.write_piece(
"task",
&format!("piece{}", i),
Bytes::copy_from_slice(data),
)
.await
.unwrap();
}
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.finish();
}
pub fn read_piece(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let mut group = c.benchmark_group("Read Piece");
group.bench_with_input(
BenchmarkId::new("Read Piece", "4MB"),
&vec![1u8; ByteSize::mb(4).as_u64() as usize],
|b, data| {
b.iter_batched(
|| {
let mut cache = rt.block_on(async {
Cache::new(Arc::new(create_config(
ByteSize::mb(4) * PIECE_COUNT as u64,
)))
});
rt.block_on(async {
cache
.put_task("task", (ByteSize::mb(4) * PIECE_COUNT as u64).as_u64())
.await;
for i in 0..PIECE_COUNT {
cache
.write_piece(
"task",
&format!("piece{}", i),
Bytes::copy_from_slice(data),
)
.await
.unwrap();
}
});
cache
},
|cache| {
rt.block_on(async {
for i in 0..PIECE_COUNT {
let mut reader = cache
.read_piece(
"task",
&format!("piece{}", i),
create_piece(data.len() as u64),
None,
)
.await
.unwrap();
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
}
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Read Piece", "10MB"),
&vec![1u8; ByteSize::mb(10).as_u64() as usize],
|b, data| {
b.iter_batched(
|| {
let mut cache = rt.block_on(async {
Cache::new(Arc::new(create_config(
ByteSize::mb(10) * PIECE_COUNT as u64,
)))
});
rt.block_on(async {
cache
.put_task("task", (ByteSize::mb(10) * PIECE_COUNT as u64).as_u64())
.await;
for i in 0..PIECE_COUNT {
cache
.write_piece(
"task",
&format!("piece{}", i),
Bytes::copy_from_slice(data),
)
.await
.unwrap();
}
});
cache
},
|cache| {
rt.block_on(async {
for i in 0..PIECE_COUNT {
let mut reader = cache
.read_piece(
"task",
&format!("piece{}", i),
create_piece(data.len() as u64),
None,
)
.await
.unwrap();
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
}
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Read Piece", "16MB"),
&vec![1u8; ByteSize::mb(16).as_u64() as usize],
|b, data| {
b.iter_batched(
|| {
let mut cache = rt.block_on(async {
Cache::new(Arc::new(create_config(
ByteSize::mb(16) * PIECE_COUNT as u64,
)))
});
rt.block_on(async {
cache
.put_task("task", (ByteSize::mb(16) * PIECE_COUNT as u64).as_u64())
.await;
for i in 0..PIECE_COUNT {
cache
.write_piece(
"task",
&format!("piece{}", i),
Bytes::copy_from_slice(data),
)
.await
.unwrap();
}
});
cache
},
|cache| {
rt.block_on(async {
for i in 0..PIECE_COUNT {
let mut reader = cache
.read_piece(
"task",
&format!("piece{}", i),
create_piece(data.len() as u64),
None,
)
.await
.unwrap();
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
}
});
},
criterion::BatchSize::SmallInput,
);
},
);
group.finish();
}
criterion_group!(benches, put_task, delete_task, write_piece, read_piece,);
criterion_main!(benches);

View File

@ -1,448 +0,0 @@
/*
* Copyright 2025 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use bytesize::ByteSize;
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use dragonfly_client_storage::cache::lru_cache::LruCache;
// Number of operations to perform in each benchmark
const OPERATION_COUNT: usize = 1000;
pub fn lru_cache_put(c: &mut Criterion) {
let mut group = c.benchmark_group("Lru Cache Put");
group.bench_with_input(
BenchmarkId::new("Lru Cache Put", "4MB"),
&ByteSize::mb(4),
|b, size| {
b.iter_batched(
|| LruCache::new(OPERATION_COUNT),
|mut cache| {
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Put", "10MB"),
&ByteSize::mb(10),
|b, size| {
b.iter_batched(
|| LruCache::new(OPERATION_COUNT),
|mut cache| {
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Put", "16MB"),
&ByteSize::mb(16),
|b, size| {
b.iter_batched(
|| LruCache::new(OPERATION_COUNT),
|mut cache| {
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.finish();
}
pub fn lru_cache_get(c: &mut Criterion) {
let mut group = c.benchmark_group("Lru Cache Get");
group.bench_with_input(
BenchmarkId::new("Lru Cache Get", "4MB"),
&ByteSize::mb(4),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|mut cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.get(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Get", "10MB"),
&ByteSize::mb(10),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|mut cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.get(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Get", "16MB"),
&ByteSize::mb(16),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|mut cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.get(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.finish();
}
pub fn lru_cache_peek(c: &mut Criterion) {
let mut group = c.benchmark_group("Lru Cache Peek");
group.bench_with_input(
BenchmarkId::new("Lru Cache Peek", "4MB"),
&ByteSize::mb(4),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.peek(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Peek", "10MB"),
&ByteSize::mb(10),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.peek(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Peek", "16MB"),
&ByteSize::mb(16),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.peek(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.finish();
}
pub fn lru_cache_contains(c: &mut Criterion) {
let mut group = c.benchmark_group("Lru Cache Contains");
group.bench_with_input(
BenchmarkId::new("Lru Cache Contains", "4MB"),
&ByteSize::mb(4),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.contains(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Contains", "10MB"),
&ByteSize::mb(10),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.contains(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Contains", "16MB"),
&ByteSize::mb(16),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.contains(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.finish();
}
pub fn lru_cache_pop(c: &mut Criterion) {
let mut group = c.benchmark_group("Lru Cache Pop");
group.bench_with_input(
BenchmarkId::new("Lru Cache Pop", "4MB"),
&ByteSize::mb(4),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|mut cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.pop(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Pop", "10MB"),
&ByteSize::mb(10),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|mut cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.pop(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Pop", "16MB"),
&ByteSize::mb(16),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|mut cache| {
for i in 0..OPERATION_COUNT {
black_box(cache.pop(&format!("key{}", i)));
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.finish();
}
pub fn lru_cache_pop_lru(c: &mut Criterion) {
let mut group = c.benchmark_group("Lru Cache Pop Lru");
group.bench_with_input(
BenchmarkId::new("Lru Cache Pop Lru", "4MB"),
&ByteSize::mb(4),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|mut cache| {
while !cache.is_empty() {
black_box(cache.pop_lru());
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Pop Lru", "10MB"),
&ByteSize::mb(10),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|mut cache| {
while !cache.is_empty() {
black_box(cache.pop_lru());
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.bench_with_input(
BenchmarkId::new("Lru Cache Pop Lru", "16MB"),
&ByteSize::mb(16),
|b, size| {
b.iter_batched(
|| {
let mut cache = LruCache::new(OPERATION_COUNT);
for i in 0..OPERATION_COUNT {
cache.put(format!("key{}", i), size.as_u64());
}
cache
},
|mut cache| {
while !cache.is_empty() {
black_box(cache.pop_lru());
}
},
criterion::BatchSize::SmallInput,
);
},
);
group.finish();
}
criterion_group!(
benches,
lru_cache_put,
lru_cache_get,
lru_cache_peek,
lru_cache_contains,
lru_cache_pop,
lru_cache_pop_lru,
);
criterion_main!(benches);

View File

@ -1,509 +0,0 @@
/*
* Copyright 2025 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use std::{borrow::Borrow, collections::HashMap, hash::Hash, hash::Hasher};
/// KeyRef is a reference to the key.
#[derive(Debug, Clone, Copy)]
struct KeyRef<K> {
k: *const K,
}
/// KeyRef implements Hash for KeyRef.
impl<K: Hash> Hash for KeyRef<K> {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
unsafe {
let key = &*self.k;
key.hash(state)
}
}
}
/// KeyRef implements PartialEq for KeyRef.
impl<K: PartialEq> PartialEq for KeyRef<K> {
fn eq(&self, other: &Self) -> bool {
unsafe {
let key1 = &*self.k;
let key2 = &*other.k;
key1.eq(key2)
}
}
}
/// KeyRef implements Eq for KeyRef.
impl<K: Eq> Eq for KeyRef<K> {}
/// KeyWrapper is a wrapper for the key.
#[repr(transparent)]
struct KeyWrapper<K: ?Sized>(K);
/// KeyWrapper implements reference conversion.
impl<K: ?Sized> KeyWrapper<K> {
/// from_ref creates a new KeyWrapper from a reference to the key.
fn from_ref(key: &K) -> &Self {
unsafe { &*(key as *const K as *const KeyWrapper<K>) }
}
}
/// KeyWrapper implements Hash for KeyWrapper.
impl<K: ?Sized + Hash> Hash for KeyWrapper<K> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.0.hash(state)
}
}
/// KeyWrapper implements PartialEq for KeyWrapper.
impl<K: ?Sized + PartialEq> PartialEq for KeyWrapper<K> {
#![allow(unknown_lints)]
#[allow(clippy::unconditional_recursion)]
fn eq(&self, other: &Self) -> bool {
self.0.eq(&other.0)
}
}
/// KeyWrapper implements Eq for KeyWrapper.
impl<K: ?Sized + Eq> Eq for KeyWrapper<K> {}
/// KeyWrapper implements Borrow for KeyWrapper.
impl<K, Q> Borrow<KeyWrapper<Q>> for KeyRef<K>
where
K: Borrow<Q>,
Q: ?Sized,
{
/// borrow borrows the key.
fn borrow(&self) -> &KeyWrapper<Q> {
unsafe {
let key = &*self.k;
KeyWrapper::from_ref(key.borrow())
}
}
}
/// Entry is a cache entry.
struct Entry<K, V> {
key: K,
value: V,
prev: Option<*mut Entry<K, V>>,
next: Option<*mut Entry<K, V>>,
}
/// Entry implements Drop for Entry.
impl<K, V> Entry<K, V> {
/// new creates a new Entry.
fn new(key: K, value: V) -> Self {
Self {
key,
value,
prev: None,
next: None,
}
}
}
/// LruCache is a least recently used cache.
pub struct LruCache<K, V> {
capacity: usize,
map: HashMap<KeyRef<K>, Box<Entry<K, V>>>,
head: Option<*mut Entry<K, V>>,
tail: Option<*mut Entry<K, V>>,
_marker: std::marker::PhantomData<K>,
}
/// LruCache implements LruCache.
impl<K: Hash + Eq, V> LruCache<K, V> {
/// new creates a new LruCache.
pub fn new(capacity: usize) -> Self {
Self {
capacity,
map: HashMap::new(),
head: None,
tail: None,
_marker: std::marker::PhantomData,
}
}
/// get gets the value of the key.
pub fn get<'a, Q>(&'a mut self, k: &Q) -> Option<&'a V>
where
K: Borrow<Q>,
Q: Hash + Eq + ?Sized,
{
if let Some(entry) = self.map.get_mut(KeyWrapper::from_ref(k)) {
let entry_ptr: *mut Entry<K, V> = &mut **entry;
self.detach(entry_ptr);
self.attach(entry_ptr);
Some(&unsafe { &*entry_ptr }.value)
} else {
None
}
}
/// put puts the key and value into the cache.
pub fn put(&mut self, key: K, mut value: V) -> Option<V> {
if let Some(existing_entry) = self.map.get_mut(KeyWrapper::from_ref(&key)) {
let entry = existing_entry.as_mut();
std::mem::swap(&mut entry.value, &mut value);
let entry_ptr: *mut Entry<K, V> = entry;
self.detach(entry_ptr);
self.attach(entry_ptr);
return Some(value);
}
let mut evicted_value = None;
if self.map.len() >= self.capacity {
if let Some(tail) = self.tail {
self.detach(tail);
unsafe {
if let Some(entry) = self.map.remove(KeyWrapper::from_ref(&(*tail).key)) {
evicted_value = Some(entry.value);
}
}
}
}
let new_entry = Box::new(Entry::new(key, value));
let key_ptr: *const K = &new_entry.key;
let entry_ptr = Box::into_raw(new_entry);
unsafe {
self.attach(entry_ptr);
self.map
.insert(KeyRef { k: key_ptr }, Box::from_raw(entry_ptr));
}
evicted_value
}
/// detach detaches the entry from the cache.
fn detach(&mut self, entry: *mut Entry<K, V>) {
unsafe {
let prev = (*entry).prev;
let next = (*entry).next;
match prev {
Some(prev) => (*prev).next = next,
None => self.head = next,
}
match next {
Some(next) => (*next).prev = prev,
None => self.tail = prev,
}
(*entry).prev = None;
(*entry).next = None;
}
}
/// attach attaches the entry to the cache.
fn attach(&mut self, entry: *mut Entry<K, V>) {
match self.head {
Some(head) => {
unsafe {
(*entry).next = Some(head);
(*head).prev = Some(entry);
}
self.head = Some(entry);
}
None => {
self.head = Some(entry);
self.tail = Some(entry);
}
}
}
/// contains checks whether the key exists in the cache.
pub fn contains<Q>(&self, k: &Q) -> bool
where
K: Borrow<Q>,
Q: Hash + Eq + ?Sized,
{
self.map.contains_key(KeyWrapper::from_ref(k))
}
/// peek peeks the value of the key. It does not move the key to the front of the cache.
pub fn peek<'a, Q>(&'a self, k: &Q) -> Option<&'a V>
where
K: Borrow<Q>,
Q: Hash + Eq + ?Sized,
{
self.map
.get(KeyWrapper::from_ref(k))
.map(|entry| &entry.value)
}
/// pop_lru pops the least recently used value from the cache.
pub fn pop_lru(&mut self) -> Option<(K, V)> {
if self.is_empty() {
return None;
}
let tail = self.tail?;
self.detach(tail);
unsafe {
self.map
.remove(KeyWrapper::from_ref(&(*tail).key))
.map(|entry| (entry.key, entry.value))
}
}
/// pop removes and returns the value for a given key, if it does not exist, it returns None.
pub fn pop<Q>(&mut self, k: &Q) -> Option<(K, V)>
where
K: Borrow<Q>,
Q: Hash + Eq + ?Sized,
{
match self.map.remove(KeyWrapper::from_ref(k)) {
None => None,
Some(entry) => {
let entry_ptr = Box::into_raw(entry);
self.detach(entry_ptr);
unsafe {
let entry = Box::from_raw(entry_ptr);
Some((entry.key, entry.value))
}
}
}
}
/// is_empty checks whether the cache is empty.
pub fn is_empty(&self) -> bool {
self.map.is_empty()
}
}
unsafe impl<K: Send, V: Send> Send for LruCache<K, V> {}
unsafe impl<K: Sync, V: Sync> Sync for LruCache<K, V> {}
impl<K, V> Drop for LruCache<K, V> {
fn drop(&mut self) {
self.map.clear();
self.head = None;
self.tail = None;
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new() {
let test_cases = vec![
// Normal capacity.
(5, 5),
// Minimum meaningful capacity.
(1, 1),
// Zero capacity.
(0, 0),
// Maximum capacity.
(usize::MAX, usize::MAX),
];
for (capacity, expected_capacity) in test_cases {
let cache: LruCache<String, i32> = LruCache::new(capacity);
assert!(cache.is_empty());
assert_eq!(cache.capacity, expected_capacity);
}
}
#[test]
fn test_get() {
let mut cache: LruCache<String, i32> = LruCache::new(3);
let test_cases = vec![
// Initial insertions.
("key1", 1, None),
("key2", 2, None),
("key3", 3, None),
// Update existing key.
("key2", 22, Some(2)),
// Eviction of oldest key.
("key4", 4, Some(1)),
];
for (key, value, expected_result) in test_cases {
let result = cache.put(key.to_string(), value);
assert_eq!(result, expected_result);
}
// Verify final cache state.
assert_eq!(cache.get(&"key1".to_string()), None);
assert_eq!(cache.get(&"key2".to_string()).copied(), Some(22));
assert_eq!(cache.get(&"key3".to_string()).copied(), Some(3));
assert_eq!(cache.get(&"key4".to_string()).copied(), Some(4));
}
#[test]
fn test_get_after_evction() {
let mut cache = LruCache::new(3);
assert_eq!(cache.get(&"nonexistent".to_string()), None);
// Prepare cache with initial values.
for (key, value) in [("key1", 1), ("key2", 2), ("key3", 3)] {
cache.put(key.to_string(), value);
}
let test_cases = vec![
("key1", Some(1)),
("nonexistent", None),
("key1", Some(1)),
("key3", Some(3)),
];
for (key, expected_value) in test_cases {
assert_eq!(cache.get(&key.to_string()).copied(), expected_value);
}
// Test eviction after getting.
cache.put("key4".to_string(), 4);
assert_eq!(cache.get(&"key1".to_string()).copied(), Some(1));
assert_eq!(cache.get(&"key2".to_string()), None);
assert_eq!(cache.get(&"key3".to_string()).copied(), Some(3));
assert_eq!(cache.get(&"key4".to_string()).copied(), Some(4));
}
#[test]
fn test_put() {
let mut cache = LruCache::new(3);
let test_cases = vec![
// Initial insertions within capacity.
("key1", 1, None),
("key2", 2, None),
("key3", 3, None),
// Overflow capacity, should evict oldest.
("key4", 4, Some(1)),
("key5", 5, Some(2)),
// Update existing key.
("key4", 44, Some(4)),
];
for (key, value, expected_result) in test_cases {
let result = cache.put(key.to_string(), value);
assert_eq!(result, expected_result);
}
// Verify final cache state.
assert_eq!(cache.get(&"key1".to_string()), None);
assert_eq!(cache.get(&"key2".to_string()), None);
assert_eq!(cache.get(&"key3".to_string()).copied(), Some(3));
assert_eq!(cache.get(&"key4".to_string()).copied(), Some(44));
assert_eq!(cache.get(&"key5".to_string()).copied(), Some(5));
}
#[test]
fn test_peek() {
let mut cache: LruCache<String, i32> = LruCache::new(3);
assert_eq!(cache.peek(&"nonexistent".to_string()), None);
// Prepare cache with initial values.
for (key, value) in [("key1", 1), ("key2", 2), ("key3", 3)] {
cache.put(key.to_string(), value);
}
let test_cases = vec![
("nonexistent", None),
("key1", Some(1)),
("key2", Some(2)),
("key3", Some(3)),
];
for (key, expected_value) in test_cases {
assert_eq!(cache.peek(&key.to_string()).copied(), expected_value);
}
// Test eviction after peeking.
cache.put("key4".to_string(), 4);
assert_eq!(cache.peek(&"key1".to_string()), None);
assert_eq!(cache.peek(&"key2".to_string()).copied(), Some(2));
assert_eq!(cache.peek(&"key3".to_string()).copied(), Some(3));
assert_eq!(cache.peek(&"key4".to_string()).copied(), Some(4));
}
#[test]
fn test_contains() {
let mut cache: LruCache<String, i32> = LruCache::new(3);
assert!(!cache.contains(&"nonexistent".to_string()));
// Prepare cache with initial values.
for (key, value) in [("key1", 1), ("key2", 2), ("key3", 3)] {
cache.put(key.to_string(), value);
}
let test_cases = vec![
("nonexistent", false),
("key1", true),
("key2", true),
("key3", true),
];
for (key, expected_result) in test_cases {
assert_eq!(cache.contains(&key.to_string()), expected_result);
}
// Test eviction after contains.
cache.put("key4".to_string(), 4);
assert!(!cache.contains(&"key1".to_string()));
assert!(cache.contains(&"key2".to_string()));
assert!(cache.contains(&"key3".to_string()));
assert!(cache.contains(&"key4".to_string()));
}
#[test]
fn test_pop_lru() {
let mut cache: LruCache<String, i32> = LruCache::new(3);
assert_eq!(cache.pop_lru(), None);
for (key, value) in [("key1", 1), ("key2", 2), ("key3", 3)] {
cache.put(key.to_string(), value);
}
assert_eq!(cache.pop_lru(), Some(("key1".to_string(), 1)));
assert_eq!(cache.pop_lru(), Some(("key2".to_string(), 2)));
assert_eq!(cache.pop_lru(), Some(("key3".to_string(), 3)));
assert_eq!(cache.pop_lru(), None);
assert!(cache.is_empty());
}
#[test]
fn test_pop() {
let mut cache: LruCache<String, i32> = LruCache::new(3);
let test_cases = vec![
("key1".to_string(), Some(("key1".to_string(), 1))),
("key2".to_string(), Some(("key2".to_string(), 2))),
("key3".to_string(), Some(("key3".to_string(), 3))),
("key1".to_string(), None),
("key2".to_string(), None),
("key3".to_string(), None),
];
cache.put("key1".to_string(), 1);
cache.put("key2".to_string(), 2);
cache.put("key3".to_string(), 3);
for (key, expected) in test_cases {
assert_eq!(cache.pop(&key), expected);
}
assert!(cache.is_empty());
}
}

View File

@ -1,989 +0,0 @@
/*
* Copyright 2025 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use bytes::Bytes;
use dragonfly_api::common::v2::Range;
use dragonfly_client_config::dfdaemon::Config;
use dragonfly_client_core::{Error, Result};
use lru_cache::LruCache;
use std::cmp::{max, min};
use std::collections::HashMap;
use std::io::Cursor;
use std::sync::Arc;
use tokio::io::{AsyncRead, BufReader};
use tokio::sync::RwLock;
use tracing::info;
pub mod lru_cache;
/// Task is the task content in the cache.
#[derive(Clone, Debug)]
struct Task {
/// content_length is the length of the task content.
content_length: u64,
/// pieces is the pieces content of the task.
pieces: Arc<RwLock<HashMap<String, Bytes>>>,
}
/// Task implements the task content in the cache.
impl Task {
/// new creates a new task.
fn new(content_length: u64) -> Self {
Self {
content_length,
pieces: Arc::new(RwLock::new(HashMap::new())),
}
}
/// write_piece writes the piece content to the task.
async fn write_piece(&self, id: &str, piece: Bytes) {
let mut pieces = self.pieces.write().await;
pieces.insert(id.to_string(), piece);
}
/// read_piece reads the piece content from the task.
async fn read_piece(&self, id: &str) -> Option<Bytes> {
let pieces = self.pieces.read().await;
pieces.get(id).cloned()
}
/// contains checks whether the piece exists in the task.
async fn contains(&self, id: &str) -> bool {
let pieces = self.pieces.read().await;
pieces.contains_key(id)
}
/// content_length returns the content length of the task.
fn content_length(&self) -> u64 {
self.content_length
}
}
/// Cache is the cache for storing piece content by LRU algorithm.
///
/// Cache storage:
/// 1. Users can preheat task by caching to memory (via CacheTask) or to disk (via Task).
/// For more details, refer to https://github.com/dragonflyoss/api/blob/main/proto/dfdaemon.proto#L174.
/// 2. If the download hits the memory cache, it will be faster than reading from the disk, because there is no
/// page cache for the first read.
///
///```text
/// +--------+
/// │ Source │
/// +--------+
/// ^ ^ Preheat
/// │ │ |
/// +-----------------+ │ │ +----------------------------+
/// │ Other Peers │ │ │ │ Peer | │
/// │ │ │ │ │ v │
/// │ +----------+ │ │ │ │ +----------+ │
/// │ │ Cache |<--|----------|<-Miss--| Cache |--Hit-->|<----Download CacheTask
/// │ +----------+ │ │ │ +----------+ │
/// │ │ │ │ │
/// │ +----------+ │ │ │ +----------+ │
/// │ │ Disk |<--|----------|<-Miss--| Disk |--Hit-->|<----Download Task
/// │ +----------+ │ │ +----------+ │
/// │ │ │ ^ │
/// │ │ │ | │
/// +-----------------+ +----------------------------+
/// |
/// Preheat
///```
/// Task is the metadata of the task.
#[derive(Clone)]
pub struct Cache {
/// config is the configuration of the dfdaemon.
config: Arc<Config>,
/// size is the size of the cache in bytes.
size: u64,
/// capacity is the maximum capacity of the cache in bytes.
capacity: u64,
/// tasks stores the tasks with their task id.
tasks: Arc<RwLock<LruCache<String, Task>>>,
}
/// Cache implements the cache for storing piece content by LRU algorithm.
impl Cache {
/// new creates a new cache with the specified capacity.
pub fn new(config: Arc<Config>) -> Self {
Cache {
config: config.clone(),
size: 0,
capacity: config.storage.cache_capacity.as_u64(),
// LRU cache capacity is set to usize::MAX to avoid evicting tasks. LRU cache will evict tasks
// by cache capacity(cache size) itself, and used pop_lru to evict the least recently
// used task.
tasks: Arc::new(RwLock::new(LruCache::new(usize::MAX))),
}
}
/// read_piece reads the piece from the cache.
pub async fn read_piece(
&self,
task_id: &str,
piece_id: &str,
piece: super::metadata::Piece,
range: Option<Range>,
) -> Result<impl AsyncRead> {
let mut tasks = self.tasks.write().await;
let Some(task) = tasks.get(task_id) else {
return Err(Error::TaskNotFound(task_id.to_string()));
};
let Some(piece_content) = task.read_piece(piece_id).await else {
return Err(Error::PieceNotFound(piece_id.to_string()));
};
drop(tasks);
// Calculate the range of bytes to return based on the range provided.
let (target_offset, target_length) = if let Some(range) = range {
let target_offset = max(piece.offset, range.start) - piece.offset;
let target_length = min(
piece.offset + piece.length - 1,
range.start + range.length - 1,
) - target_offset
- piece.offset
+ 1;
(target_offset as usize, target_length as usize)
} else {
(0, piece.length as usize)
};
// Check if the target range is valid.
let begin = target_offset;
let end = target_offset + target_length;
if begin >= piece_content.len() || end > piece_content.len() {
return Err(Error::InvalidParameter);
}
let content = piece_content.slice(begin..end);
let reader =
BufReader::with_capacity(self.config.storage.read_buffer_size, Cursor::new(content));
Ok(reader)
}
/// write_piece writes the piece content to the cache.
pub async fn write_piece(&self, task_id: &str, piece_id: &str, content: Bytes) -> Result<()> {
let mut tasks = self.tasks.write().await;
let Some(task) = tasks.get(task_id) else {
return Err(Error::TaskNotFound(task_id.to_string()));
};
if task.contains(piece_id).await {
return Ok(());
}
task.write_piece(piece_id, content).await;
Ok(())
}
/// put_task puts a new task into the cache, constrained by the capacity of the cache.
pub async fn put_task(&mut self, task_id: &str, content_length: u64) {
// If the content length is 0, we don't cache the task.
if content_length == 0 {
return;
}
// If the content length is larger than the cache capacity and the task cannot be cached.
if content_length > self.capacity {
info!(
"task {} is too large and cannot be cached: {}",
task_id, content_length
);
return;
}
let mut tasks = self.tasks.write().await;
while self.size + content_length > self.capacity {
match tasks.pop_lru() {
Some((_, task)) => {
self.size -= task.content_length();
}
None => {
break;
}
}
}
let task = Task::new(content_length);
tasks.put(task_id.to_string(), task);
self.size += content_length;
}
pub async fn delete_task(&mut self, task_id: &str) -> Result<()> {
let mut tasks = self.tasks.write().await;
let Some((_, task)) = tasks.pop(task_id) else {
return Err(Error::TaskNotFound(task_id.to_string()));
};
self.size -= task.content_length();
Ok(())
}
/// contains_task checks whether the task exists in the cache.
pub async fn contains_task(&self, id: &str) -> bool {
let tasks = self.tasks.read().await;
tasks.contains(id)
}
/// contains_piece checks whether the piece exists in the specified task.
pub async fn contains_piece(&self, task_id: &str, piece_id: &str) -> bool {
let tasks = self.tasks.read().await;
if let Some(task) = tasks.peek(task_id) {
task.contains(piece_id).await
} else {
false
}
}
}
#[cfg(test)]
mod tests {
use super::super::metadata::Piece;
use super::*;
use bytesize::ByteSize;
use dragonfly_api::common::v2::Range;
use dragonfly_client_config::dfdaemon::Storage;
use tokio::io::AsyncReadExt;
#[tokio::test]
async fn test_new() {
let test_cases = vec![
// Default configuration with 64MiB capacity.
(Config::default(), 0, ByteSize::mib(64).as_u64()),
// Custom configuration with 100MiB capacity.
(
Config {
storage: Storage {
cache_capacity: ByteSize::mib(100),
..Default::default()
},
..Default::default()
},
0,
ByteSize::mib(100).as_u64(),
),
// Zero capacity configuration.
(
Config {
storage: Storage {
cache_capacity: ByteSize::b(0),
..Default::default()
},
..Default::default()
},
0,
0,
),
];
for (config, expected_size, expected_capacity) in test_cases {
let cache = Cache::new(Arc::new(config));
assert_eq!(cache.size, expected_size);
assert_eq!(cache.capacity, expected_capacity);
}
}
#[tokio::test]
async fn test_contains_task() {
let config = Config {
storage: Storage {
cache_capacity: ByteSize::mib(10),
..Default::default()
},
..Default::default()
};
let cache = Cache::new(Arc::new(config));
let test_cases = vec![
// Test non-existent task.
("check", "non_existent", 0, false),
// Add and verify task.
("add", "task1", ByteSize::mib(1).as_u64(), true),
("check", "task1", 0, true),
// Remove and verify task.
("remove", "task1", 0, false),
("check", "task1", 0, false),
// Test multiple tasks.
("add", "task1", ByteSize::mib(1).as_u64(), true),
("add", "task2", ByteSize::mib(2).as_u64(), true),
("check", "task1", 0, true),
("check", "task2", 0, true),
("check", "task3", 0, false),
];
for (operation, task_id, content_length, expected_result) in test_cases {
match operation {
"check" => {
assert_eq!(cache.contains_task(task_id).await, expected_result);
}
"add" => {
let task = Task::new(content_length);
cache.tasks.write().await.put(task_id.to_string(), task);
assert_eq!(cache.contains_task(task_id).await, expected_result);
}
"remove" => {
cache.tasks.write().await.pop_lru();
assert_eq!(cache.contains_task(task_id).await, expected_result);
}
_ => panic!("Unknown operation."),
}
}
}
#[tokio::test]
async fn test_put_task() {
let config = Config {
storage: Storage {
cache_capacity: ByteSize::mib(10),
..Default::default()
},
..Default::default()
};
let mut cache = Cache::new(Arc::new(config));
let test_cases = vec![
// Empty task should not be cached.
("empty_task", 0, false),
// Task equal to capacity should not be cached.
("equal_capacity", ByteSize::mib(10).as_u64(), true),
// Task exceeding capacity should not be cached.
("exceed_capacity", ByteSize::mib(10).as_u64() + 1, false),
// Normal sized task should be cached.
("normal_task", ByteSize::mib(1).as_u64(), true),
];
for (task_id, size, should_exist) in test_cases {
if size > 0 {
cache.put_task(task_id, size).await;
}
assert_eq!(cache.contains_task(task_id).await, should_exist);
}
}
#[tokio::test]
async fn test_put_task_lru() {
let config = Config {
storage: Storage {
cache_capacity: ByteSize::mib(5),
..Default::default()
},
..Default::default()
};
let mut cache = Cache::new(Arc::new(config));
let test_cases = vec![
// Add tasks until eviction triggers.
("lru_task_1", ByteSize::mib(2).as_u64(), true),
("lru_task_2", ByteSize::mib(2).as_u64(), true),
// Third task triggers eviction.
("lru_task_3", ByteSize::mib(2).as_u64(), true),
// Verify eviction results.
("lru_task_1", 0, false),
("lru_task_2", 0, true),
("lru_task_3", 0, true),
];
for (task_id, size, should_exist) in test_cases {
if size > 0 {
cache.put_task(task_id, size).await;
}
assert_eq!(cache.contains_task(task_id).await, should_exist);
}
}
#[tokio::test]
async fn test_delete_task() {
let config = Config {
storage: Storage {
cache_capacity: ByteSize::mib(10),
..Default::default()
},
..Default::default()
};
let mut cache = Cache::new(Arc::new(config));
cache.put_task("task1", ByteSize::mib(1).as_u64()).await;
cache.put_task("task2", ByteSize::mib(1).as_u64()).await;
cache.put_task("task3", ByteSize::mib(1).as_u64()).await;
let test_cases = vec![
("task1", true),
("task2", true),
("task3", true),
("nonexistent", false),
("", false),
("large_task", false),
];
for (task_id, exists) in test_cases {
assert_eq!(cache.contains_task(task_id).await, exists);
let result = cache.delete_task(task_id).await;
if exists {
assert!(result.is_ok());
} else {
assert!(result.is_err());
}
assert!(!cache.contains_task(task_id).await);
}
assert!(!cache.contains_task("task1").await);
assert!(!cache.contains_task("task2").await);
assert!(!cache.contains_task("task3").await);
assert!(!cache.contains_task("nonexistent").await);
assert!(!cache.contains_task("").await);
assert!(!cache.contains_task("large_task").await);
}
#[tokio::test]
async fn test_contains_piece() {
let config = Config {
storage: Storage {
cache_capacity: ByteSize::mib(10),
..Default::default()
},
..Default::default()
};
let mut cache = Cache::new(Arc::new(config));
let test_cases = vec![
// Check non-existent task.
("check", "non_existent", "piece1", "", false),
// Check empty piece ID in non-existent task.
("check", "non_existent", "", "", false),
// Add task and verify empty task behavior.
("add_task", "task1", "", "", true),
("check", "task1", "piece1", "", false),
// Add piece and verify existence.
("add_piece", "task1", "piece1", "test data", true),
("check", "task1", "piece1", "", true),
// Check empty piece ID in existing task.
("check", "task1", "", "", false),
// Check non-existent piece in existing task.
("check", "task1", "non_existent_piece", "", false),
// Test piece ID with special characters.
("add_piece", "task1", "piece#$%^&*", "test data", true),
("check", "task1", "piece#$%^&*", "", true),
];
for (operation, task_id, piece_id, content, expected_result) in test_cases {
match operation {
"check" => {
assert_eq!(
cache.contains_piece(task_id, piece_id).await,
expected_result
);
}
"add_task" => {
cache.put_task(task_id, 1000).await;
assert!(cache.contains_task(task_id).await);
}
"add_piece" => {
cache
.write_piece(task_id, piece_id, Bytes::from(content))
.await
.unwrap();
assert_eq!(
cache.contains_piece(task_id, piece_id).await,
expected_result
);
}
_ => panic!("Unknown operation."),
}
}
}
#[tokio::test]
async fn test_write_piece() {
let config = Config {
storage: Storage {
cache_capacity: ByteSize::mib(10),
..Default::default()
},
..Default::default()
};
let mut cache = Cache::new(Arc::new(config));
// Test writing to non-existent task.
let test_data = b"test data".to_vec();
let result = cache
.write_piece("non_existent", "piece1", Bytes::from(test_data))
.await;
assert!(matches!(result, Err(Error::TaskNotFound(_))));
// Create a task for testing.
cache.put_task("task1", ByteSize::mib(1).as_u64()).await;
assert!(cache.contains_task("task1").await);
let test_cases = vec![
("piece1", b"hello world".to_vec()),
("piece2", b"rust programming".to_vec()),
("piece3", b"dragonfly cache".to_vec()),
("piece4", b"unit testing".to_vec()),
("piece5", b"async await".to_vec()),
("piece6", b"error handling".to_vec()),
("piece7", vec![0u8; 1024]),
("piece8", vec![1u8; 2048]),
];
for (piece_id, content) in &test_cases {
let result = cache
.write_piece("task1", piece_id, Bytes::copy_from_slice(content))
.await;
assert!(result.is_ok());
assert!(cache.contains_piece("task1", piece_id).await);
let piece = Piece {
number: 0,
offset: 0,
length: content.len() as u64,
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
};
let mut reader = cache
.read_piece("task1", piece_id, piece, None)
.await
.unwrap();
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
assert_eq!(buffer, *content);
}
// Test attempting to overwrite existing pieces.
// The write should succeed (return Ok) but content should not change.
for (piece_id, original_content) in &test_cases {
let new_content = format!("updated content for {}", piece_id);
let result = cache
.write_piece("task1", piece_id, Bytes::from(new_content))
.await;
assert!(result.is_ok());
// Verify content remains unchanged.
let piece = Piece {
number: 0,
offset: 0,
length: original_content.len() as u64,
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
};
let mut reader = cache
.read_piece("task1", piece_id, piece, None)
.await
.unwrap();
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
assert_eq!(buffer, *original_content);
}
}
#[tokio::test]
async fn test_read_piece() {
let config = Config {
storage: Storage {
cache_capacity: ByteSize::mib(100),
..Default::default()
},
..Default::default()
};
let mut cache = Cache::new(Arc::new(config));
let piece = Piece {
number: 0,
offset: 0,
length: 11,
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
};
let result = cache
.read_piece("non_existent", "piece1", piece.clone(), None)
.await;
assert!(matches!(result, Err(Error::TaskNotFound(_))));
cache.put_task("task1", ByteSize::mib(50).as_u64()).await;
let result = cache
.read_piece("task1", "non_existent", piece.clone(), None)
.await;
assert!(matches!(result, Err(Error::PieceNotFound(_))));
let test_pieces = vec![
// Small pieces for basic functionality testing.
(
"piece1",
b"hello world".to_vec(),
Piece {
number: 0,
offset: 0,
length: 11,
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
},
vec![
(None, b"hello world".to_vec()),
(
Some(Range {
start: 0,
length: 5,
}),
b"hello".to_vec(),
),
],
),
(
"piece2",
b"rust lang".to_vec(),
Piece {
number: 1,
offset: 11,
length: 9,
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
},
vec![
(None, b"rust lang".to_vec()),
(
Some(Range {
start: 11,
length: 4,
}),
b"rust".to_vec(),
),
],
),
(
"piece3",
b"unit test".to_vec(),
Piece {
number: 2,
offset: 20,
length: 9,
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
},
vec![
(None, b"unit test".to_vec()),
(
Some(Range {
start: 20,
length: 4,
}),
b"unit".to_vec(),
),
],
),
// Large piece for boundary testing.
(
"large_piece",
{
let size = ByteSize::mib(50).as_u64();
(0..size).map(|i| (i % 256) as u8).collect()
},
Piece {
number: 2,
offset: 0,
length: ByteSize::mib(50).as_u64(),
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
},
vec![
// Full read.
(
None,
(0..ByteSize::mib(50).as_u64())
.map(|i| (i % 256) as u8)
.collect(),
),
// Read first 1MiB.
(
Some(Range {
start: 0,
length: ByteSize::mib(1).as_u64(),
}),
(0..ByteSize::mib(1).as_u64())
.map(|i| (i % 256) as u8)
.collect(),
),
// Read last 1MiB.
(
Some(Range {
start: ByteSize::mib(49).as_u64(),
length: ByteSize::mib(1).as_u64(),
}),
(ByteSize::mib(49).as_u64()..ByteSize::mib(50).as_u64())
.map(|i| (i % 256) as u8)
.collect(),
),
],
),
];
// Write all pieces.
for (id, content, _, _) in &test_pieces {
cache
.write_piece("task1", id, Bytes::copy_from_slice(content))
.await
.unwrap();
}
// Test all pieces with their read ranges.
for (id, _, piece, ranges) in &test_pieces {
for (range, expected_content) in ranges {
let mut reader = cache
.read_piece("task1", id, piece.clone(), *range)
.await
.unwrap();
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
assert_eq!(&buffer, expected_content);
}
}
}
#[tokio::test]
async fn test_concurrent_read_same_piece() {
let config = Config {
storage: Storage {
cache_capacity: ByteSize::mib(10),
..Default::default()
},
..Default::default()
};
let mut cache = Cache::new(Arc::new(config));
cache.put_task("task1", ByteSize::mib(1).as_u64()).await;
let content = b"test data for concurrent read".to_vec();
cache
.write_piece("task1", "piece1", Bytes::from(content.clone()))
.await
.unwrap();
let cache_arc = Arc::new(cache);
let mut join_set = tokio::task::JoinSet::new();
// Spawn concurrent readers.
for i in 0..50 {
let cache_clone = cache_arc.clone();
let expected_content = content.clone();
join_set.spawn(async move {
let piece = Piece {
number: 0,
offset: 0,
length: expected_content.len() as u64,
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
};
let range = if i % 2 == 0 {
None
} else {
Some(Range {
start: 0,
length: 5,
})
};
let mut reader = cache_clone
.read_piece("task1", "piece1", piece, range)
.await
.unwrap_or_else(|e| panic!("Reader {} failed: {:?}.", i, e));
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
if let Some(range) = range {
assert_eq!(buffer, &expected_content[..range.length as usize]);
} else {
assert_eq!(buffer, expected_content);
}
});
}
while let Some(result) = join_set.join_next().await {
assert!(result.is_ok());
}
}
#[tokio::test]
async fn test_concurrent_write_different_pieces() {
let config = Config {
storage: Storage {
cache_capacity: ByteSize::mib(10),
..Default::default()
},
..Default::default()
};
let mut cache = Cache::new(Arc::new(config));
cache.put_task("task1", ByteSize::mib(1).as_u64()).await;
let cache_arc = Arc::new(cache);
let mut join_set = tokio::task::JoinSet::new();
// Spawn concurrent writers.
for i in 0..50 {
let cache_clone = cache_arc.clone();
let content = format!("content for piece {}", i).into_bytes();
join_set.spawn(async move {
let piece_id = format!("piece{}", i);
let result = cache_clone
.write_piece("task1", &piece_id, Bytes::from(content.clone()))
.await;
assert!(result.is_ok());
let piece = Piece {
number: 0,
offset: 0,
length: content.len() as u64,
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
};
let mut reader = cache_clone
.read_piece("task1", &piece_id, piece, None)
.await
.unwrap();
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
assert_eq!(buffer, content);
});
}
while let Some(result) = join_set.join_next().await {
assert!(result.is_ok());
}
}
#[tokio::test]
async fn test_concurrent_write_same_piece() {
let config = Config {
storage: Storage {
cache_capacity: ByteSize::mib(10),
..Default::default()
},
..Default::default()
};
let mut cache = Cache::new(Arc::new(config));
cache.put_task("task1", ByteSize::mib(1).as_u64()).await;
let original_content = b"original content".to_vec();
cache
.write_piece("task1", "piece1", Bytes::from(original_content.clone()))
.await
.unwrap();
let cache_arc = Arc::new(cache);
let mut join_set = tokio::task::JoinSet::new();
// Spawn concurrent writers.
for i in 0..50 {
let cache_clone = cache_arc.clone();
let new_content = format!("new content from writer {}", i).into_bytes();
join_set.spawn(async move {
let result = cache_clone
.write_piece("task1", "piece1", Bytes::from(new_content))
.await;
assert!(result.is_ok());
});
}
while let Some(result) = join_set.join_next().await {
assert!(result.is_ok());
}
let piece = Piece {
number: 0,
offset: 0,
length: original_content.len() as u64,
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: chrono::Utc::now().naive_utc(),
created_at: chrono::Utc::now().naive_utc(),
finished_at: None,
};
let mut reader = cache_arc
.read_piece("task1", "piece1", piece, None)
.await
.unwrap();
let mut buffer = Vec::new();
reader.read_to_end(&mut buffer).await.unwrap();
assert_eq!(buffer, original_content);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,815 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use chrono::NaiveDateTime;
use dragonfly_api::common::v2::Range;
use dragonfly_client_config::dfdaemon::Config;
use dragonfly_client_core::{Error, Result};
use dragonfly_client_util::digest::{Algorithm, Digest};
use reqwest::header::HeaderMap;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use tokio::io::AsyncRead;
use tokio::time::sleep;
use tokio_util::either::Either;
use tracing::{debug, error, info, instrument, warn};
pub mod cache;
pub mod content;
pub mod metadata;
pub mod storage_engine;
/// DEFAULT_WAIT_FOR_PIECE_FINISHED_INTERVAL is the default interval for waiting for the piece to be finished.
pub const DEFAULT_WAIT_FOR_PIECE_FINISHED_INTERVAL: Duration = Duration::from_millis(100);
/// Storage is the storage of the task.
pub struct Storage {
/// config is the configuration of the dfdaemon.
config: Arc<Config>,
/// metadata implements the metadata storage.
metadata: metadata::Metadata,
/// content implements the content storage.
content: content::Content,
/// cache implements the cache storage.
cache: cache::Cache,
}
/// Storage implements the storage.
impl Storage {
/// new returns a new storage.
pub async fn new(config: Arc<Config>, dir: &Path, log_dir: PathBuf) -> Result<Self> {
let metadata = metadata::Metadata::new(config.clone(), dir, &log_dir)?;
let content = content::Content::new(config.clone(), dir).await?;
let cache = cache::Cache::new(config.clone());
Ok(Storage {
config,
metadata,
content,
cache,
})
}
/// total_space returns the total space of the disk.
pub fn total_space(&self) -> Result<u64> {
self.content.total_space()
}
/// available_space returns the available space of the disk.
pub fn available_space(&self) -> Result<u64> {
self.content.available_space()
}
/// has_enough_space checks if the storage has enough space to store the content.
pub fn has_enough_space(&self, content_length: u64) -> Result<bool> {
self.content.has_enough_space(content_length)
}
/// hard_link_task hard links the task content to the destination.
#[instrument(skip_all)]
pub async fn hard_link_task(&self, task_id: &str, to: &Path) -> Result<()> {
self.content.hard_link_task(task_id, to).await
}
/// copy_task copies the task content to the destination.
#[instrument(skip_all)]
pub async fn copy_task(&self, id: &str, to: &Path) -> Result<()> {
self.content.copy_task(id, to).await
}
/// is_same_dev_inode_as_task checks if the task content is on the same device inode as the
/// destination.
pub async fn is_same_dev_inode_as_task(&self, id: &str, to: &Path) -> Result<bool> {
self.content.is_same_dev_inode_as_task(id, to).await
}
/// prepare_download_task_started prepares the metadata of the task when the task downloads
/// started.
pub async fn prepare_download_task_started(&self, id: &str) -> Result<metadata::Task> {
self.metadata.download_task_started(id, None, None, None)
}
/// download_task_started updates the metadata of the task and create task content
/// when the task downloads started.
#[instrument(skip_all)]
pub async fn download_task_started(
&self,
id: &str,
piece_length: u64,
content_length: u64,
response_header: Option<HeaderMap>,
) -> Result<metadata::Task> {
self.content.create_task(id, content_length).await?;
self.metadata.download_task_started(
id,
Some(piece_length),
Some(content_length),
response_header,
)
}
/// download_task_finished updates the metadata of the task when the task downloads finished.
#[instrument(skip_all)]
pub fn download_task_finished(&self, id: &str) -> Result<metadata::Task> {
self.metadata.download_task_finished(id)
}
/// download_task_failed updates the metadata of the task when the task downloads failed.
#[instrument(skip_all)]
pub async fn download_task_failed(&self, id: &str) -> Result<metadata::Task> {
self.metadata.download_task_failed(id)
}
/// prefetch_task_started updates the metadata of the task when the task prefetches started.
#[instrument(skip_all)]
pub async fn prefetch_task_started(&self, id: &str) -> Result<metadata::Task> {
self.metadata.prefetch_task_started(id)
}
/// prefetch_task_failed updates the metadata of the task when the task prefetches failed.
#[instrument(skip_all)]
pub async fn prefetch_task_failed(&self, id: &str) -> Result<metadata::Task> {
self.metadata.prefetch_task_failed(id)
}
/// upload_task_finished updates the metadata of the task when task uploads finished.
#[instrument(skip_all)]
pub fn upload_task_finished(&self, id: &str) -> Result<metadata::Task> {
self.metadata.upload_task_finished(id)
}
/// get_task returns the task metadata.
#[instrument(skip_all)]
pub fn get_task(&self, id: &str) -> Result<Option<metadata::Task>> {
self.metadata.get_task(id)
}
/// is_task_exists returns whether the task exists.
#[instrument(skip_all)]
pub fn is_task_exists(&self, id: &str) -> Result<bool> {
self.metadata.is_task_exists(id)
}
/// get_tasks returns the task metadatas.
#[instrument(skip_all)]
pub fn get_tasks(&self) -> Result<Vec<metadata::Task>> {
self.metadata.get_tasks()
}
/// delete_task deletes the task metadatas, task content and piece metadatas.
#[instrument(skip_all)]
pub async fn delete_task(&self, id: &str) {
self.metadata
.delete_task(id)
.unwrap_or_else(|err| error!("delete task metadata failed: {}", err));
self.metadata.delete_pieces(id).unwrap_or_else(|err| {
error!("delete piece metadatas failed: {}", err);
});
self.content.delete_task(id).await.unwrap_or_else(|err| {
error!("delete task content failed: {}", err);
});
let mut cache = self.cache.clone();
cache.delete_task(id).await.unwrap_or_else(|err| {
info!("delete task from cache failed: {}", err);
});
}
/// hard_link_persistent_cache_task hard links the persistent cache task content to the destination.
#[instrument(skip_all)]
pub async fn hard_link_persistent_cache_task(&self, task_id: &str, to: &Path) -> Result<()> {
self.content
.hard_link_persistent_cache_task(task_id, to)
.await
}
/// copy_taskcopy_persistent_cache_taskcopies the persistent cache task content to the destination.
#[instrument(skip_all)]
pub async fn copy_persistent_cache_task(&self, id: &str, to: &Path) -> Result<()> {
self.content.copy_persistent_cache_task(id, to).await
}
/// is_same_dev_inode_as_persistent_cache_task checks if the persistent cache task content is on the same device inode as the
/// destination.
pub async fn is_same_dev_inode_as_persistent_cache_task(
&self,
id: &str,
to: &Path,
) -> Result<bool> {
self.content
.is_same_dev_inode_as_persistent_cache_task(id, to)
.await
}
/// create_persistent_cache_task_started creates a new persistent cache task.
#[instrument(skip_all)]
pub async fn create_persistent_cache_task_started(
&self,
id: &str,
ttl: Duration,
piece_length: u64,
content_length: u64,
) -> Result<metadata::PersistentCacheTask> {
let metadata = self.metadata.create_persistent_cache_task_started(
id,
ttl,
piece_length,
content_length,
)?;
self.content
.create_persistent_cache_task(id, content_length)
.await?;
Ok(metadata)
}
/// create_persistent_cache_task_finished updates the metadata of the persistent cache task
/// when the persistent cache task creates finished.
#[instrument(skip_all)]
pub async fn create_persistent_cache_task_finished(
&self,
id: &str,
) -> Result<metadata::PersistentCacheTask> {
self.metadata.create_persistent_cache_task_finished(id)
}
/// create_persistent_cache_task_failed deletes the persistent cache task when
/// the persistent cache task creates failed.
#[instrument(skip_all)]
pub async fn create_persistent_cache_task_failed(&self, id: &str) {
self.delete_persistent_cache_task(id).await;
}
/// download_persistent_cache_task_started updates the metadata of the persistent cache task
/// and creates the persistent cache task content when the persistent cache task downloads started.
#[instrument(skip_all)]
pub async fn download_persistent_cache_task_started(
&self,
id: &str,
ttl: Duration,
persistent: bool,
piece_length: u64,
content_length: u64,
created_at: NaiveDateTime,
) -> Result<metadata::PersistentCacheTask> {
let metadata = self.metadata.download_persistent_cache_task_started(
id,
ttl,
persistent,
piece_length,
content_length,
created_at,
)?;
self.content
.create_persistent_cache_task(id, content_length)
.await?;
Ok(metadata)
}
/// download_persistent_cache_task_finished updates the metadata of the persistent cache task when the persistent cache task downloads finished.
#[instrument(skip_all)]
pub fn download_persistent_cache_task_finished(
&self,
id: &str,
) -> Result<metadata::PersistentCacheTask> {
self.metadata.download_persistent_cache_task_finished(id)
}
/// download_persistent_cache_task_failed updates the metadata of the persistent cache task when the persistent cache task downloads failed.
#[instrument(skip_all)]
pub async fn download_persistent_cache_task_failed(
&self,
id: &str,
) -> Result<metadata::PersistentCacheTask> {
self.metadata.download_persistent_cache_task_failed(id)
}
/// upload_persistent_cache_task_finished updates the metadata of the cahce task when persistent cache task uploads finished.
#[instrument(skip_all)]
pub fn upload_persistent_cache_task_finished(
&self,
id: &str,
) -> Result<metadata::PersistentCacheTask> {
self.metadata.upload_persistent_cache_task_finished(id)
}
/// get_persistent_cache_task returns the persistent cache task metadata.
#[instrument(skip_all)]
pub fn get_persistent_cache_task(
&self,
id: &str,
) -> Result<Option<metadata::PersistentCacheTask>> {
self.metadata.get_persistent_cache_task(id)
}
/// persist_persistent_cache_task persists the persistent cache task metadata.
#[instrument(skip_all)]
pub fn persist_persistent_cache_task(&self, id: &str) -> Result<metadata::PersistentCacheTask> {
self.metadata.persist_persistent_cache_task(id)
}
/// is_persistent_cache_task_exists returns whether the persistent cache task exists.
#[instrument(skip_all)]
pub fn is_persistent_cache_task_exists(&self, id: &str) -> Result<bool> {
self.metadata.is_persistent_cache_task_exists(id)
}
/// get_tasks returns the task metadatas.
#[instrument(skip_all)]
pub fn get_persistent_cache_tasks(&self) -> Result<Vec<metadata::PersistentCacheTask>> {
self.metadata.get_persistent_cache_tasks()
}
/// delete_persistent_cache_task deletes the persistent cache task metadatas, persistent cache task content and piece metadatas.
#[instrument(skip_all)]
pub async fn delete_persistent_cache_task(&self, id: &str) {
self.metadata
.delete_persistent_cache_task(id)
.unwrap_or_else(|err| {
error!("delete persistent cache task metadata failed: {}", err);
});
self.metadata.delete_pieces(id).unwrap_or_else(|err| {
error!("delete persistent cache piece metadatas failed: {}", err);
});
self.content
.delete_persistent_cache_task(id)
.await
.unwrap_or_else(|err| {
error!("delete persistent cache task content failed: {}", err);
});
}
/// create_persistent_cache_piece creates a new persistent cache piece.
#[instrument(skip_all)]
pub async fn create_persistent_cache_piece<R: AsyncRead + Unpin + ?Sized>(
&self,
piece_id: &str,
task_id: &str,
number: u32,
offset: u64,
length: u64,
reader: &mut R,
) -> Result<metadata::Piece> {
let response = self
.content
.write_persistent_cache_piece(task_id, offset, length, reader)
.await?;
let digest = Digest::new(Algorithm::Crc32, response.hash);
self.metadata.create_persistent_cache_piece(
piece_id,
number,
offset,
length,
digest.to_string().as_str(),
)
}
/// download_piece_started updates the metadata of the piece and writes
/// the data of piece to file when the piece downloads started.
#[instrument(skip_all)]
pub async fn download_piece_started(
&self,
piece_id: &str,
number: u32,
) -> Result<metadata::Piece> {
// Wait for the piece to be finished.
match self.wait_for_piece_finished(piece_id).await {
Ok(piece) => Ok(piece),
// If piece is not found or wait timeout, create piece metadata.
Err(_) => self.metadata.download_piece_started(piece_id, number),
}
}
/// download_piece_from_source_finished is used for downloading piece from source.
#[allow(clippy::too_many_arguments)]
#[instrument(skip_all)]
pub async fn download_piece_from_source_finished<R: AsyncRead + Unpin + ?Sized>(
&self,
piece_id: &str,
task_id: &str,
offset: u64,
length: u64,
reader: &mut R,
timeout: Duration,
) -> Result<metadata::Piece> {
tokio::select! {
piece = self.handle_downloaded_from_source_finished(piece_id, task_id, offset, length, reader) => {
piece
}
_ = sleep(timeout) => {
Err(Error::DownloadPieceFinished(piece_id.to_string()))
}
}
}
// handle_downloaded_from_source_finished handles the downloaded piece from source.
#[instrument(skip_all)]
async fn handle_downloaded_from_source_finished<R: AsyncRead + Unpin + ?Sized>(
&self,
piece_id: &str,
task_id: &str,
offset: u64,
length: u64,
reader: &mut R,
) -> Result<metadata::Piece> {
let response = self
.content
.write_piece(task_id, offset, length, reader)
.await?;
let digest = Digest::new(Algorithm::Crc32, response.hash);
self.metadata.download_piece_finished(
piece_id,
offset,
length,
digest.to_string().as_str(),
None,
)
}
/// download_piece_from_parent_finished is used for downloading piece from parent.
#[allow(clippy::too_many_arguments)]
#[instrument(skip_all)]
pub async fn download_piece_from_parent_finished<R: AsyncRead + Unpin + ?Sized>(
&self,
piece_id: &str,
task_id: &str,
offset: u64,
length: u64,
expected_digest: &str,
parent_id: &str,
reader: &mut R,
timeout: Duration,
) -> Result<metadata::Piece> {
tokio::select! {
piece = self.handle_downloaded_piece_from_parent_finished(piece_id, task_id, offset, length, expected_digest, parent_id, reader) => {
piece
}
_ = sleep(timeout) => {
Err(Error::DownloadPieceFinished(piece_id.to_string()))
}
}
}
// handle_downloaded_piece_from_parent_finished handles the downloaded piece from parent.
#[allow(clippy::too_many_arguments)]
#[instrument(skip_all)]
async fn handle_downloaded_piece_from_parent_finished<R: AsyncRead + Unpin + ?Sized>(
&self,
piece_id: &str,
task_id: &str,
offset: u64,
length: u64,
expected_digest: &str,
parent_id: &str,
reader: &mut R,
) -> Result<metadata::Piece> {
let response = self
.content
.write_piece(task_id, offset, length, reader)
.await?;
let length = response.length;
let digest = Digest::new(Algorithm::Crc32, response.hash);
// Check the digest of the piece.
if expected_digest != digest.to_string() {
return Err(Error::DigestMismatch(
expected_digest.to_string(),
digest.to_string(),
));
}
self.metadata.download_piece_finished(
piece_id,
offset,
length,
digest.to_string().as_str(),
Some(parent_id.to_string()),
)
}
/// download_piece_failed updates the metadata of the piece when the piece downloads failed.
#[instrument(skip_all)]
pub fn download_piece_failed(&self, piece_id: &str) -> Result<()> {
self.metadata.download_piece_failed(piece_id)
}
/// upload_piece updates the metadata of the piece and
/// returns the data of the piece.
#[instrument(skip_all)]
pub async fn upload_piece(
&self,
piece_id: &str,
task_id: &str,
range: Option<Range>,
) -> Result<impl AsyncRead> {
// Wait for the piece to be finished.
self.wait_for_piece_finished(piece_id).await?;
// Start uploading the task.
self.metadata.upload_task_started(task_id)?;
// Get the piece metadata and return the content of the piece.
match self.metadata.get_piece(piece_id) {
Ok(Some(piece)) => {
if self.cache.contains_piece(task_id, piece_id).await {
match self
.cache
.read_piece(task_id, piece_id, piece.clone(), range)
.await
{
Ok(reader) => {
// Finish uploading the task.
self.metadata.upload_task_finished(task_id)?;
debug!("get piece from cache: {}", piece_id);
return Ok(Either::Left(reader));
}
Err(err) => {
return Err(err);
}
}
}
match self
.content
.read_piece(task_id, piece.offset, piece.length, range)
.await
{
Ok(reader) => {
// Finish uploading the task.
self.metadata.upload_task_finished(task_id)?;
Ok(Either::Right(reader))
}
Err(err) => {
// Failed uploading the task.
self.metadata.upload_task_failed(task_id)?;
Err(err)
}
}
}
Ok(None) => {
// Failed uploading the task.
self.metadata.upload_task_failed(task_id)?;
Err(Error::PieceNotFound(piece_id.to_string()))
}
Err(err) => {
// Failed uploading the task.
self.metadata.upload_task_failed(task_id)?;
Err(err)
}
}
}
/// get_piece returns the piece metadata.
pub fn get_piece(&self, piece_id: &str) -> Result<Option<metadata::Piece>> {
self.metadata.get_piece(piece_id)
}
/// is_piece_exists returns whether the piece exists.
#[instrument(skip_all)]
pub fn is_piece_exists(&self, piece_id: &str) -> Result<bool> {
self.metadata.is_piece_exists(piece_id)
}
/// get_pieces returns the piece metadatas.
#[instrument(skip_all)]
pub fn get_pieces(&self, task_id: &str) -> Result<Vec<metadata::Piece>> {
self.metadata.get_pieces(task_id)
}
/// piece_id returns the piece id.
#[inline]
pub fn piece_id(&self, task_id: &str, number: u32) -> String {
self.metadata.piece_id(task_id, number)
}
/// download_persistent_cache_piece_started updates the metadata of the persistent cache piece and writes
/// the data of piece to file when the persistent cache piece downloads started.
#[instrument(skip_all)]
pub async fn download_persistent_cache_piece_started(
&self,
piece_id: &str,
number: u32,
) -> Result<metadata::Piece> {
// Wait for the piece to be finished.
match self
.wait_for_persistent_cache_piece_finished(piece_id)
.await
{
Ok(piece) => Ok(piece),
// If piece is not found or wait timeout, create piece metadata.
Err(_) => self.metadata.download_piece_started(piece_id, number),
}
}
/// download_persistent_cache_piece_from_parent_finished is used for downloading persistent cache piece from parent.
#[allow(clippy::too_many_arguments)]
#[instrument(skip_all)]
pub async fn download_persistent_cache_piece_from_parent_finished<
R: AsyncRead + Unpin + ?Sized,
>(
&self,
piece_id: &str,
task_id: &str,
offset: u64,
length: u64,
expected_digest: &str,
parent_id: &str,
reader: &mut R,
) -> Result<metadata::Piece> {
let response = self
.content
.write_persistent_cache_piece(task_id, offset, length, reader)
.await?;
let length = response.length;
let digest = Digest::new(Algorithm::Crc32, response.hash);
// Check the digest of the piece.
if expected_digest != digest.to_string() {
return Err(Error::DigestMismatch(
expected_digest.to_string(),
digest.to_string(),
));
}
self.metadata.download_piece_finished(
piece_id,
offset,
length,
digest.to_string().as_str(),
Some(parent_id.to_string()),
)
}
/// download_persistent_cache_piece_failed updates the metadata of the persistent cache piece when the persistent cache piece downloads failed.
#[instrument(skip_all)]
pub fn download_persistent_cache_piece_failed(&self, piece_id: &str) -> Result<()> {
self.metadata.download_piece_failed(piece_id)
}
/// upload_persistent_cache_piece updates the metadata of the piece and_then
/// returns the data of the piece.
#[instrument(skip_all)]
pub async fn upload_persistent_cache_piece(
&self,
piece_id: &str,
task_id: &str,
range: Option<Range>,
) -> Result<impl AsyncRead> {
// Wait for the persistent cache piece to be finished.
self.wait_for_persistent_cache_piece_finished(piece_id)
.await?;
// Start uploading the persistent cache task.
self.metadata
.upload_persistent_cache_task_started(task_id)?;
// Get the persistent cache piece metadata and return the content of the persistent cache piece.
match self.metadata.get_piece(piece_id) {
Ok(Some(piece)) => {
match self
.content
.read_persistent_cache_piece(task_id, piece.offset, piece.length, range)
.await
{
Ok(reader) => {
// Finish uploading the persistent cache task.
self.metadata
.upload_persistent_cache_task_finished(task_id)?;
Ok(reader)
}
Err(err) => {
// Failed uploading the persistent cache task.
self.metadata.upload_persistent_cache_task_failed(task_id)?;
Err(err)
}
}
}
Ok(None) => {
// Failed uploading the persistent cache task.
self.metadata.upload_persistent_cache_task_failed(task_id)?;
Err(Error::PieceNotFound(piece_id.to_string()))
}
Err(err) => {
// Failed uploading the persistent cache task.
self.metadata.upload_persistent_cache_task_failed(task_id)?;
Err(err)
}
}
}
/// get_persistent_cache_piece returns the persistent cache piece metadata.
#[instrument(skip_all)]
pub fn get_persistent_cache_piece(&self, piece_id: &str) -> Result<Option<metadata::Piece>> {
self.metadata.get_piece(piece_id)
}
/// is_persistent_cache_piece_exists returns whether the persistent cache piece exists.
#[instrument(skip_all)]
pub fn is_persistent_cache_piece_exists(&self, piece_id: &str) -> Result<bool> {
self.metadata.is_piece_exists(piece_id)
}
/// get_persistent_cache_pieces returns the persistent cache piece metadatas.
pub fn get_persistent_cache_pieces(&self, task_id: &str) -> Result<Vec<metadata::Piece>> {
self.metadata.get_pieces(task_id)
}
/// persistent_cache_piece_id returns the persistent cache piece id.
#[inline]
pub fn persistent_cache_piece_id(&self, task_id: &str, number: u32) -> String {
self.metadata.piece_id(task_id, number)
}
/// wait_for_piece_finished waits for the piece to be finished.
#[instrument(skip_all)]
async fn wait_for_piece_finished(&self, piece_id: &str) -> Result<metadata::Piece> {
// Total timeout for downloading a piece, combining the download time and the time to write to storage.
let wait_timeout = tokio::time::sleep(
self.config.download.piece_timeout + self.config.storage.write_piece_timeout,
);
tokio::pin!(wait_timeout);
let mut interval = tokio::time::interval(DEFAULT_WAIT_FOR_PIECE_FINISHED_INTERVAL);
loop {
tokio::select! {
_ = interval.tick() => {
let piece = self
.get_piece(piece_id)?
.ok_or_else(|| Error::PieceNotFound(piece_id.to_string()))?;
// If the piece is finished, return.
if piece.is_finished() {
debug!("wait piece finished success");
return Ok(piece);
}
}
_ = &mut wait_timeout => {
self.metadata.wait_for_piece_finished_failed(piece_id).unwrap_or_else(|err| error!("delete piece metadata failed: {}", err));
return Err(Error::WaitForPieceFinishedTimeout(piece_id.to_string()));
}
}
}
}
/// wait_for_persistent_cache_piece_finished waits for the persistent cache piece to be finished.
#[instrument(skip_all)]
async fn wait_for_persistent_cache_piece_finished(
&self,
piece_id: &str,
) -> Result<metadata::Piece> {
// Total timeout for downloading a piece, combining the download time and the time to write to storage.
let wait_timeout = tokio::time::sleep(
self.config.download.piece_timeout + self.config.storage.write_piece_timeout,
);
tokio::pin!(wait_timeout);
let mut interval = tokio::time::interval(DEFAULT_WAIT_FOR_PIECE_FINISHED_INTERVAL);
loop {
tokio::select! {
_ = interval.tick() => {
let piece = self
.get_persistent_cache_piece(piece_id)?
.ok_or_else(|| Error::PieceNotFound(piece_id.to_string()))?;
// If the piece is finished, return.
if piece.is_finished() {
debug!("wait piece finished success");
return Ok(piece);
}
}
_ = &mut wait_timeout => {
self.metadata.wait_for_piece_finished_failed(piece_id).unwrap_or_else(|err| error!("delete piece metadata failed: {}", err));
return Err(Error::WaitForPieceFinishedTimeout(piece_id.to_string()));
}
}
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,88 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use dragonfly_client_core::{
error::{ErrorType, OrErr},
Result,
};
use serde::{de::DeserializeOwned, Serialize};
pub mod rocksdb;
/// DatabaseObject marks a type can be stored in database, which has a namespace.
/// The namespace is used to separate different types of objects, for example
/// column families in rocksdb.
pub trait DatabaseObject: Serialize + DeserializeOwned {
/// NAMESPACE is the namespace of the object.
const NAMESPACE: &'static str;
/// serialized serializes the object to bytes.
fn serialized(&self) -> Result<Vec<u8>> {
Ok(bincode::serialize(self).or_err(ErrorType::SerializeError)?)
}
/// deserialize_from deserializes the object from bytes.
fn deserialize_from(bytes: &[u8]) -> Result<Self> {
Ok(bincode::deserialize(bytes).or_err(ErrorType::SerializeError)?)
}
}
/// StorageEngine defines basic storage engine operations.
pub trait StorageEngine<'db>: Operations {}
/// StorageEngineOwned is a marker trait to indicate the storage engine is owned.
pub trait StorageEngineOwned: for<'db> StorageEngine<'db> {}
impl<T: for<'db> StorageEngine<'db>> StorageEngineOwned for T {}
/// Operations defines basic crud operations.
pub trait Operations {
/// get gets the object by key.
fn get<O: DatabaseObject>(&self, key: &[u8]) -> Result<Option<O>>;
/// is_exist checks if the object exists by key.
fn is_exist<O: DatabaseObject>(&self, key: &[u8]) -> Result<bool>;
/// put puts the object by key.
fn put<O: DatabaseObject>(&self, key: &[u8], value: &O) -> Result<()>;
/// delete deletes the object by key.
fn delete<O: DatabaseObject>(&self, key: &[u8]) -> Result<()>;
/// iter iterates all objects.
fn iter<O: DatabaseObject>(&self) -> Result<impl Iterator<Item = Result<(Box<[u8]>, O)>>>;
/// iter_raw iterates all objects without serialization.
#[allow(clippy::type_complexity)]
fn iter_raw<O: DatabaseObject>(
&self,
) -> Result<impl Iterator<Item = Result<(Box<[u8]>, Box<[u8]>)>>>;
/// prefix_iter iterates all objects with prefix.
fn prefix_iter<O: DatabaseObject>(
&self,
prefix: &[u8],
) -> Result<impl Iterator<Item = Result<(Box<[u8]>, O)>>>;
/// prefix_iter_raw iterates all objects with prefix without serialization.
#[allow(clippy::type_complexity)]
fn prefix_iter_raw<O: DatabaseObject>(
&self,
prefix: &[u8],
) -> Result<impl Iterator<Item = Result<(Box<[u8]>, Box<[u8]>)>>>;
// batch_delete deletes objects by keys.
fn batch_delete<O: DatabaseObject>(&self, keys: Vec<&[u8]>) -> Result<()>;
}

View File

@ -1,645 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use crate::storage_engine::{DatabaseObject, Operations, StorageEngine};
use dragonfly_client_core::{
error::{ErrorType, OrErr},
Error, Result,
};
use rocksdb::WriteOptions;
use std::{
ops::Deref,
path::{Path, PathBuf},
};
use tracing::{info, warn};
/// RocksdbStorageEngine is a storage engine based on rocksdb.
pub struct RocksdbStorageEngine {
// inner is the inner rocksdb DB.
inner: rocksdb::DB,
}
/// RocksdbStorageEngine implements deref of the storage engine.
impl Deref for RocksdbStorageEngine {
/// Target is the inner rocksdb DB.
type Target = rocksdb::DB;
/// deref returns the inner rocksdb DB.
fn deref(&self) -> &Self::Target {
&self.inner
}
}
/// RocksdbStorageEngine implements the storage engine of the rocksdb.
impl RocksdbStorageEngine {
/// DEFAULT_DIR_NAME is the default directory name to store metadata.
const DEFAULT_DIR_NAME: &'static str = "metadata";
/// DEFAULT_MEMTABLE_MEMORY_BUDGET is the default memory budget for memtable, default is 512MB.
const DEFAULT_MEMTABLE_MEMORY_BUDGET: usize = 512 * 1024 * 1024;
// DEFAULT_MAX_BACKGROUND_JOBS is the default max background jobs for rocksdb, default is 2.
const DEFAULT_MAX_BACKGROUND_JOBS: i32 = 2;
/// DEFAULT_BLOCK_SIZE is the default block size for rocksdb, default is 64KB.
const DEFAULT_BLOCK_SIZE: usize = 64 * 1024;
/// DEFAULT_CACHE_SIZE is the default cache size for rocksdb, default is 1GB.
const DEFAULT_CACHE_SIZE: usize = 1024 * 1024 * 1024;
/// DEFAULT_LOG_MAX_SIZE is the default max log size for rocksdb, default is 64MB.
const DEFAULT_LOG_MAX_SIZE: usize = 64 * 1024 * 1024;
/// DEFAULT_LOG_MAX_FILES is the default max log files for rocksdb.
const DEFAULT_LOG_MAX_FILES: usize = 10;
/// open opens a rocksdb storage engine with the given directory and column families.
pub fn open(dir: &Path, log_dir: &PathBuf, cf_names: &[&str], keep: bool) -> Result<Self> {
info!("initializing metadata directory: {:?} {:?}", dir, cf_names);
// Initialize rocksdb options.
let mut options = rocksdb::Options::default();
options.create_if_missing(true);
options.create_missing_column_families(true);
// Optimize compression.
options.set_compression_type(rocksdb::DBCompressionType::Lz4);
options.set_bottommost_compression_type(rocksdb::DBCompressionType::Zstd);
// Improved parallelism.
options.increase_parallelism(num_cpus::get() as i32);
options.set_max_background_jobs(std::cmp::max(
num_cpus::get() as i32,
Self::DEFAULT_MAX_BACKGROUND_JOBS,
));
// Set rocksdb log options.
options.set_db_log_dir(log_dir);
options.set_log_level(rocksdb::LogLevel::Info);
options.set_max_log_file_size(Self::DEFAULT_LOG_MAX_SIZE);
options.set_keep_log_file_num(Self::DEFAULT_LOG_MAX_FILES);
// Initialize rocksdb block based table options.
let mut block_options = rocksdb::BlockBasedOptions::default();
block_options.set_block_cache(&rocksdb::Cache::new_lru_cache(Self::DEFAULT_CACHE_SIZE));
block_options.set_block_size(Self::DEFAULT_BLOCK_SIZE);
block_options.set_cache_index_and_filter_blocks(true);
block_options.set_pin_l0_filter_and_index_blocks_in_cache(true);
options.set_block_based_table_factory(&block_options);
// Initialize column family options.
let mut cf_options = rocksdb::Options::default();
cf_options.set_prefix_extractor(rocksdb::SliceTransform::create_fixed_prefix(64));
cf_options.set_memtable_prefix_bloom_ratio(0.25);
cf_options.optimize_level_style_compaction(Self::DEFAULT_MEMTABLE_MEMORY_BUDGET);
// Initialize column families.
let cfs = cf_names
.iter()
.map(|name| (name.to_string(), cf_options.clone()))
.collect::<Vec<_>>();
// Initialize rocksdb directory.
let dir = dir.join(Self::DEFAULT_DIR_NAME);
// If the storage is kept, open the db and drop the unused column families.
// Otherwise, destroy the db.
if !keep {
rocksdb::DB::destroy(&options, &dir).unwrap_or_else(|err| {
warn!("destroy {:?} failed: {}", dir, err);
});
}
// Open rocksdb.
let db =
rocksdb::DB::open_cf_with_opts(&options, &dir, cfs).or_err(ErrorType::StorageError)?;
info!("metadata initialized directory: {:?}", dir);
Ok(Self { inner: db })
}
}
/// RocksdbStorageEngine implements the storage engine operations.
impl Operations for RocksdbStorageEngine {
/// get gets the object by key.
fn get<O: DatabaseObject>(&self, key: &[u8]) -> Result<Option<O>> {
let cf = cf_handle::<O>(self)?;
let value = self.get_cf(cf, key).or_err(ErrorType::StorageError)?;
match value {
Some(value) => Ok(Some(O::deserialize_from(&value)?)),
None => Ok(None),
}
}
/// is_exist checks if the object exists by key.
fn is_exist<O: DatabaseObject>(&self, key: &[u8]) -> Result<bool> {
let cf = cf_handle::<O>(self)?;
Ok(self
.get_cf(cf, key)
.or_err(ErrorType::StorageError)?
.is_some())
}
/// put puts the object by key.
fn put<O: DatabaseObject>(&self, key: &[u8], value: &O) -> Result<()> {
let cf = cf_handle::<O>(self)?;
self.put_cf(cf, key, value.serialized()?)
.or_err(ErrorType::StorageError)?;
Ok(())
}
/// delete deletes the object by key.
fn delete<O: DatabaseObject>(&self, key: &[u8]) -> Result<()> {
let cf = cf_handle::<O>(self)?;
let mut options = WriteOptions::default();
options.set_sync(true);
self.delete_cf_opt(cf, key, &options)
.or_err(ErrorType::StorageError)?;
Ok(())
}
/// iter iterates all objects.
fn iter<O: DatabaseObject>(&self) -> Result<impl Iterator<Item = Result<(Box<[u8]>, O)>>> {
let cf = cf_handle::<O>(self)?;
let iter = self.iterator_cf(cf, rocksdb::IteratorMode::Start);
Ok(iter.map(|ele| {
let (key, value) = ele.or_err(ErrorType::StorageError)?;
Ok((key, O::deserialize_from(&value)?))
}))
}
/// iter_raw iterates all objects without serialization.
fn iter_raw<O: DatabaseObject>(
&self,
) -> Result<impl Iterator<Item = Result<(Box<[u8]>, Box<[u8]>)>>> {
let cf = cf_handle::<O>(self)?;
Ok(self
.iterator_cf(cf, rocksdb::IteratorMode::Start)
.map(|ele| {
let (key, value) = ele.or_err(ErrorType::StorageError)?;
Ok((key, value))
}))
}
/// prefix_iter iterates all objects with prefix.
fn prefix_iter<O: DatabaseObject>(
&self,
prefix: &[u8],
) -> Result<impl Iterator<Item = Result<(Box<[u8]>, O)>>> {
let cf = cf_handle::<O>(self)?;
let iter = self.prefix_iterator_cf(cf, prefix);
Ok(iter.map(|ele| {
let (key, value) = ele.or_err(ErrorType::StorageError)?;
Ok((key, O::deserialize_from(&value)?))
}))
}
/// prefix_iter_raw iterates all objects with prefix without serialization.
fn prefix_iter_raw<O: DatabaseObject>(
&self,
prefix: &[u8],
) -> Result<impl Iterator<Item = Result<(Box<[u8]>, Box<[u8]>)>>> {
let cf = cf_handle::<O>(self)?;
Ok(self.prefix_iterator_cf(cf, prefix).map(|ele| {
let (key, value) = ele.or_err(ErrorType::StorageError)?;
Ok((key, value))
}))
}
/// batch_delete deletes objects by keys.
fn batch_delete<O: DatabaseObject>(&self, keys: Vec<&[u8]>) -> Result<()> {
let cf = cf_handle::<O>(self)?;
let mut batch = rocksdb::WriteBatch::default();
for key in keys {
batch.delete_cf(cf, key);
}
let mut options = WriteOptions::default();
options.set_sync(true);
Ok(self
.write_opt(batch, &options)
.or_err(ErrorType::StorageError)?)
}
}
/// RocksdbStorageEngine implements the rocksdb of the storage engine.
impl StorageEngine<'_> for RocksdbStorageEngine {}
/// cf_handle returns the column family handle for the given object.
fn cf_handle<T>(db: &rocksdb::DB) -> Result<&rocksdb::ColumnFamily>
where
T: DatabaseObject,
{
let cf_name = T::NAMESPACE;
db.cf_handle(cf_name)
.ok_or_else(|| Error::ColumnFamilyNotFound(cf_name.to_string()))
}
#[cfg(test)]
mod tests {
use super::*;
use serde::{Deserialize, Serialize};
use tempfile::tempdir;
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
struct Object {
id: String,
value: i32,
}
impl DatabaseObject for Object {
const NAMESPACE: &'static str = "object";
}
fn create_test_engine() -> RocksdbStorageEngine {
let temp_dir = tempdir().unwrap();
let log_dir = temp_dir.path().to_path_buf();
RocksdbStorageEngine::open(temp_dir.path(), &log_dir, &[Object::NAMESPACE], false).unwrap()
}
#[test]
fn test_put_and_get() {
let engine = create_test_engine();
let object = Object {
id: "1".to_string(),
value: 42,
};
engine.put::<Object>(object.id.as_bytes(), &object).unwrap();
let retrieved_object = engine.get::<Object>(object.id.as_bytes()).unwrap().unwrap();
assert_eq!(object, retrieved_object);
}
#[test]
fn test_is_exist() {
let engine = create_test_engine();
let object = Object {
id: "2".to_string(),
value: 100,
};
assert!(!engine.is_exist::<Object>(object.id.as_bytes()).unwrap());
engine.put::<Object>(object.id.as_bytes(), &object).unwrap();
assert!(engine.is_exist::<Object>(object.id.as_bytes()).unwrap());
}
#[test]
fn test_delete() {
let engine = create_test_engine();
let object = Object {
id: "3".to_string(),
value: 200,
};
engine.put::<Object>(object.id.as_bytes(), &object).unwrap();
assert!(engine.is_exist::<Object>(object.id.as_bytes()).unwrap());
engine.delete::<Object>(object.id.as_bytes()).unwrap();
assert!(!engine.is_exist::<Object>(object.id.as_bytes()).unwrap());
}
#[test]
fn test_batch_delete() {
let engine = create_test_engine();
let objects = vec![
Object {
id: "1".to_string(),
value: 1,
},
Object {
id: "2".to_string(),
value: 2,
},
Object {
id: "3".to_string(),
value: 3,
},
];
for object in &objects {
engine.put::<Object>(object.id.as_bytes(), object).unwrap();
assert!(engine.is_exist::<Object>(object.id.as_bytes()).unwrap());
}
let ids: Vec<&[u8]> = objects.iter().map(|object| object.id.as_bytes()).collect();
engine.batch_delete::<Object>(ids).unwrap();
for object in &objects {
assert!(!engine.is_exist::<Object>(object.id.as_bytes()).unwrap());
}
}
#[test]
fn test_iter() {
let engine = create_test_engine();
let objects = vec![
Object {
id: "1".to_string(),
value: 10,
},
Object {
id: "2".to_string(),
value: 20,
},
Object {
id: "3".to_string(),
value: 30,
},
];
for object in &objects {
engine.put::<Object>(object.id.as_bytes(), object).unwrap();
}
let retrieved_objects = engine
.iter::<Object>()
.unwrap()
.collect::<Result<Vec<_>>>()
.unwrap();
assert_eq!(retrieved_objects.len(), objects.len());
for object in &objects {
let found = retrieved_objects
.iter()
.any(|(_, v)| v.id == object.id && v.value == object.value);
assert!(found, "could not find object with id {:?}", object.id);
}
}
#[test]
fn test_prefix_iter() {
let engine = create_test_engine();
// RocksDB prefix extractor is configured with fixed_prefix(64) in the open method.
let prefix_a = [b'a'; 64];
let prefix_b = [b'b'; 64];
// Create test keys with 64-byte identical prefixes.
let key_a1 = [&prefix_a[..], b"_suffix1"].concat();
let key_a2 = [&prefix_a[..], b"_suffix2"].concat();
let key_b1 = [&prefix_b[..], b"_suffix1"].concat();
let key_b2 = [&prefix_b[..], b"_suffix2"].concat();
let objects_with_prefix_a = vec![
(
key_a1.clone(),
Object {
id: "prefix_id_a1".to_string(),
value: 100,
},
),
(
key_a2.clone(),
Object {
id: "prefix_id_a2".to_string(),
value: 200,
},
),
];
let objects_with_prefix_b = vec![
(
key_b1.clone(),
Object {
id: "prefix_id_b1".to_string(),
value: 300,
},
),
(
key_b2.clone(),
Object {
id: "prefix_id_b2".to_string(),
value: 400,
},
),
];
for (key, obj) in &objects_with_prefix_a {
engine.put::<Object>(key, obj).unwrap();
}
for (key, obj) in &objects_with_prefix_b {
engine.put::<Object>(key, obj).unwrap();
}
let retrieved_objects = engine
.prefix_iter::<Object>(&prefix_a)
.unwrap()
.collect::<Result<Vec<_>>>()
.unwrap();
assert_eq!(
retrieved_objects.len(),
objects_with_prefix_a.len(),
"expected {} objects with prefix 'a', but got {}",
objects_with_prefix_a.len(),
retrieved_objects.len()
);
// Verify each object with prefix is correctly retrieved.
for (key, object) in &objects_with_prefix_a {
let found = retrieved_objects
.iter()
.any(|(_, v)| v.id == object.id && v.value == object.value);
assert!(found, "could not find object with key {:?}", key);
}
// Verify objects with different prefix are not retrieved.
for (key, object) in &objects_with_prefix_b {
let found = retrieved_objects
.iter()
.any(|(_, v)| v.id == object.id && v.value == object.value);
assert!(!found, "found object with different prefix: {:?}", key);
}
}
#[test]
fn test_iter_raw() {
let engine = create_test_engine();
let objects = vec![
Object {
id: "1".to_string(),
value: 10,
},
Object {
id: "2".to_string(),
value: 20,
},
Object {
id: "3".to_string(),
value: 30,
},
];
for object in &objects {
engine.put::<Object>(object.id.as_bytes(), object).unwrap();
}
let retrieved_objects = engine
.iter_raw::<Object>()
.unwrap()
.collect::<Result<Vec<_>>>()
.unwrap();
assert_eq!(retrieved_objects.len(), objects.len());
// Verify each object can be deserialized from the raw bytes.
for object in &objects {
let found = retrieved_objects
.iter()
.any(|(_, v)| match Object::deserialize_from(v) {
Ok(deserialized) => {
deserialized.id == object.id && deserialized.value == object.value
}
Err(_) => false,
});
assert!(
found,
"could not find or deserialize object with key {:?}",
object.id
);
}
}
#[test]
fn test_prefix_iter_raw() {
let engine = create_test_engine();
// RocksDB prefix extractor is configured with fixed_prefix(64) in the open method.
let prefix_a = [b'a'; 64];
let prefix_b = [b'b'; 64];
// Create test keys with 64-byte identical prefixes.
let key_a1 = [&prefix_a[..], b"_raw_suffix1"].concat();
let key_a2 = [&prefix_a[..], b"_raw_suffix2"].concat();
let key_b1 = [&prefix_b[..], b"_raw_suffix1"].concat();
let key_b2 = [&prefix_b[..], b"_raw_suffix2"].concat();
let objects_with_prefix_a = vec![
(
key_a1.clone(),
Object {
id: "raw_prefix_id_a1".to_string(),
value: 100,
},
),
(
key_a2.clone(),
Object {
id: "raw_prefix_id_a2".to_string(),
value: 200,
},
),
];
let objects_with_prefix_b = vec![
(
key_b1.clone(),
Object {
id: "raw_prefix_id_b1".to_string(),
value: 300,
},
),
(
key_b2.clone(),
Object {
id: "raw_prefix_id_b2".to_string(),
value: 400,
},
),
];
for (key, obj) in &objects_with_prefix_a {
engine.put::<Object>(key, obj).unwrap();
}
for (key, obj) in &objects_with_prefix_b {
engine.put::<Object>(key, obj).unwrap();
}
let retrieved_objects = engine
.prefix_iter_raw::<Object>(&prefix_a)
.unwrap()
.collect::<Result<Vec<_>>>()
.unwrap();
assert_eq!(
retrieved_objects.len(),
objects_with_prefix_a.len(),
"expected {} raw objects with prefix 'a', but got {}",
objects_with_prefix_a.len(),
retrieved_objects.len()
);
// Verify each object with prefix can be deserialized from raw bytes.
for (_, object) in &objects_with_prefix_a {
let found = retrieved_objects
.iter()
.any(|(_, v)| match Object::deserialize_from(v) {
Ok(deserialized) => {
deserialized.id == object.id && deserialized.value == object.value
}
Err(_) => false,
});
assert!(
found,
"could not find or deserialize object with key {:?}",
object.id
);
}
// Verify objects with different prefix are not retrieved.
for (key, _) in &objects_with_prefix_b {
let found = retrieved_objects
.iter()
.any(|(k, _)| k.as_ref() == key.as_slice());
assert!(!found, "found object with different prefix: {:?}", key);
}
}
#[test]
fn test_column_family_not_found() {
let engine = create_test_engine();
// Define a new type with a different namespace that hasn't been registered.
#[derive(Debug, Serialize, Deserialize, PartialEq)]
struct UnregisteredObject {
data: String,
}
impl DatabaseObject for UnregisteredObject {
const NAMESPACE: &'static str = "unregistered";
}
let key = b"unregistered";
let result = engine.get::<UnregisteredObject>(key);
assert!(result.is_err());
if let Err(err) = result {
assert!(format!("{:?}", err).contains("ColumnFamilyNotFound"));
}
}
}

View File

@ -1,39 +0,0 @@
[package]
name = "dragonfly-client-util"
description = "Utility library for the dragonfly client"
version.workspace = true
authors.workspace = true
homepage.workspace = true
repository.workspace = true
keywords.workspace = true
license.workspace = true
edition.workspace = true
[dependencies]
dragonfly-client-core.workspace = true
dragonfly-api.workspace = true
reqwest.workspace = true
http-range-header.workspace = true
http.workspace = true
tracing.workspace = true
url.workspace = true
rcgen.workspace = true
rustls.workspace = true
rustls-pki-types.workspace = true
rustls-pemfile.workspace = true
sha2.workspace = true
uuid.workspace = true
sysinfo.workspace = true
hex.workspace = true
crc32fast.workspace = true
openssl.workspace = true
lazy_static.workspace = true
bytesize.workspace = true
lru.workspace = true
tokio.workspace = true
rustix = { version = "1.0.8", features = ["fs"] }
base64 = "0.22.1"
pnet = "0.35.0"
[dev-dependencies]
tempfile.workspace = true

View File

@ -1,278 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use dragonfly_client_core::{Error as ClientError, Result as ClientResult};
use sha2::Digest as Sha2Digest;
use std::fmt;
use std::io::{self, Read};
use std::path::Path;
use std::str::FromStr;
use tracing::instrument;
/// SEPARATOR is the separator of digest.
pub const SEPARATOR: &str = ":";
/// Algorithm is an enum of the algorithm that is used to generate digest.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Algorithm {
/// Crc32 is crc32 algorithm for generate digest.
Crc32,
/// Sha256 is sha256 algorithm for generate digest.
Sha256,
/// Sha512 is sha512 algorithm for generate digest.
Sha512,
}
/// Algorithm implements the Display.
impl fmt::Display for Algorithm {
/// fmt formats the value using the given formatter.
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Algorithm::Crc32 => write!(f, "crc32"),
Algorithm::Sha256 => write!(f, "sha256"),
Algorithm::Sha512 => write!(f, "sha512"),
}
}
}
/// Algorithm implements the FromStr.
impl FromStr for Algorithm {
type Err = String;
/// from_str parses an algorithm string.
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"crc32" => Ok(Algorithm::Crc32),
"sha256" => Ok(Algorithm::Sha256),
"sha512" => Ok(Algorithm::Sha512),
_ => Err(format!("invalid digest algorithm: {}", s)),
}
}
}
/// Digest is a struct that is used to generate digest.
pub struct Digest {
/// algorithm is the algorithm that is used to generate digest.
algorithm: Algorithm,
/// encoded is the encoded digest.
encoded: String,
}
/// Digest implements the Digest.
impl Digest {
/// new returns a new Digest.
pub fn new(algorithm: Algorithm, encoded: String) -> Self {
Self { algorithm, encoded }
}
/// algorithm returns the algorithm of the digest.
pub fn algorithm(&self) -> Algorithm {
self.algorithm
}
// encoded returns the encoded digest.
pub fn encoded(&self) -> &str {
&self.encoded
}
}
/// Digest implements the Display.
impl fmt::Display for Digest {
/// fmt formats the value using the given formatter.
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}{}{}", self.algorithm, SEPARATOR, self.encoded)
}
}
/// Digest implements the FromStr.
impl FromStr for Digest {
type Err = String;
/// from_str parses a digest string.
fn from_str(s: &str) -> Result<Self, Self::Err> {
let parts: Vec<&str> = s.splitn(2, SEPARATOR).collect();
if parts.len() != 2 {
return Err(format!("invalid digest: {}", s));
}
let algorithm = match parts[0] {
"crc32" => {
if parts[1].len() != 10 {
return Err(format!(
"invalid crc32 digest length: {}, expected 10",
parts[1].len()
));
}
Algorithm::Crc32
}
"sha256" => {
if parts[1].len() != 64 {
return Err(format!(
"invalid sha256 digest length: {}, expected 64",
parts[1].len()
));
}
Algorithm::Sha256
}
"sha512" => {
if parts[1].len() != 128 {
return Err(format!(
"invalid sha512 digest length: {}, expected 128",
parts[1].len()
));
}
Algorithm::Sha512
}
_ => return Err(format!("invalid digest algorithm: {}", parts[0])),
};
Ok(Digest::new(algorithm, parts[1].to_string()))
}
}
/// calculate_file_digest calculates the digest of a file.
#[instrument(skip_all)]
pub fn calculate_file_digest(algorithm: Algorithm, path: &Path) -> ClientResult<Digest> {
let f = std::fs::File::open(path)?;
let mut reader = io::BufReader::new(f);
match algorithm {
Algorithm::Crc32 => {
let mut buffer = [0; 4096];
let mut hasher = crc32fast::Hasher::new();
loop {
match reader.read(&mut buffer) {
Ok(0) => break,
Ok(n) => hasher.update(&buffer[..n]),
Err(ref err) if err.kind() == io::ErrorKind::Interrupted => continue,
Err(err) => return Err(err.into()),
};
}
Ok(Digest::new(algorithm, hasher.finalize().to_string()))
}
Algorithm::Sha256 => {
let mut hasher = sha2::Sha256::new();
io::copy(&mut reader, &mut hasher)?;
Ok(Digest::new(algorithm, hex::encode(hasher.finalize())))
}
Algorithm::Sha512 => {
let mut hasher = sha2::Sha512::new();
io::copy(&mut reader, &mut hasher)?;
Ok(Digest::new(algorithm, hex::encode(hasher.finalize())))
}
}
}
/// verify_file_digest verifies the digest of a file against an expected digest.
pub fn verify_file_digest(expected_digest: Digest, file_path: &Path) -> ClientResult<()> {
let digest = match calculate_file_digest(expected_digest.algorithm(), file_path) {
Ok(digest) => digest,
Err(err) => {
return Err(err);
}
};
if digest.to_string() != expected_digest.to_string() {
return Err(ClientError::DigestMismatch(
expected_digest.to_string(),
digest.to_string(),
));
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs::File;
use std::io::Write;
#[test]
fn test_algorithm_display() {
assert_eq!(Algorithm::Crc32.to_string(), "crc32");
assert_eq!(Algorithm::Sha256.to_string(), "sha256");
assert_eq!(Algorithm::Sha512.to_string(), "sha512");
}
#[test]
fn test_algorithm_from_str() {
assert_eq!("crc32".parse::<Algorithm>(), Ok(Algorithm::Crc32));
assert_eq!("sha256".parse::<Algorithm>(), Ok(Algorithm::Sha256));
assert_eq!("sha512".parse::<Algorithm>(), Ok(Algorithm::Sha512));
assert!("invalid".parse::<Algorithm>().is_err());
}
#[test]
fn test_digest_display() {
let digest = Digest::new(Algorithm::Sha256, "encoded_hash".to_string());
assert_eq!(digest.to_string(), "sha256:encoded_hash");
}
#[test]
fn test_calculate_file_digest() {
let content = b"test content";
let temp_file = tempfile::NamedTempFile::new().expect("failed to create temp file");
let path = temp_file.path();
let mut file = File::create(path).expect("failed to create file");
file.write_all(content).expect("failed to write to file");
let expected_sha256 = "6ae8a75555209fd6c44157c0aed8016e763ff435a19cf186f76863140143ff72";
let digest = calculate_file_digest(Algorithm::Sha256, path)
.expect("failed to calculate Sha256 hash");
assert_eq!(digest.encoded(), expected_sha256);
let expected_sha512 = "0cbf4caef38047bba9a24e621a961484e5d2a92176a859e7eb27df343dd34eb98d538a6c5f4da1ce302ec250b821cc001e46cc97a704988297185a4df7e99602";
let digest = calculate_file_digest(Algorithm::Sha512, path)
.expect("failed to calculate Sha512 hash");
assert_eq!(digest.encoded(), expected_sha512);
let expected_crc32 = "1475635037";
let digest =
calculate_file_digest(Algorithm::Crc32, path).expect("failed to calculate Crc32 hash");
assert_eq!(digest.encoded(), expected_crc32);
}
#[test]
fn test_verify_file_digest() {
let content = b"test content";
let temp_file = tempfile::NamedTempFile::new().expect("failed to create temp file");
let path = temp_file.path();
let mut file = File::create(path).expect("failed to create file");
file.write_all(content).expect("failed to write to file");
let expected_sha256_digest = Digest::new(
Algorithm::Sha256,
"6ae8a75555209fd6c44157c0aed8016e763ff435a19cf186f76863140143ff72".to_string(),
);
assert!(verify_file_digest(expected_sha256_digest, path).is_ok());
let expected_sha512_digest = Digest::new(
Algorithm::Sha512,
"0cbf4caef38047bba9a24e621a961484e5d2a92176a859e7eb27df343dd34eb98d538a6c5f4da1ce302ec250b821cc001e46cc97a704988297185a4df7e99602".to_string(),
);
assert!(verify_file_digest(expected_sha512_digest, path).is_ok());
let expected_crc32_digest = Digest::new(Algorithm::Crc32, "1475635037".to_string());
assert!(verify_file_digest(expected_crc32_digest, path).is_ok());
}
}

View File

@ -1,54 +0,0 @@
/*
* Copyright 2025 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use dragonfly_client_core::Result;
use tokio::fs;
/// fallocate allocates the space for the file and fills it with zero, only on Linux.
#[allow(unused_variables)]
pub async fn fallocate(f: &fs::File, length: u64) -> Result<()> {
// No allocation needed for zero length. Avoids potential fallocate errors.
if length == 0 {
return Ok(());
}
#[cfg(target_os = "linux")]
{
use dragonfly_client_core::Error;
use rustix::fs::{fallocate, FallocateFlags};
use std::os::unix::io::AsFd;
use tokio::io;
// Set length (potential truncation).
f.set_len(length).await?;
let fd = f.as_fd();
let offset = 0;
let flags = FallocateFlags::KEEP_SIZE;
loop {
match fallocate(fd, flags, offset, length) {
Ok(_) => return Ok(()),
Err(rustix::io::Errno::INTR) => continue,
Err(err) => {
return Err(Error::IO(io::Error::from_raw_os_error(err.raw_os_error())))
}
}
}
}
#[cfg(not(target_os = "linux"))]
Ok(())
}

View File

@ -1,176 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use base64::prelude::*;
use dragonfly_client_core::{
error::{ErrorType, OrErr},
Error, Result,
};
use http::header::{self, HeaderMap};
/// Credentials is the credentials for the basic auth.
pub struct Credentials {
/// username is the username.
pub username: String,
/// password is the password.
pub password: String,
}
/// Credentials is the basic auth.
impl Credentials {
/// new returns a new Credentials.
pub fn new(username: &str, password: &str) -> Credentials {
Self {
username: username.to_string(),
password: password.to_string(),
}
}
/// verify verifies the basic auth with the header.
pub fn verify(&self, header: &HeaderMap) -> Result<()> {
let Some(auth_header) = header.get(header::AUTHORIZATION) else {
return Err(Error::Unauthorized);
};
if let Some((typ, payload)) = auth_header
.to_str()
.or_err(ErrorType::ParseError)?
.to_string()
.split_once(' ')
{
if typ.to_lowercase() != "basic" {
return Err(Error::Unauthorized);
};
let decoded = String::from_utf8(
BASE64_STANDARD
.decode(payload)
.or_err(ErrorType::ParseError)?,
)
.or_err(ErrorType::ParseError)?;
let Some((username, password)) = decoded.split_once(':') else {
return Err(Error::Unauthorized);
};
if username != self.username || password != self.password {
return Err(Error::Unauthorized);
}
return Ok(());
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use http::header::HeaderValue;
#[test]
fn test_verify_no_auth_header() {
let credentials = Credentials::new("user", "pass");
let header = HeaderMap::new();
let result = credentials.verify(&header);
assert!(result.is_err());
assert!(matches!(result.unwrap_err(), Error::Unauthorized));
}
#[test]
fn test_verify_invalid_auth_type() {
let credentials = Credentials::new("user", "pass");
let mut header = HeaderMap::new();
header.insert(
header::AUTHORIZATION,
HeaderValue::from_static("Bearer some_token"),
);
let result = credentials.verify(&header);
assert!(result.is_err());
assert!(matches!(result.unwrap_err(), Error::Unauthorized));
}
#[test]
fn test_verify_invalid_base64() {
let credentials = Credentials::new("user", "pass");
let mut header = HeaderMap::new();
header.insert(
header::AUTHORIZATION,
HeaderValue::from_static("Basic invalid_base64"),
);
let result = credentials.verify(&header);
assert!(result.is_err());
assert_eq!(
format!("{}", result.err().unwrap()),
format!(
"{:?} cause: Invalid symbol 95, offset 7.",
ErrorType::ParseError
),
);
}
#[test]
fn test_verify_invalid_format() {
let credentials = Credentials::new("user", "pass");
let mut header = HeaderMap::new();
header.insert(
header::AUTHORIZATION,
HeaderValue::from_static("Basic dXNlcg=="), // "user" in Base64
);
let result = credentials.verify(&header);
assert!(result.is_err());
assert!(matches!(result.unwrap_err(), Error::Unauthorized));
}
#[test]
fn test_verify_incorrect_credentials() {
let credentials = Credentials::new("user", "pass");
let mut header = HeaderMap::new();
header.insert(
header::AUTHORIZATION,
HeaderValue::from_static("Basic dXNlcjpwYXNzX2Vycm9y"), // "user:pass_error" in Base64
);
let result = credentials.verify(&header);
assert!(result.is_err());
assert!(matches!(result.unwrap_err(), Error::Unauthorized));
}
#[test]
fn test_verify_correct_credentials() {
let credentials = Credentials::new("user", "pass");
let mut header = HeaderMap::new();
header.insert(
header::AUTHORIZATION,
HeaderValue::from_static("Basic dXNlcjpwYXNz"), // "user:pass" in Base64
);
let result = credentials.verify(&header);
assert!(result.is_ok());
}
}

View File

@ -1,170 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use dragonfly_api::common::v2::Range;
use dragonfly_client_core::{
error::{ErrorType, OrErr},
Error, Result,
};
use reqwest::header::{HeaderMap, HeaderName, HeaderValue};
use std::collections::HashMap;
pub mod basic_auth;
/// headermap_to_hashmap converts a headermap to a hashmap.
pub fn headermap_to_hashmap(header: &HeaderMap<HeaderValue>) -> HashMap<String, String> {
let mut hashmap: HashMap<String, String> = HashMap::with_capacity(header.len());
for (k, v) in header {
if let Ok(v) = v.to_str() {
hashmap.insert(k.to_string(), v.to_string());
}
}
hashmap
}
/// hashmap_to_headermap converts a hashmap to a headermap.
pub fn hashmap_to_headermap(header: &HashMap<String, String>) -> Result<HeaderMap<HeaderValue>> {
let mut headermap = HeaderMap::with_capacity(header.len());
for (k, v) in header {
let name = HeaderName::from_bytes(k.as_bytes()).or_err(ErrorType::ParseError)?;
let value = HeaderValue::from_bytes(v.as_bytes()).or_err(ErrorType::ParseError)?;
headermap.insert(name, value);
}
Ok(headermap)
}
/// header_vec_to_hashmap converts a vector of header string to a hashmap.
pub fn header_vec_to_hashmap(raw_header: Vec<String>) -> Result<HashMap<String, String>> {
let mut header = HashMap::with_capacity(raw_header.len());
for h in raw_header {
if let Some((k, v)) = h.split_once(':') {
header.insert(k.trim().to_string(), v.trim().to_string());
}
}
Ok(header)
}
/// header_vec_to_headermap converts a vector of header string to a reqwest headermap.
pub fn header_vec_to_headermap(raw_header: Vec<String>) -> Result<HeaderMap> {
hashmap_to_headermap(&header_vec_to_hashmap(raw_header)?)
}
/// get_range gets the range from http header.
pub fn get_range(header: &HeaderMap, content_length: u64) -> Result<Option<Range>> {
match header.get(reqwest::header::RANGE) {
Some(range) => {
let range = range.to_str().or_err(ErrorType::ParseError)?;
Ok(Some(parse_range_header(range, content_length)?))
}
None => Ok(None),
}
}
/// parse_range_header parses a Range header string as per RFC 7233,
/// supported Range Header: "Range": "bytes=100-200", "Range": "bytes=-50",
/// "Range": "bytes=150-", "Range": "bytes=0-0,-1".
pub fn parse_range_header(range_header_value: &str, content_length: u64) -> Result<Range> {
let parsed_ranges =
http_range_header::parse_range_header(range_header_value).or_err(ErrorType::ParseError)?;
let valid_ranges = parsed_ranges
.validate(content_length)
.or_err(ErrorType::ParseError)?;
// Not support multiple ranges.
let valid_range = valid_ranges
.first()
.ok_or_else(|| Error::EmptyHTTPRangeError)?;
let start = valid_range.start().to_owned();
let length = valid_range.end() - start + 1;
Ok(Range { start, length })
}
#[cfg(test)]
mod tests {
use super::*;
use reqwest::header::{HeaderMap, HeaderValue};
#[test]
fn test_headermap_to_hashmap() {
let mut header = HeaderMap::new();
header.insert("Content-Type", HeaderValue::from_static("application/json"));
header.insert("Authorization", HeaderValue::from_static("Bearer token"));
let hashmap = headermap_to_hashmap(&header);
assert_eq!(hashmap.get("content-type").unwrap(), "application/json");
assert_eq!(hashmap.get("authorization").unwrap(), "Bearer token");
assert_eq!(hashmap.get("foo"), None);
}
#[test]
fn test_hashmap_to_headermap() {
let mut hashmap = HashMap::new();
hashmap.insert("Content-Type".to_string(), "application/json".to_string());
hashmap.insert("Authorization".to_string(), "Bearer token".to_string());
let header = hashmap_to_headermap(&hashmap).unwrap();
assert_eq!(header.get("Content-Type").unwrap(), "application/json");
assert_eq!(header.get("Authorization").unwrap(), "Bearer token");
}
#[test]
fn test_header_vec_to_hashmap() {
let raw_header = vec![
"Content-Type: application/json".to_string(),
"Authorization: Bearer token".to_string(),
];
let hashmap = header_vec_to_hashmap(raw_header).unwrap();
assert_eq!(hashmap.get("Content-Type").unwrap(), "application/json");
assert_eq!(hashmap.get("Authorization").unwrap(), "Bearer token");
}
#[test]
fn test_header_vec_to_headermap() {
let raw_header = vec![
"Content-Type: application/json".to_string(),
"Authorization: Bearer token".to_string(),
];
let header = header_vec_to_headermap(raw_header).unwrap();
assert_eq!(header.get("Content-Type").unwrap(), "application/json");
assert_eq!(header.get("Authorization").unwrap(), "Bearer token");
}
#[test]
fn test_get_range() {
let mut header = HeaderMap::new();
header.insert(
reqwest::header::RANGE,
HeaderValue::from_static("bytes=0-100"),
);
let range = get_range(&header, 200).unwrap().unwrap();
assert_eq!(range.start, 0);
assert_eq!(range.length, 101);
}
#[test]
fn test_parse_range_header() {
let range = parse_range_header("bytes=0-100", 200).unwrap();
assert_eq!(range.start, 0);
assert_eq!(range.length, 101);
}
}

View File

@ -1,445 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use dragonfly_api::common::v2::TaskType;
use dragonfly_client_core::{
error::{ErrorType, OrErr},
Result,
};
use sha2::{Digest, Sha256};
use std::io::{self, Read};
use std::path::PathBuf;
use url::Url;
use uuid::Uuid;
/// SEED_PEER_SUFFIX is the suffix of the seed peer.
const SEED_PEER_SUFFIX: &str = "seed";
/// PERSISTENT_CACHE_TASK_SUFFIX is the suffix of the persistent cache task.
const PERSISTENT_CACHE_TASK_SUFFIX: &str = "persistent-cache-task";
/// TaskIDParameter is the parameter of the task id.
pub enum TaskIDParameter {
/// Content uses the content to generate the task id.
Content(String),
/// URLBased uses the url, piece_length, tag, application and filtered_query_params to generate
/// the task id.
URLBased {
url: String,
piece_length: Option<u64>,
tag: Option<String>,
application: Option<String>,
filtered_query_params: Vec<String>,
},
}
/// PersistentCacheTaskIDParameter is the parameter of the persistent cache task id.
pub enum PersistentCacheTaskIDParameter {
/// Content uses the content to generate the persistent cache task id.
Content(String),
/// FileContentBased uses the file path, piece_length, tag and application to generate the persistent cache task id.
FileContentBased {
path: PathBuf,
piece_length: Option<u64>,
tag: Option<String>,
application: Option<String>,
},
}
/// IDGenerator is used to generate the id for the resources.
#[derive(Debug)]
pub struct IDGenerator {
/// ip is the ip of the host.
ip: String,
/// hostname is the hostname of the host.
hostname: String,
/// is_seed_peer indicates whether the host is a seed peer.
is_seed_peer: bool,
}
/// IDGenerator implements the IDGenerator.
impl IDGenerator {
/// new creates a new IDGenerator.
pub fn new(ip: String, hostname: String, is_seed_peer: bool) -> Self {
IDGenerator {
ip,
hostname,
is_seed_peer,
}
}
/// host_id generates the host id.
#[inline]
pub fn host_id(&self) -> String {
if self.is_seed_peer {
return format!("{}-{}-{}", self.ip, self.hostname, "seed");
}
format!("{}-{}", self.ip, self.hostname)
}
/// task_id generates the task id.
#[inline]
pub fn task_id(&self, parameter: TaskIDParameter) -> Result<String> {
match parameter {
TaskIDParameter::Content(content) => {
Ok(hex::encode(Sha256::digest(content.as_bytes())))
}
TaskIDParameter::URLBased {
url,
piece_length,
tag,
application,
filtered_query_params,
} => {
// Filter the query parameters.
let url = Url::parse(url.as_str()).or_err(ErrorType::ParseError)?;
let query = url
.query_pairs()
.filter(|(k, _)| !filtered_query_params.contains(&k.to_string()));
let mut artifact_url = url.clone();
if query.clone().count() == 0 {
artifact_url.set_query(None);
} else {
artifact_url.query_pairs_mut().clear().extend_pairs(query);
}
let artifact_url_str = artifact_url.to_string();
let final_url = if artifact_url_str.ends_with('/') && artifact_url.path() == "/" {
artifact_url_str.trim_end_matches('/').to_string()
} else {
artifact_url_str
};
// Initialize the hasher.
let mut hasher = Sha256::new();
// Add the url to generate the task id.
hasher.update(final_url);
// Add the tag to generate the task id.
if let Some(tag) = tag {
hasher.update(tag);
}
// Add the application to generate the task id.
if let Some(application) = application {
hasher.update(application);
}
// Add the piece length to generate the task id.
if let Some(piece_length) = piece_length {
hasher.update(piece_length.to_string());
}
hasher.update(TaskType::Standard.as_str_name().as_bytes());
// Generate the task id.
Ok(hex::encode(hasher.finalize()))
}
}
}
/// persistent_cache_task_id generates the persistent cache task id.
#[inline]
pub fn persistent_cache_task_id(
&self,
parameter: PersistentCacheTaskIDParameter,
) -> Result<String> {
let mut hasher = crc32fast::Hasher::new();
match parameter {
PersistentCacheTaskIDParameter::Content(content) => {
hasher.update(content.as_bytes());
Ok(hasher.finalize().to_string())
}
PersistentCacheTaskIDParameter::FileContentBased {
path,
piece_length,
tag,
application,
} => {
// Calculate the hash of the file.
let f = std::fs::File::open(path)?;
let mut buffer = [0; 4096];
let mut reader = io::BufReader::with_capacity(buffer.len(), f);
loop {
match reader.read(&mut buffer) {
Ok(0) => break,
Ok(n) => hasher.update(&buffer[..n]),
Err(ref err) if err.kind() == io::ErrorKind::Interrupted => continue,
Err(err) => return Err(err.into()),
};
}
// Add the tag to generate the persistent cache task id.
if let Some(tag) = tag {
hasher.update(tag.as_bytes());
}
// Add the application to generate the persistent cache task id.
if let Some(application) = application {
hasher.update(application.as_bytes());
}
// Add the piece length to generate the persistent cache task id.
if let Some(piece_length) = piece_length {
hasher.update(piece_length.to_string().as_bytes());
}
hasher.update(TaskType::PersistentCache.as_str_name().as_bytes());
// Generate the task id by crc32.
Ok(hasher.finalize().to_string())
}
}
}
/// peer_id generates the peer id.
#[inline]
pub fn peer_id(&self) -> String {
if self.is_seed_peer {
return format!(
"{}-{}-{}-{}",
self.ip,
self.hostname,
Uuid::new_v4(),
SEED_PEER_SUFFIX,
);
}
format!("{}-{}-{}", self.ip, self.hostname, Uuid::new_v4())
}
/// task_type generates the task type by the task id.
pub fn task_type(&self, id: &str) -> TaskType {
if id.ends_with(PERSISTENT_CACHE_TASK_SUFFIX) {
return TaskType::PersistentCache;
}
TaskType::Standard
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs::File;
use std::io::Write;
use tempfile::tempdir;
#[test]
fn should_generate_host_id() {
let test_cases = vec![
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
"127.0.0.1-localhost",
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), true),
"127.0.0.1-localhost-seed",
),
];
for (generator, expected) in test_cases {
assert_eq!(generator.host_id(), expected);
}
}
#[test]
fn should_generate_task_id() {
let test_cases = vec![
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
TaskIDParameter::URLBased {
url: "https://example.com".to_string(),
piece_length: Some(1024_u64),
tag: Some("foo".to_string()),
application: Some("bar".to_string()),
filtered_query_params: vec![],
},
"27554d06dfc788c2c2c60e01960152ffbd4b145fc103fcb80b432b4dc238a6fe",
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
TaskIDParameter::URLBased {
url: "https://example.com".to_string(),
piece_length: None,
tag: Some("foo".to_string()),
application: Some("bar".to_string()),
filtered_query_params: vec![],
},
"06408fbf247ddaca478f8cb9565fe5591c28efd0994b8fea80a6a87d3203c5ca",
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
TaskIDParameter::URLBased {
url: "https://example.com".to_string(),
piece_length: None,
tag: Some("foo".to_string()),
application: None,
filtered_query_params: vec![],
},
"3c3f230ef9f191dd2821510346a7bc138e4894bee9aee184ba250a3040701d2a",
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
TaskIDParameter::URLBased {
url: "https://example.com".to_string(),
piece_length: None,
tag: None,
application: Some("bar".to_string()),
filtered_query_params: vec![],
},
"c9f9261b7305c24371244f9f149f5d4589ed601348fdf22d7f6f4b10658fdba2",
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
TaskIDParameter::URLBased {
url: "https://example.com".to_string(),
piece_length: Some(1024_u64),
tag: None,
application: None,
filtered_query_params: vec![],
},
"9f7c9aafbc6f30f8f41a96ca77eeae80c5b60964b3034b0ee43ccf7b2f9e52b8",
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
TaskIDParameter::URLBased {
url: "https://example.com?foo=foo&bar=bar".to_string(),
piece_length: None,
tag: None,
application: None,
filtered_query_params: vec!["foo".to_string(), "bar".to_string()],
},
"457b4328cde278e422c9e243f7bfd1e97f511fec43a80f535cf6b0ef6b086776",
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
TaskIDParameter::Content("This is a test file".to_string()),
"e2d0fe1585a63ec6009c8016ff8dda8b17719a637405a4e23c0ff81339148249",
),
];
for (generator, parameter, expected_id) in test_cases {
let task_id = generator.task_id(parameter).unwrap();
assert_eq!(task_id, expected_id);
}
}
#[test]
fn should_generate_persistent_cache_task_id() {
let dir = tempdir().unwrap();
let file_path = dir.path().join("testfile");
let mut f = File::create(&file_path).unwrap();
f.write_all("This is a test file".as_bytes()).unwrap();
let test_cases = vec![
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
PersistentCacheTaskIDParameter::FileContentBased {
path: file_path.clone(),
piece_length: Some(1024_u64),
tag: Some("tag1".to_string()),
application: Some("app1".to_string()),
},
"3490958009",
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
PersistentCacheTaskIDParameter::FileContentBased {
path: file_path.clone(),
piece_length: None,
tag: None,
application: Some("app1".to_string()),
},
"735741469",
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
PersistentCacheTaskIDParameter::FileContentBased {
path: file_path.clone(),
piece_length: None,
tag: Some("tag1".to_string()),
application: None,
},
"3954905097",
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
PersistentCacheTaskIDParameter::FileContentBased {
path: file_path.clone(),
piece_length: Some(1024_u64),
tag: None,
application: None,
},
"4162557545",
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
PersistentCacheTaskIDParameter::Content("This is a test file".to_string()),
"107352521",
),
];
for (generator, parameter, expected_id) in test_cases {
let task_id = generator.persistent_cache_task_id(parameter).unwrap();
assert_eq!(task_id, expected_id);
}
}
#[test]
fn should_generate_peer_id() {
let test_cases = vec![
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false),
false,
),
(
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), true),
true,
),
];
for (generator, is_seed_peer) in test_cases {
let peer_id = generator.peer_id();
assert!(peer_id.starts_with("127.0.0.1-localhost-"));
if is_seed_peer {
assert!(peer_id.ends_with("-seed"));
}
}
}
#[test]
fn should_generate_task_type() {
let test_cases = vec![
("some-task-id", TaskType::Standard),
(
"some-task-id-persistent-cache-task",
TaskType::PersistentCache,
),
];
let generator = IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false);
for (id, expected_type) in test_cases {
assert_eq!(generator.task_type(id), expected_type);
}
}
}

View File

@ -1,230 +0,0 @@
/*
* Copyright 2025 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use bytesize::ByteSize;
use pnet::datalink::{self, NetworkInterface};
use std::cmp::min;
use std::net::IpAddr;
use std::sync::Arc;
use std::time::Duration;
use sysinfo::Networks;
use tokio::sync::Mutex;
use tracing::{info, warn};
/// Interface represents a network interface with its information.
#[derive(Debug, Clone, Default)]
pub struct Interface {
/// name is the name of the network interface.
pub name: String,
/// bandwidth is the bandwidth of the network interface in bps.
pub bandwidth: u64,
// network_data_mutex is a mutex to protect access to network data.
network_data_mutex: Arc<Mutex<()>>,
}
/// NetworkData represents the network data for a specific interface,
#[derive(Debug, Clone, Default)]
pub struct NetworkData {
/// max_rx_bandwidth is the maximum receive bandwidth of the interface in bps.
pub max_rx_bandwidth: u64,
/// rx_bandwidth is the current receive bandwidth of the interface in bps.
pub rx_bandwidth: Option<u64>,
/// max_tx_bandwidth is the maximum transmit bandwidth of the interface in bps.
pub max_tx_bandwidth: u64,
/// tx_bandwidth is the current transmit bandwidth of the interface in bps.
pub tx_bandwidth: Option<u64>,
}
/// Interface methods provide functionality to get network interface information.
impl Interface {
/// DEFAULT_NETWORKS_REFRESH_INTERVAL is the default interval for refreshing network data.
const DEFAULT_NETWORKS_REFRESH_INTERVAL: Duration = Duration::from_secs(2);
/// new creates a new Interface instance based on the provided IP address and rate limit.
pub fn new(ip: IpAddr, rate_limit: ByteSize) -> Interface {
let rate_limit = Self::byte_size_to_bits(rate_limit); // convert to bps
let Some(interface) = Self::get_network_interface_by_ip(ip) else {
warn!(
"can not find interface for IP address {}, network interface unknown with bandwidth {} bps",
ip, rate_limit
);
return Interface {
name: "unknown".to_string(),
bandwidth: rate_limit,
network_data_mutex: Arc::new(Mutex::new(())),
};
};
match Self::get_speed(&interface.name) {
Some(speed) => {
let bandwidth = min(Self::megabits_to_bits(speed), rate_limit);
info!(
"network interface {} with bandwidth {} bps",
interface.name, bandwidth
);
Interface {
name: interface.name,
bandwidth,
network_data_mutex: Arc::new(Mutex::new(())),
}
}
None => {
warn!(
"can not get speed, network interface {} with bandwidth {} bps",
interface.name, rate_limit
);
Interface {
name: interface.name,
bandwidth: rate_limit,
network_data_mutex: Arc::new(Mutex::new(())),
}
}
}
}
/// get_network_data retrieves the network data for the interface.
pub async fn get_network_data(&self) -> NetworkData {
// Lock the mutex to ensure exclusive access to network data.
let _guard = self.network_data_mutex.lock().await;
// Initialize sysinfo network.
let mut networks = Networks::new_with_refreshed_list();
// Sleep to calculate the network traffic difference over
// the DEFAULT_NETWORKS_REFRESH_INTERVAL.
tokio::time::sleep(Self::DEFAULT_NETWORKS_REFRESH_INTERVAL).await;
// Refresh network information.
networks.refresh();
let Some(network_data) = networks.get(self.name.as_str()) else {
warn!("can not find network data for interface {}", self.name);
return NetworkData {
max_rx_bandwidth: self.bandwidth,
max_tx_bandwidth: self.bandwidth,
..Default::default()
};
};
// Calculate the receive and transmit bandwidth in bits per second.
let rx_bandwidth = (Self::bytes_to_bits(network_data.received()) as f64
/ Self::DEFAULT_NETWORKS_REFRESH_INTERVAL.as_secs_f64())
.round() as u64;
// Calculate the transmit bandwidth in bits per second.
let tx_bandwidth = (Self::bytes_to_bits(network_data.transmitted()) as f64
/ Self::DEFAULT_NETWORKS_REFRESH_INTERVAL.as_secs_f64())
.round() as u64;
NetworkData {
max_rx_bandwidth: self.bandwidth,
rx_bandwidth: Some(rx_bandwidth),
max_tx_bandwidth: self.bandwidth,
tx_bandwidth: Some(tx_bandwidth),
}
}
/// get_speed returns the speed of the network interface in Mbps.
pub fn get_speed(name: &str) -> Option<u64> {
#[cfg(target_os = "linux")]
{
let speed_path = format!("/sys/class/net/{}/speed", name);
std::fs::read_to_string(&speed_path)
.ok()
.and_then(|speed_str| speed_str.trim().parse::<u64>().ok())
}
#[cfg(not(target_os = "linux"))]
{
warn!("can not get interface {} speed on non-linux platform", name);
None
}
}
/// get_network_interface_by_ip returns the network interface that has the specified
/// IP address.
pub fn get_network_interface_by_ip(ip: IpAddr) -> Option<NetworkInterface> {
datalink::interfaces()
.into_iter()
.find(|interface| interface.ips.iter().any(|ip_net| ip_net.ip() == ip))
}
/// byte_size_to_bits converts a ByteSize to bits.
pub fn byte_size_to_bits(size: ByteSize) -> u64 {
size.as_u64() * 8
}
/// megabits_to_bit converts megabits to bits.
pub fn megabits_to_bits(size: u64) -> u64 {
size * 1_000_000 // 1 Mbit = 1,000,000 bits
}
/// bytes_to_bits converts bytes to bits.
pub fn bytes_to_bits(size: u64) -> u64 {
size * 8 // 1 byte = 8 bits
}
}
#[cfg(test)]
mod tests {
use super::*;
use bytesize::ByteSize;
#[test]
fn test_byte_size_to_bits() {
let test_cases = vec![
(ByteSize::kb(1), 8_000u64),
(ByteSize::mb(1), 8_000_000u64),
(ByteSize::gb(1), 8_000_000_000u64),
(ByteSize::b(0), 0u64),
];
for (input, expected) in test_cases {
let result = Interface::byte_size_to_bits(input);
assert_eq!(result, expected);
}
}
#[test]
fn test_megabits_to_bits() {
let test_cases = vec![
(1u64, 1_000_000u64),
(1000u64, 1_000_000_000u64),
(0u64, 0u64),
];
for (input, expected) in test_cases {
let result = Interface::megabits_to_bits(input);
assert_eq!(result, expected);
}
}
#[test]
fn test_bytes_to_bits() {
let test_cases = vec![(1u64, 8u64), (1000u64, 8_000u64), (0u64, 0u64)];
for (input, expected) in test_cases {
let result = Interface::bytes_to_bits(input);
assert_eq!(result, expected);
}
}
}

View File

@ -1,425 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use dragonfly_client_core::error::{ErrorType, OrErr};
use dragonfly_client_core::{Error as ClientError, Result as ClientResult};
use lazy_static::lazy_static;
use lru::LruCache;
use rcgen::{Certificate, CertificateParams, KeyPair};
use rustls_pki_types::{CertificateDer, PrivateKeyDer, ServerName, UnixTime};
use std::num::NonZeroUsize;
use std::path::PathBuf;
use std::sync::{Arc, Mutex};
use std::vec::Vec;
use std::{fs, io};
use tracing::instrument;
/// DEFAULT_CERTS_CACHE_CAPACITY is the default capacity of the certificates cache.
const DEFAULT_CERTS_CACHE_CAPACITY: usize = 1000;
/// CertKeyPair is the type of the certificate and private key pair.
type CertKeyPair = (Vec<CertificateDer<'static>>, PrivateKeyDer<'static>);
lazy_static! {
/// SELF_SIGNED_CERTS is a map that stores the self-signed certificates to avoid
/// generating the same certificates multiple times.
static ref SELF_SIGNED_CERTS: Arc<Mutex<LruCache<String, CertKeyPair>>> =
Arc::new(Mutex::new(LruCache::new(NonZeroUsize::new(DEFAULT_CERTS_CACHE_CAPACITY).unwrap())));
/// SIMPLE_SELF_SIGNED_CERTS is a map that stores the simple self-signed certificates to avoid
/// generating the same certificates multiple times.
static ref SIMPLE_SELF_SIGNED_CERTS: Arc<Mutex<LruCache<String, CertKeyPair>>> =
Arc::new(Mutex::new(LruCache::new(NonZeroUsize::new(DEFAULT_CERTS_CACHE_CAPACITY).unwrap())));
}
/// NoVerifier is a verifier that does not verify the server certificate.
/// It is used for testing and should not be used in production.
#[derive(Debug)]
pub struct NoVerifier(Arc<rustls::crypto::CryptoProvider>);
/// Implement the NoVerifier.
impl NoVerifier {
/// new creates a new NoVerifier.
pub fn new() -> Arc<Self> {
Arc::new(Self(Arc::new(rustls::crypto::ring::default_provider())))
}
}
/// Implement the ServerCertVerifier trait for NoVerifier.
impl rustls::client::danger::ServerCertVerifier for NoVerifier {
/// verify_server_cert verifies the server certificate.
fn verify_server_cert(
&self,
_end_entity: &CertificateDer<'_>,
_intermediates: &[CertificateDer<'_>],
_server_name: &ServerName<'_>,
_ocsp: &[u8],
_now: UnixTime,
) -> Result<rustls::client::danger::ServerCertVerified, rustls::Error> {
Ok(rustls::client::danger::ServerCertVerified::assertion())
}
/// verify_tls12_signature verifies the TLS 1.2 signature.
fn verify_tls12_signature(
&self,
message: &[u8],
cert: &CertificateDer<'_>,
dss: &rustls::DigitallySignedStruct,
) -> Result<rustls::client::danger::HandshakeSignatureValid, rustls::Error> {
rustls::crypto::verify_tls12_signature(
message,
cert,
dss,
&self.0.signature_verification_algorithms,
)
}
/// verify_tls13_signature verifies the TLS 1.3 signature.
fn verify_tls13_signature(
&self,
message: &[u8],
cert: &CertificateDer<'_>,
dss: &rustls::DigitallySignedStruct,
) -> Result<rustls::client::danger::HandshakeSignatureValid, rustls::Error> {
rustls::crypto::verify_tls13_signature(
message,
cert,
dss,
&self.0.signature_verification_algorithms,
)
}
/// supported_verify_schemes returns the supported signature schemes.
fn supported_verify_schemes(&self) -> Vec<rustls::SignatureScheme> {
self.0.signature_verification_algorithms.supported_schemes()
}
}
/// Generate a CA certificate from PEM format files.
/// Generate CA by openssl with PEM format files:
/// openssl req -x509 -sha256 -days 36500 -nodes -newkey rsa:4096 -keyout ca.key -out ca.crt
#[instrument(skip_all)]
pub fn generate_ca_cert_from_pem(
ca_cert_path: &PathBuf,
ca_key_path: &PathBuf,
) -> ClientResult<Certificate> {
// Load CA certificate and key with PEM format.
let ca_cert_pem = fs::read(ca_cert_path)?;
let ca_cert_pem = std::str::from_utf8(&ca_cert_pem)?;
let ca_key_pem = fs::read(ca_key_path)?;
let ca_key_pem = std::str::from_utf8(&ca_key_pem)?;
// Parse CA certificate and key.
let key_pair = KeyPair::from_pem(ca_key_pem).or_err(ErrorType::CertificateError)?;
let ca_params = CertificateParams::from_ca_cert_pem(ca_cert_pem, key_pair)
.or_err(ErrorType::CertificateError)?;
let ca_cert = Certificate::from_params(ca_params).or_err(ErrorType::CertificateError)?;
Ok(ca_cert)
}
/// Generate certificates from PEM format files.
#[instrument(skip_all)]
pub fn generate_cert_from_pem(cert_path: &PathBuf) -> ClientResult<Vec<CertificateDer<'static>>> {
let f = fs::File::open(cert_path)?;
let mut certs_pem_reader = io::BufReader::new(f);
let certs = rustls_pemfile::certs(&mut certs_pem_reader).collect::<Result<Vec<_>, _>>()?;
Ok(certs)
}
/// generate_self_signed_certs_by_ca_cert generates a self-signed certificates
/// by given subject alternative names with CA certificate.
#[instrument(skip_all)]
pub fn generate_self_signed_certs_by_ca_cert(
ca_cert: &Certificate,
host: &str,
subject_alt_names: Vec<String>,
) -> ClientResult<(Vec<CertificateDer<'static>>, PrivateKeyDer<'static>)> {
let mut cache = SELF_SIGNED_CERTS.lock().unwrap();
if let Some((certs, key)) = cache.get(host) {
return Ok((certs.clone(), key.clone_key()));
};
drop(cache);
// Sign certificate with CA certificate by given subject alternative names.
let params = CertificateParams::new(subject_alt_names);
let cert = Certificate::from_params(params).or_err(ErrorType::CertificateError)?;
let cert_pem = cert
.serialize_pem_with_signer(ca_cert)
.or_err(ErrorType::CertificateError)?;
let key_pem = cert.serialize_private_key_pem();
// Parse certificate.
let mut cert_pem_reader = io::BufReader::new(cert_pem.as_bytes());
let certs = rustls_pemfile::certs(&mut cert_pem_reader).collect::<Result<Vec<_>, _>>()?;
// Parse private key.
let mut key_pem_reader = io::BufReader::new(key_pem.as_bytes());
let key = rustls_pemfile::private_key(&mut key_pem_reader)?
.ok_or_else(|| ClientError::Unknown("failed to load private key".to_string()))?;
let mut cache = SELF_SIGNED_CERTS.lock().unwrap();
cache.push(host.to_string(), (certs.clone(), key.clone_key()));
Ok((certs, key))
}
/// generate_simple_self_signed_certs generates a simple self-signed certificates
#[instrument(skip_all)]
pub fn generate_simple_self_signed_certs(
host: &str,
subject_alt_names: impl Into<Vec<String>>,
) -> ClientResult<(Vec<CertificateDer<'static>>, PrivateKeyDer<'static>)> {
let mut cache = SIMPLE_SELF_SIGNED_CERTS.lock().unwrap();
if let Some((certs, key)) = cache.get(host) {
return Ok((certs.clone(), key.clone_key()));
};
drop(cache);
let cert = rcgen::generate_simple_self_signed(subject_alt_names)
.or_err(ErrorType::CertificateError)?;
let key = rustls_pki_types::PrivateKeyDer::Pkcs8(cert.serialize_private_key_der().into());
let certs = vec![cert
.serialize_der()
.or_err(ErrorType::CertificateError)?
.into()];
let mut cache = SIMPLE_SELF_SIGNED_CERTS.lock().unwrap();
cache.push(host.to_string(), (certs.clone(), key.clone_key()));
Ok((certs, key))
}
/// certs_to_raw_certs converts DER format of the certificates to raw certificates.
#[instrument(skip_all)]
pub fn certs_to_raw_certs(certs: Vec<CertificateDer<'static>>) -> Vec<Vec<u8>> {
certs
.into_iter()
.map(|cert| cert.as_ref().to_vec())
.collect()
}
/// raw_certs_to_certs converts raw certificates to DER format of certificates.
#[instrument(skip_all)]
pub fn raw_certs_to_certs(raw_certs: Vec<Vec<u8>>) -> Vec<CertificateDer<'static>> {
raw_certs.into_iter().map(|cert| cert.into()).collect()
}
/// load_certs_from_pem loads certificates from PEM format string.
#[instrument(skip_all)]
pub fn load_certs_from_pem(cert_pem: &str) -> ClientResult<Vec<CertificateDer<'static>>> {
let certs = rustls_pemfile::certs(&mut cert_pem.as_bytes()).collect::<Result<Vec<_>, _>>()?;
Ok(certs)
}
/// load_key_from_pem loads private key from PEM format string.
#[instrument(skip_all)]
pub fn load_key_from_pem(key_pem: &str) -> ClientResult<PrivateKeyDer<'static>> {
let key = rustls_pemfile::private_key(&mut key_pem.as_bytes())?
.ok_or_else(|| ClientError::Unknown("failed to load private key".to_string()))?;
Ok(key)
}
#[cfg(test)]
mod tests {
use super::*;
use rustls::client::danger::ServerCertVerifier;
use rustls_pki_types::{CertificateDer, ServerName, UnixTime};
use std::io::Write;
use tempfile::NamedTempFile;
// Generate the certificate and private key by script(`scripts/generate_certs.sh`).
const SERVER_CERT: &str = r#"""
-----BEGIN CERTIFICATE-----
MIIDsDCCApigAwIBAgIUWuckNOpaPERz+QMACyqCqFJwYIYwDQYJKoZIhvcNAQEL
BQAwYjELMAkGA1UEBhMCQ04xEDAOBgNVBAgMB0JlaWppbmcxEDAOBgNVBAcMB0Jl
aWppbmcxEDAOBgNVBAoMB1Rlc3QgQ0ExCzAJBgNVBAsMAklUMRAwDgYDVQQDDAdU
ZXN0IENBMB4XDTI0MTAxMTEyMTEwN1oXDTI2MDIyMzEyMTEwN1owaDELMAkGA1UE
BhMCQ04xEDAOBgNVBAgMB0JlaWppbmcxEDAOBgNVBAcMB0JlaWppbmcxFDASBgNV
BAoMC1Rlc3QgU2VydmVyMQswCQYDVQQLDAJJVDESMBAGA1UEAwwJbG9jYWxob3N0
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAiA9wEge3Jq8qw8Ix9z6t
ss7ttK/49TMddhnQuqoYrFKjYliuvfbRZOU1nBP7+5XSAliPDCRNPS17JSwsXJk2
bstc69fruDpYmthualSTsUYSwJJqzJjy5mlwSPtBsombcSHrUasMce5C4iXJX8Wx
1O8ZCwuI5LUKxLujt+ZWnYfp5lzDcDhgD6wIzcMk67jv2edcWhqGkKmQbbmmK3Ve
DJRa56NCh0F2U1SW0KCXTzoC1YU/bbB4UCfvHouMzCRNTr3VcrfL5aBIn/z/f6Xt
atQkqFa/T1/lOQ0miMqNyBW58NxkPsTaJm2kVZ21hF2Dvo8MU/8Ras0J0aL8sc4n
LwIDAQABo1gwVjAUBgNVHREEDTALgglsb2NhbGhvc3QwHQYDVR0OBBYEFJP+jy8a
tCfnu6nekyZugvq8XT2gMB8GA1UdIwQYMBaAFOwXKq7J6STkwLUWC1xKwq1Psy63
MA0GCSqGSIb3DQEBCwUAA4IBAQCu8nqnuzNn3E9dNC8ptV7ga1zb7cGdL3ZT5W3d
10gmPo3YijWoCj4snattX9zxI8ThAY7uX6jrR0/HRXGJIw5JnlBmykdgyrQYEDzU
FUL0GGabJNxZ+zDV77P+3WdgCx3F7wLQk+x+etMPvYuWC8RMse7W6dB1INyMT/l6
k1rV73KTupSNJrYhqw0RnmNHIctkwiZLLpzLFj91BHjK5ero7VV4s7vnx+gtO/zQ
FnIyiyfYYcSpVMhhaNkeCtWOfgVYU/m4XXn5bwEOhMN6q0JcdBPnT6kd2otLhiIo
/WeyWEUeZ4rQhS7C1i31AYtNtVnnvI7BrsI4czYdcJcj3CM+
-----END CERTIFICATE-----
"""#;
const SERVER_KEY: &str = r#"""
-----BEGIN PRIVATE KEY-----
MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCID3ASB7cmryrD
wjH3Pq2yzu20r/j1Mx12GdC6qhisUqNiWK699tFk5TWcE/v7ldICWI8MJE09LXsl
LCxcmTZuy1zr1+u4Olia2G5qVJOxRhLAkmrMmPLmaXBI+0GyiZtxIetRqwxx7kLi
JclfxbHU7xkLC4jktQrEu6O35ladh+nmXMNwOGAPrAjNwyTruO/Z51xaGoaQqZBt
uaYrdV4MlFrno0KHQXZTVJbQoJdPOgLVhT9tsHhQJ+8ei4zMJE1OvdVyt8vloEif
/P9/pe1q1CSoVr9PX+U5DSaIyo3IFbnw3GQ+xNombaRVnbWEXYO+jwxT/xFqzQnR
ovyxzicvAgMBAAECggEABqHVkTfe1p+PBGx34tG/4nQxwIRxLJG31no+jeAdYOLF
AEeulqezbmIroyTMA0uQKWscy0V/gXUi3avHOOktp72Vv9fxy98F/fyBPx3YEvLa
69DMnl0qPl06CvLlTey6km8RKxUrRq9S2NoTydD+m1fC9jCIhvHkrNExIXjtaewU
PvAHJy4ho+hVLo40udmQ4i1gnEWYUtjkr65ujuOAlWrlScHGvOrATbrfcaufPi/S
5A/h8UlfahBstmh3a2tBLZlNl82s5ZKsVM1Oq1Vk9hAX5DP2JBAmuZKgX/xSDdpR
62VUQGqp1WLgble5vR6ZUFo5+Jiw1uxe9jmNUg9mMQKBgQC8giG3DeeU6+rX9LVz
cklF4jioU5LMdYutwXbtuGIWgXeJo8r0fzrgBtBVGRn7anS7YnYA+67h+A8SC6MO
SXvktpHIC3Egge2Q9dRrWA4YCpkIxlOQ5ofCqovvCg9kq9sYqGz6lMr3RrzOWkUW
+0hF1CHCV0+KGFeIvTYVIKSsJwKBgQC4xiTsaShmwJ6HdR59jOmij+ccCPQTt2IO
eGcniY2cHIoX9I7nn7Yah6JbMT0c8j75KA+pfCrK3FpRNrb71cI1iqBHedZXpRaV
eshJztmw3AKtxQPNwRYrKYpY/M0ShAduppELeshZz1kubQU3sD4adrhcGCDXkctb
dP44IpipuQKBgC+W5q4Q65L0ECCe3aQciRUEbGtKVfgaAL5H5h9TeifWXXg5Coa5
DAL8lWG2aZHIKVoZHFNZNqhDeIKEv5BeytFNqfYHtXKQeoorFYpX+47kNgg6EWS2
XjWt2o/pSUOQA0rxUjnckHTmvcmWjnSj0XYXfMJUSndBd+/EXL/ussPnAoGAGE5Q
Wxz2KJYcBHuemCtqLG07nI988/8Ckh66ixPoIeoLLF2KUuPKg7Dl5ZMTk/Q13nar
oMLpqifUZayJ45TZ6EslDGH1lS/tSZqOME9aiY5Xd95bwrwsm17qiQwwOchOZfrZ
R6ZOJqpE8/t5XTr84GRPmiW+ZD0UgCJisqWyaVkCgYEAtupQDst0hmZ0KnJSIZ5U
R6skHABhmwNU5lOPUBIzHVorbAaKDKd4iFbBI5wnBuWxXY0SANl2HYX3gZaPccH4
wzvR3jZ1B4UlEBXl2V+VRbrXyPTN4uUF42AkSGuOsK4O878wW8noX+ZZTk7gydTN
Z+yQ5jhu/fmSBNhqO/8Lp+Y=
-----END PRIVATE KEY-----
"""#;
#[test]
fn test_no_verifier() {
let verifier = NoVerifier::new();
// Test verify_server_cert
let result = verifier.verify_server_cert(
&CertificateDer::from(vec![]),
&[],
&ServerName::DnsName("d7y.io".try_into().unwrap()),
&[],
UnixTime::now(),
);
assert!(result.is_ok());
// Test supported_verify_schemes
let schemes = verifier.supported_verify_schemes();
assert!(!schemes.is_empty());
}
#[test]
fn test_generate_ca_cert_from_pem() {
let ca_cert_file = NamedTempFile::new().unwrap();
let ca_key_file = NamedTempFile::new().unwrap();
ca_cert_file
.as_file()
.write_all(SERVER_CERT.as_bytes())
.unwrap();
ca_key_file
.as_file()
.write_all(SERVER_KEY.as_bytes())
.unwrap();
let result = generate_ca_cert_from_pem(
&ca_cert_file.path().to_path_buf(),
&ca_key_file.path().to_path_buf(),
);
assert!(result.is_ok());
}
#[test]
fn test_generate_cert_from_pem() {
let cert_file = NamedTempFile::new().unwrap();
cert_file
.as_file()
.write_all(SERVER_CERT.as_bytes())
.unwrap();
let result = generate_cert_from_pem(&cert_file.path().to_path_buf());
assert!(result.is_ok());
assert!(!result.unwrap().is_empty());
}
#[test]
fn test_generate_self_signed_certs_by_ca_cert() {
let ca_cert_file = NamedTempFile::new().unwrap();
let ca_key_file = NamedTempFile::new().unwrap();
ca_cert_file
.as_file()
.write_all(SERVER_CERT.as_bytes())
.unwrap();
ca_key_file
.as_file()
.write_all(SERVER_KEY.as_bytes())
.unwrap();
let ca_cert = generate_ca_cert_from_pem(
&ca_cert_file.path().to_path_buf(),
&ca_key_file.path().to_path_buf(),
)
.unwrap();
let host = "example.com";
let subject_alt_names = vec![host.to_string()];
let result = generate_self_signed_certs_by_ca_cert(&ca_cert, host, subject_alt_names);
assert!(result.is_ok());
let (certs, key) = result.unwrap();
assert!(!certs.is_empty());
assert!(matches!(key, PrivateKeyDer::Pkcs8(_)));
}
#[test]
fn test_certs_to_raw_certs() {
let cert_file = NamedTempFile::new().unwrap();
cert_file
.as_file()
.write_all(SERVER_CERT.as_bytes())
.unwrap();
let certs = generate_cert_from_pem(&cert_file.path().to_path_buf()).unwrap();
let raw_certs = certs_to_raw_certs(certs);
assert!(!raw_certs.is_empty());
}
#[test]
fn test_raw_certs_to_certs() {
let cert_file = NamedTempFile::new().unwrap();
cert_file
.as_file()
.write_all(SERVER_CERT.as_bytes())
.unwrap();
let certs = generate_cert_from_pem(&cert_file.path().to_path_buf()).unwrap();
let raw_certs = certs_to_raw_certs(certs);
let certs = raw_certs_to_certs(raw_certs);
assert!(!certs.is_empty());
}
#[test]
fn test_load_certs_from_pem() {
let result = load_certs_from_pem(SERVER_CERT);
assert!(result.is_ok());
assert!(!result.unwrap().is_empty());
}
#[test]
fn test_load_key_from_pem() {
let result = load_key_from_pem(SERVER_KEY);
assert!(result.is_ok());
assert!(matches!(result.unwrap(), PrivateKeyDer::Pkcs8(_)));
}
}

View File

@ -1,312 +0,0 @@
[package]
name = "dragonfly-client"
description = "Dragonfly client written in Rust"
version.workspace = true
authors.workspace = true
homepage.workspace = true
repository.workspace = true
keywords.workspace = true
license.workspace = true
readme.workspace = true
edition.workspace = true
[[bin]]
name = "dfdaemon"
path = "src/bin/dfdaemon/main.rs"
[[bin]]
name = "dfget"
path = "src/bin/dfget/main.rs"
[[bin]]
name = "dfcache"
path = "src/bin/dfcache/main.rs"
[dependencies]
dragonfly-client-core.workspace = true
dragonfly-client-config.workspace = true
dragonfly-client-storage.workspace = true
dragonfly-client-backend.workspace = true
dragonfly-client-util.workspace = true
dragonfly-api.workspace = true
rcgen.workspace = true
hyper.workspace = true
hyper-util.workspace = true
hyper-rustls.workspace = true
tracing.workspace = true
serde.workspace = true
chrono.workspace = true
prost-wkt-types.workspace = true
tokio.workspace = true
tokio-util.workspace = true
rustls.workspace = true
rustls-pki-types.workspace = true
warp.workspace = true
tonic.workspace = true
tonic-reflection.workspace = true
tokio-stream.workspace = true
reqwest.workspace = true
url.workspace = true
http.workspace = true
openssl.workspace = true
clap.workspace = true
anyhow.workspace = true
bytes.workspace = true
bytesize.workspace = true
humantime.workspace = true
uuid.workspace = true
percent-encoding.workspace = true
tokio-rustls.workspace = true
serde_json.workspace = true
fs2.workspace = true
lazy_static.workspace = true
futures.workspace = true
local-ip-address.workspace = true
sysinfo.workspace = true
tracing-appender = "0.2.3"
tracing-subscriber = { version = "0.3", features = ["env-filter", "time", "chrono"] }
tracing-panic = "0.1.2"
tracing-opentelemetry = "0.30.0"
opentelemetry = { version = "0.29.1", default-features = false, features = ["trace"] }
opentelemetry-otlp = { version = "0.29.0", default-features = false, features = ["trace", "grpc-tonic", "http-proto", "reqwest-blocking-client"] }
opentelemetry_sdk = { version = "0.29.0", default-features = false, features = ["trace", "rt-tokio"] }
opentelemetry-semantic-conventions = { version = "0.30.0", features = ["semconv_experimental"] }
rolling-file = "0.2.0"
pprof = { version = "0.15", features = ["flamegraph", "protobuf-codec"] }
prometheus = { version = "0.13", features = ["process"] }
tonic-health = "0.12.3"
tower = { version = "0.4.13", features = ["limit", "load-shed", "buffer"] }
indicatif = "0.18.0"
hashring = "0.3.6"
leaky-bucket = "1.1.2"
http-body-util = "0.1.3"
termion = "4.0.5"
tabled = "0.20.0"
path-absolutize = "3.1.1"
dashmap = "6.1.0"
fastrand = "2.3.0"
glob = "0.3.3"
console-subscriber = "0.4.1"
[dev-dependencies]
tempfile.workspace = true
[target.'cfg(not(target_env = "msvc"))'.dependencies]
tikv-jemallocator = { version = "0.5.4", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms", "background_threads"] }
[target.'cfg(target_os = "linux")'.dependencies]
jemalloc_pprof = "0.4.2"
[package.metadata.deb.variants.x86_64-unknown-linux-gnu]
maintainer = "Dragonfly <dragonfly-maintainers@googlegroups.com>"
priority = "optional"
section = "rust"
assets = [
[
"../target/x86_64-unknown-linux-gnu/release/dfget",
"usr/bin/dfget",
"755",
],
[
"../target/x86_64-unknown-linux-gnu/release/dfdaemon",
"usr/bin/dfdaemon",
"755",
],
[
"../target/x86_64-unknown-linux-gnu/release/dfcache",
"usr/bin/dfcache",
"755",
],
[
"../ci/dfdaemon.service",
"lib/systemd/system/dfdaemon.service",
"644",
],
[
"../CONTRIBUTING.md",
"usr/share/doc/client/CONTRIBUTING.md",
"644",
],
[
"../LICENSE",
"usr/share/doc/client/LICENSE",
"644",
],
[
"../README.md",
"usr/share/doc/client/README.md",
"644",
],
]
[package.metadata.deb.variants.x86_64-unknown-linux-musl]
maintainer = "Dragonfly <dragonfly-maintainers@googlegroups.com>"
section = "rust"
priority = "optional"
assets = [
[
"../target/x86_64-unknown-linux-musl/release/dfget",
"usr/bin/dfget",
"755",
],
[
"../target/x86_64-unknown-linux-musl/release/dfdaemon",
"usr/bin/dfdaemon",
"755",
],
[
"../target/x86_64-unknown-linux-musl/release/dfcache",
"usr/bin/dfcache",
"755",
],
[
"../ci/dfdaemon.service",
"lib/systemd/system/dfdaemon.service",
"644",
],
[
"../CONTRIBUTING.md",
"usr/share/doc/client/CONTRIBUTING.md",
"644",
],
[
"../LICENSE",
"usr/share/doc/client/LICENSE",
"644",
],
[
"../README.md",
"usr/share/doc/client/README.md",
"644",
],
]
[package.metadata.deb.variants.aarch64-unknown-linux-gnu]
maintainer = "Dragonfly <dragonfly-maintainers@googlegroups.com>"
priority = "optional"
section = "rust"
assets = [
[
"../target/aarch64-unknown-linux-gnu/release/dfget",
"usr/bin/dfget",
"755",
],
[
"../target/aarch64-unknown-linux-gnu/release/dfdaemon",
"usr/bin/dfdaemon",
"755",
],
[
"../target/aarch64-unknown-linux-gnu/release/dfcache",
"usr/bin/dfcache",
"755",
],
[
"../ci/dfdaemon.service",
"lib/systemd/system/dfdaemon.service",
"644",
],
[
"../CONTRIBUTING.md",
"usr/share/doc/client/CONTRIBUTING.md",
"644",
],
[
"../LICENSE",
"usr/share/doc/client/LICENSE",
"644",
],
[
"../README.md",
"usr/share/doc/client/README.md",
"644",
],
]
[package.metadata.deb.variants.aarch64-unknown-linux-musl]
maintainer = "Dragonfly <dragonfly-maintainers@googlegroups.com>"
priority = "optional"
section = "rust"
assets = [
[
"../target/aarch64-unknown-linux-musl/release/dfget",
"usr/bin/dfget",
"755",
],
[
"../target/aarch64-unknown-linux-musl/release/dfdaemon",
"usr/bin/dfdaemon",
"755",
],
[
"../target/aarch64-unknown-linux-musl/release/dfcache",
"usr/bin/dfcache",
"755",
],
[
"../ci/dfdaemon.service",
"lib/systemd/system/dfdaemon.service",
"644",
],
[
"../CONTRIBUTING.md",
"usr/share/doc/client/CONTRIBUTING.md",
"644",
],
[
"../LICENSE",
"usr/share/doc/client/LICENSE",
"644",
],
[
"../README.md",
"usr/share/doc/client/README.md",
"644",
],
]
[package.metadata.generate-rpm.variants.x86_64-unknown-linux-gnu]
assets = [
{ source = "../target/x86_64-unknown-linux-gnu/release/dfget", dest = "/usr/bin/dfget", mode = "755" },
{ source = "../target/x86_64-unknown-linux-gnu/release/dfdaemon", dest = "/usr/bin/dfdaemon", mode = "755" },
{ source = "../target/x86_64-unknown-linux-gnu/release/dfcache", dest = "/usr/bin/dfcache", mode = "755" },
{ source = "../ci/dfdaemon.service", dest = "/lib/systemd/system/dfdaemon.service", config = true, mode = "644" },
{ source = "../CONTRIBUTING.md", dest = "/usr/share/doc/client/CONTRIBUTING.md", mode = "644", doc = true },
{ source = "../LICENSE", dest = "/usr/share/doc/client/LICENSE.md", mode = "644", doc = true },
{ source = "../README.md", dest = "/usr/share/doc/client/README.md", mode = "644", doc = true },
]
[package.metadata.generate-rpm.variants.x86_64-unknown-linux-musl]
assets = [
{ source = "../target/x86_64-unknown-linux-musl/release/dfget", dest = "/usr/bin/dfget", mode = "755" },
{ source = "../target/x86_64-unknown-linux-musl/release/dfdaemon", dest = "/usr/bin/dfdaemon", mode = "755" },
{ source = "../target/x86_64-unknown-linux-musl/release/dfcache", dest = "/usr/bin/dfcache", mode = "755" },
{ source = "../ci/dfdaemon.service", dest = "/lib/systemd/system/dfdaemon.service", config = true, mode = "644" },
{ source = "../CONTRIBUTING.md", dest = "/usr/share/doc/client/CONTRIBUTING.md", mode = "644", doc = true },
{ source = "../LICENSE", dest = "/usr/share/doc/client/LICENSE.md", mode = "644", doc = true },
{ source = "../README.md", dest = "/usr/share/doc/client/README.md", mode = "644", doc = true },
]
auto-req = "no"
[package.metadata.generate-rpm.variants.aarch64-unknown-linux-gnu]
assets = [
{ source = "../target/aarch64-unknown-linux-gnu/release/dfget", dest = "/usr/bin/dfget", mode = "755" },
{ source = "../target/aarch64-unknown-linux-gnu/release/dfdaemon", dest = "/usr/bin/dfdaemon", mode = "755" },
{ source = "../target/aarch64-unknown-linux-gnu/release/dfcache", dest = "/usr/bin/dfcache", mode = "755" },
{ source = "../ci/dfdaemon.service", dest = "/lib/systemd/system/dfdaemon.service", config = true, mode = "644" },
{ source = "../CONTRIBUTING.md", dest = "/usr/share/doc/client/CONTRIBUTING.md", mode = "644", doc = true },
{ source = "../LICENSE", dest = "/usr/share/doc/client/LICENSE.md", mode = "644", doc = true },
{ source = "../README.md", dest = "/usr/share/doc/client/README.md", mode = "644", doc = true },
]
[package.metadata.generate-rpm.variants.aarch64-unknown-linux-musl]
assets = [
{ source = "../target/aarch64-unknown-linux-musl/release/dfget", dest = "/usr/bin/dfget", mode = "755" },
{ source = "../target/aarch64-unknown-linux-musl/release/dfdaemon", dest = "/usr/bin/dfdaemon", mode = "755" },
{ source = "../target/aarch64-unknown-linux-musl/release/dfcache", dest = "/usr/bin/dfcache", mode = "755" },
{ source = "../ci/dfdaemon.service", dest = "/lib/systemd/system/dfdaemon.service", config = true, mode = "644" },
{ source = "../CONTRIBUTING.md", dest = "/usr/share/doc/client/CONTRIBUTING.md", mode = "644", doc = true },
{ source = "../LICENSE", dest = "/usr/share/doc/client/LICENSE.md", mode = "644", doc = true },
{ source = "../README.md", dest = "/usr/share/doc/client/README.md", mode = "644", doc = true },
]
auto-req = "no"

View File

@ -1,251 +0,0 @@
/*
* Copyright 2023 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use crate::grpc::scheduler::SchedulerClient;
use crate::shutdown;
use dragonfly_api::common::v2::{Build, Cpu, Disk, Host, Memory, Network};
use dragonfly_api::scheduler::v2::{AnnounceHostRequest, DeleteHostRequest};
use dragonfly_client_config::{
dfdaemon::{Config, HostType},
CARGO_PKG_RUSTC_VERSION, CARGO_PKG_VERSION, GIT_COMMIT_SHORT_HASH,
};
use dragonfly_client_core::error::{ErrorType, OrErr};
use dragonfly_client_core::Result;
use dragonfly_client_util::net::Interface;
use std::env;
use std::sync::Arc;
use std::time::Duration;
use sysinfo::System;
use tokio::sync::mpsc;
use tracing::{debug, error, info, instrument};
/// Announcer is used to announce the dfdaemon information to the manager and scheduler.
pub struct SchedulerAnnouncer {
/// config is the configuration of the dfdaemon.
config: Arc<Config>,
/// host_id is the id of the host.
host_id: String,
/// scheduler_client is the grpc client of the scheduler.
scheduler_client: Arc<SchedulerClient>,
/// interface is the network interface.
interface: Arc<Interface>,
/// shutdown is used to shutdown the announcer.
shutdown: shutdown::Shutdown,
/// _shutdown_complete is used to notify the announcer is shutdown.
_shutdown_complete: mpsc::UnboundedSender<()>,
}
/// SchedulerAnnouncer implements the scheduler announcer of the dfdaemon.
impl SchedulerAnnouncer {
/// new creates a new scheduler announcer.
pub async fn new(
config: Arc<Config>,
host_id: String,
scheduler_client: Arc<SchedulerClient>,
interface: Arc<Interface>,
shutdown: shutdown::Shutdown,
shutdown_complete_tx: mpsc::UnboundedSender<()>,
) -> Result<Self> {
let announcer = Self {
config,
host_id,
scheduler_client,
interface,
shutdown,
_shutdown_complete: shutdown_complete_tx,
};
// Initialize the scheduler announcer.
announcer
.scheduler_client
.init_announce_host(announcer.make_announce_host_request(Duration::ZERO).await?)
.await?;
Ok(announcer)
}
/// run announces the dfdaemon information to the scheduler.
pub async fn run(&self) {
// Clone the shutdown channel.
let mut shutdown = self.shutdown.clone();
// Start the scheduler announcer.
let mut interval = tokio::time::interval(self.config.scheduler.announce_interval);
loop {
tokio::select! {
_ = interval.tick() => {
let request = match self.make_announce_host_request(interval.period()).await {
Ok(request) => request,
Err(err) => {
error!("make announce host request failed: {}", err);
continue;
}
};
if let Err(err) = self.scheduler_client.announce_host(request).await {
error!("announce host to scheduler failed: {}", err);
};
}
_ = shutdown.recv() => {
// Announce to scheduler shutting down with signals.
if let Err(err) = self.scheduler_client.delete_host(DeleteHostRequest{
host_id: self.host_id.clone(),
}).await {
error!("delete host from scheduler failed: {}", err);
}
info!("announce to scheduler shutting down");
return
}
}
}
}
/// make_announce_host_request makes the announce host request.
#[instrument(skip_all)]
async fn make_announce_host_request(&self, interval: Duration) -> Result<AnnounceHostRequest> {
// If the seed peer is enabled, we should announce the seed peer to the scheduler.
let host_type = if self.config.seed_peer.enable {
self.config.seed_peer.kind
} else {
HostType::Normal
};
// Refresh the system information.
let mut sys = System::new_all();
sys.refresh_all();
// Get the process information.
let process = sys.process(sysinfo::get_current_pid().unwrap()).unwrap();
// Get the cpu information.
let cpu = Cpu {
logical_count: sys.physical_core_count().unwrap_or_default() as u32,
physical_count: sys.physical_core_count().unwrap_or_default() as u32,
percent: sys.global_cpu_usage() as f64,
process_percent: process.cpu_usage() as f64,
// TODO: Get the cpu times.
times: None,
};
// Get the memory information.
let memory = Memory {
total: sys.total_memory(),
available: sys.available_memory(),
used: sys.used_memory(),
used_percent: (sys.used_memory() / sys.total_memory()) as f64,
process_used_percent: (process.memory() / sys.total_memory()) as f64,
free: sys.free_memory(),
};
// Wait for getting the network data.
let network_data = self.interface.get_network_data().await;
debug!(
"network data: rx bandwidth {}/{} bps, tx bandwidth {}/{} bps",
network_data.rx_bandwidth.unwrap_or(0),
network_data.max_rx_bandwidth,
network_data.tx_bandwidth.unwrap_or(0),
network_data.max_tx_bandwidth
);
// Get the network information.
let network = Network {
idc: self.config.host.idc.clone(),
location: self.config.host.location.clone(),
max_rx_bandwidth: network_data.max_rx_bandwidth,
rx_bandwidth: network_data.rx_bandwidth,
max_tx_bandwidth: network_data.max_tx_bandwidth,
tx_bandwidth: network_data.tx_bandwidth,
..Default::default()
};
// Get the disk information.
let stats = fs2::statvfs(self.config.storage.dir.as_path())?;
let total_space = stats.total_space();
let available_space = stats.available_space();
let used_space = total_space - available_space;
let used_percent = (used_space as f64 / (total_space) as f64) * 100.0;
let mut write_bandwidth = 0;
let mut read_bandwidth = 0;
if interval != Duration::ZERO {
let disk_usage = process.disk_usage();
write_bandwidth = disk_usage.written_bytes / interval.as_secs();
read_bandwidth = disk_usage.read_bytes / interval.as_secs();
};
let disk = Disk {
total: total_space,
free: available_space,
used: used_space,
used_percent,
write_bandwidth,
read_bandwidth,
// TODO: Get the disk inodes information.
inodes_total: 0,
inodes_used: 0,
inodes_free: 0,
inodes_used_percent: 0.0,
};
// Get the build information.
let build = Build {
git_version: CARGO_PKG_VERSION.to_string(),
git_commit: Some(GIT_COMMIT_SHORT_HASH.to_string()),
go_version: None,
rust_version: Some(CARGO_PKG_RUSTC_VERSION.to_string()),
platform: None,
};
// Struct the host information.
let host = Host {
id: self.host_id.to_string(),
r#type: host_type as u32,
hostname: self.config.host.hostname.clone(),
ip: self.config.host.ip.unwrap().to_string(),
port: self.config.upload.server.port as i32,
download_port: self.config.upload.server.port as i32,
os: env::consts::OS.to_string(),
platform: env::consts::OS.to_string(),
platform_family: env::consts::FAMILY.to_string(),
platform_version: System::os_version().unwrap_or_default(),
kernel_version: System::kernel_version().unwrap_or_default(),
cpu: Some(cpu),
memory: Some(memory),
network: Some(network),
disk: Some(disk),
build: Some(build),
// TODO: Get scheduler cluster id from dynconfig.
scheduler_cluster_id: 0,
disable_shared: self.config.upload.disable_shared,
};
Ok(AnnounceHostRequest {
host: Some(host),
interval: Some(
prost_wkt_types::Duration::try_from(self.config.scheduler.announce_interval)
.or_err(ErrorType::ParseError)?,
),
})
}
}

View File

@ -1,614 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use clap::Parser;
use dragonfly_api::dfdaemon::v2::{
download_persistent_cache_task_response, DownloadPersistentCacheTaskRequest,
};
use dragonfly_api::errordetails::v2::Backend;
use dragonfly_client_core::{
error::{ErrorType, OrErr},
Error, Result,
};
use dragonfly_client_util::fs::fallocate;
use indicatif::{ProgressBar, ProgressState, ProgressStyle};
use local_ip_address::local_ip;
use path_absolutize::*;
use std::path::{Path, PathBuf};
use std::time::Duration;
use std::{cmp::min, fmt::Write};
use termion::{color, style};
use tokio::fs::{self, OpenOptions};
use tokio::io::{AsyncSeekExt, AsyncWriteExt, SeekFrom};
use tracing::{debug, error, info};
use super::*;
/// ExportCommand is the subcommand of export.
#[derive(Debug, Clone, Parser)]
pub struct ExportCommand {
#[arg(help = "Specify the persistent cache task ID to export")]
id: String,
#[arg(
long = "transfer-from-dfdaemon",
default_value_t = false,
help = "Specify whether to transfer the content of downloading file from dfdaemon's unix domain socket. If it is true, dfcache will call dfdaemon to download the file, and dfdaemon will return the content of downloading file to dfcache via unix domain socket, and dfcache will copy the content to the output path. If it is false, dfdaemon will download the file and hardlink or copy the file to the output path."
)]
transfer_from_dfdaemon: bool,
#[arg(
long = "force-hard-link",
default_value_t = false,
help = "Specify whether the download file must be hard linked to the output path. If hard link is failed, download will be failed. If it is false, dfdaemon will copy the file to the output path if hard link is failed."
)]
force_hard_link: bool,
#[arg(
long = "application",
default_value = "",
help = "Caller application which is used for statistics and access control"
)]
application: String,
#[arg(
long = "tag",
default_value = "",
help = "Different tags for the same file will be divided into different persistent cache tasks"
)]
tag: String,
#[arg(
short = 'O',
long = "output",
help = "Specify the output path of exporting file"
)]
output: PathBuf,
#[arg(
long = "timeout",
value_parser= humantime::parse_duration,
default_value = "2h",
help = "Specify the timeout for exporting a file"
)]
timeout: Duration,
#[arg(
long = "digest",
required = false,
help = "Verify the integrity of the downloaded file using the specified digest, support sha256, sha512, crc32. If the digest is not specified, the downloaded file will not be verified. Format: <algorithm>:<digest>, e.g. sha256:1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef, crc32:12345678"
)]
digest: Option<String>,
#[arg(
short = 'e',
long = "endpoint",
default_value_os_t = dfdaemon::default_download_unix_socket_path(),
help = "Endpoint of dfdaemon's GRPC server"
)]
endpoint: PathBuf,
#[arg(
short = 'l',
long,
default_value = "info",
help = "Specify the logging level [trace, debug, info, warn, error]"
)]
log_level: Level,
#[arg(
long,
default_value_os_t = dfcache::default_dfcache_log_dir(),
help = "Specify the log directory"
)]
log_dir: PathBuf,
#[arg(
long,
default_value_t = 6,
help = "Specify the max number of log files"
)]
log_max_files: usize,
#[arg(long, default_value_t = false, help = "Specify whether to print log")]
console: bool,
}
/// Implement the execute for ExportCommand.
impl ExportCommand {
/// Executes the export command with comprehensive validation and advanced error handling.
///
/// This function serves as the main entry point for the dfcache export command execution.
/// It handles the complete workflow including argument parsing, validation, logging setup,
/// dfdaemon client connection, and export operation execution. The function provides
/// sophisticated error reporting with colored terminal output, including specialized
/// handling for backend errors with HTTP status codes and headers.
pub async fn execute(&self) -> Result<()> {
// Parse command line arguments.
Args::parse();
// Initialize tracing.
let _guards = init_tracing(
dfcache::NAME,
self.log_dir.clone(),
self.log_level,
self.log_max_files,
None,
None,
None,
None,
None,
false,
self.console,
);
// Validate the command line arguments.
if let Err(err) = self.validate_args() {
println!(
"{}{}{}Validating Failed!{}",
color::Fg(color::Red),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}****************************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}Message:{} {}",
color::Fg(color::Cyan),
style::Italic,
style::Bold,
style::Reset,
err,
);
println!(
"{}{}{}****************************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
std::process::exit(1);
}
// Get dfdaemon download client.
let dfdaemon_download_client =
match get_dfdaemon_download_client(self.endpoint.to_path_buf()).await {
Ok(client) => client,
Err(err) => {
println!(
"{}{}{}Connect Dfdaemon Failed!{}",
color::Fg(color::Red),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}****************************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}Message:{}, can not connect {}, please check the unix socket {}",
color::Fg(color::Cyan),
style::Italic,
style::Bold,
style::Reset,
err,
self.endpoint.to_string_lossy(),
);
println!(
"{}{}{}****************************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
std::process::exit(1);
}
};
// Run export command.
if let Err(err) = self.run(dfdaemon_download_client).await {
match err {
Error::TonicStatus(status) => {
let details = status.details();
if let Ok(backend_err) = serde_json::from_slice::<Backend>(details) {
println!(
"{}{}{}Exporting Failed!{}",
color::Fg(color::Red),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}****************************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
if let Some(status_code) = backend_err.status_code {
println!(
"{}{}{}Bad Status Code:{} {}",
color::Fg(color::Red),
style::Italic,
style::Bold,
style::Reset,
status_code
);
}
println!(
"{}{}{}Message:{} {}",
color::Fg(color::Cyan),
style::Italic,
style::Bold,
style::Reset,
backend_err.message
);
if !backend_err.header.is_empty() {
println!(
"{}{}{}Header:{}",
color::Fg(color::Cyan),
style::Italic,
style::Bold,
style::Reset
);
for (key, value) in backend_err.header.iter() {
println!(" [{}]: {}", key.as_str(), value.as_str());
}
}
println!(
"{}{}{}****************************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
} else {
println!(
"{}{}{}Exporting Failed!{}",
color::Fg(color::Red),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}*********************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}Bad Code:{} {}",
color::Fg(color::Red),
style::Italic,
style::Bold,
style::Reset,
status.code()
);
println!(
"{}{}{}Message:{} {}",
color::Fg(color::Cyan),
style::Italic,
style::Bold,
style::Reset,
status.message()
);
if !status.details().is_empty() {
println!(
"{}{}{}Details:{} {}",
color::Fg(color::Cyan),
style::Italic,
style::Bold,
style::Reset,
std::str::from_utf8(status.details()).unwrap()
);
}
println!(
"{}{}{}*********************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
}
}
Error::BackendError(err) => {
println!(
"{}{}{}Exporting Failed!{}",
color::Fg(color::Red),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}****************************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}Message:{} {}",
color::Fg(color::Red),
style::Italic,
style::Bold,
style::Reset,
err.message
);
if err.header.is_some() {
println!(
"{}{}{}Header:{}",
color::Fg(color::Cyan),
style::Italic,
style::Bold,
style::Reset
);
for (key, value) in err.header.unwrap_or_default().iter() {
println!(" [{}]: {}", key.as_str(), value.to_str().unwrap());
}
}
println!(
"{}{}{}****************************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
}
err => {
println!(
"{}{}{}Exporting Failed!{}",
color::Fg(color::Red),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}****************************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}Message:{} {}",
color::Fg(color::Red),
style::Italic,
style::Bold,
style::Reset,
err
);
println!(
"{}{}{}****************************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
}
}
std::process::exit(1);
}
Ok(())
}
/// Executes the export operation to retrieve cached files from the persistent cache system.
///
/// This function handles the core export functionality by downloading a cached file from the
/// dfdaemon persistent cache system. It supports two transfer modes: direct file transfer
/// by dfdaemon (hardlink/copy) or streaming piece content through the client for manual
/// file assembly. The operation provides real-time progress feedback and handles file
/// creation, directory setup, and efficient piece-by-piece writing with sparse file allocation.
async fn run(&self, dfdaemon_download_client: DfdaemonDownloadClient) -> Result<()> {
// Dfcache needs to notify dfdaemon to transfer the piece content of downloading file via unix domain socket
// when the `transfer_from_dfdaemon` is true. Otherwise, dfdaemon will download the file and hardlink or
// copy the file to the output path.
let (output_path, need_piece_content) = if self.transfer_from_dfdaemon {
(None, true)
} else {
let absolute_path = Path::new(&self.output).absolutize()?;
info!("export file to: {}", absolute_path.to_string_lossy());
(Some(absolute_path.to_string_lossy().to_string()), false)
};
// Create dfdaemon client.
let response = dfdaemon_download_client
.download_persistent_cache_task(DownloadPersistentCacheTaskRequest {
task_id: self.id.clone(),
// When scheduler triggers the export task, it will set true. If the export task is
// triggered by the user, it will set false.
persistent: false,
tag: Some(self.tag.clone()),
application: Some(self.application.clone()),
output_path,
timeout: Some(
prost_wkt_types::Duration::try_from(self.timeout)
.or_err(ErrorType::ParseError)?,
),
need_piece_content,
force_hard_link: self.force_hard_link,
digest: self.digest.clone(),
remote_ip: Some(local_ip().unwrap().to_string()),
})
.await
.inspect_err(|err| {
error!("download persistent cache task failed: {}", err);
})?;
// If transfer_from_dfdaemon is true, then dfcache needs to create the output file and write the
// piece content to the output file.
let mut f = if self.transfer_from_dfdaemon {
if let Some(parent) = self.output.parent() {
if !parent.exists() {
fs::create_dir_all(parent).await.inspect_err(|err| {
error!("failed to create directory {:?}: {}", parent, err);
})?;
}
}
let f = OpenOptions::new()
.create_new(true)
.write(true)
.mode(dfcache::DEFAULT_OUTPUT_FILE_MODE)
.open(&self.output)
.await
.inspect_err(|err| {
error!("open file {:?} failed: {}", self.output, err);
})?;
Some(f)
} else {
None
};
// Initialize progress bar.
let progress_bar = ProgressBar::new(0);
progress_bar.set_style(
ProgressStyle::with_template(
"[{elapsed_precise}] [{wide_bar}] {bytes}/{total_bytes} ({bytes_per_sec}, {eta})",
)
.or_err(ErrorType::ParseError)?
.with_key("eta", |state: &ProgressState, w: &mut dyn Write| {
write!(w, "{:.1}s", state.eta().as_secs_f64()).unwrap()
})
.progress_chars("#>-"),
);
// Download file.
let mut downloaded = 0;
let mut out_stream = response.into_inner();
while let Some(message) = out_stream.message().await.inspect_err(|err| {
error!("get message failed: {}", err);
})? {
match message.response {
Some(download_persistent_cache_task_response::Response::DownloadPersistentCacheTaskStartedResponse(
response,
)) => {
if let Some(f) = &f {
fallocate(f, response.content_length)
.await
.inspect_err(|err| {
error!("fallocate {:?} failed: {}", self.output, err);
})?;
}
progress_bar.set_length(response.content_length);
}
Some(download_persistent_cache_task_response::Response::DownloadPieceFinishedResponse(
response,
)) => {
let piece = response.piece.ok_or(Error::InvalidParameter)?;
// Dfcache needs to write the piece content to the output file.
if let Some(f) = &mut f {
f.seek(SeekFrom::Start(piece.offset))
.await
.inspect_err(|err| {
error!("seek {:?} failed: {}", self.output, err);
})?;
let content = piece.content.ok_or(Error::InvalidParameter)?;
f.write_all(&content).await.inspect_err(|err| {
error!("write {:?} failed: {}", self.output, err);
})?;
debug!("copy piece {} to {:?} success", piece.number, self.output);
};
downloaded += piece.length;
let position = min(downloaded + piece.length, progress_bar.length().unwrap_or(0));
progress_bar.set_position(position);
}
None => {}
}
}
progress_bar.finish_with_message("downloaded");
Ok(())
}
/// Validates command line arguments for the export operation to ensure safe file output.
///
/// This function performs essential validation of the output path to prevent file conflicts
/// and ensure the target location is suitable for export operations. It checks parent
/// directory existence, prevents accidental file overwrites, and validates path accessibility
/// before allowing the export operation to proceed.
fn validate_args(&self) -> Result<()> {
let absolute_path = Path::new(&self.output).absolutize()?;
match absolute_path.parent() {
Some(parent_path) => {
if !parent_path.is_dir() {
return Err(Error::ValidationError(format!(
"output path {} is not a directory",
parent_path.to_string_lossy()
)));
}
}
None => {
return Err(Error::ValidationError(format!(
"output path {} is not exist",
self.output.to_string_lossy()
)));
}
}
if absolute_path.exists() {
return Err(Error::ValidationError(format!(
"output path {} is already exist",
self.output.to_string_lossy()
)));
}
Ok(())
}
}

View File

@ -1,420 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use bytesize::ByteSize;
use clap::Parser;
use dragonfly_api::dfdaemon::v2::UploadPersistentCacheTaskRequest;
use dragonfly_client::resource::piece::MIN_PIECE_LENGTH;
use dragonfly_client_config::dfcache::default_dfcache_persistent_replica_count;
use dragonfly_client_core::{
error::{ErrorType, OrErr},
Error, Result,
};
use indicatif::{ProgressBar, ProgressStyle};
use local_ip_address::local_ip;
use path_absolutize::*;
use std::path::{Path, PathBuf};
use std::time::Duration;
use termion::{color, style};
use tracing::info;
use super::*;
/// DEFAULT_PROGRESS_BAR_STEADY_TICK_INTERVAL is the default steady tick interval of progress bar.
const DEFAULT_PROGRESS_BAR_STEADY_TICK_INTERVAL: Duration = Duration::from_millis(80);
/// ImportCommand is the subcommand of import.
#[derive(Debug, Clone, Parser)]
pub struct ImportCommand {
#[arg(help = "Specify the path of the file to import")]
path: PathBuf,
#[arg(
long = "content-for-calculating-task-id",
help = "Specify the content used to calculate the persistent cache task ID. If it is set, use its value to calculate the task ID, Otherwise, calculate the persistent cache task ID based on url, piece-length, tag, application, and filtered-query-params."
)]
content_for_calculating_task_id: Option<String>,
#[arg(
long = "persistent-replica-count",
default_value_t = default_dfcache_persistent_replica_count(),
help = "Specify the replica count of the persistent cache task"
)]
persistent_replica_count: u64,
#[arg(
long = "piece-length",
required = false,
help = "Specify the piece length for downloading file. If the piece length is not specified, the piece length will be calculated according to the file size. Different piece lengths will be divided into different persistent cache tasks. The value needs to be set with human readable format and needs to be greater than or equal to 4mib, for example: 4mib, 1gib"
)]
piece_length: Option<ByteSize>,
#[arg(
long = "application",
required = false,
help = "Different applications for the same url will be divided into different persistent cache tasks"
)]
application: Option<String>,
#[arg(
long = "tag",
required = false,
help = "Different tags for the same file will be divided into different persistent cache tasks"
)]
tag: Option<String>,
#[arg(
long = "ttl",
value_parser= humantime::parse_duration,
default_value = "1h",
help = "Specify the ttl of the persistent cache task, maximum is 7d and minimum is 1m"
)]
ttl: Duration,
#[arg(
long = "timeout",
value_parser= humantime::parse_duration,
default_value = "30m",
help = "Specify the timeout for importing a file"
)]
timeout: Duration,
#[arg(
short = 'e',
long = "endpoint",
default_value_os_t = dfdaemon::default_download_unix_socket_path(),
help = "Endpoint of dfdaemon's GRPC server"
)]
endpoint: PathBuf,
#[arg(
short = 'l',
long,
default_value = "info",
help = "Specify the logging level [trace, debug, info, warn, error]"
)]
log_level: Level,
#[arg(
long,
default_value_os_t = dfcache::default_dfcache_log_dir(),
help = "Specify the log directory"
)]
log_dir: PathBuf,
#[arg(
long,
default_value_t = 6,
help = "Specify the max number of log files"
)]
log_max_files: usize,
#[arg(long, default_value_t = false, help = "Specify whether to print log")]
console: bool,
}
/// Implement the execute for ImportCommand.
impl ImportCommand {
/// Executes the import sub command with comprehensive validation and error handling.
///
/// This function serves as the main entry point for the dfcache import command execution.
/// It handles the complete workflow including argument parsing, validation, logging setup,
/// dfdaemon client connection, and import operation execution. The function provides
/// detailed error reporting with colored terminal output and follows a fail-fast approach
/// with immediate process termination on any critical failures.
pub async fn execute(&self) -> Result<()> {
// Parse command line arguments.
Args::parse();
// Initialize tracing.
let _guards = init_tracing(
dfcache::NAME,
self.log_dir.clone(),
self.log_level,
self.log_max_files,
None,
None,
None,
None,
None,
false,
self.console,
);
// Validate the command line arguments.
if let Err(err) = self.validate_args() {
println!(
"{}{}{}Validating Failed!{}",
color::Fg(color::Red),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}****************************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}Message:{} {}",
color::Fg(color::Cyan),
style::Italic,
style::Bold,
style::Reset,
err,
);
println!(
"{}{}{}****************************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
std::process::exit(1);
}
// Get dfdaemon download client.
let dfdaemon_download_client =
match get_dfdaemon_download_client(self.endpoint.to_path_buf()).await {
Ok(client) => client,
Err(err) => {
println!(
"{}{}{}Connect Dfdaemon Failed!{}",
color::Fg(color::Red),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}****************************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}Message:{}, can not connect {}, please check the unix socket {}",
color::Fg(color::Cyan),
style::Italic,
style::Bold,
style::Reset,
err,
self.endpoint.to_string_lossy(),
);
println!(
"{}{}{}****************************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
std::process::exit(1);
}
};
// Run import sub command.
if let Err(err) = self.run(dfdaemon_download_client).await {
match err {
Error::TonicStatus(status) => {
println!(
"{}{}{}Importing Failed!{}",
color::Fg(color::Red),
style::Italic,
style::Bold,
style::Reset,
);
println!(
"{}{}{}*********************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}Bad Code:{} {}",
color::Fg(color::Red),
style::Italic,
style::Bold,
style::Reset,
status.code()
);
println!(
"{}{}{}Message:{} {}",
color::Fg(color::Cyan),
style::Italic,
style::Bold,
style::Reset,
status.message()
);
println!(
"{}{}{}Details:{} {}",
color::Fg(color::Cyan),
style::Italic,
style::Bold,
style::Reset,
std::str::from_utf8(status.details()).unwrap()
);
println!(
"{}{}{}*********************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
}
err => {
println!(
"{}{}{}Importing Failed!{}",
color::Fg(color::Red),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}****************************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}Message:{} {}",
color::Fg(color::Red),
style::Italic,
style::Bold,
style::Reset,
err
);
println!(
"{}{}{}****************************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
}
}
std::process::exit(1);
}
Ok(())
}
/// Executes the cache import operation by uploading a file to the persistent cache system.
///
/// This function handles the core import functionality by uploading a local file to the
/// dfdaemon persistent cache system. It provides visual feedback through a progress spinner,
/// converts the file path to absolute format, and configures the cache task with specified
/// parameters including TTL, replica count, and piece length. The operation is asynchronous
/// and provides completion feedback with the generated task ID.
async fn run(&self, dfdaemon_download_client: DfdaemonDownloadClient) -> Result<()> {
let absolute_path = Path::new(&self.path).absolutize()?;
info!("import file: {}", absolute_path.to_string_lossy());
let progress_bar = ProgressBar::new_spinner();
progress_bar.enable_steady_tick(DEFAULT_PROGRESS_BAR_STEADY_TICK_INTERVAL);
progress_bar.set_style(
ProgressStyle::with_template("{spinner:.blue} {msg}")
.unwrap()
.tick_strings(&["", "", "", "", "", "", "", ""]),
);
progress_bar.set_message("Importing...");
let persistent_cache_task = dfdaemon_download_client
.upload_persistent_cache_task(UploadPersistentCacheTaskRequest {
content_for_calculating_task_id: self.content_for_calculating_task_id.clone(),
path: absolute_path.to_string_lossy().to_string(),
persistent_replica_count: self.persistent_replica_count,
tag: self.tag.clone(),
application: self.application.clone(),
piece_length: self.piece_length.map(|piece_length| piece_length.as_u64()),
ttl: Some(
prost_wkt_types::Duration::try_from(self.ttl).or_err(ErrorType::ParseError)?,
),
timeout: Some(
prost_wkt_types::Duration::try_from(self.timeout)
.or_err(ErrorType::ParseError)?,
),
remote_ip: Some(local_ip().unwrap().to_string()),
})
.await?;
progress_bar.finish_with_message(format!("Done: {}", persistent_cache_task.id));
Ok(())
}
/// Validates command line arguments for the import operation to ensure safe and correct execution.
///
/// This function performs comprehensive validation of import-specific parameters to prevent
/// invalid operations and ensure the import request meets all system requirements. It validates
/// TTL boundaries, file existence and type, and piece length constraints before allowing the
/// import operation to proceed.
fn validate_args(&self) -> Result<()> {
if self.ttl < Duration::from_secs(5 * 60)
|| self.ttl > Duration::from_secs(7 * 24 * 60 * 60)
{
return Err(Error::ValidationError(format!(
"ttl must be between 5 minutes and 7 days, but got {}",
self.ttl.as_secs()
)));
}
if self.path.is_dir() {
return Err(Error::ValidationError(format!(
"path {} is a directory",
self.path.display()
)));
}
if !self.path.exists() {
return Err(Error::ValidationError(format!(
"path {} does not exist",
self.path.display()
)));
}
if let Some(piece_length) = self.piece_length {
if piece_length.as_u64() < MIN_PIECE_LENGTH {
return Err(Error::ValidationError(format!(
"piece length {} bytes is less than the minimum piece length {} bytes",
piece_length.as_u64(),
MIN_PIECE_LENGTH
)));
}
}
Ok(())
}
}

View File

@ -1,123 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use clap::{Parser, Subcommand};
use dragonfly_client::grpc::dfdaemon_download::DfdaemonDownloadClient;
use dragonfly_client::grpc::health::HealthClient;
use dragonfly_client::tracing::init_tracing;
use dragonfly_client_config::VersionValueParser;
use dragonfly_client_config::{dfcache, dfdaemon};
use dragonfly_client_core::Result;
use std::path::PathBuf;
use tracing::Level;
pub mod export;
pub mod import;
pub mod stat;
#[derive(Debug, Parser)]
#[command(
name = dfcache::NAME,
author,
version,
about = "dfcache is a cache command line based on P2P technology in Dragonfly.",
long_about = "A cache command line based on P2P technology in Dragonfly that can import file and export file in P2P network, \
and it can copy multiple replicas during import. P2P cache is effectively used for fast read and write cache.",
disable_version_flag = true
)]
struct Args {
#[arg(
short = 'V',
long = "version",
help = "Print version information",
default_value_t = false,
action = clap::ArgAction::SetTrue,
value_parser = VersionValueParser
)]
version: bool,
#[command(subcommand)]
command: Command,
}
#[derive(Debug, Clone, Subcommand)]
#[command(args_conflicts_with_subcommands = true)]
pub enum Command {
#[command(
name = "import",
author,
version,
about = "Import a file into Dragonfly P2P network",
long_about = "Import a local file into Dragonfly P2P network and copy multiple replicas during import. If import successfully, it will return a task ID."
)]
Import(import::ImportCommand),
#[command(
name = "export",
author,
version,
about = "Export a file from Dragonfly P2P network",
long_about = "Export a file from Dragonfly P2P network by task ID. If export successfully, it will return the local file path."
)]
Export(export::ExportCommand),
#[command(
name = "stat",
author,
version,
about = "Stat a file in Dragonfly P2P network",
long_about = "Stat a file in Dragonfly P2P network by task ID. If stat successfully, it will return the file information."
)]
Stat(stat::StatCommand),
}
/// Implement the execute for Command.
impl Command {
#[allow(unused)]
pub async fn execute(self) -> Result<()> {
match self {
Self::Import(cmd) => cmd.execute().await,
Self::Export(cmd) => cmd.execute().await,
Self::Stat(cmd) => cmd.execute().await,
}
}
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
// Parse command line arguments.
let args = Args::parse();
// Execute the command.
args.command.execute().await?;
Ok(())
}
/// Creates and validates a dfdaemon download client with health checking.
///
/// This function establishes a connection to the dfdaemon service via Unix domain socket
/// and performs a health check to ensure the service is running and ready to handle
/// download requests. Only after successful health verification does it return the
/// download client for actual use.
pub async fn get_dfdaemon_download_client(endpoint: PathBuf) -> Result<DfdaemonDownloadClient> {
// Check dfdaemon's health.
let health_client = HealthClient::new_unix(endpoint.clone()).await?;
health_client.check_dfdaemon_download().await?;
// Get dfdaemon download client.
let dfdaemon_download_client = DfdaemonDownloadClient::new_unix(endpoint).await?;
Ok(dfdaemon_download_client)
}

View File

@ -1,325 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use chrono::{DateTime, Local};
use clap::Parser;
use dragonfly_api::dfdaemon::v2::StatPersistentCacheTaskRequest;
use dragonfly_client_core::{
error::{ErrorType, OrErr},
Error, Result,
};
use humantime::format_duration;
use local_ip_address::local_ip;
use std::time::Duration;
use tabled::{
settings::{object::Rows, Alignment, Modify, Style},
Table, Tabled,
};
use termion::{color, style};
use super::*;
/// StatCommand is the subcommand of stat.
#[derive(Debug, Clone, Parser)]
pub struct StatCommand {
#[arg(help = "Specify the persistent cache task ID to stat")]
id: String,
#[arg(
short = 'e',
long = "endpoint",
default_value_os_t = dfdaemon::default_download_unix_socket_path(),
help = "Endpoint of dfdaemon's GRPC server"
)]
endpoint: PathBuf,
#[arg(
short = 'l',
long,
default_value = "info",
help = "Specify the logging level [trace, debug, info, warn, error]"
)]
log_level: Level,
#[arg(
long,
default_value_os_t = dfcache::default_dfcache_log_dir(),
help = "Specify the log directory"
)]
log_dir: PathBuf,
#[arg(
long,
default_value_t = 6,
help = "Specify the max number of log files"
)]
log_max_files: usize,
#[arg(long, default_value_t = false, help = "Specify whether to print log")]
console: bool,
}
/// Implement the execute for StatCommand.
impl StatCommand {
/// Executes the stat command with comprehensive error handling and user feedback.
///
/// This function serves as the main entry point for the dfcache stat command execution.
/// It handles the complete lifecycle including argument parsing, logging initialization,
/// dfdaemon client setup, and command execution with detailed error reporting. The
/// function provides colored terminal output for better user experience and exits
/// with appropriate status codes on failure.
pub async fn execute(&self) -> Result<()> {
// Parse command line arguments.
Args::parse();
// Initialize tracing.
let _guards = init_tracing(
dfcache::NAME,
self.log_dir.clone(),
self.log_level,
self.log_max_files,
None,
None,
None,
None,
None,
false,
self.console,
);
// Get dfdaemon download client.
let dfdaemon_download_client =
match get_dfdaemon_download_client(self.endpoint.to_path_buf()).await {
Ok(client) => client,
Err(err) => {
println!(
"{}{}{}Connect Dfdaemon Failed!{}",
color::Fg(color::Red),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}****************************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}Message:{}, can not connect {}, please check the unix socket {}",
color::Fg(color::Cyan),
style::Italic,
style::Bold,
style::Reset,
err,
self.endpoint.to_string_lossy(),
);
println!(
"{}{}{}****************************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
std::process::exit(1);
}
};
// Run stat sub command.
if let Err(err) = self.run(dfdaemon_download_client).await {
match err {
Error::TonicStatus(status) => {
println!(
"{}{}{}Stating Failed!{}",
color::Fg(color::Red),
style::Italic,
style::Bold,
style::Reset,
);
println!(
"{}{}{}*********************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}Bad Code:{} {}",
color::Fg(color::Red),
style::Italic,
style::Bold,
style::Reset,
status.code()
);
println!(
"{}{}{}Message:{} {}",
color::Fg(color::Cyan),
style::Italic,
style::Bold,
style::Reset,
status.message()
);
println!(
"{}{}{}Details:{} {}",
color::Fg(color::Cyan),
style::Italic,
style::Bold,
style::Reset,
std::str::from_utf8(status.details()).unwrap()
);
println!(
"{}{}{}*********************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
}
err => {
println!(
"{}{}{}Stating Failed!{}",
color::Fg(color::Red),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}****************************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
println!(
"{}{}{}Message:{} {}",
color::Fg(color::Red),
style::Italic,
style::Bold,
style::Reset,
err
);
println!(
"{}{}{}****************************************{}",
color::Fg(color::Black),
style::Italic,
style::Bold,
style::Reset
);
}
}
std::process::exit(1);
}
Ok(())
}
/// Executes the stat command to retrieve and display persistent cache task information.
///
/// This function queries the dfdaemon service for detailed information about a specific
/// persistent cache task and presents it in a formatted table for user consumption.
/// It handles data conversion from raw protocol buffer values to human-readable formats
/// including byte sizes, durations, and timestamps with proper timezone conversion.
async fn run(&self, dfdaemon_download_client: DfdaemonDownloadClient) -> Result<()> {
let task = dfdaemon_download_client
.stat_persistent_cache_task(StatPersistentCacheTaskRequest {
task_id: self.id.clone(),
remote_ip: Some(local_ip().unwrap().to_string()),
})
.await?;
// Define the table struct for printing.
#[derive(Debug, Default, Tabled)]
#[tabled(rename_all = "UPPERCASE")]
struct TableTask {
id: String,
state: String,
#[tabled(rename = "CONTENT LENGTH")]
content_length: String,
#[tabled(rename = "PIECE LENGTH")]
piece_length: String,
#[tabled(rename = "PERSISTENT REPLICA COUNT")]
persistent_replica_count: u64,
ttl: String,
#[tabled(rename = "CREATED")]
created_at: String,
#[tabled(rename = "UPDATED")]
updated_at: String,
}
let mut table_task = TableTask {
id: task.id,
state: task.state,
// Convert content_length to human readable format.
content_length: bytesize::to_string(task.content_length, true),
// Convert piece_length to human readable format.
piece_length: bytesize::to_string(task.piece_length, true),
persistent_replica_count: task.persistent_replica_count,
..Default::default()
};
// Convert ttl to human readable format.
let ttl = Duration::try_from(task.ttl.ok_or(Error::InvalidParameter)?)
.or_err(ErrorType::ParseError)?;
table_task.ttl = format_duration(ttl).to_string();
// Convert created_at to human readable format.
if let Some(created_at) = task.created_at {
if let Some(date_time) =
DateTime::from_timestamp(created_at.seconds, created_at.nanos as u32)
{
table_task.created_at = date_time
.with_timezone(&Local)
.format("%Y-%m-%d %H:%M:%S")
.to_string();
}
}
// Convert updated_at to human readable format.
if let Some(updated_at) = task.updated_at {
if let Some(date_time) =
DateTime::from_timestamp(updated_at.seconds, updated_at.nanos as u32)
{
table_task.updated_at = date_time
.with_timezone(&Local)
.format("%Y-%m-%d %H:%M:%S")
.to_string();
}
}
// Create a table and print it.
let mut table = Table::new(vec![table_task]);
table
.with(Style::blank())
.with(Modify::new(Rows::first()).with(Alignment::center()));
println!("{table}");
Ok(())
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,329 +0,0 @@
/*
* Copyright 2023 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use crate::grpc::scheduler::SchedulerClient;
use crate::shutdown;
use chrono::Utc;
use dragonfly_api::scheduler::v2::DeleteTaskRequest;
use dragonfly_client_config::dfdaemon::Config;
use dragonfly_client_core::Result;
use dragonfly_client_storage::{metadata, Storage};
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::mpsc;
use tracing::{error, info, instrument};
// DOWNLOAD_TASK_TIMEOUT is the timeout of downloading the task. If the task download timeout, the
// task will be garbage collected by disk usage, default 2 hours.
pub const DOWNLOAD_TASK_TIMEOUT: Duration = Duration::from_secs(2 * 60 * 60);
/// GC is the garbage collector of dfdaemon.
pub struct GC {
/// config is the configuration of the dfdaemon.
config: Arc<Config>,
/// host_id is the id of the host.
host_id: String,
/// storage is the local storage.
storage: Arc<Storage>,
/// scheduler_client is the grpc client of the scheduler.
scheduler_client: Arc<SchedulerClient>,
/// shutdown is used to shutdown the garbage collector.
shutdown: shutdown::Shutdown,
/// _shutdown_complete is used to notify the garbage collector is shutdown.
_shutdown_complete: mpsc::UnboundedSender<()>,
}
impl GC {
/// new creates a new GC.
pub fn new(
config: Arc<Config>,
host_id: String,
storage: Arc<Storage>,
scheduler_client: Arc<SchedulerClient>,
shutdown: shutdown::Shutdown,
shutdown_complete_tx: mpsc::UnboundedSender<()>,
) -> Self {
GC {
config,
host_id,
storage,
scheduler_client,
shutdown,
_shutdown_complete: shutdown_complete_tx,
}
}
/// run runs the garbage collector.
pub async fn run(&self) {
// Clone the shutdown channel.
let mut shutdown = self.shutdown.clone();
// Start the collect loop.
let mut interval = tokio::time::interval(self.config.gc.interval);
loop {
tokio::select! {
_ = interval.tick() => {
// Evict the persistent cache task by ttl.
if let Err(err) = self.evict_persistent_cache_task_by_ttl().await {
info!("failed to evict persistent cache task by ttl: {}", err);
}
// Evict the cache by disk usage.
if let Err(err) = self.evict_persistent_cache_task_by_disk_usage().await {
info!("failed to evict persistent cache task by disk usage: {}", err);
}
// Evict the task by ttl.
if let Err(err) = self.evict_task_by_ttl().await {
info!("failed to evict task by ttl: {}", err);
}
// Evict the cache by disk usage.
if let Err(err) = self.evict_task_by_disk_usage().await {
info!("failed to evict task by disk usage: {}", err);
}
}
_ = shutdown.recv() => {
// Shutdown the garbage collector.
info!("garbage collector shutting down");
return
}
}
}
}
/// evict_task_by_ttl evicts the task by ttl.
#[instrument(skip_all)]
async fn evict_task_by_ttl(&self) -> Result<()> {
info!("start to evict by task ttl");
for task in self.storage.get_tasks()? {
// If the task is expired and not uploading, evict the task.
if task.is_expired(self.config.gc.policy.task_ttl) {
self.storage.delete_task(&task.id).await;
info!("evict task {}", task.id);
self.delete_task_from_scheduler(task.clone()).await;
info!("delete task {} from scheduler", task.id);
}
}
info!("evict by task ttl done");
Ok(())
}
/// evict_task_by_disk_usage evicts the task by disk usage.
#[instrument(skip_all)]
async fn evict_task_by_disk_usage(&self) -> Result<()> {
let available_space = self.storage.available_space()?;
let total_space = self.storage.total_space()?;
// Calculate the usage percent.
let usage_percent = (100 - available_space * 100 / total_space) as u8;
if usage_percent >= self.config.gc.policy.dist_high_threshold_percent {
info!(
"start to evict task by disk usage, disk usage {}% is higher than high threshold {}%",
usage_percent, self.config.gc.policy.dist_high_threshold_percent
);
// Calculate the need evict space.
let need_evict_space = total_space as f64
* ((usage_percent - self.config.gc.policy.dist_low_threshold_percent) as f64
/ 100.0);
// Evict the task by the need evict space.
if let Err(err) = self.evict_task_space(need_evict_space as u64).await {
info!("failed to evict task by disk usage: {}", err);
}
info!("evict task by disk usage done");
}
Ok(())
}
/// evict_task_space evicts the task by the given space.
#[instrument(skip_all)]
async fn evict_task_space(&self, need_evict_space: u64) -> Result<()> {
let mut tasks = self.storage.get_tasks()?;
tasks.sort_by(|a, b| a.updated_at.cmp(&b.updated_at));
let mut evicted_space = 0;
for task in tasks {
// Evict enough space.
if evicted_space >= need_evict_space {
break;
}
// If the task has downloaded finished, task has the content length, evicted space is the
// content length. If the task has started and did not download the data, and content
// length is 0, evicted space is 0.
let task_space = match task.content_length() {
Some(content_length) => content_length,
None => {
// If the task has no content length, skip it.
if !task.is_failed() {
error!("task {} has no content length", task.id);
continue;
}
// If the task has started and did not download the data, and content length is 0.
info!("task {} is failed, has no content length", task.id);
0
}
};
// If the task is started and not finished, and the task download is not timeout,
// skip it.
if task.is_started()
&& !task.is_finished()
&& !task.is_failed()
&& (task.created_at + DOWNLOAD_TASK_TIMEOUT > Utc::now().naive_utc())
{
info!("task {} is started and not finished, skip it", task.id);
continue;
}
// Evict the task.
self.storage.delete_task(&task.id).await;
// Update the evicted space.
evicted_space += task_space;
info!("evict task {} size {}", task.id, task_space);
self.delete_task_from_scheduler(task.clone()).await;
info!("delete task {} from scheduler", task.id);
}
info!("evict total size {}", evicted_space);
Ok(())
}
/// delete_task_from_scheduler deletes the task from the scheduler.
#[instrument(skip_all)]
async fn delete_task_from_scheduler(&self, task: metadata::Task) {
self.scheduler_client
.delete_task(DeleteTaskRequest {
host_id: self.host_id.clone(),
task_id: task.id.clone(),
})
.await
.unwrap_or_else(|err| {
error!("failed to delete peer {}: {}", task.id, err);
});
}
/// evict_persistent_cache_task_by_ttl evicts the persistent cache task by ttl.
#[instrument(skip_all)]
async fn evict_persistent_cache_task_by_ttl(&self) -> Result<()> {
info!("start to evict by persistent cache task ttl");
for task in self.storage.get_persistent_cache_tasks()? {
// If the persistent cache task is expired and not uploading, evict the persistent cache task.
if task.is_expired() {
self.storage.delete_persistent_cache_task(&task.id).await;
info!("evict persistent cache task {}", task.id);
}
}
info!("evict by persistent cache task ttl done");
Ok(())
}
/// evict_persistent_cache_task_by_disk_usage evicts the persistent cache task by disk usage.
#[instrument(skip_all)]
async fn evict_persistent_cache_task_by_disk_usage(&self) -> Result<()> {
let available_space = self.storage.available_space()?;
let total_space = self.storage.total_space()?;
// Calculate the usage percent.
let usage_percent = (100 - available_space * 100 / total_space) as u8;
if usage_percent >= self.config.gc.policy.dist_high_threshold_percent {
info!(
"start to evict persistent cache task by disk usage, disk usage {}% is higher than high threshold {}%",
usage_percent, self.config.gc.policy.dist_high_threshold_percent
);
// Calculate the need evict space.
let need_evict_space = total_space as f64
* ((usage_percent - self.config.gc.policy.dist_low_threshold_percent) as f64
/ 100.0);
// Evict the persistent cache task by the need evict space.
if let Err(err) = self
.evict_persistent_cache_task_space(need_evict_space as u64)
.await
{
info!("failed to evict task by disk usage: {}", err);
}
info!("evict persistent cache task by disk usage done");
}
Ok(())
}
/// evict_persistent_cache_task_space evicts the persistent cache task by the given space.
#[instrument(skip_all)]
async fn evict_persistent_cache_task_space(&self, need_evict_space: u64) -> Result<()> {
let mut tasks = self.storage.get_persistent_cache_tasks()?;
tasks.sort_by(|a, b| a.updated_at.cmp(&b.updated_at));
let mut evicted_space = 0;
for task in tasks {
// Evict enough space.
if evicted_space >= need_evict_space {
break;
}
// If the persistent cache task is persistent, skip it.
if task.is_persistent() {
continue;
}
// If the task is started and not finished, and the task download is not timeout,
// skip it.
if task.is_started()
&& !task.is_finished()
&& !task.is_failed()
&& (task.created_at + DOWNLOAD_TASK_TIMEOUT > Utc::now().naive_utc())
{
info!(
"persistent cache task {} is started and not finished, skip it",
task.id
);
continue;
}
// Evict the task.
self.storage.delete_persistent_cache_task(&task.id).await;
// Update the evicted space.
let task_space = task.content_length();
evicted_space += task_space;
info!(
"evict persistent cache task {} size {}",
task.id, task_space
);
}
info!("evict total size {}", evicted_space);
Ok(())
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,145 +0,0 @@
/*
* Copyright 2023 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use dragonfly_client_core::{
error::{ErrorType, OrErr},
Error, Result,
};
use hyper_util::rt::TokioIo;
use std::path::PathBuf;
use tokio::net::UnixStream;
use tonic::service::interceptor::InterceptedService;
use tonic::transport::ClientTlsConfig;
use tonic::transport::{Channel, Endpoint, Uri};
use tonic_health::pb::{
health_client::HealthClient as HealthGRPCClient, HealthCheckRequest, HealthCheckResponse,
};
use tower::service_fn;
use tracing::{error, instrument};
use super::interceptor::InjectTracingInterceptor;
/// HealthClient is a wrapper of HealthGRPCClient.
#[derive(Clone)]
pub struct HealthClient {
/// client is the grpc client of the certificate.
client: HealthGRPCClient<InterceptedService<Channel, InjectTracingInterceptor>>,
}
/// HealthClient implements the grpc client of the health.
impl HealthClient {
/// new creates a new HealthClient.
pub async fn new(addr: &str, client_tls_config: Option<ClientTlsConfig>) -> Result<Self> {
let channel = match client_tls_config {
Some(client_tls_config) => Channel::from_shared(addr.to_string())
.map_err(|_| Error::InvalidURI(addr.into()))?
.tls_config(client_tls_config)?
.connect_timeout(super::CONNECT_TIMEOUT)
.timeout(super::REQUEST_TIMEOUT)
.tcp_keepalive(Some(super::TCP_KEEPALIVE))
.http2_keep_alive_interval(super::HTTP2_KEEP_ALIVE_INTERVAL)
.keep_alive_timeout(super::HTTP2_KEEP_ALIVE_TIMEOUT)
.connect()
.await
.inspect_err(|err| {
error!("connect to {} failed: {}", addr, err);
})
.or_err(ErrorType::ConnectError)?,
None => Channel::from_shared(addr.to_string())
.map_err(|_| Error::InvalidURI(addr.into()))?
.connect_timeout(super::CONNECT_TIMEOUT)
.timeout(super::REQUEST_TIMEOUT)
.tcp_keepalive(Some(super::TCP_KEEPALIVE))
.http2_keep_alive_interval(super::HTTP2_KEEP_ALIVE_INTERVAL)
.keep_alive_timeout(super::HTTP2_KEEP_ALIVE_TIMEOUT)
.connect()
.await
.inspect_err(|err| {
error!("connect to {} failed: {}", addr, err);
})
.or_err(ErrorType::ConnectError)?,
};
let client = HealthGRPCClient::with_interceptor(channel, InjectTracingInterceptor)
.max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX);
Ok(Self { client })
}
/// new_unix creates a new HealthClient with unix domain socket.
pub async fn new_unix(socket_path: PathBuf) -> Result<Self> {
// Ignore the uri because it is not used.
let channel = Endpoint::try_from("http://[::]:50051")
.unwrap()
.connect_with_connector(service_fn(move |_: Uri| {
let socket_path = socket_path.clone();
async move {
Ok::<_, std::io::Error>(TokioIo::new(
UnixStream::connect(socket_path.clone()).await?,
))
}
}))
.await
.inspect_err(|err| {
error!("connect failed: {}", err);
})
.or_err(ErrorType::ConnectError)?;
let client = HealthGRPCClient::with_interceptor(channel, InjectTracingInterceptor)
.max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX);
Ok(Self { client })
}
/// check checks the health of the grpc service without service name.
#[instrument(skip_all)]
pub async fn check(&self) -> Result<HealthCheckResponse> {
let request = Self::make_request(HealthCheckRequest {
service: "".to_string(),
});
let response = self.client.clone().check(request).await?;
Ok(response.into_inner())
}
/// check_service checks the health of the grpc service with service name.
#[instrument(skip_all)]
pub async fn check_service(&self, service: String) -> Result<HealthCheckResponse> {
let request = Self::make_request(HealthCheckRequest { service });
let response = self.client.clone().check(request).await?;
Ok(response.into_inner())
}
/// check_dfdaemon_download checks the health of the dfdaemon download service.
#[instrument(skip_all)]
pub async fn check_dfdaemon_download(&self) -> Result<HealthCheckResponse> {
self.check_service("dfdaemon.v2.DfdaemonDownload".to_string())
.await
}
/// check_dfdaemon_upload checks the health of the dfdaemon upload service.
#[instrument(skip_all)]
pub async fn check_dfdaemon_upload(&self) -> Result<HealthCheckResponse> {
self.check_service("dfdaemon.v2.DfdaemonUpload".to_string())
.await
}
/// make_request creates a new request with timeout.
fn make_request<T>(request: T) -> tonic::Request<T> {
let mut request = tonic::Request::new(request);
request.set_timeout(super::REQUEST_TIMEOUT);
request
}
}

View File

@ -1,86 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use tonic::{metadata, service::Interceptor, Request, Status};
use tracing_opentelemetry::OpenTelemetrySpanExt;
/// MetadataMap is a tracing meda data map container for span context.
struct MetadataMap<'a>(&'a mut metadata::MetadataMap);
/// MetadataMap implements the otel tracing Extractor.
impl opentelemetry::propagation::Extractor for MetadataMap<'_> {
/// Get a value for a key from the `MetadataMap`. If the value can't be converted to &str, returns None
fn get(&self, key: &str) -> Option<&str> {
self.0.get(key).and_then(|metadata| metadata.to_str().ok())
}
/// Collect all the keys from the `MetadataMap`.
fn keys(&self) -> Vec<&str> {
self.0
.keys()
.map(|key| match key {
tonic::metadata::KeyRef::Ascii(v) => v.as_str(),
tonic::metadata::KeyRef::Binary(v) => v.as_str(),
})
.collect::<Vec<_>>()
}
}
/// MetadataMap implements the otel tracing Injector.
impl opentelemetry::propagation::Injector for MetadataMap<'_> {
/// set a key-value pair to the injector.
fn set(&mut self, key: &str, value: String) {
if let Ok(key) = metadata::MetadataKey::from_bytes(key.as_bytes()) {
if let Ok(val) = metadata::MetadataValue::try_from(&value) {
self.0.insert(key, val);
}
}
}
}
/// InjectTracingInterceptor is a auto-inject tracing gRPC interceptor.
#[derive(Clone)]
pub struct InjectTracingInterceptor;
/// InjectTracingInterceptor implements the tonic Interceptor interface.
impl Interceptor for InjectTracingInterceptor {
/// call and inject tracing context into lgobal propagator.
fn call(&mut self, mut request: Request<()>) -> std::result::Result<Request<()>, Status> {
let context = tracing::Span::current().context();
opentelemetry::global::get_text_map_propagator(|prop| {
prop.inject_context(&context, &mut MetadataMap(request.metadata_mut()));
});
Ok(request)
}
}
/// ExtractTracingInterceptor is a auto-extract tracing gRPC interceptor.
#[derive(Clone)]
pub struct ExtractTracingInterceptor;
/// ExtractTracingInterceptor implements the tonic Interceptor interface.
impl Interceptor for ExtractTracingInterceptor {
/// call and inject tracing context into lgobal propagator.
fn call(&mut self, mut request: Request<()>) -> std::result::Result<Request<()>, Status> {
let parent_cx = opentelemetry::global::get_text_map_propagator(|prop| {
prop.extract(&MetadataMap(request.metadata_mut()))
});
request.extensions_mut().insert(parent_cx);
Ok(request)
}
}

View File

@ -1,158 +0,0 @@
/*
* Copyright 2023 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use crate::grpc::health::HealthClient;
use dragonfly_api::manager::v2::{
manager_client::ManagerClient as ManagerGRPCClient, DeleteSeedPeerRequest,
ListSchedulersRequest, ListSchedulersResponse, SeedPeer, UpdateSeedPeerRequest,
};
use dragonfly_client_config::dfdaemon::Config;
use dragonfly_client_core::{
error::{ErrorType, OrErr},
Error, Result,
};
use std::sync::Arc;
use tonic::{service::interceptor::InterceptedService, transport::Channel};
use tonic_health::pb::health_check_response::ServingStatus;
use tracing::{error, instrument};
use url::Url;
use super::interceptor::InjectTracingInterceptor;
/// ManagerClient is a wrapper of ManagerGRPCClient.
#[derive(Clone)]
pub struct ManagerClient {
/// client is the grpc client of the manager.
pub client: ManagerGRPCClient<InterceptedService<Channel, InjectTracingInterceptor>>,
}
/// ManagerClient implements the grpc client of the manager.
impl ManagerClient {
/// new creates a new ManagerClient.
pub async fn new(config: Arc<Config>, addr: String) -> Result<Self> {
let domain_name = Url::parse(addr.as_str())?
.host_str()
.ok_or_else(|| {
error!("invalid address: {}", addr);
Error::InvalidParameter
})?
.to_string();
let client_tls_config = config
.manager
.load_client_tls_config(domain_name.as_str())
.await?;
let health_client = HealthClient::new(addr.as_str(), client_tls_config.clone()).await?;
match health_client.check().await {
Ok(resp) => {
if resp.status != ServingStatus::Serving as i32 {
return Err(Error::AvailableManagerNotFound);
}
}
Err(err) => return Err(err),
}
let channel = match client_tls_config {
Some(client_tls_config) => Channel::from_shared(addr.clone())
.map_err(|_| Error::InvalidURI(addr.clone()))?
.tls_config(client_tls_config)?
.buffer_size(super::BUFFER_SIZE)
.connect_timeout(super::CONNECT_TIMEOUT)
.timeout(super::REQUEST_TIMEOUT)
.tcp_keepalive(Some(super::TCP_KEEPALIVE))
.http2_keep_alive_interval(super::HTTP2_KEEP_ALIVE_INTERVAL)
.keep_alive_timeout(super::HTTP2_KEEP_ALIVE_TIMEOUT)
.connect()
.await
.inspect_err(|err| {
error!("connect to {} failed: {}", addr.to_string(), err);
})
.or_err(ErrorType::ConnectError)?,
None => Channel::from_shared(addr.clone())
.map_err(|_| Error::InvalidURI(addr.clone()))?
.buffer_size(super::BUFFER_SIZE)
.connect_timeout(super::CONNECT_TIMEOUT)
.timeout(super::REQUEST_TIMEOUT)
.tcp_keepalive(Some(super::TCP_KEEPALIVE))
.http2_keep_alive_interval(super::HTTP2_KEEP_ALIVE_INTERVAL)
.keep_alive_timeout(super::HTTP2_KEEP_ALIVE_TIMEOUT)
.connect()
.await
.inspect_err(|err| {
error!("connect to {} failed: {}", addr.to_string(), err);
})
.or_err(ErrorType::ConnectError)?,
};
let client = ManagerGRPCClient::with_interceptor(channel, InjectTracingInterceptor)
.max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX);
Ok(Self { client })
}
/// list_schedulers lists all schedulers that best match the client.
#[instrument(skip_all)]
pub async fn list_schedulers(
&self,
request: ListSchedulersRequest,
) -> Result<ListSchedulersResponse> {
let request = Self::make_request(request);
let response = self.client.clone().list_schedulers(request).await?;
Ok(response.into_inner())
}
/// update_seed_peer updates the seed peer information.
#[instrument(skip_all)]
pub async fn update_seed_peer(&self, request: UpdateSeedPeerRequest) -> Result<SeedPeer> {
let request = Self::make_request(request);
let response = self.client.clone().update_seed_peer(request).await?;
Ok(response.into_inner())
}
/// delete_seed_peer deletes the seed peer information.
#[instrument(skip_all)]
pub async fn delete_seed_peer(&self, request: DeleteSeedPeerRequest) -> Result<()> {
let request = Self::make_request(request);
self.client.clone().delete_seed_peer(request).await?;
Ok(())
}
/// make_request creates a new request with timeout.
fn make_request<T>(request: T) -> tonic::Request<T> {
let mut request = tonic::Request::new(request);
request.set_timeout(super::REQUEST_TIMEOUT);
request
}
}
#[cfg(test)]
mod tests {
use super::ManagerClient;
use dragonfly_client_config::dfdaemon::Config;
use std::sync::Arc;
#[tokio::test]
async fn invalid_uri_should_fail() {
let addr = "htt:/xxx".to_string();
let result = ManagerClient::new(Arc::new(Config::default()), addr).await;
assert!(result.is_err());
match result {
Err(e) => assert_eq!(e.to_string(), "invalid parameter"),
_ => panic!("unexpected error"),
}
}
}

View File

@ -1,144 +0,0 @@
/*
* Copyright 2023 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use crate::metrics::{
collect_prefetch_task_failure_metrics, collect_prefetch_task_started_metrics,
};
use dragonfly_api::dfdaemon::v2::DownloadTaskRequest;
use dragonfly_client_core::{Error as ClientError, Result as ClientResult};
use std::path::PathBuf;
use std::time::Duration;
use tonic::Request;
use tracing::{error, info, instrument, Instrument};
pub mod dfdaemon_download;
pub mod dfdaemon_upload;
pub mod health;
pub mod interceptor;
pub mod manager;
pub mod scheduler;
/// CONNECT_TIMEOUT is the timeout for GRPC connection.
pub const CONNECT_TIMEOUT: Duration = Duration::from_secs(2);
/// REQUEST_TIMEOUT is the timeout for GRPC requests, default is 10 second.
/// Note: This timeout is used for the whole request, including wait for scheduler
/// scheduling, refer to https://d7y.io/docs/next/reference/configuration/scheduler/.
/// Scheduler'configure `scheduler.retryInterval`, `scheduler.retryBackToSourceLimit` and `scheduler.retryLimit`
/// is used for the scheduler to schedule the task.
pub const REQUEST_TIMEOUT: Duration = Duration::from_secs(15);
/// TCP_KEEPALIVE is the keepalive duration for TCP connection.
pub const TCP_KEEPALIVE: Duration = Duration::from_secs(3600);
/// HTTP2_KEEP_ALIVE_INTERVAL is the interval for HTTP2 keep alive.
pub const HTTP2_KEEP_ALIVE_INTERVAL: Duration = Duration::from_secs(300);
/// HTTP2_KEEP_ALIVE_TIMEOUT is the timeout for HTTP2 keep alive.
pub const HTTP2_KEEP_ALIVE_TIMEOUT: Duration = Duration::from_secs(20);
/// MAX_FRAME_SIZE is the max frame size for GRPC, default is 4MB.
pub const MAX_FRAME_SIZE: u32 = 4 * 1024 * 1024;
/// INITIAL_WINDOW_SIZE is the initial window size for GRPC, default is 512KB.
pub const INITIAL_WINDOW_SIZE: u32 = 512 * 1024;
/// BUFFER_SIZE is the buffer size for GRPC, default is 64KB.
pub const BUFFER_SIZE: usize = 64 * 1024;
/// prefetch_task prefetches the task if prefetch flag is true.
#[instrument(skip_all)]
pub async fn prefetch_task(
socket_path: PathBuf,
request: Request<DownloadTaskRequest>,
) -> ClientResult<()> {
// Initialize the dfdaemon download client.
let dfdaemon_download_client =
dfdaemon_download::DfdaemonDownloadClient::new_unix(socket_path.clone()).await?;
// Make the prefetch request.
let mut request = request.into_inner();
let Some(download) = request.download.as_mut() else {
return Err(ClientError::InvalidParameter);
};
// Remove the range flag for download full task.
download.range = None;
// Remove the prefetch flag for prevent the infinite loop.
download.prefetch = false;
// Mark the is_prefetch flag as true to represents it is a prefetch request.
download.is_prefetch = true;
// Remove the range header for download full task.
download
.request_header
.remove(reqwest::header::RANGE.as_str());
// Get the fields from the download task.
let task_type = download.r#type;
let tag = download.tag.clone();
let application = download.application.clone();
let priority = download.priority;
// Download task by dfdaemon download client.
let response = dfdaemon_download_client
.download_task(request)
.await
.inspect_err(|err| {
error!("prefetch task failed: {}", err);
})?;
// Collect the prefetch task started metrics.
collect_prefetch_task_started_metrics(
task_type,
tag.clone().unwrap_or_default().as_str(),
application.clone().unwrap_or_default().as_str(),
priority.to_string().as_str(),
);
// Spawn to handle the download task.
tokio::spawn(
async move {
let mut out_stream = response.into_inner();
loop {
match out_stream.message().await {
Ok(Some(_)) => info!("prefetch piece finished"),
Ok(None) => {
info!("prefetch task finished");
return;
}
Err(err) => {
// Collect the prefetch task failure metrics.
collect_prefetch_task_failure_metrics(
task_type,
tag.clone().unwrap_or_default().as_str(),
application.clone().unwrap_or_default().as_str(),
priority.to_string().as_str(),
);
error!("prefetch piece failed: {}", err);
return;
}
}
}
}
.in_current_span(),
);
Ok(())
}

View File

@ -1,629 +0,0 @@
/*
* Copyright 2023 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use crate::dynconfig::Dynconfig;
use dragonfly_api::common::v2::{Peer, PersistentCachePeer, PersistentCacheTask, Task};
use dragonfly_api::manager::v2::Scheduler;
use dragonfly_api::scheduler::v2::{
scheduler_client::SchedulerClient as SchedulerGRPCClient, AnnounceHostRequest,
AnnouncePeerRequest, AnnouncePeerResponse, AnnouncePersistentCachePeerRequest,
AnnouncePersistentCachePeerResponse, DeleteHostRequest, DeletePeerRequest,
DeletePersistentCachePeerRequest, DeletePersistentCacheTaskRequest, DeleteTaskRequest,
StatPeerRequest, StatPersistentCachePeerRequest, StatPersistentCacheTaskRequest,
StatTaskRequest, UploadPersistentCacheTaskFailedRequest,
UploadPersistentCacheTaskFinishedRequest, UploadPersistentCacheTaskStartedRequest,
};
use dragonfly_client_config::dfdaemon::Config;
use dragonfly_client_core::error::{ErrorType, OrErr};
use dragonfly_client_core::{Error, Result};
use hashring::HashRing;
use std::net::{IpAddr, SocketAddr};
use std::str::FromStr;
use std::sync::Arc;
use tokio::sync::RwLock;
use tokio::task::JoinSet;
use tonic::service::interceptor::InterceptedService;
use tonic::transport::Channel;
use tracing::{error, info, instrument, Instrument};
use url::Url;
use super::interceptor::InjectTracingInterceptor;
/// VNode is the virtual node of the hashring.
#[derive(Debug, Copy, Clone, Hash, PartialEq)]
struct VNode {
/// addr is the address of the virtual node.
addr: SocketAddr,
}
/// VNode implements the Display trait.
impl std::fmt::Display for VNode {
/// fmt formats the virtual node.
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.addr)
}
}
/// SchedulerClient is a wrapper of SchedulerGRPCClient.
#[derive(Clone)]
pub struct SchedulerClient {
/// config is the configuration of the dfdaemon.
config: Arc<Config>,
/// dynconfig is the dynamic configuration of the dfdaemon.
dynconfig: Arc<Dynconfig>,
/// available_schedulers is the available schedulers.
available_schedulers: Arc<RwLock<Vec<Scheduler>>>,
/// available_scheduler_addrs is the addresses of available schedulers.
available_scheduler_addrs: Arc<RwLock<Vec<SocketAddr>>>,
/// hashring is the hashring of the scheduler.
hashring: Arc<RwLock<HashRing<VNode>>>,
}
/// SchedulerClient implements the grpc client of the scheduler.
impl SchedulerClient {
/// new creates a new SchedulerClient.
pub async fn new(config: Arc<Config>, dynconfig: Arc<Dynconfig>) -> Result<Self> {
let client = Self {
config,
dynconfig,
available_schedulers: Arc::new(RwLock::new(Vec::new())),
available_scheduler_addrs: Arc::new(RwLock::new(Vec::new())),
hashring: Arc::new(RwLock::new(HashRing::new())),
};
client.refresh_available_scheduler_addrs().await?;
Ok(client)
}
/// announce_peer announces the peer to the scheduler.
#[instrument(skip_all)]
pub async fn announce_peer(
&self,
task_id: &str,
peer_id: &str,
request: impl tonic::IntoStreamingRequest<Message = AnnouncePeerRequest>,
) -> Result<tonic::Response<tonic::codec::Streaming<AnnouncePeerResponse>>> {
let response = self
.client(task_id, Some(peer_id))
.await?
.announce_peer(request)
.await?;
Ok(response)
}
/// stat_peer gets the status of the peer.
#[instrument(skip(self))]
pub async fn stat_peer(&self, request: StatPeerRequest) -> Result<Peer> {
let task_id = request.task_id.clone();
let request = Self::make_request(request);
let response = self
.client(task_id.as_str(), None)
.await?
.stat_peer(request)
.await?;
Ok(response.into_inner())
}
/// delete_peer tells the scheduler that the peer is deleting.
#[instrument(skip(self))]
pub async fn delete_peer(&self, request: DeletePeerRequest) -> Result<()> {
let task_id = request.task_id.clone();
let request = Self::make_request(request);
self.client(task_id.as_str(), None)
.await?
.delete_peer(request)
.await?;
Ok(())
}
/// stat_task gets the status of the task.
#[instrument(skip(self))]
pub async fn stat_task(&self, request: StatTaskRequest) -> Result<Task> {
let task_id = request.task_id.clone();
let request = Self::make_request(request);
let response = self
.client(task_id.as_str(), None)
.await?
.stat_task(request)
.await?;
Ok(response.into_inner())
}
/// delete_task tells the scheduler that the task is deleting.
#[instrument(skip(self))]
pub async fn delete_task(&self, request: DeleteTaskRequest) -> Result<()> {
let task_id = request.task_id.clone();
let request = Self::make_request(request);
self.client(task_id.as_str(), None)
.await?
.delete_task(request)
.await?;
Ok(())
}
/// announce_host announces the host to the scheduler.
#[instrument(skip(self))]
pub async fn announce_host(&self, request: AnnounceHostRequest) -> Result<()> {
// Update scheduler addresses of the client.
self.update_available_scheduler_addrs().await?;
// Announce the host to the scheduler.
let mut join_set = JoinSet::new();
let available_scheduler_addrs = self.available_scheduler_addrs.read().await;
let available_scheduler_addrs_clone = available_scheduler_addrs.clone();
drop(available_scheduler_addrs);
for available_scheduler_addr in available_scheduler_addrs_clone.iter() {
let request = Self::make_request(request.clone());
async fn announce_host(
addr: SocketAddr,
request: tonic::Request<AnnounceHostRequest>,
) -> Result<()> {
info!("announce host to {}", addr);
// Connect to the scheduler.
let channel = Channel::from_shared(format!("http://{}", addr))
.map_err(|_| Error::InvalidURI(addr.to_string()))?
.buffer_size(super::BUFFER_SIZE)
.connect_timeout(super::CONNECT_TIMEOUT)
.timeout(super::REQUEST_TIMEOUT)
.connect()
.await
.inspect_err(|err| {
error!("connect to {} failed: {}", addr.to_string(), err);
})
.or_err(ErrorType::ConnectError)?;
let mut client =
SchedulerGRPCClient::with_interceptor(channel, InjectTracingInterceptor)
.max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX);
client.announce_host(request).await?;
Ok(())
}
join_set.spawn(announce_host(*available_scheduler_addr, request).in_current_span());
}
while let Some(message) = join_set
.join_next()
.await
.transpose()
.or_err(ErrorType::AsyncRuntimeError)?
{
if let Err(err) = message {
error!("failed to announce host: {}", err);
}
}
Ok(())
}
/// init_announce_host announces the host to the scheduler.
#[instrument(skip(self))]
pub async fn init_announce_host(&self, request: AnnounceHostRequest) -> Result<()> {
let mut join_set = JoinSet::new();
let available_scheduler_addrs = self.available_scheduler_addrs.read().await;
let available_scheduler_addrs_clone = available_scheduler_addrs.clone();
drop(available_scheduler_addrs);
for available_scheduler_addr in available_scheduler_addrs_clone.iter() {
let request = Self::make_request(request.clone());
async fn announce_host(
addr: SocketAddr,
request: tonic::Request<AnnounceHostRequest>,
) -> Result<()> {
info!("announce host to {:?}", addr);
// Connect to the scheduler.
let channel = Channel::from_shared(format!("http://{}", addr))
.map_err(|_| Error::InvalidURI(addr.to_string()))?
.buffer_size(super::BUFFER_SIZE)
.connect_timeout(super::CONNECT_TIMEOUT)
.timeout(super::REQUEST_TIMEOUT)
.connect()
.await
.inspect_err(|err| {
error!("connect to {} failed: {}", addr.to_string(), err);
})
.or_err(ErrorType::ConnectError)?;
let mut client =
SchedulerGRPCClient::with_interceptor(channel, InjectTracingInterceptor)
.max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX);
client.announce_host(request).await?;
Ok(())
}
join_set.spawn(announce_host(*available_scheduler_addr, request).in_current_span());
}
while let Some(message) = join_set
.join_next()
.await
.transpose()
.or_err(ErrorType::AsyncRuntimeError)?
{
if let Err(err) = message {
error!("failed to init announce host: {}", err);
return Err(err);
}
}
Ok(())
}
/// delete_host tells the scheduler that the host is deleting.
#[instrument(skip(self))]
pub async fn delete_host(&self, request: DeleteHostRequest) -> Result<()> {
// Update scheduler addresses of the client.
self.update_available_scheduler_addrs().await?;
// Delete the host from the scheduler.
let mut join_set = JoinSet::new();
let available_scheduler_addrs = self.available_scheduler_addrs.read().await;
let available_scheduler_addrs_clone = available_scheduler_addrs.clone();
drop(available_scheduler_addrs);
for available_scheduler_addr in available_scheduler_addrs_clone.iter() {
let request = Self::make_request(request.clone());
async fn delete_host(
addr: SocketAddr,
request: tonic::Request<DeleteHostRequest>,
) -> Result<()> {
info!("delete host from {}", addr);
// Connect to the scheduler.
let channel = Channel::from_shared(format!("http://{}", addr))
.map_err(|_| Error::InvalidURI(addr.to_string()))?
.buffer_size(super::BUFFER_SIZE)
.connect_timeout(super::CONNECT_TIMEOUT)
.timeout(super::REQUEST_TIMEOUT)
.connect()
.await
.inspect_err(|err| {
error!("connect to {} failed: {}", addr.to_string(), err);
})
.or_err(ErrorType::ConnectError)?;
let mut client =
SchedulerGRPCClient::with_interceptor(channel, InjectTracingInterceptor)
.max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX);
client.delete_host(request).await?;
Ok(())
}
join_set.spawn(delete_host(*available_scheduler_addr, request).in_current_span());
}
while let Some(message) = join_set
.join_next()
.await
.transpose()
.or_err(ErrorType::AsyncRuntimeError)?
{
if let Err(err) = message {
error!("failed to delete host: {}", err);
}
}
Ok(())
}
/// announce_persistent_cache_peer announces the persistent cache peer to the scheduler.
#[instrument(skip_all)]
pub async fn announce_persistent_cache_peer(
&self,
task_id: &str,
peer_id: &str,
request: impl tonic::IntoStreamingRequest<Message = AnnouncePersistentCachePeerRequest>,
) -> Result<tonic::Response<tonic::codec::Streaming<AnnouncePersistentCachePeerResponse>>> {
let response = self
.client(task_id, Some(peer_id))
.await?
.announce_persistent_cache_peer(request)
.await?;
Ok(response)
}
/// stat_persistent_cache_peer gets the status of the persistent cache peer.
#[instrument(skip(self))]
pub async fn stat_persistent_cache_peer(
&self,
request: StatPersistentCachePeerRequest,
) -> Result<PersistentCachePeer> {
let task_id = request.task_id.clone();
let request = Self::make_request(request);
let response = self
.client(task_id.as_str(), None)
.await?
.stat_persistent_cache_peer(request)
.await?;
Ok(response.into_inner())
}
/// delete_persistent_cache_peer tells the scheduler that the persistent cache peer is deleting.
#[instrument(skip(self))]
pub async fn delete_persistent_cache_peer(
&self,
request: DeletePersistentCachePeerRequest,
) -> Result<()> {
let task_id = request.task_id.clone();
let request = Self::make_request(request);
self.client(task_id.as_str(), None)
.await?
.delete_persistent_cache_peer(request)
.await?;
Ok(())
}
/// upload_persistent_cache_task_started uploads the metadata of the persistent cache task started.
#[instrument(skip(self))]
pub async fn upload_persistent_cache_task_started(
&self,
request: UploadPersistentCacheTaskStartedRequest,
) -> Result<()> {
let task_id = request.task_id.clone();
let request = Self::make_request(request);
self.client(task_id.as_str(), None)
.await?
.upload_persistent_cache_task_started(request)
.await?;
Ok(())
}
/// upload_persistent_cache_task_finished uploads the metadata of the persistent cache task finished.
#[instrument(skip_all)]
pub async fn upload_persistent_cache_task_finished(
&self,
request: UploadPersistentCacheTaskFinishedRequest,
) -> Result<PersistentCacheTask> {
let task_id = request.task_id.clone();
let request = Self::make_request(request);
let response = self
.client(task_id.as_str(), None)
.await?
.upload_persistent_cache_task_finished(request)
.await?;
Ok(response.into_inner())
}
/// upload_persistent_cache_task_failed uploads the metadata of the persistent cache task failed.
#[instrument(skip_all)]
pub async fn upload_persistent_cache_task_failed(
&self,
request: UploadPersistentCacheTaskFailedRequest,
) -> Result<()> {
let task_id = request.task_id.clone();
let request = Self::make_request(request);
self.client(task_id.as_str(), None)
.await?
.upload_persistent_cache_task_failed(request)
.await?;
Ok(())
}
/// stat_persistent_cache_task gets the status of the persistent cache task.
#[instrument(skip(self))]
pub async fn stat_persistent_cache_task(
&self,
request: StatPersistentCacheTaskRequest,
) -> Result<PersistentCacheTask> {
let task_id = request.task_id.clone();
let request = Self::make_request(request);
let response = self
.client(task_id.as_str(), None)
.await?
.stat_persistent_cache_task(request)
.await?;
Ok(response.into_inner())
}
/// delete_persistent_cache_task tells the scheduler that the persistent cache task is deleting.
#[instrument(skip(self))]
pub async fn delete_persistent_cache_task(
&self,
request: DeletePersistentCacheTaskRequest,
) -> Result<()> {
let task_id = request.task_id.clone();
let request = Self::make_request(request);
self.client(task_id.as_str(), None)
.await?
.delete_persistent_cache_task(request)
.await?;
Ok(())
}
/// client gets the grpc client of the scheduler.
#[instrument(skip(self))]
async fn client(
&self,
task_id: &str,
peer_id: Option<&str>,
) -> Result<SchedulerGRPCClient<InterceptedService<Channel, InjectTracingInterceptor>>> {
// Update scheduler addresses of the client.
self.update_available_scheduler_addrs().await?;
// Get the scheduler address from the hashring.
let addrs = self.hashring.read().await;
let addr = *addrs
.get(&task_id[0..5].to_string())
.ok_or_else(|| Error::HashRing(task_id.to_string()))?;
drop(addrs);
info!("picked {:?}", addr);
let addr = format!("http://{}", addr);
let domain_name = Url::parse(addr.as_str())?
.host_str()
.ok_or_else(|| {
error!("invalid address: {}", addr);
Error::InvalidParameter
})?
.to_string();
let channel = match self
.config
.scheduler
.load_client_tls_config(domain_name.as_str())
.await?
{
Some(client_tls_config) => Channel::from_shared(addr.clone())
.map_err(|_| Error::InvalidURI(addr.clone()))?
.tls_config(client_tls_config)?
.buffer_size(super::BUFFER_SIZE)
.connect_timeout(super::CONNECT_TIMEOUT)
.timeout(super::REQUEST_TIMEOUT)
.tcp_keepalive(Some(super::TCP_KEEPALIVE))
.http2_keep_alive_interval(super::HTTP2_KEEP_ALIVE_INTERVAL)
.keep_alive_timeout(super::HTTP2_KEEP_ALIVE_TIMEOUT)
.connect()
.await
.inspect_err(|err| {
error!("connect to {} failed: {}", addr.to_string(), err);
})
.or_err(ErrorType::ConnectError)?,
None => Channel::from_shared(addr.clone())
.map_err(|_| Error::InvalidURI(addr.clone()))?
.buffer_size(super::BUFFER_SIZE)
.connect_timeout(super::CONNECT_TIMEOUT)
.timeout(super::REQUEST_TIMEOUT)
.tcp_keepalive(Some(super::TCP_KEEPALIVE))
.http2_keep_alive_interval(super::HTTP2_KEEP_ALIVE_INTERVAL)
.keep_alive_timeout(super::HTTP2_KEEP_ALIVE_TIMEOUT)
.connect()
.await
.inspect_err(|err| {
error!("connect to {} failed: {}", addr.to_string(), err);
})
.or_err(ErrorType::ConnectError)?,
};
Ok(
SchedulerGRPCClient::with_interceptor(channel, InjectTracingInterceptor)
.max_decoding_message_size(usize::MAX)
.max_encoding_message_size(usize::MAX),
)
}
/// update_available_scheduler_addrs updates the addresses of available schedulers.
#[instrument(skip(self))]
async fn update_available_scheduler_addrs(&self) -> Result<()> {
// Get the endpoints of available schedulers.
let data_available_schedulers_clone = {
let data = self.dynconfig.data.read().await;
data.available_schedulers.clone()
};
// Check if the available schedulers is empty.
if data_available_schedulers_clone.is_empty() {
return Err(Error::AvailableSchedulersNotFound);
}
// Get the available schedulers.
let available_schedulers_clone = {
let available_schedulers = self.available_schedulers.read().await;
available_schedulers.clone()
};
// Check if the available schedulers is not changed.
if data_available_schedulers_clone.len() == available_schedulers_clone.len()
&& data_available_schedulers_clone
.iter()
.zip(available_schedulers_clone.iter())
.all(|(a, b)| a == b)
{
info!(
"available schedulers is not changed: {:?}",
data_available_schedulers_clone
.iter()
.map(|s| s.ip.clone())
.collect::<Vec<String>>()
);
return Ok(());
}
let mut new_available_schedulers = Vec::new();
let mut new_available_scheduler_addrs = Vec::new();
let mut new_hashring = HashRing::new();
for available_scheduler in data_available_schedulers_clone.iter() {
let ip = match IpAddr::from_str(&available_scheduler.ip) {
Ok(ip) => ip,
Err(err) => {
error!("failed to parse ip: {}", err);
continue;
}
};
// Add the scheduler to the available schedulers.
new_available_schedulers.push(available_scheduler.clone());
// Add the scheduler address to the addresses of available schedulers.
let socket_addr = SocketAddr::new(ip, available_scheduler.port as u16);
new_available_scheduler_addrs.push(socket_addr);
// Add the scheduler to the hashring.
new_hashring.add(VNode { addr: socket_addr });
}
// Update the available schedulers.
let mut available_schedulers = self.available_schedulers.write().await;
*available_schedulers = new_available_schedulers;
drop(available_schedulers);
// Update the addresses of available schedulers.
let mut available_scheduler_addrs = self.available_scheduler_addrs.write().await;
*available_scheduler_addrs = new_available_scheduler_addrs;
drop(available_scheduler_addrs);
// Update the hashring.
let mut hashring = self.hashring.write().await;
*hashring = new_hashring;
drop(hashring);
let available_scheduler_addrs = self.available_scheduler_addrs.read().await;
info!(
"refresh available scheduler addresses: {:?}",
available_scheduler_addrs
.iter()
.map(|s| s.ip().to_string())
.collect::<Vec<String>>(),
);
Ok(())
}
/// refresh_available_scheduler_addrs refreshes addresses of available schedulers.
#[instrument(skip(self))]
async fn refresh_available_scheduler_addrs(&self) -> Result<()> {
// Refresh the dynamic configuration.
self.dynconfig.refresh().await?;
// Update scheduler addresses of the client.
self.update_available_scheduler_addrs().await
}
/// make_request creates a new request with timeout.
fn make_request<T>(request: T) -> tonic::Request<T> {
let mut request = tonic::Request::new(request);
request.set_timeout(super::REQUEST_TIMEOUT);
request
}
}

View File

@ -1,81 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use crate::shutdown;
use std::net::SocketAddr;
use tokio::sync::mpsc;
use tracing::{info, instrument};
use warp::{Filter, Rejection, Reply};
/// Health is the health server.
#[derive(Debug)]
pub struct Health {
/// addr is the address of the health server.
addr: SocketAddr,
/// shutdown is used to shutdown the health server.
shutdown: shutdown::Shutdown,
/// _shutdown_complete is used to notify the health server is shutdown.
_shutdown_complete: mpsc::UnboundedSender<()>,
}
/// Health implements the health server.
impl Health {
/// new creates a new Health.
pub fn new(
addr: SocketAddr,
shutdown: shutdown::Shutdown,
shutdown_complete_tx: mpsc::UnboundedSender<()>,
) -> Self {
Self {
addr,
shutdown,
_shutdown_complete: shutdown_complete_tx,
}
}
/// run starts the health server.
pub async fn run(&self) {
// Clone the shutdown channel.
let mut shutdown = self.shutdown.clone();
// Create the health route.
let health_route = warp::path!("healthy")
.and(warp::get())
.and(warp::path::end())
.and_then(Self::health_handler);
// Start the health server and wait for it to finish.
info!("health server listening on {}", self.addr);
tokio::select! {
_ = warp::serve(health_route).run(self.addr) => {
// Health server ended.
info!("health server ended");
}
_ = shutdown.recv() => {
// Health server shutting down with signals.
info!("health server shutting down");
}
}
}
/// health_handler handles the health check request.
#[instrument(skip_all)]
async fn health_handler() -> Result<impl Reply, Rejection> {
Ok(warp::reply())
}
}

View File

@ -1,963 +0,0 @@
/*
* Copyright 2023 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use crate::shutdown;
use dragonfly_api::common::v2::{Range, TrafficType};
use dragonfly_client_config::{
dfdaemon::Config, BUILD_PLATFORM, CARGO_PKG_VERSION, GIT_COMMIT_DATE, GIT_COMMIT_SHORT_HASH,
};
use lazy_static::lazy_static;
use prometheus::{
exponential_buckets, gather, Encoder, HistogramOpts, HistogramVec, IntCounterVec, IntGaugeVec,
Opts, Registry, TextEncoder,
};
use std::net::SocketAddr;
use std::path::Path;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::mpsc;
use tracing::{error, info, instrument, warn};
use warp::{Filter, Rejection, Reply};
/// DOWNLOAD_TASK_LEVEL1_DURATION_THRESHOLD is the threshold of download task level1 duration for
/// recording slow download task.
const DOWNLOAD_TASK_LEVEL1_DURATION_THRESHOLD: Duration = Duration::from_millis(500);
/// UPLOAD_TASK_LEVEL1_DURATION_THRESHOLD is the threshold of upload task level1 duration for
/// recording slow upload task.
const UPLOAD_TASK_LEVEL1_DURATION_THRESHOLD: Duration = Duration::from_millis(500);
lazy_static! {
/// REGISTRY is used to register all metrics.
pub static ref REGISTRY: Registry = Registry::new();
/// VERSION_GAUGE is used to record the version info of the service.
pub static ref VERSION_GAUGE: IntGaugeVec =
IntGaugeVec::new(
Opts::new("version", "Version info of the service.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["git_version", "git_commit", "platform", "build_time"]
).expect("metric can be created");
/// UPLOAD_TASK_COUNT is used to count the number of upload tasks.
pub static ref UPLOAD_TASK_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("upload_task_total", "Counter of the number of the upload task.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["type", "tag", "app"]
).expect("metric can be created");
/// UPLOAD_TASK_FAILURE_COUNT is used to count the failed number of upload tasks.
pub static ref UPLOAD_TASK_FAILURE_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("upload_task_failure_total", "Counter of the number of failed of the upload task.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["type", "tag", "app"]
).expect("metric can be created");
/// CONCURRENT_UPLOAD_TASK_GAUGE is used to gauge the number of concurrent upload tasks.
pub static ref CONCURRENT_UPLOAD_TASK_GAUGE: IntGaugeVec =
IntGaugeVec::new(
Opts::new("concurrent_upload_task_total", "Gauge of the number of concurrent of the upload task.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["type", "tag", "app"]
).expect("metric can be created");
/// UPLOAD_TASK_DURATION is used to record the upload task duration.
pub static ref UPLOAD_TASK_DURATION: HistogramVec =
HistogramVec::new(
HistogramOpts::new("upload_task_duration_milliseconds", "Histogram of the upload task duration.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME).buckets(exponential_buckets(1.0, 2.0, 24).unwrap()),
&["task_type", "task_size_level"]
).expect("metric can be created");
/// DOWNLOAD_TASK_COUNT is used to count the number of download tasks.
pub static ref DOWNLOAD_TASK_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("download_task_total", "Counter of the number of the download task.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["type", "tag", "app", "priority"]
).expect("metric can be created");
/// DOWNLOAD_TASK_FAILURE_COUNT is used to count the failed number of download tasks.
pub static ref DOWNLOAD_TASK_FAILURE_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("download_task_failure_total", "Counter of the number of failed of the download task.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["type", "tag", "app", "priority"]
).expect("metric can be created");
/// PREFETCH_TASK_COUNT is used to count the number of prefetch tasks.
pub static ref PREFETCH_TASK_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("prefetch_task_total", "Counter of the number of the prefetch task.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["type", "tag", "app", "priority"]
).expect("metric can be created");
/// PREFETCH_TASK_FAILURE_COUNT is used to count the failed number of prefetch tasks.
pub static ref PREFETCH_TASK_FAILURE_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("prefetch_task_failure_total", "Counter of the number of failed of the prefetch task.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["type", "tag", "app", "priority"]
).expect("metric can be created");
/// CONCURRENT_DOWNLOAD_TASK_GAUGE is used to gauge the number of concurrent download tasks.
pub static ref CONCURRENT_DOWNLOAD_TASK_GAUGE: IntGaugeVec =
IntGaugeVec::new(
Opts::new("concurrent_download_task_total", "Gauge of the number of concurrent of the download task.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["type", "tag", "app", "priority"]
).expect("metric can be created");
/// CONCURRENT_UPLOAD_PIECE_GAUGE is used to gauge the number of concurrent upload pieces.
pub static ref CONCURRENT_UPLOAD_PIECE_GAUGE: IntGaugeVec =
IntGaugeVec::new(
Opts::new("concurrent_upload_piece_total", "Gauge of the number of concurrent of the upload piece.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&[]
).expect("metric can be created");
/// DOWNLOAD_TRAFFIC is used to count the download traffic.
pub static ref DOWNLOAD_TRAFFIC: IntCounterVec =
IntCounterVec::new(
Opts::new("download_traffic", "Counter of the number of the download traffic.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["type", "task_type"]
).expect("metric can be created");
/// UPLOAD_TRAFFIC is used to count the upload traffic.
pub static ref UPLOAD_TRAFFIC: IntCounterVec =
IntCounterVec::new(
Opts::new("upload_traffic", "Counter of the number of the upload traffic.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["task_type"]
).expect("metric can be created");
/// DOWNLOAD_TASK_DURATION is used to record the download task duration.
pub static ref DOWNLOAD_TASK_DURATION: HistogramVec =
HistogramVec::new(
HistogramOpts::new("download_task_duration_milliseconds", "Histogram of the download task duration.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME).buckets(exponential_buckets(1.0, 2.0, 24).unwrap()),
&["task_type", "task_size_level"]
).expect("metric can be created");
/// BACKEND_REQUEST_COUNT is used to count the number of backend requset.
pub static ref BACKEND_REQUEST_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("backend_request_total", "Counter of the number of the backend request.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["scheme", "method"]
).expect("metric can be created");
/// BACKEND_REQUEST_FAILURE_COUNT is used to count the failed number of backend request.
pub static ref BACKEND_REQUEST_FAILURE_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("backend_request_failure_total", "Counter of the number of failed of the backend request.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["scheme", "method"]
).expect("metric can be created");
/// BACKEND_REQUEST_DURATION is used to record the backend request duration.
pub static ref BACKEND_REQUEST_DURATION: HistogramVec =
HistogramVec::new(
HistogramOpts::new("backend_request_duration_milliseconds", "Histogram of the backend request duration.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME).buckets(exponential_buckets(1.0, 2.0, 24).unwrap()),
&["scheme", "method"]
).expect("metric can be created");
/// PROXY_REQUEST_COUNT is used to count the number of proxy requset.
pub static ref PROXY_REQUEST_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("proxy_request_total", "Counter of the number of the proxy request.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&[]
).expect("metric can be created");
/// PROXY_REQUEST_FAILURE_COUNT is used to count the failed number of proxy request.
pub static ref PROXY_REQUEST_FAILURE_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("proxy_request_failure_total", "Counter of the number of failed of the proxy request.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&[]
).expect("metric can be created");
/// PROXY_REQUEST_VIA_DFDAEMON_COUNT is used to count the number of proxy requset via dfdaemon.
pub static ref PROXY_REQUEST_VIA_DFDAEMON_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("proxy_request_via_dfdaemon_total", "Counter of the number of the proxy request via dfdaemon.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&[]
).expect("metric can be created");
/// UPDATE_TASK_COUNT is used to count the number of update tasks.
pub static ref UPDATE_TASK_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("update_task_total", "Counter of the number of the update task.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["type"]
).expect("metric can be created");
/// UPDATE_TASK_FAILURE_COUNT is used to count the failed number of update tasks.
pub static ref UPDATE_TASK_FAILURE_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("update_task_failure_total", "Counter of the number of failed of the update task.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["type"]
).expect("metric can be created");
/// STAT_TASK_COUNT is used to count the number of stat tasks.
pub static ref STAT_TASK_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("stat_task_total", "Counter of the number of the stat task.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["type"]
).expect("metric can be created");
/// STAT_TASK_FAILURE_COUNT is used to count the failed number of stat tasks.
pub static ref STAT_TASK_FAILURE_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("stat_task_failure_total", "Counter of the number of failed of the stat task.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["type"]
).expect("metric can be created");
/// LIST_TASK_ENTRIES_COUNT is used to count the number of list task entries.
pub static ref LIST_TASK_ENTRIES_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("list_task_entries_total", "Counter of the number of the list task entries.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["type"]
).expect("metric can be created");
/// LIST_TASK_ENTRIES_FAILURE_COUNT is used to count the failed number of list task entries.
pub static ref LIST_TASK_ENTRIES_FAILURE_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("list_task_entries_failure_total", "Counter of the number of failed of the list task entries.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["type"]
).expect("metric can be created");
/// DELETE_TASK_COUNT is used to count the number of delete tasks.
pub static ref DELETE_TASK_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("delete_task_total", "Counter of the number of the delete task.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["type"]
).expect("metric can be created");
/// DELETE_TASK_FAILURE_COUNT is used to count the failed number of delete tasks.
pub static ref DELETE_TASK_FAILURE_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("delete_task_failure_total", "Counter of the number of failed of the delete task.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&["type"]
).expect("metric can be created");
/// DELETE_HOST_COUNT is used to count the number of delete host.
pub static ref DELETE_HOST_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("delete_host_total", "Counter of the number of the delete host.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&[]
).expect("metric can be created");
/// DELETE_HOST_FAILURE_COUNT is used to count the failed number of delete host.
pub static ref DELETE_HOST_FAILURE_COUNT: IntCounterVec =
IntCounterVec::new(
Opts::new("delete_host_failure_total", "Counter of the number of failed of the delete host.").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&[]
).expect("metric can be created");
/// DISK_SPACE is used to count of the disk space.
pub static ref DISK_SPACE: IntGaugeVec =
IntGaugeVec::new(
Opts::new("disk_space_total", "Gauge of the disk space in bytes").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&[]
).expect("metric can be created");
/// DISK_USAGE_SPACE is used to count of the disk usage space.
pub static ref DISK_USAGE_SPACE: IntGaugeVec =
IntGaugeVec::new(
Opts::new("disk_usage_space_total", "Gauge of the disk usage space in bytes").namespace(dragonfly_client_config::SERVICE_NAME).subsystem(dragonfly_client_config::NAME),
&[]
).expect("metric can be created");
}
/// register_custom_metrics registers all custom metrics.
fn register_custom_metrics() {
REGISTRY
.register(Box::new(VERSION_GAUGE.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(DOWNLOAD_TASK_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(DOWNLOAD_TASK_FAILURE_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(PREFETCH_TASK_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(PREFETCH_TASK_FAILURE_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(CONCURRENT_DOWNLOAD_TASK_GAUGE.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(CONCURRENT_UPLOAD_PIECE_GAUGE.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(DOWNLOAD_TRAFFIC.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(UPLOAD_TRAFFIC.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(DOWNLOAD_TASK_DURATION.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(BACKEND_REQUEST_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(BACKEND_REQUEST_FAILURE_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(BACKEND_REQUEST_DURATION.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(PROXY_REQUEST_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(PROXY_REQUEST_FAILURE_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(PROXY_REQUEST_VIA_DFDAEMON_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(UPDATE_TASK_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(UPDATE_TASK_FAILURE_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(STAT_TASK_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(STAT_TASK_FAILURE_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(LIST_TASK_ENTRIES_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(LIST_TASK_ENTRIES_FAILURE_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(DELETE_TASK_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(DELETE_TASK_FAILURE_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(DELETE_HOST_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(DELETE_HOST_FAILURE_COUNT.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(DISK_SPACE.clone()))
.expect("metric can be registered");
REGISTRY
.register(Box::new(DISK_USAGE_SPACE.clone()))
.expect("metric can be registered");
}
/// reset_custom_metrics resets all custom metrics.
fn reset_custom_metrics() {
VERSION_GAUGE.reset();
DOWNLOAD_TASK_COUNT.reset();
DOWNLOAD_TASK_FAILURE_COUNT.reset();
PREFETCH_TASK_COUNT.reset();
PREFETCH_TASK_FAILURE_COUNT.reset();
CONCURRENT_DOWNLOAD_TASK_GAUGE.reset();
CONCURRENT_UPLOAD_PIECE_GAUGE.reset();
DOWNLOAD_TRAFFIC.reset();
UPLOAD_TRAFFIC.reset();
DOWNLOAD_TASK_DURATION.reset();
BACKEND_REQUEST_COUNT.reset();
BACKEND_REQUEST_FAILURE_COUNT.reset();
BACKEND_REQUEST_DURATION.reset();
PROXY_REQUEST_COUNT.reset();
PROXY_REQUEST_FAILURE_COUNT.reset();
PROXY_REQUEST_VIA_DFDAEMON_COUNT.reset();
UPDATE_TASK_COUNT.reset();
UPDATE_TASK_FAILURE_COUNT.reset();
STAT_TASK_COUNT.reset();
STAT_TASK_FAILURE_COUNT.reset();
LIST_TASK_ENTRIES_COUNT.reset();
LIST_TASK_ENTRIES_FAILURE_COUNT.reset();
DELETE_TASK_COUNT.reset();
DELETE_TASK_FAILURE_COUNT.reset();
DELETE_HOST_COUNT.reset();
DELETE_HOST_FAILURE_COUNT.reset();
DISK_SPACE.reset();
DISK_USAGE_SPACE.reset();
}
/// TaskSize represents the size of the task.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TaskSize {
/// Level0 represents unknown size.
Level0,
/// Level0 represents size range is from 0 to 1M.
Level1,
/// Level1 represents size range is from 1M to 4M.
Level2,
/// Level2 represents size range is from 4M to 8M.
Level3,
/// Level3 represents size range is from 8M to 16M.
Level4,
/// Level4 represents size range is from 16M to 32M.
Level5,
/// Level5 represents size range is from 32M to 64M.
Level6,
/// Level6 represents size range is from 64M to 128M.
Level7,
/// Level7 represents size range is from 128M to 256M.
Level8,
/// Level8 represents size range is from 256M to 512M.
Level9,
/// Level9 represents size range is from 512M to 1G.
Level10,
/// Level10 represents size range is from 1G to 4G.
Level11,
/// Level11 represents size range is from 4G to 8G.
Level12,
/// Level12 represents size range is from 8G to 16G.
Level13,
/// Level13 represents size range is from 16G to 32G.
Level14,
/// Level14 represents size range is from 32G to 64G.
Level15,
/// Level15 represents size range is from 64G to 128G.
Level16,
/// Level16 represents size range is from 128G to 256G.
Level17,
/// Level17 represents size range is from 256G to 512G.
Level18,
/// Level18 represents size range is from 512G to 1T.
Level19,
/// Level20 represents size is greater than 1T.
Level20,
}
/// TaskSize implements the Display trait.
impl std::fmt::Display for TaskSize {
/// fmt formats the TaskSize.
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
TaskSize::Level0 => write!(f, "0"),
TaskSize::Level1 => write!(f, "1"),
TaskSize::Level2 => write!(f, "2"),
TaskSize::Level3 => write!(f, "3"),
TaskSize::Level4 => write!(f, "4"),
TaskSize::Level5 => write!(f, "5"),
TaskSize::Level6 => write!(f, "6"),
TaskSize::Level7 => write!(f, "7"),
TaskSize::Level8 => write!(f, "8"),
TaskSize::Level9 => write!(f, "9"),
TaskSize::Level10 => write!(f, "10"),
TaskSize::Level11 => write!(f, "11"),
TaskSize::Level12 => write!(f, "12"),
TaskSize::Level13 => write!(f, "13"),
TaskSize::Level14 => write!(f, "14"),
TaskSize::Level15 => write!(f, "15"),
TaskSize::Level16 => write!(f, "16"),
TaskSize::Level17 => write!(f, "17"),
TaskSize::Level18 => write!(f, "18"),
TaskSize::Level19 => write!(f, "19"),
TaskSize::Level20 => write!(f, "20"),
}
}
}
/// TaskSize implements the TaskSize.
impl TaskSize {
/// calculate_size_level calculates the size level according to the size.
pub fn calculate_size_level(size: u64) -> Self {
match size {
0 => TaskSize::Level0,
size if size < 1024 * 1024 => TaskSize::Level1,
size if size < 4 * 1024 * 1024 => TaskSize::Level2,
size if size < 8 * 1024 * 1024 => TaskSize::Level3,
size if size < 16 * 1024 * 1024 => TaskSize::Level4,
size if size < 32 * 1024 * 1024 => TaskSize::Level5,
size if size < 64 * 1024 * 1024 => TaskSize::Level6,
size if size < 128 * 1024 * 1024 => TaskSize::Level7,
size if size < 256 * 1024 * 1024 => TaskSize::Level8,
size if size < 512 * 1024 * 1024 => TaskSize::Level9,
size if size < 1024 * 1024 * 1024 => TaskSize::Level10,
size if size < 4 * 1024 * 1024 * 1024 => TaskSize::Level11,
size if size < 8 * 1024 * 1024 * 1024 => TaskSize::Level12,
size if size < 16 * 1024 * 1024 * 1024 => TaskSize::Level13,
size if size < 32 * 1024 * 1024 * 1024 => TaskSize::Level14,
size if size < 64 * 1024 * 1024 * 1024 => TaskSize::Level15,
size if size < 128 * 1024 * 1024 * 1024 => TaskSize::Level16,
size if size < 256 * 1024 * 1024 * 1024 => TaskSize::Level17,
size if size < 512 * 1024 * 1024 * 1024 => TaskSize::Level18,
size if size < 1024 * 1024 * 1024 * 1024 => TaskSize::Level19,
_ => TaskSize::Level20,
}
}
}
/// collect_upload_task_started_metrics collects the upload task started metrics.
pub fn collect_upload_task_started_metrics(typ: i32, tag: &str, app: &str) {
let typ = typ.to_string();
UPLOAD_TASK_COUNT.with_label_values(&[&typ, tag, app]).inc();
CONCURRENT_UPLOAD_TASK_GAUGE
.with_label_values(&[&typ, tag, app])
.inc();
}
/// collect_upload_task_finished_metrics collects the upload task finished metrics.
pub fn collect_upload_task_finished_metrics(
typ: i32,
tag: &str,
app: &str,
content_length: u64,
cost: Duration,
) {
let task_size = TaskSize::calculate_size_level(content_length);
// Collect the slow upload Level1 task for analysis.
if task_size == TaskSize::Level1 && cost > UPLOAD_TASK_LEVEL1_DURATION_THRESHOLD {
warn!(
"upload task cost is too long: {}ms {}bytes",
cost.as_millis(),
content_length,
);
}
let typ = typ.to_string();
let task_size = task_size.to_string();
UPLOAD_TASK_DURATION
.with_label_values(&[&typ, &task_size])
.observe(cost.as_millis() as f64);
CONCURRENT_UPLOAD_TASK_GAUGE
.with_label_values(&[&typ, tag, app])
.dec();
}
/// collect_upload_task_failure_metrics collects the upload task failure metrics.
pub fn collect_upload_task_failure_metrics(typ: i32, tag: &str, app: &str) {
let typ = typ.to_string();
UPLOAD_TASK_FAILURE_COUNT
.with_label_values(&[&typ, tag, app])
.inc();
CONCURRENT_UPLOAD_TASK_GAUGE
.with_label_values(&[&typ, tag, app])
.dec();
}
/// collect_download_task_started_metrics collects the download task started metrics.
pub fn collect_download_task_started_metrics(typ: i32, tag: &str, app: &str, priority: &str) {
let typ = typ.to_string();
DOWNLOAD_TASK_COUNT
.with_label_values(&[&typ, tag, app, priority])
.inc();
CONCURRENT_DOWNLOAD_TASK_GAUGE
.with_label_values(&[&typ, tag, app, priority])
.inc();
}
/// collect_download_task_finished_metrics collects the download task finished metrics.
pub fn collect_download_task_finished_metrics(
typ: i32,
tag: &str,
app: &str,
priority: &str,
content_length: u64,
range: Option<Range>,
cost: Duration,
) {
let size = match range {
Some(range) => range.length,
None => content_length,
};
let task_size = TaskSize::calculate_size_level(size);
// Nydus will request the small range of the file, so the download task duration
// should be short. Collect the slow download Level1 task for analysis.
if task_size == TaskSize::Level1 && cost > DOWNLOAD_TASK_LEVEL1_DURATION_THRESHOLD {
warn!(
"download task cost is too long: {}ms {}bytes",
cost.as_millis(),
size,
);
}
let typ = typ.to_string();
let task_size = task_size.to_string();
DOWNLOAD_TASK_DURATION
.with_label_values(&[&typ, &task_size])
.observe(cost.as_millis() as f64);
CONCURRENT_DOWNLOAD_TASK_GAUGE
.with_label_values(&[&typ, tag, app, priority])
.dec();
}
/// collect_download_task_failure_metrics collects the download task failure metrics.
pub fn collect_download_task_failure_metrics(typ: i32, tag: &str, app: &str, priority: &str) {
let typ = typ.to_string();
DOWNLOAD_TASK_FAILURE_COUNT
.with_label_values(&[&typ, tag, app, priority])
.inc();
CONCURRENT_DOWNLOAD_TASK_GAUGE
.with_label_values(&[&typ, tag, app, priority])
.dec();
}
/// collect_prefetch_task_started_metrics collects the prefetch task started metrics.
pub fn collect_prefetch_task_started_metrics(typ: i32, tag: &str, app: &str, priority: &str) {
PREFETCH_TASK_COUNT
.with_label_values(&[typ.to_string().as_str(), tag, app, priority])
.inc();
}
/// collect_prefetch_task_failure_metrics collects the prefetch task failure metrics.
pub fn collect_prefetch_task_failure_metrics(typ: i32, tag: &str, app: &str, priority: &str) {
PREFETCH_TASK_FAILURE_COUNT
.with_label_values(&[typ.to_string().as_str(), tag, app, priority])
.inc();
}
/// collect_download_piece_traffic_metrics collects the download piece traffic metrics.
pub fn collect_download_piece_traffic_metrics(typ: &TrafficType, task_type: i32, length: u64) {
DOWNLOAD_TRAFFIC
.with_label_values(&[typ.as_str_name(), task_type.to_string().as_str()])
.inc_by(length);
}
/// collect_upload_piece_started_metrics collects the upload piece started metrics.
pub fn collect_upload_piece_started_metrics() {
CONCURRENT_UPLOAD_PIECE_GAUGE.with_label_values(&[]).inc();
}
/// collect_upload_piece_finished_metrics collects the upload piece finished metrics.
pub fn collect_upload_piece_finished_metrics() {
CONCURRENT_UPLOAD_PIECE_GAUGE.with_label_values(&[]).dec();
}
/// collect_upload_piece_traffic_metrics collects the upload piece traffic metrics.
pub fn collect_upload_piece_traffic_metrics(task_type: i32, length: u64) {
UPLOAD_TRAFFIC
.with_label_values(&[task_type.to_string().as_str()])
.inc_by(length);
}
/// collect_upload_piece_failure_metrics collects the upload piece failure metrics.
pub fn collect_upload_piece_failure_metrics() {
CONCURRENT_UPLOAD_PIECE_GAUGE.with_label_values(&[]).dec();
}
/// collect_backend_request_started_metrics collects the backend request started metrics.
pub fn collect_backend_request_started_metrics(scheme: &str, method: &str) {
BACKEND_REQUEST_COUNT
.with_label_values(&[scheme, method])
.inc();
}
/// collect_backend_request_failure_metrics collects the backend request failure metrics.
pub fn collect_backend_request_failure_metrics(scheme: &str, method: &str) {
BACKEND_REQUEST_FAILURE_COUNT
.with_label_values(&[scheme, method])
.inc();
}
/// collect_backend_request_finished_metrics collects the backend request finished metrics.
pub fn collect_backend_request_finished_metrics(scheme: &str, method: &str, cost: Duration) {
BACKEND_REQUEST_DURATION
.with_label_values(&[scheme, method])
.observe(cost.as_millis() as f64);
}
/// collect_proxy_request_started_metrics collects the proxy request started metrics.
pub fn collect_proxy_request_started_metrics() {
PROXY_REQUEST_COUNT.with_label_values(&[]).inc();
}
/// collect_proxy_request_failure_metrics collects the proxy request failure metrics.
pub fn collect_proxy_request_failure_metrics() {
PROXY_REQUEST_FAILURE_COUNT.with_label_values(&[]).inc();
}
/// collect_proxy_request_via_dfdaemon_metrics collects the proxy request via dfdaemon metrics.
pub fn collect_proxy_request_via_dfdaemon_metrics() {
PROXY_REQUEST_VIA_DFDAEMON_COUNT
.with_label_values(&[])
.inc();
}
/// collect_update_task_started_metrics collects the update task started metrics.
pub fn collect_update_task_started_metrics(typ: i32) {
UPDATE_TASK_COUNT
.with_label_values(&[typ.to_string().as_str()])
.inc();
}
/// collect_update_task_failure_metrics collects the update task failure metrics.
pub fn collect_update_task_failure_metrics(typ: i32) {
UPDATE_TASK_FAILURE_COUNT
.with_label_values(&[typ.to_string().as_str()])
.inc();
}
/// collect_stat_task_started_metrics collects the stat task started metrics.
pub fn collect_stat_task_started_metrics(typ: i32) {
STAT_TASK_COUNT
.with_label_values(&[typ.to_string().as_str()])
.inc();
}
/// collect_stat_task_failure_metrics collects the stat task failure metrics.
pub fn collect_stat_task_failure_metrics(typ: i32) {
STAT_TASK_FAILURE_COUNT
.with_label_values(&[typ.to_string().as_str()])
.inc();
}
/// collect_list_task_entries_started_metrics collects the list task entries started metrics.
pub fn collect_list_task_entries_started_metrics(typ: i32) {
LIST_TASK_ENTRIES_COUNT
.with_label_values(&[typ.to_string().as_str()])
.inc();
}
/// collect_list_task_entries_failure_metrics collects the list task entries failure metrics.
pub fn collect_list_task_entries_failure_metrics(typ: i32) {
LIST_TASK_ENTRIES_FAILURE_COUNT
.with_label_values(&[typ.to_string().as_str()])
.inc();
}
/// collect_delete_task_started_metrics collects the delete task started metrics.
pub fn collect_delete_task_started_metrics(typ: i32) {
DELETE_TASK_COUNT
.with_label_values(&[typ.to_string().as_str()])
.inc();
}
/// collect_delete_task_failure_metrics collects the delete task failure metrics.
pub fn collect_delete_task_failure_metrics(typ: i32) {
DELETE_TASK_FAILURE_COUNT
.with_label_values(&[typ.to_string().as_str()])
.inc();
}
/// collect_delete_host_started_metrics collects the delete host started metrics.
pub fn collect_delete_host_started_metrics() {
DELETE_HOST_COUNT.with_label_values(&[]).inc();
}
/// collect_delete_host_failure_metrics collects the delete host failure metrics.
pub fn collect_delete_host_failure_metrics() {
DELETE_HOST_FAILURE_COUNT.with_label_values(&[]).inc();
}
/// collect_disk_metrics collects the disk metrics.
pub fn collect_disk_metrics(path: &Path) {
// Collect disk space metrics.
let stats = match fs2::statvfs(path) {
Ok(stats) => stats,
Err(err) => {
error!("failed to get disk space: {}", err);
return;
}
};
let total_space = stats.total_space();
let available_space = stats.available_space();
let usage_space = total_space - available_space;
DISK_SPACE.with_label_values(&[]).set(total_space as i64);
DISK_USAGE_SPACE
.with_label_values(&[])
.set(usage_space as i64);
}
/// Metrics is the metrics server.
#[derive(Debug)]
pub struct Metrics {
/// config is the configuration of the dfdaemon.
config: Arc<Config>,
/// shutdown is used to shutdown the metrics server.
shutdown: shutdown::Shutdown,
/// _shutdown_complete is used to notify the metrics server is shutdown.
_shutdown_complete: mpsc::UnboundedSender<()>,
}
/// Metrics implements the metrics server.
impl Metrics {
/// new creates a new Metrics.
pub fn new(
config: Arc<Config>,
shutdown: shutdown::Shutdown,
shutdown_complete_tx: mpsc::UnboundedSender<()>,
) -> Self {
Self {
config,
shutdown,
_shutdown_complete: shutdown_complete_tx,
}
}
/// run starts the metrics server.
pub async fn run(&self) {
// Clone the shutdown channel.
let mut shutdown = self.shutdown.clone();
// Register custom metrics.
register_custom_metrics();
// VERSION_GAUGE sets the version info of the service.
VERSION_GAUGE
.get_metric_with_label_values(&[
CARGO_PKG_VERSION,
GIT_COMMIT_SHORT_HASH,
BUILD_PLATFORM,
GIT_COMMIT_DATE,
])
.unwrap()
.set(1);
// Clone the config.
let config = self.config.clone();
// Create the metrics server address.
let addr = SocketAddr::new(
self.config.metrics.server.ip.unwrap(),
self.config.metrics.server.port,
);
// Get the metrics route.
let get_metrics_route = warp::path!("metrics")
.and(warp::get())
.and(warp::path::end())
.and_then(move || Self::get_metrics_handler(config.clone()));
// Delete the metrics route.
let delete_metrics_route = warp::path!("metrics")
.and(warp::delete())
.and(warp::path::end())
.and_then(Self::delete_metrics_handler);
let metrics_routes = get_metrics_route.or(delete_metrics_route);
// Start the metrics server and wait for it to finish.
info!("metrics server listening on {}", addr);
tokio::select! {
_ = warp::serve(metrics_routes).run(addr) => {
// Metrics server ended.
info!("metrics server ended");
}
_ = shutdown.recv() => {
// Metrics server shutting down with signals.
info!("metrics server shutting down");
}
}
}
/// get_metrics_handler handles the metrics request of getting.
#[instrument(skip_all)]
async fn get_metrics_handler(config: Arc<Config>) -> Result<impl Reply, Rejection> {
// Collect the disk space metrics.
collect_disk_metrics(config.storage.dir.as_path());
// Encode custom metrics.
let encoder = TextEncoder::new();
let mut buf = Vec::new();
if let Err(err) = encoder.encode(&REGISTRY.gather(), &mut buf) {
error!("could not encode custom metrics: {}", err);
};
let mut res = match String::from_utf8(buf.clone()) {
Ok(v) => v,
Err(err) => {
error!("custom metrics could not be from_utf8'd: {}", err);
String::default()
}
};
buf.clear();
// Encode prometheus metrics.
let mut buf = Vec::new();
if let Err(err) = encoder.encode(&gather(), &mut buf) {
error!("could not encode prometheus metrics: {}", err);
};
let res_custom = match String::from_utf8(buf.clone()) {
Ok(v) => v,
Err(err) => {
error!("prometheus metrics could not be from_utf8'd: {}", err);
String::default()
}
};
buf.clear();
res.push_str(&res_custom);
Ok(res)
}
/// delete_metrics_handler handles the metrics request of deleting.
#[instrument(skip_all)]
async fn delete_metrics_handler() -> Result<impl Reply, Rejection> {
reset_custom_metrics();
Ok(Vec::new())
}
}

View File

@ -1,405 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use bytesize::ByteSize;
use dragonfly_api::common::v2::Priority;
use reqwest::header::HeaderMap;
use tracing::error;
/// DRAGONFLY_TAG_HEADER is the header key of tag in http request.
pub const DRAGONFLY_TAG_HEADER: &str = "X-Dragonfly-Tag";
/// DRAGONFLY_APPLICATION_HEADER is the header key of application in http request.
pub const DRAGONFLY_APPLICATION_HEADER: &str = "X-Dragonfly-Application";
/// DRAGONFLY_PRIORITY_HEADER is the header key of priority in http request,
/// refer to https://github.com/dragonflyoss/api/blob/main/proto/common.proto#L67.
pub const DRAGONFLY_PRIORITY_HEADER: &str = "X-Dragonfly-Priority";
/// DRAGONFLY_REGISTRY_HEADER is the header key of custom address of container registry.
pub const DRAGONFLY_REGISTRY_HEADER: &str = "X-Dragonfly-Registry";
/// DRAGONFLY_FILTERS_HEADER is the header key of filters in http request,
/// it is the filtered query params to generate the task id.
/// When filter is "X-Dragonfly-Filtered-Query-Params: Signature,Expires,ns" for example:
/// http://example.com/xyz?Expires=e1&Signature=s1&ns=docker.io and http://example.com/xyz?Expires=e2&Signature=s2&ns=docker.io
/// will generate the same task id.
/// Default value includes the filtered query params of s3, gcs, oss, obs, cos.
pub const DRAGONFLY_FILTERED_QUERY_PARAMS_HEADER: &str = "X-Dragonfly-Filtered-Query-Params";
/// DRAGONFLY_USE_P2P_HEADER is the header key of use p2p in http request.
/// If the value is "true", the request will use P2P technology to distribute
/// the content. If the value is "false", but url matches the regular expression in proxy config.
/// The request will also use P2P technology to distribute the content.
pub const DRAGONFLY_USE_P2P_HEADER: &str = "X-Dragonfly-Use-P2P";
/// DRAGONFLY_PREFETCH_HEADER is the header key of prefetch in http request.
/// X-Dragonfly-Prefetch priority is higher than prefetch in config.
/// If the value is "true", the range request will prefetch the entire file.
/// If the value is "false", the range request will fetch the range content.
pub const DRAGONFLY_PREFETCH_HEADER: &str = "X-Dragonfly-Prefetch";
/// DRAGONFLY_OUTPUT_PATH_HEADER is the header key of absolute output path in http request.
///
/// If `X-Dragonfly-Output-Path` is set, the downloaded file will be saved to the specified path.
/// Dfdaemon will try to create hard link to the output path before starting the download. If hard link creation fails,
/// it will copy the file to the output path after the download is completed.
/// For more details refer to https://github.com/dragonflyoss/design/blob/main/systems-analysis/file-download-workflow-with-hard-link/README.md.
pub const DRAGONFLY_OUTPUT_PATH_HEADER: &str = "X-Dragonfly-Output-Path";
/// DRAGONFLY_FORCE_HARD_LINK_HEADER is the header key of force hard link in http request.
///
/// `X-Dragonfly-Force-Hard-Link` is the flag to indicate whether the download file must be hard linked to the output path.
/// For more details refer to https://github.com/dragonflyoss/design/blob/main/systems-analysis/file-download-workflow-with-hard-link/README.md.
pub const DRAGONFLY_FORCE_HARD_LINK_HEADER: &str = "X-Dragonfly-Force-Hard-Link";
/// DRAGONFLY_PIECE_LENGTH_HEADER is the header key of piece length in http request.
/// If the value is set, the piece length will be used to download the file.
/// Different piece length will generate different task id. The value needs to
/// be set with human readable format and needs to be greater than or equal
/// to 4mib, for example: 4mib, 1gib
pub const DRAGONFLY_PIECE_LENGTH_HEADER: &str = "X-Dragonfly-Piece-Length";
/// DRAGONFLY_CONTENT_FOR_CALCULATING_TASK_ID_HEADER is the header key of content for calculating task id.
/// If DRAGONFLY_CONTENT_FOR_CALCULATING_TASK_ID_HEADER is set, use its value to calculate the task ID.
/// Otherwise, calculate the task ID based on `url`, `piece_length`, `tag`, `application`, and `filtered_query_params`.
pub const DRAGONFLY_CONTENT_FOR_CALCULATING_TASK_ID_HEADER: &str =
"X-Dragonfly-Content-For-Calculating-Task-ID";
/// DRAGONFLY_TASK_DOWNLOAD_FINISHED_HEADER is the response header key to indicate whether the task download finished.
/// When the task download is finished, the response will include this header with the value `"true"`,
/// indicating that the download hit the local cache.
pub const DRAGONFLY_TASK_DOWNLOAD_FINISHED_HEADER: &str = "X-Dragonfly-Task-Download-Finished";
/// DRAGONFLY_TASK_ID_HEADER is the response header key of task id. Client will calculate the task ID
/// based on `url`, `piece_length`, `tag`, `application`, and `filtered_query_params`.
pub const DRAGONFLY_TASK_ID_HEADER: &str = "X-Dragonfly-Task-ID";
/// get_tag gets the tag from http header.
pub fn get_tag(header: &HeaderMap) -> Option<String> {
header
.get(DRAGONFLY_TAG_HEADER)
.and_then(|tag| tag.to_str().ok())
.map(|tag| tag.to_string())
}
/// get_application gets the application from http header.
pub fn get_application(header: &HeaderMap) -> Option<String> {
header
.get(DRAGONFLY_APPLICATION_HEADER)
.and_then(|application| application.to_str().ok())
.map(|application| application.to_string())
}
/// get_priority gets the priority from http header.
pub fn get_priority(header: &HeaderMap) -> i32 {
let default_priority = Priority::Level6 as i32;
match header.get(DRAGONFLY_PRIORITY_HEADER) {
Some(priority) => match priority.to_str() {
Ok(priority) => match priority.parse::<i32>() {
Ok(priority) => priority,
Err(err) => {
error!("parse priority from header failed: {}", err);
default_priority
}
},
Err(err) => {
error!("get priority from header failed: {}", err);
default_priority
}
},
None => default_priority,
}
}
/// get_registry gets the custom address of container registry from http header.
pub fn get_registry(header: &HeaderMap) -> Option<String> {
header
.get(DRAGONFLY_REGISTRY_HEADER)
.and_then(|registry| registry.to_str().ok())
.map(|registry| registry.to_string())
}
/// get_filters gets the filters from http header.
pub fn get_filtered_query_params(
header: &HeaderMap,
default_filtered_query_params: Vec<String>,
) -> Vec<String> {
match header.get(DRAGONFLY_FILTERED_QUERY_PARAMS_HEADER) {
Some(filters) => match filters.to_str() {
Ok(filters) => filters.split(',').map(|s| s.trim().to_string()).collect(),
Err(err) => {
error!("get filters from header failed: {}", err);
default_filtered_query_params
}
},
None => default_filtered_query_params,
}
}
/// get_use_p2p gets the use p2p from http header.
pub fn get_use_p2p(header: &HeaderMap) -> bool {
match header.get(DRAGONFLY_USE_P2P_HEADER) {
Some(value) => match value.to_str() {
Ok(value) => value.eq_ignore_ascii_case("true"),
Err(err) => {
error!("get use p2p from header failed: {}", err);
false
}
},
None => false,
}
}
/// get_prefetch gets the prefetch from http header.
pub fn get_prefetch(header: &HeaderMap) -> Option<bool> {
match header.get(DRAGONFLY_PREFETCH_HEADER) {
Some(value) => match value.to_str() {
Ok(value) => Some(value.eq_ignore_ascii_case("true")),
Err(err) => {
error!("get use p2p from header failed: {}", err);
None
}
},
None => None,
}
}
/// get_output_path gets the output path from http header.
pub fn get_output_path(header: &HeaderMap) -> Option<String> {
header
.get(DRAGONFLY_OUTPUT_PATH_HEADER)
.and_then(|output_path| output_path.to_str().ok())
.map(|output_path| output_path.to_string())
}
/// get_force_hard_link gets the force hard link from http header.
pub fn get_force_hard_link(header: &HeaderMap) -> bool {
match header.get(DRAGONFLY_FORCE_HARD_LINK_HEADER) {
Some(value) => match value.to_str() {
Ok(value) => value.eq_ignore_ascii_case("true"),
Err(err) => {
error!("get force hard link from header failed: {}", err);
false
}
},
None => false,
}
}
/// get_piece_length gets the piece length from http header.
pub fn get_piece_length(header: &HeaderMap) -> Option<ByteSize> {
match header.get(DRAGONFLY_PIECE_LENGTH_HEADER) {
Some(piece_length) => match piece_length.to_str() {
Ok(piece_length) => match piece_length.parse::<ByteSize>() {
Ok(piece_length) => Some(piece_length),
Err(err) => {
error!("parse piece length from header failed: {}", err);
None
}
},
Err(err) => {
error!("get piece length from header failed: {}", err);
None
}
},
None => None,
}
}
/// get_content_for_calculating_task_id gets the content for calculating task id from http header.
pub fn get_content_for_calculating_task_id(header: &HeaderMap) -> Option<String> {
header
.get(DRAGONFLY_CONTENT_FOR_CALCULATING_TASK_ID_HEADER)
.and_then(|content| content.to_str().ok())
.map(|content| content.to_string())
}
#[cfg(test)]
mod tests {
use super::*;
use reqwest::header::{HeaderMap, HeaderValue};
#[test]
fn test_get_tag() {
let mut headers = HeaderMap::new();
headers.insert(DRAGONFLY_TAG_HEADER, HeaderValue::from_static("test-tag"));
assert_eq!(get_tag(&headers), Some("test-tag".to_string()));
let empty_headers = HeaderMap::new();
assert_eq!(get_tag(&empty_headers), None);
}
#[test]
fn test_get_application() {
let mut headers = HeaderMap::new();
headers.insert(
DRAGONFLY_APPLICATION_HEADER,
HeaderValue::from_static("test-app"),
);
assert_eq!(get_application(&headers), Some("test-app".to_string()));
let empty_headers = HeaderMap::new();
assert_eq!(get_application(&empty_headers), None);
}
#[test]
fn test_get_priority() {
let mut headers = HeaderMap::new();
headers.insert(DRAGONFLY_PRIORITY_HEADER, HeaderValue::from_static("5"));
assert_eq!(get_priority(&headers), 5);
let empty_headers = HeaderMap::new();
assert_eq!(get_priority(&empty_headers), Priority::Level6 as i32);
headers.insert(
DRAGONFLY_PRIORITY_HEADER,
HeaderValue::from_static("invalid"),
);
assert_eq!(get_priority(&headers), Priority::Level6 as i32);
}
#[test]
fn test_get_registry() {
let mut headers = HeaderMap::new();
headers.insert(
DRAGONFLY_REGISTRY_HEADER,
HeaderValue::from_static("test-registry"),
);
assert_eq!(get_registry(&headers), Some("test-registry".to_string()));
let empty_headers = HeaderMap::new();
assert_eq!(get_registry(&empty_headers), None);
}
#[test]
fn test_get_filtered_query_params() {
let mut headers = HeaderMap::new();
headers.insert(
DRAGONFLY_FILTERED_QUERY_PARAMS_HEADER,
HeaderValue::from_static("param1,param2"),
);
assert_eq!(
get_filtered_query_params(&headers, vec!["default".to_string()]),
vec!["param1".to_string(), "param2".to_string()]
);
let empty_headers = HeaderMap::new();
assert_eq!(
get_filtered_query_params(&empty_headers, vec!["default".to_string()]),
vec!["default".to_string()]
);
}
#[test]
fn test_get_use_p2p() {
let mut headers = HeaderMap::new();
headers.insert(DRAGONFLY_USE_P2P_HEADER, HeaderValue::from_static("true"));
assert!(get_use_p2p(&headers));
headers.insert(DRAGONFLY_USE_P2P_HEADER, HeaderValue::from_static("false"));
assert!(!get_use_p2p(&headers));
let empty_headers = HeaderMap::new();
assert!(!get_use_p2p(&empty_headers));
}
#[test]
fn test_get_prefetch() {
let mut headers = HeaderMap::new();
headers.insert(DRAGONFLY_PREFETCH_HEADER, HeaderValue::from_static("true"));
assert_eq!(get_prefetch(&headers), Some(true));
headers.insert(DRAGONFLY_PREFETCH_HEADER, HeaderValue::from_static("false"));
assert_eq!(get_prefetch(&headers), Some(false));
let empty_headers = HeaderMap::new();
assert_eq!(get_prefetch(&empty_headers), None);
}
#[test]
fn test_get_output_path() {
let mut headers = HeaderMap::new();
headers.insert(
DRAGONFLY_OUTPUT_PATH_HEADER,
HeaderValue::from_static("/path/to/output"),
);
assert_eq!(
get_output_path(&headers),
Some("/path/to/output".to_string())
);
let empty_headers = HeaderMap::new();
assert_eq!(get_output_path(&empty_headers), None);
}
#[test]
fn test_get_force_hard_link() {
let mut headers = HeaderMap::new();
headers.insert(
DRAGONFLY_FORCE_HARD_LINK_HEADER,
HeaderValue::from_static("true"),
);
assert!(get_force_hard_link(&headers));
headers.insert(
DRAGONFLY_FORCE_HARD_LINK_HEADER,
HeaderValue::from_static("false"),
);
assert!(!get_force_hard_link(&headers));
let empty_headers = HeaderMap::new();
assert!(!get_force_hard_link(&empty_headers));
}
#[test]
fn test_get_piece_length() {
let mut headers = HeaderMap::new();
headers.insert(
DRAGONFLY_PIECE_LENGTH_HEADER,
HeaderValue::from_static("4mib"),
);
assert_eq!(get_piece_length(&headers), Some(ByteSize::mib(4)));
let empty_headers = HeaderMap::new();
assert_eq!(get_piece_length(&empty_headers), None);
headers.insert(
DRAGONFLY_PIECE_LENGTH_HEADER,
HeaderValue::from_static("invalid"),
);
assert_eq!(get_piece_length(&headers), None);
headers.insert(DRAGONFLY_PIECE_LENGTH_HEADER, HeaderValue::from_static("0"));
assert_eq!(get_piece_length(&headers), Some(ByteSize::b(0)));
}
#[test]
fn test_get_content_for_calculating_task_id() {
let mut headers = HeaderMap::new();
headers.insert(
DRAGONFLY_CONTENT_FOR_CALCULATING_TASK_ID_HEADER,
HeaderValue::from_static("test-content"),
);
assert_eq!(
get_content_for_calculating_task_id(&headers),
Some("test-content".to_string())
);
let empty_headers = HeaderMap::new();
assert_eq!(get_registry(&empty_headers), None);
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,978 +0,0 @@
/*
* Copyright 2023 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use super::*;
use crate::metrics::{
collect_backend_request_failure_metrics, collect_backend_request_finished_metrics,
collect_backend_request_started_metrics, collect_download_piece_traffic_metrics,
collect_upload_piece_traffic_metrics,
};
use chrono::Utc;
use dragonfly_api::common::v2::{Hdfs, ObjectStorage, Range, TrafficType};
use dragonfly_client_backend::{BackendFactory, GetRequest};
use dragonfly_client_config::dfdaemon::Config;
use dragonfly_client_core::{error::BackendError, Error, Result};
use dragonfly_client_storage::{metadata, Storage};
use dragonfly_client_util::id_generator::IDGenerator;
use leaky_bucket::RateLimiter;
use reqwest::header::{self, HeaderMap};
use std::collections::HashMap;
use std::io::Cursor;
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::io::{AsyncRead, AsyncReadExt};
use tracing::{error, info, instrument, Span};
/// MAX_PIECE_COUNT is the maximum piece count. If the piece count is upper
/// than MAX_PIECE_COUNT, the piece length will be optimized by the file length.
/// When piece length became the MAX_PIECE_LENGTH, the piece count
/// probably will be upper than MAX_PIECE_COUNT.
pub const MAX_PIECE_COUNT: u64 = 500;
/// MIN_PIECE_LENGTH is the minimum piece length.
pub const MIN_PIECE_LENGTH: u64 = 4 * 1024 * 1024;
/// MAX_PIECE_LENGTH is the maximum piece length.
pub const MAX_PIECE_LENGTH: u64 = 64 * 1024 * 1024;
/// PieceLengthStrategy sets the optimization strategy of piece length.
pub enum PieceLengthStrategy {
/// OptimizeByFileLength optimizes the piece length by the file length.
OptimizeByFileLength(u64),
/// FixedPieceLength sets the fixed piece length.
FixedPieceLength(u64),
}
/// Piece represents a piece manager.
pub struct Piece {
/// config is the configuration of the dfdaemon.
config: Arc<Config>,
/// id_generator is the id generator.
id_generator: Arc<IDGenerator>,
/// storage is the local storage.
storage: Arc<Storage>,
/// downloader is the piece downloader.
downloader: Arc<dyn piece_downloader::Downloader>,
/// backend_factory is the backend factory.
backend_factory: Arc<BackendFactory>,
/// download_rate_limiter is the rate limiter of the download speed in bps(bytes per second).
download_rate_limiter: Arc<RateLimiter>,
/// upload_rate_limiter is the rate limiter of the upload speed in bps(bytes per second).
upload_rate_limiter: Arc<RateLimiter>,
/// prefetch_rate_limiter is the rate limiter of the prefetch speed in bps(bytes per second).
prefetch_rate_limiter: Arc<RateLimiter>,
}
/// Piece implements the piece manager.
impl Piece {
/// new returns a new Piece.
pub fn new(
config: Arc<Config>,
id_generator: Arc<IDGenerator>,
storage: Arc<Storage>,
backend_factory: Arc<BackendFactory>,
) -> Result<Self> {
Ok(Self {
config: config.clone(),
id_generator,
storage,
downloader: piece_downloader::DownloaderFactory::new(
config.storage.server.protocol.as_str(),
config.clone(),
)?
.build(),
backend_factory,
download_rate_limiter: Arc::new(
RateLimiter::builder()
.initial(config.download.rate_limit.as_u64() as usize)
.refill(config.download.rate_limit.as_u64() as usize)
.max(config.download.rate_limit.as_u64() as usize)
.interval(Duration::from_secs(1))
.fair(false)
.build(),
),
upload_rate_limiter: Arc::new(
RateLimiter::builder()
.initial(config.upload.rate_limit.as_u64() as usize)
.refill(config.upload.rate_limit.as_u64() as usize)
.max(config.upload.rate_limit.as_u64() as usize)
.interval(Duration::from_secs(1))
.fair(false)
.build(),
),
prefetch_rate_limiter: Arc::new(
RateLimiter::builder()
.initial(config.proxy.prefetch_rate_limit.as_u64() as usize)
.refill(config.proxy.prefetch_rate_limit.as_u64() as usize)
.max(config.proxy.prefetch_rate_limit.as_u64() as usize)
.interval(Duration::from_secs(1))
.fair(false)
.build(),
),
})
}
/// id generates a new piece id.
#[inline]
pub fn id(&self, task_id: &str, number: u32) -> String {
self.storage.piece_id(task_id, number)
}
/// get gets a piece from the local storage.
pub fn get(&self, piece_id: &str) -> Result<Option<metadata::Piece>> {
self.storage.get_piece(piece_id)
}
/// get_all gets all pieces of a task from the local storage.
pub fn get_all(&self, task_id: &str) -> Result<Vec<metadata::Piece>> {
self.storage.get_pieces(task_id)
}
/// calculate_interested calculates the interested pieces by content_length and range.
pub fn calculate_interested(
&self,
piece_length: u64,
content_length: u64,
range: Option<Range>,
) -> Result<Vec<metadata::Piece>> {
// If content_length is 0, return empty piece.
if content_length == 0 {
return Ok(Vec::new());
}
// If range is not None, calculate the pieces by range.
if let Some(range) = range {
if range.length == 0 {
return Err(Error::InvalidParameter);
}
let mut number = 0;
let mut offset = 0;
let mut pieces: Vec<metadata::Piece> = Vec::new();
loop {
// If offset is greater than content_length, break the loop.
if offset >= content_length {
let mut piece = pieces.pop().ok_or_else(|| {
error!("piece not found");
Error::InvalidParameter
})?;
piece.length = piece_length + content_length - offset;
pieces.push(piece);
break;
}
// If offset is greater than range.start + range.length, break the loop.
if offset >= range.start + range.length {
break;
}
if offset + piece_length > range.start {
pieces.push(metadata::Piece {
number: number as u32,
offset,
length: piece_length,
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: Utc::now().naive_utc(),
created_at: Utc::now().naive_utc(),
finished_at: None,
});
}
offset = (number + 1) * piece_length;
number += 1;
}
info!(
"calculate interested pieces by range: {:?}, piece length: {:?}. pieces: {:?}",
range,
piece_length,
pieces
.iter()
.map(|piece| piece.number)
.collect::<Vec<u32>>()
);
return Ok(pieces);
}
// Calculate the pieces by content_length without range.
let mut number = 0;
let mut offset = 0;
let mut pieces: Vec<metadata::Piece> = Vec::new();
loop {
// If offset is greater than content_length, break the loop.
if offset >= content_length {
let mut piece = pieces.pop().ok_or_else(|| {
error!("piece not found");
Error::InvalidParameter
})?;
piece.length = piece_length + content_length - offset;
pieces.push(piece);
break;
}
pieces.push(metadata::Piece {
number: number as u32,
offset,
length: piece_length,
digest: "".to_string(),
parent_id: None,
uploading_count: 0,
uploaded_count: 0,
updated_at: Utc::now().naive_utc(),
created_at: Utc::now().naive_utc(),
finished_at: None,
});
offset = (number + 1) * piece_length;
number += 1;
}
info!(
"calculate interested pieces by content length, piece length: {:?}, pieces: {:?}",
piece_length,
pieces
.iter()
.map(|piece| piece.number)
.collect::<Vec<u32>>()
);
Ok(pieces)
}
/// remove_finished_from_interested removes the finished pieces from interested pieces.
#[instrument(skip_all)]
pub fn remove_finished_from_interested(
&self,
finished_pieces: Vec<metadata::Piece>,
interested_pieces: Vec<metadata::Piece>,
) -> Vec<metadata::Piece> {
interested_pieces
.iter()
.filter(|piece| {
!finished_pieces
.iter()
.any(|finished_piece| finished_piece.number == piece.number)
})
.cloned()
.collect::<Vec<metadata::Piece>>()
}
/// merge_finished_pieces merges the finished pieces and has finished pieces.
#[instrument(skip_all)]
pub fn merge_finished_pieces(
&self,
finished_pieces: Vec<metadata::Piece>,
old_finished_pieces: Vec<metadata::Piece>,
) -> Vec<metadata::Piece> {
let mut pieces: HashMap<u32, metadata::Piece> = HashMap::new();
for finished_piece in finished_pieces.into_iter() {
pieces.insert(finished_piece.number, finished_piece);
}
for old_finished_piece in old_finished_pieces.into_iter() {
pieces
.entry(old_finished_piece.number)
.or_insert(old_finished_piece);
}
pieces.into_values().collect()
}
/// calculate_piece_size calculates the piece size by content_length.
pub fn calculate_piece_length(&self, strategy: PieceLengthStrategy) -> u64 {
match strategy {
PieceLengthStrategy::OptimizeByFileLength(content_length) => {
let piece_length = (content_length as f64 / MAX_PIECE_COUNT as f64) as u64;
let actual_piece_length = piece_length.next_power_of_two();
match (
actual_piece_length > MIN_PIECE_LENGTH,
actual_piece_length < MAX_PIECE_LENGTH,
) {
(true, true) => actual_piece_length,
(_, false) => MAX_PIECE_LENGTH,
(false, _) => MIN_PIECE_LENGTH,
}
}
PieceLengthStrategy::FixedPieceLength(piece_length) => piece_length,
}
}
/// calculate_piece_count calculates the piece count by piece_length and content_length.
pub fn calculate_piece_count(&self, piece_length: u64, content_length: u64) -> u32 {
(content_length as f64 / piece_length as f64).ceil() as u32
}
/// upload_from_local_into_async_read uploads a single piece from local cache.
#[instrument(skip_all, fields(piece_id))]
pub async fn upload_from_local_into_async_read(
&self,
piece_id: &str,
task_id: &str,
length: u64,
range: Option<Range>,
disable_rate_limit: bool,
) -> Result<impl AsyncRead> {
// Span record the piece_id.
Span::current().record("piece_id", piece_id);
Span::current().record("piece_length", length);
// Acquire the upload rate limiter.
if !disable_rate_limit {
self.upload_rate_limiter.acquire(length as usize).await;
}
// Upload the piece content.
self.storage
.upload_piece(piece_id, task_id, range)
.await
.inspect(|_| {
collect_upload_piece_traffic_metrics(
self.id_generator.task_type(task_id) as i32,
length,
);
})
}
/// download_from_local_into_async_read downloads a single piece from local cache.
#[instrument(skip_all, fields(piece_id))]
pub async fn download_from_local_into_async_read(
&self,
piece_id: &str,
task_id: &str,
length: u64,
range: Option<Range>,
disable_rate_limit: bool,
is_prefetch: bool,
) -> Result<impl AsyncRead> {
// Span record the piece_id.
Span::current().record("piece_id", piece_id);
Span::current().record("piece_length", length);
// Acquire the download rate limiter.
if !disable_rate_limit {
if is_prefetch {
// Acquire the prefetch rate limiter.
self.prefetch_rate_limiter.acquire(length as usize).await;
} else {
// Acquire the download rate limiter.
self.download_rate_limiter.acquire(length as usize).await;
}
}
// Upload the piece content.
self.storage.upload_piece(piece_id, task_id, range).await
}
/// download_from_local downloads a single piece from local cache. Fake the download piece
/// from the local cache, just collect the metrics.
#[instrument(skip_all)]
pub fn download_from_local(&self, task_id: &str, length: u64) {
collect_download_piece_traffic_metrics(
&TrafficType::LocalPeer,
self.id_generator.task_type(task_id) as i32,
length,
);
}
/// download_from_parent downloads a single piece from a parent.
#[allow(clippy::too_many_arguments)]
#[instrument(skip_all, fields(piece_id))]
pub async fn download_from_parent(
&self,
piece_id: &str,
host_id: &str,
task_id: &str,
number: u32,
length: u64,
parent: piece_collector::CollectedParent,
is_prefetch: bool,
) -> Result<metadata::Piece> {
// Span record the piece_id.
Span::current().record("piece_id", piece_id);
Span::current().record("piece_length", length);
// Record the start of downloading piece.
let piece = self
.storage
.download_piece_started(piece_id, number)
.await?;
// If the piece is downloaded by the other thread,
// return the piece directly.
if piece.is_finished() {
info!("finished piece {} from local", piece_id);
return Ok(piece);
}
if is_prefetch {
// Acquire the prefetch rate limiter.
self.prefetch_rate_limiter.acquire(length as usize).await;
} else {
// Acquire the download rate limiter.
self.download_rate_limiter.acquire(length as usize).await;
}
// Create a dfdaemon client.
let host = parent.host.clone().ok_or_else(|| {
error!("peer host is empty");
if let Some(err) = self.storage.download_piece_failed(piece_id).err() {
error!("set piece metadata failed: {}", err)
};
Error::InvalidPeer(parent.id.clone())
})?;
let (content, offset, digest) = self
.downloader
.download_piece(
format!("{}:{}", host.ip, host.port).as_str(),
number,
host_id,
task_id,
)
.await
.inspect_err(|err| {
error!("download piece failed: {}", err);
if let Some(err) = self.storage.download_piece_failed(piece_id).err() {
error!("set piece metadata failed: {}", err)
};
})?;
let mut reader = Cursor::new(content);
// Record the finish of downloading piece.
match self
.storage
.download_piece_from_parent_finished(
piece_id,
task_id,
offset,
length,
digest.as_str(),
parent.id.as_str(),
&mut reader,
self.config.storage.write_piece_timeout,
)
.await
{
Ok(piece) => {
collect_download_piece_traffic_metrics(
&TrafficType::RemotePeer,
self.id_generator.task_type(task_id) as i32,
length,
);
Ok(piece)
}
Err(err) => {
error!("download piece finished: {}", err);
if let Some(err) = self.storage.download_piece_failed(piece_id).err() {
error!("set piece metadata failed: {}", err)
};
Err(err)
}
}
}
/// download_from_source downloads a single piece from the source.
#[allow(clippy::too_many_arguments)]
#[instrument(skip_all, fields(piece_id))]
pub async fn download_from_source(
&self,
piece_id: &str,
task_id: &str,
number: u32,
url: &str,
offset: u64,
length: u64,
request_header: HeaderMap,
is_prefetch: bool,
object_storage: Option<ObjectStorage>,
hdfs: Option<Hdfs>,
) -> Result<metadata::Piece> {
// Span record the piece_id.
Span::current().record("piece_id", piece_id);
Span::current().record("piece_length", length);
// Record the start of downloading piece.
let piece = self
.storage
.download_piece_started(piece_id, number)
.await?;
// If the piece is downloaded by the other thread,
// return the piece directly.
if piece.is_finished() {
info!("finished piece {} from local", piece_id);
return Ok(piece);
}
if is_prefetch {
// Acquire the prefetch rate limiter.
self.prefetch_rate_limiter.acquire(length as usize).await;
} else {
// Acquire the download rate limiter.
self.download_rate_limiter.acquire(length as usize).await;
}
// Add range header to the request by offset and length.
let mut request_header = request_header.clone();
request_header.insert(
header::RANGE,
format!("bytes={}-{}", offset, offset + length - 1)
.parse()
.unwrap(),
);
// Download the piece from the source.
let backend = self.backend_factory.build(url).inspect_err(|err| {
error!("build backend failed: {}", err);
if let Some(err) = self.storage.download_piece_failed(piece_id).err() {
error!("set piece metadata failed: {}", err)
};
})?;
// Record the start time.
let start_time = Instant::now();
// Collect the backend request started metrics.
collect_backend_request_started_metrics(
backend.scheme().as_str(),
http::Method::GET.as_str(),
);
let mut response = backend
.get(GetRequest {
task_id: task_id.to_string(),
piece_id: piece_id.to_string(),
url: url.to_string(),
range: Some(Range {
start: offset,
length,
}),
http_header: Some(request_header),
timeout: self.config.download.piece_timeout,
client_cert: None,
object_storage,
hdfs,
})
.await
.inspect_err(|err| {
// Collect the backend request failure metrics.
collect_backend_request_failure_metrics(
backend.scheme().as_str(),
http::Method::GET.as_str(),
);
// if the request is failed.
error!("backend get failed: {}", err);
if let Some(err) = self.storage.download_piece_failed(piece_id).err() {
error!("set piece metadata failed: {}", err)
};
})?;
if !response.success {
// Collect the backend request failure metrics.
collect_backend_request_failure_metrics(
backend.scheme().as_str(),
http::Method::GET.as_str(),
);
// if the status code is not OK.
let mut buffer = String::new();
response
.reader
.read_to_string(&mut buffer)
.await
.unwrap_or_default();
let error_message = response.error_message.unwrap_or_default();
error!("backend get failed: {} {}", error_message, buffer.as_str());
self.storage.download_piece_failed(piece_id)?;
return Err(Error::BackendError(Box::new(BackendError {
message: error_message,
status_code: Some(response.http_status_code.unwrap_or_default()),
header: Some(response.http_header.unwrap_or_default()),
})));
}
// Collect the backend request finished metrics.
collect_backend_request_finished_metrics(
backend.scheme().as_str(),
http::Method::GET.as_str(),
start_time.elapsed(),
);
// Record the finish of downloading piece.
match self
.storage
.download_piece_from_source_finished(
piece_id,
task_id,
offset,
length,
&mut response.reader,
self.config.storage.write_piece_timeout,
)
.await
{
Ok(piece) => {
collect_download_piece_traffic_metrics(
&TrafficType::BackToSource,
self.id_generator.task_type(task_id) as i32,
length,
);
Ok(piece)
}
Err(err) => {
error!("download piece finished: {}", err);
if let Some(err) = self.storage.download_piece_failed(piece_id).err() {
error!("set piece metadata failed: {}", err)
};
Err(err)
}
}
}
/// persistent_cache_id generates a new persistent cache piece id.
#[inline]
pub fn persistent_cache_id(&self, task_id: &str, number: u32) -> String {
self.storage.persistent_cache_piece_id(task_id, number)
}
/// get_persistent_cache gets a persistent cache piece from the local storage.
#[instrument(skip_all)]
pub fn get_persistent_cache(&self, piece_id: &str) -> Result<Option<metadata::Piece>> {
self.storage.get_persistent_cache_piece(piece_id)
}
/// create_persistent_cache creates a new persistent cache piece.
#[instrument(skip_all)]
pub async fn create_persistent_cache<R: AsyncRead + Unpin + ?Sized>(
&self,
piece_id: &str,
task_id: &str,
number: u32,
offset: u64,
length: u64,
reader: &mut R,
) -> Result<metadata::Piece> {
self.storage
.create_persistent_cache_piece(piece_id, task_id, number, offset, length, reader)
.await
}
/// upload_persistent_cache_from_local_into_async_read uploads a persistent cache piece from local cache.
#[instrument(skip_all, fields(piece_id))]
pub async fn upload_persistent_cache_from_local_into_async_read(
&self,
piece_id: &str,
task_id: &str,
length: u64,
range: Option<Range>,
) -> Result<impl AsyncRead> {
// Span record the piece_id.
Span::current().record("piece_id", piece_id);
Span::current().record("piece_length", length);
// Acquire the upload rate limiter.
self.upload_rate_limiter.acquire(length as usize).await;
// Upload the persistent cache piece content.
self.storage
.upload_persistent_cache_piece(piece_id, task_id, range)
.await
.inspect(|_| {
collect_upload_piece_traffic_metrics(
self.id_generator.task_type(task_id) as i32,
length,
);
})
}
/// download_persistent_cache_from_local_into_async_read downloads a persistent cache piece from local cache.
#[instrument(skip_all, fields(piece_id))]
pub async fn download_persistent_cache_from_local_into_async_read(
&self,
piece_id: &str,
task_id: &str,
length: u64,
range: Option<Range>,
disable_rate_limit: bool,
is_prefetch: bool,
) -> Result<impl AsyncRead> {
// Span record the piece_id.
Span::current().record("piece_id", piece_id);
Span::current().record("piece_length", length);
// Acquire the download rate limiter.
if !disable_rate_limit {
if is_prefetch {
// Acquire the prefetch rate limiter.
self.prefetch_rate_limiter.acquire(length as usize).await;
} else {
// Acquire the download rate limiter.
self.download_rate_limiter.acquire(length as usize).await;
}
}
// Upload the piece content.
self.storage
.upload_persistent_cache_piece(piece_id, task_id, range)
.await
}
/// download_persistent_cache_from_local downloads a persistent cache piece from local cache. Fake the download
/// persistent cache piece from the local cache, just collect the metrics.
#[instrument(skip_all)]
pub fn download_persistent_cache_from_local(&self, task_id: &str, length: u64) {
collect_download_piece_traffic_metrics(
&TrafficType::LocalPeer,
self.id_generator.task_type(task_id) as i32,
length,
);
}
/// download_persistent_cache_from_parent downloads a persistent cache piece from a parent.
#[allow(clippy::too_many_arguments)]
#[instrument(skip_all, fields(piece_id))]
pub async fn download_persistent_cache_from_parent(
&self,
piece_id: &str,
host_id: &str,
task_id: &str,
number: u32,
length: u64,
parent: piece_collector::CollectedParent,
is_prefetch: bool,
) -> Result<metadata::Piece> {
// Span record the piece_id.
Span::current().record("piece_id", piece_id);
Span::current().record("piece_length", length);
if is_prefetch {
// Acquire the prefetch rate limiter.
self.prefetch_rate_limiter.acquire(length as usize).await;
} else {
// Acquire the download rate limiter.
self.download_rate_limiter.acquire(length as usize).await;
}
// Record the start of downloading piece.
let piece = self
.storage
.download_persistent_cache_piece_started(piece_id, number)
.await?;
// If the piece is downloaded by the other thread,
// return the piece directly.
if piece.is_finished() {
info!("finished persistent cache piece {} from local", piece_id);
return Ok(piece);
}
// Create a dfdaemon client.
let host = parent.host.clone().ok_or_else(|| {
error!("peer host is empty");
if let Some(err) = self
.storage
.download_persistent_cache_piece_failed(piece_id)
.err()
{
error!("set persistent cache piece metadata failed: {}", err)
};
Error::InvalidPeer(parent.id.clone())
})?;
let (content, offset, digest) = self
.downloader
.download_persistent_cache_piece(
format!("{}:{}", host.ip, host.port).as_str(),
number,
host_id,
task_id,
)
.await
.inspect_err(|err| {
error!("download persistent cache piece failed: {}", err);
if let Some(err) = self
.storage
.download_persistent_cache_piece_failed(piece_id)
.err()
{
error!("set persistent cache piece metadata failed: {}", err)
};
})?;
let mut reader = Cursor::new(content);
// Record the finish of downloading piece.
match self
.storage
.download_persistent_cache_piece_from_parent_finished(
piece_id,
task_id,
offset,
length,
digest.as_str(),
parent.id.as_str(),
&mut reader,
)
.await
{
Ok(piece) => {
collect_download_piece_traffic_metrics(
&TrafficType::RemotePeer,
self.id_generator.task_type(task_id) as i32,
length,
);
Ok(piece)
}
Err(err) => {
error!("download persistent cache piece finished: {}", err);
if let Some(err) = self
.storage
.download_persistent_cache_piece_failed(piece_id)
.err()
{
error!("set persistent cache piece metadata failed: {}", err)
};
Err(err)
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[tokio::test]
async fn test_calculate_interested() {
let temp_dir = tempdir().unwrap();
let config = Config::default();
let config = Arc::new(config);
let id_generator =
IDGenerator::new("127.0.0.1".to_string(), "localhost".to_string(), false);
let id_generator = Arc::new(id_generator);
let storage = Storage::new(
config.clone(),
temp_dir.path(),
temp_dir.path().to_path_buf(),
)
.await
.unwrap();
let storage = Arc::new(storage);
let backend_factory = BackendFactory::new(None).unwrap();
let backend_factory = Arc::new(backend_factory);
let piece = Piece::new(
config.clone(),
id_generator.clone(),
storage.clone(),
backend_factory.clone(),
)
.unwrap();
let test_cases = vec![
(1000, 1, None, 1, vec![0], 0, 1),
(1000, 5000, None, 5, vec![0, 1, 2, 3, 4], 4000, 1000),
(5000, 1000, None, 1, vec![0], 0, 1000),
(
10,
101,
None,
11,
vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
100,
1,
),
(
1000,
5000,
Some(Range {
start: 1500,
length: 2000,
}),
3,
vec![1, 2, 3],
3000,
1000,
),
(
1000,
5000,
Some(Range {
start: 0,
length: 1,
}),
1,
vec![0],
0,
1000,
),
];
for (
piece_length,
content_length,
range,
expected_len,
expected_numbers,
expected_last_piece_offset,
expected_last_piece_length,
) in test_cases
{
let pieces = piece
.calculate_interested(piece_length, content_length, range)
.unwrap();
assert_eq!(pieces.len(), expected_len);
assert_eq!(
pieces
.iter()
.map(|piece| piece.number)
.collect::<Vec<u32>>(),
expected_numbers
);
let last_piece = pieces.last().unwrap();
assert_eq!(last_piece.offset, expected_last_piece_offset);
assert_eq!(last_piece.length, expected_last_piece_length);
}
}
}

View File

@ -1,567 +0,0 @@
/*
* Copyright 2023 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use crate::grpc::dfdaemon_upload::DfdaemonUploadClient;
use dashmap::DashMap;
use dragonfly_api::common::v2::Host;
use dragonfly_api::dfdaemon::v2::{SyncPersistentCachePiecesRequest, SyncPiecesRequest};
use dragonfly_client_config::dfdaemon::Config;
use dragonfly_client_core::{Error, Result};
use dragonfly_client_storage::metadata;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::mpsc::{self, Receiver, Sender};
use tokio::task::JoinSet;
use tokio_stream::StreamExt;
use tracing::{error, info, instrument, Instrument};
const DEFAULT_WAIT_FOR_PIECE_FROM_DIFFERENT_PARENTS: Duration = Duration::from_millis(5);
/// CollectedParent is the parent peer collected from the parent.
#[derive(Clone, Debug)]
pub struct CollectedParent {
/// id is the id of the parent.
pub id: String,
/// host is the host of the parent.
pub host: Option<Host>,
}
/// CollectedPiece is the piece collected from a peer.
pub struct CollectedPiece {
/// number is the piece number.
pub number: u32,
/// length is the piece length.
pub length: u64,
/// parent is the parent peer.
pub parent: CollectedParent,
}
/// PieceCollector is used to collect pieces from peers.
pub struct PieceCollector {
/// config is the configuration of the dfdaemon.
config: Arc<Config>,
/// host_id is the id of the host.
host_id: String,
/// task_id is the id of the task.
task_id: String,
/// parents is the parent peers.
parents: Vec<CollectedParent>,
/// interested_pieces is the pieces interested by the collector.
interested_pieces: Vec<metadata::Piece>,
/// collected_pieces is a map to store the collected pieces from different parents.
collected_pieces: Arc<DashMap<u32, Vec<CollectedParent>>>,
}
/// PieceCollector is used to collect pieces from peers.
impl PieceCollector {
/// new creates a new PieceCollector.
pub async fn new(
config: Arc<Config>,
host_id: &str,
task_id: &str,
interested_pieces: Vec<metadata::Piece>,
parents: Vec<CollectedParent>,
) -> Self {
let collected_pieces = Arc::new(DashMap::with_capacity(interested_pieces.len()));
for interested_piece in &interested_pieces {
collected_pieces.insert(interested_piece.number, Vec::new());
}
Self {
config,
task_id: task_id.to_string(),
host_id: host_id.to_string(),
parents,
interested_pieces,
collected_pieces,
}
}
/// run runs the piece collector.
#[instrument(skip_all)]
pub async fn run(&self) -> Receiver<CollectedPiece> {
let config = self.config.clone();
let host_id = self.host_id.clone();
let task_id = self.task_id.clone();
let parents = self.parents.clone();
let interested_pieces = self.interested_pieces.clone();
let collected_pieces = self.collected_pieces.clone();
let collected_piece_timeout = self.config.download.collected_piece_timeout;
let (collected_piece_tx, collected_piece_rx) = mpsc::channel(128 * 1024);
tokio::spawn(
async move {
Self::collect_from_parents(
config,
&host_id,
&task_id,
parents,
interested_pieces,
collected_pieces,
collected_piece_tx,
collected_piece_timeout,
)
.await
.unwrap_or_else(|err| {
error!("collect pieces failed: {}", err);
});
}
.in_current_span(),
);
collected_piece_rx
}
/// collect_from_parents collects pieces from multiple parents with load balancing strategy.
///
/// The collection process works in two phases:
/// 1. **Synchronization Phase**: Waits for a configured duration (DEFAULT_WAIT_FOR_PIECE_FROM_DIFFERENT_PARENTS)
/// to collect the same piece information from different parents. This allows the collector
/// to gather multiple sources for each piece.
///
/// 2. **Selection Phase**: After the wait period, randomly selects one parent from the available
/// candidates for each piece and forwards it to the piece downloader.
///
/// **Load Balancing Strategy**:
/// The random parent selection is designed to distribute download load across multiple parents
/// during concurrent piece downloads. This approach ensures:
/// - Optimal utilization of bandwidth from multiple parent nodes
/// - Prevention of overwhelming any single parent with too many requests
/// - Better overall download performance through parallel connections
///
/// This strategy is particularly effective when downloading multiple pieces simultaneously,
/// as it naturally spreads the workload across the available parent pool.
#[allow(clippy::too_many_arguments)]
#[instrument(skip_all)]
async fn collect_from_parents(
config: Arc<Config>,
host_id: &str,
task_id: &str,
parents: Vec<CollectedParent>,
interested_pieces: Vec<metadata::Piece>,
collected_pieces: Arc<DashMap<u32, Vec<CollectedParent>>>,
collected_piece_tx: Sender<CollectedPiece>,
collected_piece_timeout: Duration,
) -> Result<()> {
// Create a task to collect pieces from peers.
let mut join_set = JoinSet::new();
for parent in parents.iter() {
#[allow(clippy::too_many_arguments)]
async fn sync_pieces(
config: Arc<Config>,
host_id: String,
task_id: String,
parent: CollectedParent,
interested_pieces: Vec<metadata::Piece>,
collected_pieces: Arc<DashMap<u32, Vec<CollectedParent>>>,
collected_piece_tx: Sender<CollectedPiece>,
collected_piece_timeout: Duration,
) -> Result<CollectedParent> {
info!("sync pieces from parent {}", parent.id);
// If candidate_parent.host is None, skip it.
let host = parent.host.clone().ok_or_else(|| {
error!("peer {:?} host is empty", parent);
Error::InvalidPeer(parent.id.clone())
})?;
// Create a dfdaemon client.
let dfdaemon_upload_client = DfdaemonUploadClient::new(
config,
format!("http://{}:{}", host.ip, host.port),
false,
)
.await
.inspect_err(|err| {
error!(
"create dfdaemon upload client from parent {} failed: {}",
parent.id, err
);
})?;
let response = dfdaemon_upload_client
.sync_pieces(SyncPiecesRequest {
host_id: host_id.to_string(),
task_id: task_id.to_string(),
interested_piece_numbers: interested_pieces
.iter()
.map(|piece| piece.number)
.collect(),
})
.await
.inspect_err(|err| {
error!("sync pieces from parent {} failed: {}", parent.id, err);
})?;
// If the response repeating timeout exceeds the piece download timeout, the stream will return error.
let out_stream = response.into_inner().timeout(collected_piece_timeout);
tokio::pin!(out_stream);
while let Some(message) = out_stream.try_next().await.inspect_err(|err| {
error!("sync pieces from parent {} failed: {}", parent.id, err);
})? {
let message = message?;
if let Some(mut parents) = collected_pieces.get_mut(&message.number) {
parents.push(parent.clone());
} else {
continue;
}
// Wait for collecting the piece from different parents when the first
// piece is collected.
tokio::time::sleep(DEFAULT_WAIT_FOR_PIECE_FROM_DIFFERENT_PARENTS).await;
let parents = match collected_pieces.remove(&message.number) {
Some((_, parents)) => parents,
None => continue,
};
let parent = match parents.get(fastrand::usize(..parents.len())) {
Some(parent) => parent,
None => {
error!(
"collected_pieces does not contain parent for piece {}",
message.number
);
continue;
}
};
info!(
"picked up piece {}-{} metadata from parent {}",
task_id, message.number, parent.id
);
collected_piece_tx
.send(CollectedPiece {
number: message.number,
length: message.length,
parent: parent.clone(),
})
.await
.inspect_err(|err| {
error!("send CollectedPiece failed: {}", err);
})?;
}
Ok(parent)
}
join_set.spawn(
sync_pieces(
config.clone(),
host_id.to_string(),
task_id.to_string(),
parent.clone(),
interested_pieces.clone(),
collected_pieces.clone(),
collected_piece_tx.clone(),
collected_piece_timeout,
)
.in_current_span(),
);
}
// Wait for all tasks to finish.
while let Some(message) = join_set.join_next().await {
match message {
Ok(Ok(peer)) => {
info!("peer {} sync pieces finished", peer.id);
// If all pieces are collected, abort all tasks.
if collected_pieces.is_empty() {
info!("all pieces are collected, abort all tasks");
join_set.abort_all();
}
}
Ok(Err(err)) => {
error!("sync pieces failed: {}", err);
}
Err(err) => {
error!("sync pieces failed: {}", err);
}
}
}
Ok(())
}
}
/// PersistentCachePieceCollector is used to collect persistent cache pieces from peers.
pub struct PersistentCachePieceCollector {
/// config is the configuration of the dfdaemon.
config: Arc<Config>,
/// host_id is the id of the host.
host_id: String,
/// task_id is the id of the persistent cache task.
task_id: String,
/// parents is the parent peers.
parents: Vec<CollectedParent>,
/// interested_pieces is the pieces interested by the collector.
interested_pieces: Vec<metadata::Piece>,
/// collected_pieces is a map to store the collected pieces from different parents.
collected_pieces: Arc<DashMap<u32, Vec<CollectedParent>>>,
}
/// PersistentCachePieceCollector is used to collect persistent cache pieces from peers.
impl PersistentCachePieceCollector {
/// new creates a new PieceCollector.
pub async fn new(
config: Arc<Config>,
host_id: &str,
task_id: &str,
interested_pieces: Vec<metadata::Piece>,
parents: Vec<CollectedParent>,
) -> Self {
let collected_pieces = Arc::new(DashMap::with_capacity(interested_pieces.len()));
for interested_piece in &interested_pieces {
collected_pieces.insert(interested_piece.number, Vec::new());
}
Self {
config,
task_id: task_id.to_string(),
host_id: host_id.to_string(),
parents,
interested_pieces,
collected_pieces,
}
}
/// run runs the piece collector.
#[instrument(skip_all)]
pub async fn run(&self) -> Receiver<CollectedPiece> {
let config = self.config.clone();
let host_id = self.host_id.clone();
let task_id = self.task_id.clone();
let parents = self.parents.clone();
let interested_pieces = self.interested_pieces.clone();
let collected_pieces = self.collected_pieces.clone();
let collected_piece_timeout = self.config.download.piece_timeout;
let (collected_piece_tx, collected_piece_rx) = mpsc::channel(10 * 1024);
tokio::spawn(
async move {
Self::collect_from_parents(
config,
&host_id,
&task_id,
parents,
interested_pieces,
collected_pieces,
collected_piece_tx,
collected_piece_timeout,
)
.await
.unwrap_or_else(|err| {
error!("collect persistent cache pieces failed: {}", err);
});
}
.in_current_span(),
);
collected_piece_rx
}
/// collect_from_parents collects pieces from multiple parents with load balancing strategy.
///
/// The collection process works in two phases:
/// 1. **Synchronization Phase**: Waits for a configured duration (DEFAULT_WAIT_FOR_PIECE_FROM_DIFFERENT_PARENTS)
/// to collect the same piece information from different parents. This allows the collector
/// to gather multiple sources for each piece.
///
/// 2. **Selection Phase**: After the wait period, randomly selects one parent from the available
/// candidates for each piece and forwards it to the piece downloader.
///
/// **Load Balancing Strategy**:
/// The random parent selection is designed to distribute download load across multiple parents
/// during concurrent piece downloads. This approach ensures:
/// - Optimal utilization of bandwidth from multiple parent nodes
/// - Prevention of overwhelming any single parent with too many requests
/// - Better overall download performance through parallel connections
///
/// This strategy is particularly effective when downloading multiple pieces simultaneously,
/// as it naturally spreads the workload across the available parent pool.
#[allow(clippy::too_many_arguments)]
#[instrument(skip_all)]
async fn collect_from_parents(
config: Arc<Config>,
host_id: &str,
task_id: &str,
parents: Vec<CollectedParent>,
interested_pieces: Vec<metadata::Piece>,
collected_pieces: Arc<DashMap<u32, Vec<CollectedParent>>>,
collected_piece_tx: Sender<CollectedPiece>,
collected_piece_timeout: Duration,
) -> Result<()> {
// Create a task to collect pieces from peers.
let mut join_set = JoinSet::new();
for parent in parents.iter() {
#[allow(clippy::too_many_arguments)]
async fn sync_pieces(
config: Arc<Config>,
host_id: String,
task_id: String,
parent: CollectedParent,
interested_pieces: Vec<metadata::Piece>,
collected_pieces: Arc<DashMap<u32, Vec<CollectedParent>>>,
collected_piece_tx: Sender<CollectedPiece>,
collected_piece_timeout: Duration,
) -> Result<CollectedParent> {
info!("sync persistent cache pieces from parent {}", parent.id);
// If candidate_parent.host is None, skip it.
let host = parent.host.clone().ok_or_else(|| {
error!("persistent cache peer {:?} host is empty", parent);
Error::InvalidPeer(parent.id.clone())
})?;
// Create a dfdaemon client.
let dfdaemon_upload_client = DfdaemonUploadClient::new(
config,
format!("http://{}:{}", host.ip, host.port),
false,
)
.await
.inspect_err(|err| {
error!(
"create dfdaemon upload client from parent {} failed: {}",
parent.id, err
);
})?;
let response = dfdaemon_upload_client
.sync_persistent_cache_pieces(SyncPersistentCachePiecesRequest {
host_id: host_id.to_string(),
task_id: task_id.to_string(),
interested_piece_numbers: interested_pieces
.iter()
.map(|piece| piece.number)
.collect(),
})
.await
.inspect_err(|err| {
error!(
"sync persistent cache pieces from parent {} failed: {}",
parent.id, err
);
})?;
// If the response repeating timeout exceeds the piece download timeout, the stream will return error.
let out_stream = response.into_inner().timeout(collected_piece_timeout);
tokio::pin!(out_stream);
while let Some(message) = out_stream.try_next().await.inspect_err(|err| {
error!(
"sync persistent cache pieces from parent {} failed: {}",
parent.id, err
);
})? {
let message = message?;
if let Some(mut parents) = collected_pieces.get_mut(&message.number) {
parents.push(parent.clone());
} else {
continue;
}
// Wait for collecting the piece from different parents when the first
// piece is collected.
tokio::time::sleep(DEFAULT_WAIT_FOR_PIECE_FROM_DIFFERENT_PARENTS).await;
let parents = match collected_pieces.remove(&message.number) {
Some((_, parents)) => parents,
None => continue,
};
let parent = match parents.get(fastrand::usize(..parents.len())) {
Some(parent) => parent,
None => {
error!(
"collected_pieces does not contain parent for piece {}",
message.number
);
continue;
}
};
info!(
"picked up piece {}-{} metadata from parent {}",
task_id, message.number, parent.id
);
collected_piece_tx
.send(CollectedPiece {
number: message.number,
length: message.length,
parent: parent.clone(),
})
.await
.inspect_err(|err| {
error!("send CollectedPiece failed: {}", err);
})?;
}
Ok(parent)
}
join_set.spawn(
sync_pieces(
config.clone(),
host_id.to_string(),
task_id.to_string(),
parent.clone(),
interested_pieces.clone(),
collected_pieces.clone(),
collected_piece_tx.clone(),
collected_piece_timeout,
)
.in_current_span(),
);
}
// Wait for all tasks to finish.
while let Some(message) = join_set.join_next().await {
match message {
Ok(Ok(peer)) => {
info!("peer {} sync persistent cache pieces finished", peer.id);
// If all pieces are collected, abort all tasks.
if collected_pieces.is_empty() {
info!("all persistent cache pieces are collected, abort all tasks");
join_set.abort_all();
}
}
Ok(Err(err)) => {
error!("sync persistent cache pieces failed: {}", err);
}
Err(err) => {
error!("sync persistent cache pieces failed: {}", err);
}
}
}
Ok(())
}
}

View File

@ -1,405 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use crate::grpc::dfdaemon_upload::DfdaemonUploadClient;
use dragonfly_api::dfdaemon::v2::{DownloadPersistentCachePieceRequest, DownloadPieceRequest};
use dragonfly_client_config::dfdaemon::Config;
use dragonfly_client_core::{Error, Result};
use dragonfly_client_storage::metadata;
use std::collections::HashMap;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::sync::Mutex;
use tracing::{debug, error, instrument};
/// DEFAULT_DOWNLOADER_CAPACITY is the default capacity of the downloader to store the clients.
const DEFAULT_DOWNLOADER_CAPACITY: usize = 2000;
/// DEFAULT_DOWNLOADER_IDLE_TIMEOUT is the default idle timeout for the downloader.
const DEFAULT_DOWNLOADER_IDLE_TIMEOUT: Duration = Duration::from_secs(30);
/// Downloader is the interface for downloading pieces, which is implemented by different
/// protocols. The downloader is used to download pieces from the other peers.
#[tonic::async_trait]
pub trait Downloader: Send + Sync {
/// download_piece downloads a piece from the other peer by different protocols.
async fn download_piece(
&self,
addr: &str,
number: u32,
host_id: &str,
task_id: &str,
) -> Result<(Vec<u8>, u64, String)>;
/// download_persistent_cache_piece downloads a persistent cache piece from the other peer by different
/// protocols.
async fn download_persistent_cache_piece(
&self,
addr: &str,
number: u32,
host_id: &str,
task_id: &str,
) -> Result<(Vec<u8>, u64, String)>;
}
/// DownloaderFactory is the factory for creating different downloaders by different protocols.
pub struct DownloaderFactory {
/// downloader is the downloader for downloading pieces, which is implemented by different
/// protocols.
downloader: Arc<dyn Downloader + Send + Sync>,
}
/// DownloadFactory implements the DownloadFactory trait.
impl DownloaderFactory {
/// new returns a new DownloadFactory.
pub fn new(protocol: &str, config: Arc<Config>) -> Result<Self> {
let downloader = match protocol {
"grpc" => Arc::new(GRPCDownloader::new(
config.clone(),
DEFAULT_DOWNLOADER_CAPACITY,
DEFAULT_DOWNLOADER_IDLE_TIMEOUT,
)),
_ => {
error!("downloader unsupported protocol: {}", protocol);
return Err(Error::InvalidParameter);
}
};
Ok(Self { downloader })
}
/// build returns the downloader.
pub fn build(&self) -> Arc<dyn Downloader> {
self.downloader.clone()
}
}
/// RequestGuard is the guard for the request.
struct RequestGuard {
/// active_requests is the number of the active requests.
active_requests: Arc<AtomicUsize>,
}
/// RequestGuard implements the guard for the request to add or subtract the active requests.
impl RequestGuard {
/// new returns a new RequestGuard.
fn new(active_requests: Arc<AtomicUsize>) -> Self {
active_requests.fetch_add(1, Ordering::SeqCst);
Self { active_requests }
}
}
/// RequestGuard implements the Drop trait.
impl Drop for RequestGuard {
/// drop subtracts the active requests.
fn drop(&mut self) {
self.active_requests.fetch_sub(1, Ordering::SeqCst);
}
}
/// DfdaemonUploadClientEntry is the entry of the dfdaemon upload client.
#[derive(Clone)]
struct DfdaemonUploadClientEntry {
/// client is the dfdaemon upload client.
client: DfdaemonUploadClient,
/// active_requests is the number of the active requests.
active_requests: Arc<AtomicUsize>,
/// actived_at is the time when the client is the last active time.
actived_at: Arc<std::sync::Mutex<Instant>>,
}
/// GRPCDownloader is the downloader for downloading pieces by the gRPC protocol.
/// It will reuse the dfdaemon upload clients to download pieces from the other peers by
/// peer's address.
pub struct GRPCDownloader {
/// config is the configuration of the dfdaemon.
config: Arc<Config>,
/// clients is the map of the dfdaemon upload clients.
clients: Arc<Mutex<HashMap<String, DfdaemonUploadClientEntry>>>,
/// capacity is the capacity of the dfdaemon upload clients. If the number of the
/// clients exceeds the capacity, it will clean up the idle clients.
capacity: usize,
/// client_idle_timeout is the idle timeout for the client. If the client is idle for a long
/// time, it will be removed when cleaning up the idle clients.
idle_timeout: Duration,
/// cleanup_at is the time when the client is the last cleanup time.
cleanup_at: Arc<Mutex<Instant>>,
}
/// GRPCDownloader implements the downloader with the gRPC protocol.
impl GRPCDownloader {
/// new returns a new GRPCDownloader.
pub fn new(config: Arc<Config>, capacity: usize, idle_timeout: Duration) -> Self {
Self {
config,
clients: Arc::new(Mutex::new(HashMap::new())),
capacity,
idle_timeout,
cleanup_at: Arc::new(Mutex::new(Instant::now())),
}
}
/// client returns the dfdaemon upload client by the address.
///
/// Opterations:
/// 1. If the client entry exists, it will return the client directly to reuse the client by
/// the address.
/// 2. If the client entry does not exist, it will create a new client entry and insert it
/// into the clients map.
async fn client(&self, addr: &str) -> Result<DfdaemonUploadClient> {
let now = Instant::now();
// Cleanup the idle clients first to avoid the clients exceeding the capacity and the
// clients are idle for a long time.
self.cleanup_idle_client_entries().await;
let clients = self.clients.lock().await;
if let Some(entry) = clients.get(addr) {
debug!("reusing client: {}", addr);
*entry.actived_at.lock().unwrap() = now;
return Ok(entry.client.clone());
}
drop(clients);
// If there are many concurrent requests to create the client, it will create multiple
// clients for the same address. But it will reuse the same client by entry operation.
debug!("creating client: {}", addr);
let client =
DfdaemonUploadClient::new(self.config.clone(), format!("http://{}", addr), true)
.await?;
let mut clients = self.clients.lock().await;
let entry = clients
.entry(addr.to_string())
.or_insert(DfdaemonUploadClientEntry {
client: client.clone(),
active_requests: Arc::new(AtomicUsize::new(0)),
actived_at: Arc::new(std::sync::Mutex::new(now)),
});
// If it is created by other concurrent requests and reused client, need to update the
// last active time.
*entry.actived_at.lock().unwrap() = now;
Ok(entry.client.clone())
}
/// get_client_entry returns the client entry by the address.
async fn get_client_entry(&self, addr: &str) -> Option<DfdaemonUploadClientEntry> {
let clients = self.clients.lock().await;
clients.get(addr).cloned()
}
/// remove_client_entry removes the client entry if it is idle.
async fn remove_client_entry(&self, addr: &str) {
let mut clients = self.clients.lock().await;
if let Some(entry) = clients.get(addr) {
if entry.active_requests.load(Ordering::SeqCst) == 0 {
clients.remove(addr);
}
}
}
/// cleanup_idle_clients cleans up the idle clients, which are idle for a long time or have no
/// active requests.
async fn cleanup_idle_client_entries(&self) {
let now = Instant::now();
// Avoid hot cleanup for the clients.
let cleanup_at = self.cleanup_at.lock().await;
let interval = self.idle_timeout / 2;
if now.duration_since(*cleanup_at) < interval {
debug!("avoid hot cleanup");
return;
}
drop(cleanup_at);
let mut clients = self.clients.lock().await;
let exceeds_capacity = clients.len() > self.capacity;
clients.retain(|addr, entry| {
let active_requests = entry.active_requests.load(Ordering::SeqCst);
let is_active = active_requests > 0;
let actived_at = entry.actived_at.lock().unwrap();
let idel_duration = now.duration_since(*actived_at);
let is_recent = idel_duration <= self.idle_timeout;
// Retain the client if it is active or not exceeds the capacity and is recent.
let should_retain = is_active || (!exceeds_capacity && is_recent);
if !should_retain {
debug!(
"removing idle client: {}, exceeds_capacity: {}, idle_duration: {}s",
addr,
exceeds_capacity,
idel_duration.as_secs(),
);
}
should_retain
});
// Update the cleanup time.
*self.cleanup_at.lock().await = now;
}
}
/// GRPCDownloader implements the Downloader trait.
#[tonic::async_trait]
impl Downloader for GRPCDownloader {
/// download_piece downloads a piece from the other peer by the gRPC protocol.
#[instrument(skip_all)]
async fn download_piece(
&self,
addr: &str,
number: u32,
host_id: &str,
task_id: &str,
) -> Result<(Vec<u8>, u64, String)> {
let client = self.client(addr).await?;
let entry = self
.get_client_entry(addr)
.await
.ok_or(Error::UnexpectedResponse)?;
let request_guard = RequestGuard::new(entry.active_requests.clone());
let response = match client
.download_piece(
DownloadPieceRequest {
host_id: host_id.to_string(),
task_id: task_id.to_string(),
piece_number: number,
},
self.config.download.piece_timeout,
)
.await
{
Ok(response) => response,
Err(err) => {
// If the request fails, it will drop the request guard and remove the client
// entry to avoid using the invalid client.
drop(request_guard);
self.remove_client_entry(addr).await;
return Err(err);
}
};
let Some(piece) = response.piece else {
return Err(Error::InvalidParameter);
};
let Some(content) = piece.content else {
return Err(Error::InvalidParameter);
};
// Calculate the digest of the piece metadata and compare it with the expected digest,
// it verifies the integrity of the piece metadata.
let piece_metadata = metadata::Piece {
number,
length: piece.length,
offset: piece.offset,
digest: piece.digest.clone(),
..Default::default()
};
if let Some(expected_digest) = response.digest {
let digest = piece_metadata.calculate_digest();
if expected_digest != digest {
return Err(Error::DigestMismatch(
expected_digest.to_string(),
digest.to_string(),
));
}
}
Ok((content, piece.offset, piece.digest))
}
/// download_persistent_cache_piece downloads a persistent cache piece from the other peer by
/// the gRPC protocol.
#[instrument(skip_all)]
async fn download_persistent_cache_piece(
&self,
addr: &str,
number: u32,
host_id: &str,
task_id: &str,
) -> Result<(Vec<u8>, u64, String)> {
let client = self.client(addr).await?;
let entry = self
.get_client_entry(addr)
.await
.ok_or(Error::UnexpectedResponse)?;
let request_guard = RequestGuard::new(entry.active_requests.clone());
let response = match client
.download_persistent_cache_piece(
DownloadPersistentCachePieceRequest {
host_id: host_id.to_string(),
task_id: task_id.to_string(),
piece_number: number,
},
self.config.download.piece_timeout,
)
.await
{
Ok(response) => response,
Err(err) => {
// If the request fails, it will drop the request guard and remove the client
// entry to avoid using the invalid client.
drop(request_guard);
self.remove_client_entry(addr).await;
return Err(err);
}
};
let Some(piece) = response.piece else {
return Err(Error::InvalidParameter);
};
let Some(content) = piece.content else {
return Err(Error::InvalidParameter);
};
// Calculate the digest of the piece metadata and compare it with the expected digest,
// it verifies the integrity of the piece metadata.
let piece_metadata = metadata::Piece {
number,
length: piece.length,
offset: piece.offset,
digest: piece.digest.clone(),
..Default::default()
};
if let Some(expected_digest) = response.digest {
let digest = piece_metadata.calculate_digest();
if expected_digest != digest {
return Err(Error::DigestMismatch(
expected_digest.to_string(),
digest.to_string(),
));
}
}
Ok((content, piece.offset, piece.digest))
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,208 +0,0 @@
/*
* Copyright 2023 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use tokio::signal::unix::{signal, SignalKind};
use tokio::sync::broadcast;
use tracing::info;
/// Shutdown is a signal to shutdown.
#[derive(Debug)]
pub struct Shutdown {
/// is_shutdown is true if the shutdown signal has been received.
is_shutdown: bool,
/// sender is used to send the shutdown signal.
sender: broadcast::Sender<()>,
/// receiver is used to receive the shutdown signal.
receiver: broadcast::Receiver<()>,
}
/// Shutdown implements the shutdown signal.
impl Shutdown {
/// new creates a new Shutdown.
pub fn new() -> Shutdown {
let (sender, receiver) = broadcast::channel(1);
Self {
is_shutdown: false,
sender,
receiver,
}
}
/// is_shutdown returns true if the shutdown signal has been received.
pub fn is_shutdown(&self) -> bool {
self.is_shutdown
}
/// trigger triggers the shutdown signal.
pub fn trigger(&self) {
let _ = self.sender.send(());
}
/// recv waits for the shutdown signal.
pub async fn recv(&mut self) {
// Return immediately if the shutdown signal has already been received.
if self.is_shutdown {
return;
}
// Wait for the shutdown signal.
let _ = self.receiver.recv().await;
// Set the shutdown flag.
self.is_shutdown = true;
}
}
// Default implements the Default trait.
impl Default for Shutdown {
// default returns a new default Shutdown.
fn default() -> Self {
Self::new()
}
}
/// Clone implements the Clone trait.
impl Clone for Shutdown {
/// clone returns a new Shutdown.
fn clone(&self) -> Self {
let sender = self.sender.clone();
let receiver = self.sender.subscribe();
Self {
is_shutdown: self.is_shutdown,
sender,
receiver,
}
}
}
/// shutdown_signal returns a future that will resolve when a SIGINT, SIGTERM or SIGQUIT signal is
/// received by the process.
pub async fn shutdown_signal() {
let mut sigint = signal(SignalKind::interrupt()).unwrap();
let mut sigterm = signal(SignalKind::terminate()).unwrap();
let mut sigquit = signal(SignalKind::quit()).unwrap();
tokio::select! {
_ = sigint.recv() => {
info!("received SIGINT, shutting down");
},
_ = sigterm.recv() => {
info!("received SIGTERM, shutting down");
}
_ = sigquit.recv() => {
info!("received SIGQUIT, shutting down");
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use tokio::time::{sleep, Duration};
#[tokio::test]
async fn test_shutdown_trigger_and_recv() {
// Create a new shutdown instance.
let mut shutdown = Shutdown::new();
// Trigger the shutdown signal in a separate task.
let shutdown_clone = shutdown.clone();
tokio::spawn(async move {
// Small delay to ensure the receiver is waiting.
sleep(Duration::from_millis(10)).await;
shutdown_clone.trigger();
});
// Wait for the shutdown signal.
shutdown.recv().await;
// Verify that is_shutdown is set to true.
assert!(shutdown.is_shutdown());
}
#[tokio::test]
async fn test_shutdown_multiple_receivers() {
// Create a new shutdown instance.
let mut shutdown1 = Shutdown::new();
let mut shutdown2 = shutdown1.clone();
let mut shutdown3 = shutdown1.clone();
// Trigger the shutdown signal.
shutdown1.trigger();
// All receivers should receive the signal.
shutdown1.recv().await;
shutdown2.recv().await;
shutdown3.recv().await;
// Verify that all instances have is_shutdown set to true.
assert!(shutdown1.is_shutdown());
assert!(shutdown2.is_shutdown());
assert!(shutdown3.is_shutdown());
}
#[tokio::test]
async fn test_shutdown_clone_behavior() {
// Create a new shutdown instance.
let mut shutdown1 = Shutdown::new();
// Set is_shutdown to true.
shutdown1.trigger();
shutdown1.recv().await;
assert!(shutdown1.is_shutdown());
// Clone the instance.
let shutdown2 = shutdown1.clone();
// Verify that the clone has the same is_shutdown value.
assert_eq!(shutdown1.is_shutdown(), shutdown2.is_shutdown());
// Create a new instance before triggering.
let mut shutdown3 = Shutdown::new();
let mut shutdown4 = shutdown3.clone();
// Trigger after cloning.
shutdown3.trigger();
// Both should receive the signal.
shutdown3.recv().await;
shutdown4.recv().await;
assert!(shutdown3.is_shutdown());
assert!(shutdown4.is_shutdown());
}
#[tokio::test]
async fn test_shutdown_already_triggered() {
// Create a new shutdown instance.
let mut shutdown = Shutdown::new();
// Trigger and receive.
shutdown.trigger();
shutdown.recv().await;
assert!(shutdown.is_shutdown());
// Call recv again, should return immediately.
let start = std::time::Instant::now();
shutdown.recv().await;
let elapsed = start.elapsed();
// Verify that recv returned immediately (less than 5ms).
assert!(elapsed < Duration::from_millis(5));
}
}

View File

@ -1,172 +0,0 @@
/*
* Copyright 2024 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use crate::shutdown;
use pprof::protos::Message;
use pprof::ProfilerGuard;
use serde::{Deserialize, Serialize};
use std::net::SocketAddr;
use std::time::Duration;
use tokio::sync::mpsc;
use tracing::{error, info, instrument};
use warp::{Filter, Rejection, Reply};
/// DEFAULT_PROFILER_SECONDS is the default seconds to start profiling.
const DEFAULT_PROFILER_SECONDS: u64 = 10;
/// DEFAULT_PROFILER_FREQUENCY is the default frequency to start profiling.
const DEFAULT_PROFILER_FREQUENCY: i32 = 1000;
/// PProfProfileQueryParams is the query params to start profiling.
#[derive(Deserialize, Serialize)]
#[serde(default)]
pub struct PProfProfileQueryParams {
/// seconds is the seconds to start profiling.
pub seconds: u64,
/// frequency is the frequency to start profiling.
pub frequency: i32,
}
/// PProfProfileQueryParams implements the default.
impl Default for PProfProfileQueryParams {
fn default() -> Self {
Self {
seconds: DEFAULT_PROFILER_SECONDS,
frequency: DEFAULT_PROFILER_FREQUENCY,
}
}
}
/// Stats is the stats server.
#[derive(Debug)]
pub struct Stats {
/// addr is the address of the stats server.
addr: SocketAddr,
/// shutdown is used to shutdown the stats server.
shutdown: shutdown::Shutdown,
/// _shutdown_complete is used to notify the stats server is shutdown.
_shutdown_complete: mpsc::UnboundedSender<()>,
}
/// Stats implements the stats server.
impl Stats {
/// new creates a new Stats.
pub fn new(
addr: SocketAddr,
shutdown: shutdown::Shutdown,
shutdown_complete_tx: mpsc::UnboundedSender<()>,
) -> Self {
Self {
addr,
shutdown,
_shutdown_complete: shutdown_complete_tx,
}
}
/// run starts the stats server.
pub async fn run(&self) {
// Clone the shutdown channel.
let mut shutdown = self.shutdown.clone();
// Create the pprof profile route.
let pprof_profile_route = warp::path!("debug" / "pprof" / "profile")
.and(warp::get())
.and(warp::query::<PProfProfileQueryParams>())
.and_then(Self::pprof_profile_handler);
// Create the pprof heap route.
let pprof_heap_route = warp::path!("debug" / "pprof" / "heap")
.and(warp::get())
.and_then(Self::pprof_heap_handler);
// Create the pprof routes.
let pprof_routes = pprof_profile_route.or(pprof_heap_route);
// Start the stats server and wait for it to finish.
info!("stats server listening on {}", self.addr);
tokio::select! {
_ = warp::serve(pprof_routes).run(self.addr) => {
// Stats server ended.
info!("stats server ended");
}
_ = shutdown.recv() => {
// Stats server shutting down with signals.
info!("stats server shutting down");
}
}
}
/// stats_handler handles the stats request.
#[instrument(skip_all)]
async fn pprof_profile_handler(
query_params: PProfProfileQueryParams,
) -> Result<impl Reply, Rejection> {
info!(
"start profiling for {} seconds with {} frequency",
query_params.seconds, query_params.frequency
);
let guard = ProfilerGuard::new(query_params.frequency).map_err(|err| {
error!("failed to create profiler guard: {}", err);
warp::reject::reject()
})?;
tokio::time::sleep(Duration::from_secs(query_params.seconds)).await;
let report = guard.report().build().map_err(|err| {
error!("failed to build profiler report: {}", err);
warp::reject::reject()
})?;
let profile = report.pprof().map_err(|err| {
error!("failed to get pprof profile: {}", err);
warp::reject::reject()
})?;
let mut body: Vec<u8> = Vec::new();
profile.write_to_vec(&mut body).map_err(|err| {
error!("failed to write pprof profile: {}", err);
warp::reject::reject()
})?;
Ok(body)
}
/// pprof_heap_handler handles the pprof heap request.
#[instrument(skip_all)]
async fn pprof_heap_handler() -> Result<impl Reply, Rejection> {
info!("start heap profiling");
#[cfg(target_os = "linux")]
{
let mut prof_ctl = jemalloc_pprof::PROF_CTL.as_ref().unwrap().lock().await;
if !prof_ctl.activated() {
return Err(warp::reject::reject());
}
let pprof = prof_ctl.dump_pprof().map_err(|err| {
error!("failed to dump pprof: {}", err);
warp::reject::reject()
})?;
Ok(pprof)
}
#[cfg(not(target_os = "linux"))]
Err::<warp::http::Error, Rejection>(warp::reject::reject())
}
}

View File

@ -1,222 +0,0 @@
/*
* Copyright 2023 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use dragonfly_client_config::dfdaemon::Host;
use opentelemetry::{global, trace::TracerProvider, KeyValue};
use opentelemetry_otlp::{WithExportConfig, WithTonicConfig};
use opentelemetry_sdk::{propagation::TraceContextPropagator, Resource};
use rolling_file::*;
use std::fs;
use std::path::PathBuf;
use std::str::FromStr;
use std::time::Duration;
use tonic::metadata::{MetadataKey, MetadataMap, MetadataValue};
use tracing::{info, Level};
use tracing_appender::non_blocking::WorkerGuard;
use tracing_opentelemetry::OpenTelemetryLayer;
use tracing_subscriber::{
filter::LevelFilter,
fmt::{time::ChronoLocal, Layer},
prelude::*,
EnvFilter, Registry,
};
/// SPAN_EXPORTER_TIMEOUT is the timeout for the span exporter.
const SPAN_EXPORTER_TIMEOUT: Duration = Duration::from_secs(10);
/// init_tracing initializes the tracing system.
#[allow(clippy::too_many_arguments)]
pub fn init_tracing(
name: &str,
log_dir: PathBuf,
log_level: Level,
log_max_files: usize,
otel_protocol: Option<String>,
otel_endpoint: Option<String>,
otel_path: Option<PathBuf>,
otel_headers: Option<reqwest::header::HeaderMap>,
host: Option<Host>,
is_seed_peer: bool,
console: bool,
) -> Vec<WorkerGuard> {
let mut guards = vec![];
// Setup stdout layer.
let (stdout_writer, stdout_guard) = tracing_appender::non_blocking(std::io::stdout());
guards.push(stdout_guard);
// Initialize stdout layer.
let stdout_filter = if console {
LevelFilter::DEBUG
} else {
LevelFilter::OFF
};
let stdout_logging_layer = Layer::new()
.with_writer(stdout_writer)
.with_file(true)
.with_line_number(true)
.with_target(false)
.with_thread_names(false)
.with_thread_ids(false)
.with_timer(ChronoLocal::rfc_3339())
.pretty()
.with_filter(stdout_filter);
// Setup file layer.
fs::create_dir_all(log_dir.clone()).expect("failed to create log directory");
let rolling_appender = BasicRollingFileAppender::new(
log_dir.join(name).with_extension("log"),
RollingConditionBasic::new().hourly(),
log_max_files,
)
.expect("failed to create rolling file appender");
let (rolling_writer, rolling_writer_guard) = tracing_appender::non_blocking(rolling_appender);
guards.push(rolling_writer_guard);
let file_logging_layer = Layer::new()
.with_writer(rolling_writer)
.with_ansi(false)
.with_file(true)
.with_line_number(true)
.with_target(false)
.with_thread_names(false)
.with_thread_ids(false)
.with_timer(ChronoLocal::rfc_3339())
.compact();
// Setup env filter for log level.
let env_filter = EnvFilter::try_from_default_env()
.unwrap_or_else(|_| EnvFilter::default().add_directive(log_level.into()));
// Enable console subscriber layer for tracing spawn tasks on `127.0.0.1:6669` when log level is TRACE.
let console_subscriber_layer = if log_level == Level::TRACE {
Some(console_subscriber::spawn())
} else {
None
};
let subscriber = Registry::default()
.with(env_filter)
.with(console_subscriber_layer)
.with(file_logging_layer)
.with(stdout_logging_layer);
// If OTLP protocol and endpoint are provided, set up OpenTelemetry tracing.
if let (Some(protocol), Some(endpoint)) = (otel_protocol, otel_endpoint) {
let otlp_exporter = match protocol.as_str() {
"grpc" => {
let mut metadata = MetadataMap::new();
if let Some(headers) = otel_headers {
for (key, value) in headers.iter() {
metadata.insert(
MetadataKey::from_str(key.as_str())
.expect("failed to create metadata key"),
MetadataValue::from_str(value.to_str().unwrap())
.expect("failed to create metadata value"),
);
}
}
let endpoint_url = url::Url::parse(&format!("http://{}", endpoint))
.expect("failed to parse OTLP endpoint URL");
opentelemetry_otlp::SpanExporter::builder()
.with_tonic()
.with_endpoint(endpoint_url)
.with_timeout(SPAN_EXPORTER_TIMEOUT)
.with_metadata(metadata)
.build()
.expect("failed to create OTLP exporter")
}
"http" | "https" => {
let mut endpoint_url = url::Url::parse(&format!("{}://{}", protocol, endpoint))
.expect("failed to parse OTLP endpoint URL");
if let Some(path) = otel_path {
endpoint_url = endpoint_url
.join(path.to_str().unwrap())
.expect("failed to join OTLP endpoint path");
}
opentelemetry_otlp::SpanExporter::builder()
.with_http()
.with_endpoint(endpoint_url.as_str())
.with_protocol(opentelemetry_otlp::Protocol::HttpJson)
.with_timeout(SPAN_EXPORTER_TIMEOUT)
.build()
.expect("failed to create OTLP exporter")
}
_ => {
panic!("unsupported OTLP protocol: {}", protocol);
}
};
let host = host.unwrap();
let provider = opentelemetry_sdk::trace::SdkTracerProvider::builder()
.with_batch_exporter(otlp_exporter)
.with_resource(
Resource::builder()
.with_service_name(format!("{}-{}", name, host.ip.unwrap()))
.with_schema_url(
[
KeyValue::new(
opentelemetry_semantic_conventions::attribute::SERVICE_NAMESPACE,
"dragonfly",
),
KeyValue::new(
opentelemetry_semantic_conventions::attribute::HOST_NAME,
host.hostname,
),
KeyValue::new(
opentelemetry_semantic_conventions::attribute::HOST_IP,
host.ip.unwrap().to_string(),
),
],
opentelemetry_semantic_conventions::SCHEMA_URL,
)
.with_attribute(opentelemetry::KeyValue::new(
"host.idc",
host.idc.unwrap_or_default(),
))
.with_attribute(opentelemetry::KeyValue::new(
"host.location",
host.location.unwrap_or_default(),
))
.with_attribute(opentelemetry::KeyValue::new("host.seed_peer", is_seed_peer))
.build(),
)
.build();
let tracer = provider.tracer(name.to_string());
global::set_tracer_provider(provider.clone());
global::set_text_map_propagator(TraceContextPropagator::new());
let jaeger_layer = OpenTelemetryLayer::new(tracer);
subscriber.with(jaeger_layer).init();
} else {
subscriber.init();
}
std::panic::set_hook(Box::new(tracing_panic::panic_hook));
info!(
"tracing initialized directory: {}, level: {}",
log_dir.as_path().display(),
log_level
);
guards
}

View File

@ -1,2 +0,0 @@
[toolchain]
channel = "1.85.0"

View File

@ -1,45 +0,0 @@
#!/bin/bash
# Create the directory used for storing certs.
mkdir -p certs
cd certs
# Generate CA private key and self-signed cert.
openssl genrsa -out ca.key 2048
openssl req -x509 -new -nodes -key ca.key -sha256 -days 1024 -out ca.crt -subj "/C=CN/ST=Beijing/L=Beijing/O=Test CA/OU=IT/CN=Test CA"
# Generate another CA private key and self-signed cert.
openssl genrsa -out wrong-ca.key 2048
openssl req -x509 -new -nodes -key wrong-ca.key -sha256 -days 1024 -out wrong-ca.crt -subj "/C=CN/ST=Beijing/L=Beijing/O=Wrong CA/OU=IT/CN=Wrong CA"
# Generate OpenSSL config file with SAN extention.
cat >san.cnf <<EOL
[req]
distinguished_name = req_distinguished_name
req_extensions = v3_req
[req_distinguished_name]
[ v3_req ]
subjectAltName = @alt_names
[ alt_names ]
DNS.1 = localhost
EOL
# Generate server private key and CSR.
openssl genrsa -out server.key 2048
openssl req -new -key server.key -out server.csr -subj "/C=CN/ST=Beijing/L=Beijing/O=Test Server/OU=IT/CN=localhost" -config san.cnf
# Sign server CSR by using CA with SAN extension.
openssl x509 -req -in server.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out server.crt -days 365 -sha256 -extensions v3_req -extfile san.cnf
# Convert server.crt to pem.
openssl x509 -in server.crt -out server.pem -outform PEM
# Generate client private key and CSR.
openssl genrsa -out client.key 2048
openssl req -new -key client.key -out client.csr -subj "/C=CN/ST=Beijing/L=Beijing/O=Test Server/OU=IT/CN=localhost" -config san.cnf
# Sign client CSR by using CA with SAN extension.
openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out client.crt -days 365 -sha256 -extensions v3_req -extfile san.cnf
# Convert client.crt to pem.
openssl x509 -in client.crt -out client.pem -outform PEM

271
src/announcer/mod.rs Normal file
View File

@ -0,0 +1,271 @@
/*
* Copyright 2023 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use crate::config::{
dfdaemon::{Config, HostType},
CARGO_PKG_RUSTC_VERSION, CARGO_PKG_VERSION, GIT_HASH,
};
use crate::grpc::{manager::ManagerClient, scheduler::SchedulerClient};
use crate::shutdown;
use crate::Result;
use dragonfly_api::common::v2::{Build, Cpu, Host, Memory, Network};
use dragonfly_api::manager::v2::{DeleteSeedPeerRequest, SourceType, UpdateSeedPeerRequest};
use dragonfly_api::scheduler::v2::AnnounceHostRequest;
use std::env;
use std::sync::Arc;
use sysinfo::{CpuExt, ProcessExt, System, SystemExt};
use tokio::sync::mpsc;
use tracing::{error, info};
// ManagerAnnouncer is used to announce the dfdaemon information to the manager.
pub struct ManagerAnnouncer {
// config is the configuration of the dfdaemon.
config: Arc<Config>,
// manager_client is the grpc client of the manager.
manager_client: Arc<ManagerClient>,
// shutdown is used to shutdown the announcer.
shutdown: shutdown::Shutdown,
// _shutdown_complete is used to notify the announcer is shutdown.
_shutdown_complete: mpsc::UnboundedSender<()>,
}
// ManagerAnnouncer implements the manager announcer of the dfdaemon.
impl ManagerAnnouncer {
// new creates a new manager announcer.
pub fn new(
config: Arc<Config>,
manager_client: Arc<ManagerClient>,
shutdown: shutdown::Shutdown,
shutdown_complete_tx: mpsc::UnboundedSender<()>,
) -> Self {
Self {
config,
manager_client,
shutdown,
_shutdown_complete: shutdown_complete_tx,
}
}
// run announces the dfdaemon information to the manager.
pub async fn run(&self) -> Result<()> {
// Clone the shutdown channel.
let mut shutdown = self.shutdown.clone();
// If the seed peer is enabled, we should announce the seed peer to the manager.
if self.config.seed_peer.enable {
// Register the seed peer to the manager.
self.manager_client
.update_seed_peer(UpdateSeedPeerRequest {
source_type: SourceType::SeedPeerSource.into(),
hostname: self.config.host.hostname.clone(),
r#type: self.config.seed_peer.kind.to_string(),
idc: self.config.host.idc.clone(),
location: self.config.host.location.clone(),
ip: self.config.host.ip.unwrap().to_string(),
port: self.config.upload.server.port as i32,
download_port: self.config.upload.server.port as i32,
seed_peer_cluster_id: self.config.seed_peer.cluster_id,
})
.await?;
// Announce to scheduler shutting down with signals.
shutdown.recv().await;
// Delete the seed peer from the manager.
self.manager_client
.delete_seed_peer(DeleteSeedPeerRequest {
source_type: SourceType::SeedPeerSource.into(),
hostname: self.config.host.hostname.clone(),
ip: self.config.host.ip.unwrap().to_string(),
seed_peer_cluster_id: self.config.seed_peer.cluster_id,
})
.await?;
info!("announce to manager shutting down");
} else {
shutdown.recv().await;
info!("announce to manager shutting down");
}
Ok(())
}
}
// Announcer is used to announce the dfdaemon information to the manager and scheduler.
pub struct SchedulerAnnouncer {
// config is the configuration of the dfdaemon.
config: Arc<Config>,
// host_id is the id of the host.
host_id: String,
// scheduler_client is the grpc client of the scheduler.
scheduler_client: Arc<SchedulerClient>,
// shutdown is used to shutdown the announcer.
shutdown: shutdown::Shutdown,
// _shutdown_complete is used to notify the announcer is shutdown.
_shutdown_complete: mpsc::UnboundedSender<()>,
}
// SchedulerAnnouncer implements the scheduler announcer of the dfdaemon.
impl SchedulerAnnouncer {
// new creates a new scheduler announcer.
pub async fn new(
config: Arc<Config>,
host_id: String,
scheduler_client: Arc<SchedulerClient>,
shutdown: shutdown::Shutdown,
shutdown_complete_tx: mpsc::UnboundedSender<()>,
) -> Result<Self> {
let announcer = Self {
config,
host_id,
scheduler_client,
shutdown,
_shutdown_complete: shutdown_complete_tx,
};
// Initialize the scheduler announcer.
announcer
.scheduler_client
.init_announce_host(announcer.make_announce_host_request()?)
.await?;
Ok(announcer)
}
// run announces the dfdaemon information to the scheduler.
pub async fn run(&self) {
// Clone the shutdown channel.
let mut shutdown = self.shutdown.clone();
// Start the scheduler announcer.
let mut interval = tokio::time::interval(self.config.scheduler.announce_interval);
loop {
tokio::select! {
_ = interval.tick() => {
let request = match self.make_announce_host_request() {
Ok(request) => request,
Err(err) => {
error!("make announce host request failed: {}", err);
continue;
}
};
if let Err(err) = self.scheduler_client.init_announce_host(request).await {
error!("announce host to scheduler failed: {}", err);
};
}
_ = shutdown.recv() => {
// Announce to scheduler shutting down with signals.
info!("announce to scheduler shutting down");
return
}
}
}
}
// make_announce_host_request makes the announce host request.
fn make_announce_host_request(&self) -> Result<AnnounceHostRequest> {
// If the seed peer is enabled, we should announce the seed peer to the scheduler.
let host_type = if self.config.seed_peer.enable {
self.config.seed_peer.kind
} else {
HostType::Normal
};
// Get the system information.
let mut sys = System::new_all();
sys.refresh_all();
// Get the process information.
let process = sys.process(sysinfo::get_current_pid().unwrap()).unwrap();
// Get the cpu information.
let cpu = Cpu {
logical_count: sys.physical_core_count().unwrap_or_default() as u32,
physical_count: sys.physical_core_count().unwrap_or_default() as u32,
percent: sys.global_cpu_info().cpu_usage() as f64,
process_percent: process.cpu_usage() as f64,
// TODO Get the cpu times.
times: None,
};
// Get the memory information.
let memory = Memory {
total: sys.total_memory(),
available: sys.available_memory(),
used: sys.used_memory(),
used_percent: (sys.used_memory() / sys.total_memory()) as f64,
// TODO Get the process used memory.
process_used_percent: 0 as f64,
free: sys.free_memory(),
};
// Get the network information.
let network = Network {
// TODO Get the count of the tcp connection.
tcp_connection_count: 0,
// TODO Get the count of the upload tcp connection.
upload_tcp_connection_count: 0,
idc: self.config.host.idc.clone(),
location: self.config.host.location.clone(),
};
// Get the build information.
let build = Build {
git_version: CARGO_PKG_VERSION.to_string(),
git_commit: Some(GIT_HASH.unwrap_or_default().to_string()),
go_version: None,
rust_version: Some(CARGO_PKG_RUSTC_VERSION.to_string()),
platform: None,
};
// Struct the host information.
let host = Host {
id: self.host_id.to_string(),
r#type: host_type as u32,
hostname: self.config.host.hostname.clone(),
ip: self.config.host.ip.unwrap().to_string(),
port: self.config.upload.server.port as i32,
download_port: self.config.upload.server.port as i32,
os: env::consts::OS.to_string(),
platform: env::consts::OS.to_string(),
platform_family: env::consts::FAMILY.to_string(),
platform_version: sys.os_version().unwrap_or_default(),
kernel_version: sys.kernel_version().unwrap_or_default(),
cpu: Some(cpu),
memory: Some(memory),
network: Some(network),
// TODO Get the disk information.
disk: None,
build: Some(build),
// TODO Get scheduler cluster id from dynconfig.
scheduler_cluster_id: 0,
};
Ok(AnnounceHostRequest { host: Some(host) })
}
}

115
src/backend/http.rs Normal file
View File

@ -0,0 +1,115 @@
/*
* Copyright 2023 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use crate::Result;
use futures::TryStreamExt;
use reqwest::header::HeaderMap;
use std::time::Duration;
use tokio::io::AsyncRead;
use tokio_util::compat::FuturesAsyncReadCompatExt;
// Request is the request for HTTP backend.
pub struct Request {
// url is the url of the request.
pub url: String,
// header is the headers of the request.
pub header: HeaderMap,
// timeout is the timeout of the request.
pub timeout: Duration,
}
// HeadResponse is the head response for HTTP backend.
pub struct HeadResponse {
// header is the headers of the response.
pub header: HeaderMap,
// status_code is the status code of the response.
pub status_code: reqwest::StatusCode,
}
// GetResponse is the get response for HTTP backend.
pub struct GetResponse<R: AsyncRead> {
// header is the headers of the response.
pub header: HeaderMap,
// status_code is the status code of the response.
pub status_code: reqwest::StatusCode,
// body is the content of the response.
pub reader: R,
}
// HTTP is the HTTP backend.
pub struct HTTP {
// client is the http client.
client: reqwest::Client,
}
// HTTP implements the http interface.
impl HTTP {
// new returns a new HTTP.
pub fn new() -> Self {
Self {
client: reqwest::Client::new(),
}
}
// Head gets the header of the request.
pub async fn head(&self, request: Request) -> Result<HeadResponse> {
let mut request_builder = self.client.head(&request.url).headers(request.header);
request_builder = request_builder.timeout(request.timeout);
let response = request_builder.send().await?;
let header = response.headers().clone();
let status_code = response.status();
Ok(HeadResponse {
header,
status_code,
})
}
// Get gets the content of the request.
pub async fn get(&self, request: Request) -> Result<GetResponse<impl AsyncRead>> {
let mut request_builder = self.client.get(&request.url).headers(request.header);
request_builder = request_builder.timeout(request.timeout);
let response = request_builder.send().await?;
let header = response.headers().clone();
let status_code = response.status();
let reader = response
.bytes_stream()
.map_err(|e| futures::io::Error::new(futures::io::ErrorKind::Other, e))
.into_async_read()
.compat();
Ok(GetResponse {
header,
status_code,
reader,
})
}
}
// Default implements the Default trait.
impl Default for HTTP {
// default returns a new default HTTP.
fn default() -> Self {
Self::new()
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright 2024 The Dragonfly Authors
* Copyright 2023 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -14,4 +14,4 @@
* limitations under the License.
*/
pub mod container_runtime;
pub mod http;

View File

@ -15,40 +15,27 @@
*/
use clap::Parser;
use dragonfly_client::announcer::SchedulerAnnouncer;
use dragonfly_client::announcer::{ManagerAnnouncer, SchedulerAnnouncer};
use dragonfly_client::backend::http::HTTP;
use dragonfly_client::config::dfdaemon;
use dragonfly_client::dynconfig::Dynconfig;
use dragonfly_client::gc::GC;
use dragonfly_client::grpc::{
dfdaemon_download::DfdaemonDownloadServer, dfdaemon_upload::DfdaemonUploadServer,
manager::ManagerClient, scheduler::SchedulerClient,
};
use dragonfly_client::health::Health;
use dragonfly_client::metrics::Metrics;
use dragonfly_client::proxy::Proxy;
use dragonfly_client::resource::{persistent_cache_task::PersistentCacheTask, task::Task};
use dragonfly_client::shutdown;
use dragonfly_client::stats::Stats;
use dragonfly_client::storage::Storage;
use dragonfly_client::task::Task;
use dragonfly_client::tracing::init_tracing;
use dragonfly_client_backend::BackendFactory;
use dragonfly_client_config::{dfdaemon, VersionValueParser};
use dragonfly_client_storage::Storage;
use dragonfly_client_util::{id_generator::IDGenerator, net::Interface};
use dragonfly_client::utils::id_generator::IDGenerator;
use std::net::SocketAddr;
use std::path::PathBuf;
use std::sync::Arc;
use termion::{color, style};
use tokio::sync::mpsc;
use tokio::sync::Barrier;
use tracing::{error, info, Level};
#[cfg(not(target_env = "msvc"))]
#[global_allocator]
static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
#[allow(non_upper_case_globals)]
#[export_name = "malloc_conf"]
pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:19\0";
#[derive(Debug, Parser)]
#[command(
name = dfdaemon::NAME,
@ -57,8 +44,7 @@ pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:19\0
about = "dfdaemon is a high performance P2P download daemon",
long_about = "A high performance P2P download daemon in Dragonfly that can download resources of different protocols. \
When user triggers a file downloading task, dfdaemon will download the pieces of file from other peers. \
Meanwhile, it will act as an uploader to support other peers to download pieces from it if it owns them.",
disable_version_flag = true
Meanwhile, it will act as an uploader to support other peers to download pieces from it if it owns them."
)]
struct Args {
#[arg(
@ -69,6 +55,13 @@ struct Args {
]
config: PathBuf,
#[arg(
long = "lock-path",
default_value_os_t = dfdaemon::default_dfdaemon_lock_path(),
help = "Specify the lock file path"
)]
lock_path: PathBuf,
#[arg(
short = 'l',
long,
@ -86,23 +79,10 @@ struct Args {
#[arg(
long,
default_value_t = 6,
default_value_t = 24,
help = "Specify the max number of log files"
)]
log_max_files: usize,
#[arg(long, default_value_t = true, help = "Specify whether to print log")]
console: bool,
#[arg(
short = 'V',
long = "version",
help = "Print version information",
default_value_t = false,
action = clap::ArgAction::SetTrue,
value_parser = VersionValueParser
)]
version: bool,
}
#[tokio::main]
@ -111,55 +91,26 @@ async fn main() -> Result<(), anyhow::Error> {
let args = Args::parse();
// Load config.
let config = match dfdaemon::Config::load(&args.config).await {
Ok(config) => config,
Err(err) => {
println!(
"{}{}Load config {} error: {}{}\n",
color::Fg(color::Red),
style::Bold,
args.config.display(),
err,
style::Reset
);
println!(
"{}{}If the file does not exist, you need to new a default config file refer to: {}{}{}{}https://d7y.io/docs/next/reference/configuration/client/dfdaemon/{}",
color::Fg(color::Yellow),
style::Bold,
style::Reset,
color::Fg(color::Cyan),
style::Underline,
style::Italic,
style::Reset,
);
std::process::exit(1);
}
};
let config = dfdaemon::Config::load(&args.config).map_err(|err| {
error!("load config failed: {}", err);
err
})?;
let config = Arc::new(config);
// Initialize tracing.
let _guards = init_tracing(
dfdaemon::NAME,
args.log_dir.clone(),
&args.log_dir,
args.log_level,
args.log_max_files,
config.tracing.protocol.clone(),
config.tracing.endpoint.clone(),
config.tracing.path.clone(),
Some(config.tracing.headers.clone()),
Some(config.host.clone()),
config.seed_peer.enable,
args.console,
config.tracing.addr.to_owned(),
);
// Initialize storage.
let storage = Storage::new(config.clone(), config.storage.dir.as_path(), args.log_dir)
.await
.inspect_err(|err| {
error!("initialize storage failed: {}", err);
})?;
let storage = Storage::new(config.clone(), config.storage.dir.as_path()).map_err(|err| {
error!("initialize storage failed: {}", err);
err
})?;
let storage = Arc::new(storage);
// Initialize id generator.
@ -170,11 +121,16 @@ async fn main() -> Result<(), anyhow::Error> {
);
let id_generator = Arc::new(id_generator);
// Initialize http client.
let http_client = HTTP::new();
let http_client = Arc::new(http_client);
// Initialize manager client.
let manager_client = ManagerClient::new(config.clone(), config.manager.addr.clone())
let manager_client = ManagerClient::new(config.manager.addrs.clone())
.await
.inspect_err(|err| {
.map_err(|err| {
error!("initialize manager client failed: {}", err);
err
})?;
let manager_client = Arc::new(manager_client);
@ -190,73 +146,42 @@ async fn main() -> Result<(), anyhow::Error> {
shutdown_complete_tx.clone(),
)
.await
.inspect_err(|err| {
.map_err(|err| {
error!("initialize dynconfig server failed: {}", err);
err
})?;
let dynconfig = Arc::new(dynconfig);
// Initialize scheduler client.
let scheduler_client = SchedulerClient::new(config.clone(), dynconfig.clone())
let scheduler_client = SchedulerClient::new(dynconfig.clone())
.await
.inspect_err(|err| {
.map_err(|err| {
error!("initialize scheduler client failed: {}", err);
err
})?;
let scheduler_client = Arc::new(scheduler_client);
let backend_factory = BackendFactory::new(Some(config.server.plugin_dir.as_path()))
.inspect_err(|err| {
error!("initialize backend factory failed: {}", err);
})?;
let backend_factory = Arc::new(backend_factory);
// Initialize task manager.
let task = Task::new(
config.clone(),
id_generator.clone(),
storage.clone(),
scheduler_client.clone(),
backend_factory.clone(),
)?;
let task = Arc::new(task);
// Initialize persistent cache task manager.
let persistent_cache_task = PersistentCacheTask::new(
config.clone(),
id_generator.clone(),
storage.clone(),
scheduler_client.clone(),
backend_factory.clone(),
)?;
let persistent_cache_task = Arc::new(persistent_cache_task);
let interface = Interface::new(config.host.ip.unwrap(), config.upload.rate_limit);
let interface = Arc::new(interface);
// Initialize health server.
let health = Health::new(
SocketAddr::new(config.health.server.ip.unwrap(), config.health.server.port),
shutdown.clone(),
shutdown_complete_tx.clone(),
http_client.clone(),
);
let task = Arc::new(task);
// Initialize metrics server.
let metrics = Metrics::new(
config.clone(),
SocketAddr::new(config.metrics.ip.unwrap(), config.metrics.port),
shutdown.clone(),
shutdown_complete_tx.clone(),
);
// Initialize stats server.
let stats = Stats::new(
SocketAddr::new(config.stats.server.ip.unwrap(), config.stats.server.port),
shutdown.clone(),
shutdown_complete_tx.clone(),
);
// Initialize proxy server.
let proxy = Proxy::new(
// Initialize manager announcer.
let manager_announcer = ManagerAnnouncer::new(
config.clone(),
task.clone(),
manager_client.clone(),
shutdown.clone(),
shutdown_complete_tx.clone(),
);
@ -266,13 +191,13 @@ async fn main() -> Result<(), anyhow::Error> {
config.clone(),
id_generator.host_id(),
scheduler_client.clone(),
interface.clone(),
shutdown.clone(),
shutdown_complete_tx.clone(),
)
.await
.inspect_err(|err| {
.map_err(|err| {
error!("initialize scheduler announcer failed: {}", err);
err
})?;
// Initialize upload grpc server.
@ -280,18 +205,14 @@ async fn main() -> Result<(), anyhow::Error> {
config.clone(),
SocketAddr::new(config.upload.server.ip.unwrap(), config.upload.server.port),
task.clone(),
persistent_cache_task.clone(),
interface.clone(),
shutdown.clone(),
shutdown_complete_tx.clone(),
);
// Initialize download grpc server.
let mut dfdaemon_download_grpc = DfdaemonDownloadServer::new(
config.clone(),
config.download.server.socket_path.clone(),
task.clone(),
persistent_cache_task.clone(),
shutdown.clone(),
shutdown_complete_tx.clone(),
);
@ -299,9 +220,7 @@ async fn main() -> Result<(), anyhow::Error> {
// Initialize garbage collector.
let gc = GC::new(
config.clone(),
id_generator.host_id(),
storage.clone(),
scheduler_client.clone(),
shutdown.clone(),
shutdown_complete_tx.clone(),
);
@ -309,60 +228,34 @@ async fn main() -> Result<(), anyhow::Error> {
// Log dfdaemon started pid.
info!("dfdaemon started at pid {}", std::process::id());
// grpc server started barrier.
let grpc_server_started_barrier = Arc::new(Barrier::new(3));
// Wait for servers to exit or shutdown signal.
tokio::select! {
_ = tokio::spawn(async move { dynconfig.run().await }) => {
info!("dynconfig manager exited");
},
_ = tokio::spawn(async move { health.run().await }) => {
info!("health server exited");
},
_ = tokio::spawn(async move { metrics.run().await }) => {
info!("metrics server exited");
},
_ = tokio::spawn(async move { stats.run().await }) => {
info!("stats server exited");
_ = tokio::spawn(async move { manager_announcer.run().await }) => {
info!("announcer manager exited");
},
_ = tokio::spawn(async move { scheduler_announcer.run().await }) => {
info!("announcer scheduler exited");
},
_ = tokio::spawn(async move { gc.run().await }) => {
info!("garbage collector exited");
},
_ = {
let barrier = grpc_server_started_barrier.clone();
tokio::spawn(async move {
dfdaemon_upload_grpc.run(barrier).await.unwrap_or_else(|err| error!("dfdaemon upload grpc server failed: {}", err));
})
} => {
_ = tokio::spawn(async move { dfdaemon_upload_grpc.run().await }) => {
info!("dfdaemon upload grpc server exited");
},
_ = {
let barrier = grpc_server_started_barrier.clone();
tokio::spawn(async move {
dfdaemon_download_grpc.run(barrier).await.unwrap_or_else(|err| error!("dfdaemon download grpc server failed: {}", err));
})
} => {
_ = tokio::spawn(async move { dfdaemon_download_grpc.run().await }) => {
info!("dfdaemon download grpc unix server exited");
},
_ = {
let barrier = grpc_server_started_barrier.clone();
tokio::spawn(async move {
proxy.run(barrier).await.unwrap_or_else(|err| error!("proxy server failed: {}", err));
})
} => {
info!("proxy server exited");
_ = tokio::spawn(async move { gc.run().await }) => {
info!("garbage collector exited");
},
_ = shutdown::shutdown_signal() => {},
@ -375,10 +268,6 @@ async fn main() -> Result<(), anyhow::Error> {
// of scheduler_client, so scheduler_client can be released normally.
drop(task);
// Drop persistent cache task to release scheduler_client. when drop the persistent cache task, it will release the Arc reference
// of scheduler_client, so scheduler_client can be released normally.
drop(persistent_cache_task);
// Drop scheduler_client to release dynconfig. when drop the scheduler_client, it will release the
// Arc reference of dynconfig, so dynconfig can be released normally.
drop(scheduler_client);

401
src/bin/dfget/main.rs Normal file
View File

@ -0,0 +1,401 @@
/*
* Copyright 2023 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use clap::Parser;
use dragonfly_api::common::v2::Download;
use dragonfly_api::common::v2::TaskType;
use dragonfly_api::dfdaemon::v2::DownloadTaskRequest;
use dragonfly_client::config::dfdaemon;
use dragonfly_client::config::dfget;
use dragonfly_client::grpc::dfdaemon_download::DfdaemonDownloadClient;
use dragonfly_client::grpc::health::HealthClient;
use dragonfly_client::tracing::init_tracing;
use dragonfly_client::Error;
use fslock::LockFile;
use indicatif::{ProgressBar, ProgressState, ProgressStyle};
use std::collections::HashMap;
use std::path::PathBuf;
use std::process::Stdio;
use std::time::Duration;
use std::{cmp::min, fmt::Write};
use tokio::process::{Child, Command};
use tracing::{debug, error, info, Level};
use url::Url;
// DEFAULT_DFDAEMON_CHECK_HEALTH_INTERVAL is the default interval of checking dfdaemon's health.
const DEFAULT_DFDAEMON_CHECK_HEALTH_INTERVAL: Duration = Duration::from_millis(200);
// DEFAULT_DFDAEMON_CHECK_HEALTH_TIMEOUT is the default timeout of checking dfdaemon's health.
const DEFAULT_DFDAEMON_CHECK_HEALTH_TIMEOUT: Duration = Duration::from_secs(10);
#[derive(Debug, Parser)]
#[command(
name = dfget::NAME,
author,
version,
about = "dfget is a download command line based on P2P technology",
long_about = "A download command line based on P2P technology in Dragonfly that can download resources of different protocols."
)]
struct Args {
#[arg(help = "Specify the URL to download")]
url: Url,
#[arg(
short = 'o',
long = "output",
help = "Specify the output path of downloading file"
)]
output: PathBuf,
#[arg(
short = 'e',
long = "endpoint",
default_value_os_t = dfdaemon::default_download_unix_socket_path(),
help = "Endpoint of dfdaemon's GRPC server"
)]
endpoint: PathBuf,
#[arg(
long = "timeout",
value_parser= humantime::parse_duration,
default_value = "2h",
help = "Specify the timeout for downloading a file"
)]
timeout: Duration,
#[arg(
long = "piece-length",
default_value_t = 4194304,
help = "Specify the byte length of the piece"
)]
piece_length: u64,
#[arg(
long = "download-rate-limit",
default_value_t = 2147483648,
help = "Specify the rate limit of the downloading in bytes per second"
)]
download_rate_limit: u64,
#[arg(
short = 'd',
long = "digest",
default_value = "",
help = "Verify the integrity of the downloaded file using the specified digest, e.g. md5:86d3f3a95c324c9479bd8986968f4327"
)]
digest: String,
#[arg(
short = 'p',
long = "priority",
default_value_t = 6,
help = "Specify the priority for scheduling task"
)]
priority: i32,
#[arg(
long = "application",
default_value = "",
help = "Caller application which is used for statistics and access control"
)]
application: String,
#[arg(
long = "tag",
default_value = "",
help = "Different tags for the same url will be divided into different tasks"
)]
tag: String,
#[arg(
short = 'H',
long = "header",
required = false,
help = "Specify the header for downloading file, e.g. --header='Content-Type: application/json' --header='Accept: application/json'"
)]
header: Option<Vec<String>>,
#[arg(
long = "filter",
required = false,
help = "Filter the query parameters of the downloaded URL. If the download URL is the same, it will be scheduled as the same task, e.g. --filter='signature' --filter='timeout'"
)]
filters: Option<Vec<String>>,
#[arg(
long = "disable-back-to-source",
default_value_t = false,
help = "Disable back-to-source download when dfget download failed"
)]
disable_back_to_source: bool,
#[arg(
short = 'l',
long,
default_value = "info",
help = "Specify the logging level [trace, debug, info, warn, error]"
)]
log_level: Level,
#[arg(
long,
default_value_os_t = dfget::default_dfget_log_dir(),
help = "Specify the log directory"
)]
log_dir: PathBuf,
#[arg(
long,
default_value_t = 24,
help = "Specify the max number of log files"
)]
log_max_files: usize,
#[arg(
short = 'c',
long = "dfdaemon-config",
default_value_os_t = dfdaemon::default_dfdaemon_config_path(),
help = "Specify dfdaemon's config file to use")
]
dfdaemon_config: PathBuf,
#[arg(
long = "dfdaemon-lock-path",
default_value_os_t = dfdaemon::default_dfdaemon_lock_path(),
help = "Specify the dfdaemon's lock file path"
)]
dfdaemon_lock_path: PathBuf,
#[arg(
long = "dfdaemon-log-level",
default_value = "info",
help = "Specify the dfdaemon's logging level [trace, debug, info, warn, error]"
)]
dfdaemon_log_level: Level,
#[arg(
long = "dfdaemon-log-dir",
default_value_os_t = dfdaemon::default_dfdaemon_log_dir(),
help = "Specify the dfdaemon's log directory"
)]
dfdaemon_log_dir: PathBuf,
#[arg(
long,
default_value_t = 24,
help = "Specify the dfdaemon's max number of log files"
)]
dfdaemon_log_max_files: usize,
}
#[tokio::main]
async fn main() -> Result<(), anyhow::Error> {
// Parse command line arguments.
let args = Args::parse();
// Initialize tracting.
let _guards = init_tracing(
dfget::NAME,
&args.log_dir,
args.log_level,
args.log_max_files,
None,
);
// Get or create dfdaemon download client.
let dfdaemon_download_client = get_or_create_dfdaemon_download_client(
args.dfdaemon_config,
args.endpoint.clone(),
args.dfdaemon_log_dir,
args.dfdaemon_log_level,
args.dfdaemon_log_max_files,
args.dfdaemon_lock_path,
)
.await
.map_err(|err| {
error!("initialize dfdaemon download client failed: {}", err);
err
})?;
// Create dfdaemon client.
let response = dfdaemon_download_client
.download_task(DownloadTaskRequest {
download: Some(Download {
url: args.url.to_string(),
digest: Some(args.digest),
range: None,
r#type: TaskType::Dfdaemon as i32,
tag: Some(args.tag),
application: Some(args.application),
priority: args.priority,
filters: args.filters.unwrap_or_default(),
header: parse_header(args.header.unwrap_or_default())?,
piece_length: args.piece_length,
output_path: args.output.into_os_string().into_string().unwrap(),
timeout: Some(prost_wkt_types::Duration::try_from(args.timeout)?),
download_rate_limit: Some(args.download_rate_limit),
need_back_to_source: false,
}),
})
.await
.map_err(|err| {
error!("download task failed: {}", err);
err
})?;
// Initialize progress bar.
let pb = ProgressBar::new(0);
pb.set_style(
ProgressStyle::with_template(
"[{elapsed_precise}] [{wide_bar}] {bytes}/{total_bytes} ({bytes_per_sec}, {eta})",
)?
.with_key("eta", |state: &ProgressState, w: &mut dyn Write| {
write!(w, "{:.1}s", state.eta().as_secs_f64()).unwrap()
})
.progress_chars("#>-"),
);
// Dwonload file.
let mut downloaded = 0;
let mut out_stream = response.into_inner();
while let Some(message) = out_stream.message().await? {
let piece = message.piece.ok_or(Error::InvalidParameter())?;
pb.set_length(message.content_length);
downloaded += piece.length;
let position = min(downloaded + piece.length, message.content_length);
pb.set_position(position);
}
pb.finish_with_message("downloaded");
Ok(())
}
// get_or_create_dfdaemon_download_client gets a dfdaemon download client or creates a new one.
async fn get_or_create_dfdaemon_download_client(
config_path: PathBuf,
endpoint: PathBuf,
log_dir: PathBuf,
log_level: Level,
log_max_files: usize,
lock_path: PathBuf,
) -> Result<DfdaemonDownloadClient, anyhow::Error> {
// Get dfdaemon download client and check its health.
match get_dfdaemon_download_client(endpoint.clone()).await {
Ok(dfdaemon_download_client) => return Ok(dfdaemon_download_client),
Err(err) => debug!("get dfdaemon download client failed: {}", err),
}
// Create a lock file to prevent multiple dfdaemon processes from being created.
let mut f = LockFile::open(lock_path.as_path())?;
f.lock()?;
// Check dfdaemon download client again.
match get_dfdaemon_download_client(endpoint.clone()).await {
Ok(dfdaemon_download_client) => return Ok(dfdaemon_download_client),
Err(err) => debug!("get dfdaemon download client failed: {}", err),
}
// Spawn a dfdaemon process.
let child = spawn_dfdaemon(config_path, log_dir, log_level, log_max_files)?;
info!("spawn dfdaemon process: {:?}", child);
// Initialize the timeout of checking dfdaemon's health.
let check_health_timeout = tokio::time::sleep(DEFAULT_DFDAEMON_CHECK_HEALTH_TIMEOUT);
tokio::pin!(check_health_timeout);
// Wait for dfdaemon's health.
let mut interval = tokio::time::interval(DEFAULT_DFDAEMON_CHECK_HEALTH_INTERVAL);
loop {
tokio::select! {
_ = interval.tick() => {
match get_dfdaemon_download_client(endpoint.clone()).await {
Ok(dfdaemon_download_client) => {
f.unlock()?;
return Ok(dfdaemon_download_client);
}
Err(err) => debug!("get dfdaemon download client failed: {}", err),
}
}
_ = &mut check_health_timeout => {
return Err(anyhow::anyhow!("get dfdaemon download client timeout"));
}
}
}
}
// get_and_check_dfdaemon_download_client gets a dfdaemon download client and checks its health.
async fn get_dfdaemon_download_client(
endpoint: PathBuf,
) -> Result<DfdaemonDownloadClient, anyhow::Error> {
// Check dfdaemon's health.
let health_client = HealthClient::new_unix(endpoint.clone()).await?;
health_client.check_dfdaemon_download().await?;
// Get dfdaemon download client.
let dfdaemon_download_client = DfdaemonDownloadClient::new_unix(endpoint).await?;
Ok(dfdaemon_download_client)
}
// spawn_dfdaemon spawns a dfdaemon process in the background.
fn spawn_dfdaemon(
config_path: PathBuf,
log_dir: PathBuf,
log_level: Level,
log_max_files: usize,
) -> Result<Child, anyhow::Error> {
// Create dfdaemon command.
let mut cmd = Command::new("dfdaemon");
// Set command line arguments.
cmd.arg("--config")
.arg(config_path)
.arg("--log-dir")
.arg(log_dir)
.arg("--log-level")
.arg(log_level.to_string())
.arg("--log-max-files")
.arg(log_max_files.to_string());
// Redirect stdin, stdout, stderr to /dev/null.
cmd.stdin(Stdio::null())
.stdout(Stdio::null())
.stderr(Stdio::null());
// Create a new session for dfdaemon by calling setsid.
unsafe {
cmd.pre_exec(|| {
libc::setsid();
Ok(())
});
}
let child = cmd.spawn()?;
Ok(child)
}
// parse_header parses the header strings to a hash map.
fn parse_header(raw_header: Vec<String>) -> Result<HashMap<String, String>, Error> {
let mut header = HashMap::new();
for h in raw_header {
let mut parts = h.splitn(2, ':');
let key = parts.next().unwrap().trim();
let value = parts.next().unwrap().trim();
header.insert(key.to_string(), value.to_string());
}
Ok(header)
}

112
src/bin/dfstore/main.rs Normal file
View File

@ -0,0 +1,112 @@
/*
* Copyright 2023 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use clap::{Parser, Subcommand};
use dragonfly_client::config::dfdaemon;
use dragonfly_client::config::dfstore;
use dragonfly_client::tracing::init_tracing;
use std::path::PathBuf;
use tracing::Level;
#[derive(Debug, Parser)]
#[command(
name = dfstore::NAME,
author,
version,
about = "dfstore is a storage command line based on P2P technology in Dragonfly.",
long_about = "A storage command line based on P2P technology in Dragonfly that can rely on different types of object storage, \
such as S3 or OSS, to provide stable object storage capabilities. It uses the entire P2P network as a cache when storing objects. \
Rely on S3 or OSS as the backend to ensure storage reliability. In the process of object storage, \
P2P cache is effectively used for fast read and write storage."
)]
struct Args {
#[arg(
short = 'e',
long = "endpoint",
default_value_os_t = dfdaemon::default_download_unix_socket_path(),
help = "Endpoint of dfdaemon's GRPC server"
)]
endpoint: PathBuf,
#[arg(
short = 'l',
long,
default_value = "info",
help = "Specify the logging level [trace, debug, info, warn, error]"
)]
log_level: Level,
#[arg(
long,
default_value_os_t = dfstore::default_dfstore_log_dir(),
help = "Specify the log directory"
)]
log_dir: PathBuf,
#[arg(
long,
default_value_t = 24,
help = "Specify the max number of log files"
)]
log_max_files: usize,
#[command(subcommand)]
command: Command,
}
#[derive(Debug, Clone, Subcommand)]
#[command()]
pub enum Command {
#[command(
name = "cp",
author,
version,
about = "Download or upload files using object storage in Dragonfly",
long_about = "Download a file from object storage in Dragonfly or upload a local file to object storage in Dragonfly"
)]
Copy(CopyCommand),
#[command(
name = "rm",
author,
version,
about = "Remove a file from Dragonfly object storage",
long_about = "Remove the P2P cache in Dragonfly and remove the file stored in the object storage."
)]
Remove(RemoveCommand),
}
// Download or upload files using object storage in Dragonfly.
#[derive(Debug, Clone, Parser)]
pub struct CopyCommand {}
// Remove a file from Dragonfly object storage.
#[derive(Debug, Clone, Parser)]
pub struct RemoveCommand {}
fn main() {
// Parse command line arguments.
let args = Args::parse();
// Initialize tracing.
let _guards = init_tracing(
dfstore::NAME,
&args.log_dir,
args.log_level,
args.log_max_files,
None,
);
}

Some files were not shown because too many files have changed in this diff Show More