Compare commits

..

No commits in common. "main" and "v0.6.0" have entirely different histories.
main ... v0.6.0

216 changed files with 7836 additions and 27630 deletions

View File

@ -1 +1,2 @@
/.git
/out /out

View File

@ -1,54 +0,0 @@
version: 2
updates:
# Automatic upgrade for go modules.
- package-ecosystem: "gomod"
directories:
- "/estargz"
- "/ipfs"
- "/"
- "/cmd"
schedule:
interval: "daily"
ignore:
# We upgrade this manually on each release
- dependency-name: "github.com/containerd/stargz-snapshotter/estargz"
groups:
golang-x:
patterns:
- "golang.org/x/*"
google-golang:
patterns:
- "google.golang.org/*"
containerd:
patterns:
- "github.com/containerd/*"
opencontainers:
patterns:
- "github.com/opencontainers/*"
k8s:
patterns:
- "k8s.io/*"
gomod:
# this pattern covers all go dependencies that are not in
# the above groups. dependabot doesn't seem to update sub-modules if
# a dependency doesn't belong to a group, so we define this group
# explicitly.
exclude-patterns:
- "golang.org/x/*"
- "google.golang.org/*"
- "github.com/containerd/*"
- "github.com/opencontainers/*"
- "k8s.io/*"
# Automatic upgrade for base images used in the Dockerfile
- package-ecosystem: "docker"
directory: "/"
schedule:
interval: "daily"
# Automatic upgrade for Github Actions
- package-ecosystem: "github-actions"
directory: "/" # means ".github/workflows"
schedule:
interval: "daily"

View File

@ -1,23 +1,19 @@
name: Benchmark name: Benchmark
on: on: [push, pull_request]
push:
branches:
- main
pull_request:
env: env:
DOCKER_BUILDKIT: 1 DOCKER_BUILDKIT: 1
jobs: jobs:
hello-bench: hello-bench:
runs-on: ubuntu-24.04 runs-on: ubuntu-20.04
name: HelloBench name: HelloBench
env: env:
BENCHMARK_LOG_DIR: ${{ github.workspace }}/log/ BENCHMARK_LOG_DIR: ${{ github.workspace }}/log/
BENCHMARK_RESULT_DIR: ${{ github.workspace }}/benchmark/ BENCHMARK_RESULT_DIR: ${{ github.workspace }}/benchmark/
BENCHMARK_REGISTRY: ghcr.io BENCHMARK_REGISTRY: ghcr.io
BENCHMARK_USER: stargz-containers BENCHMARK_USER: stargz-containers
BENCHMARK_TARGETS: python:3.10 gcc:11.2.0 postgres:14.2 tomcat:10.1.0-jdk17-openjdk-bullseye BENCHMARK_TARGETS: python:3.9 gcc:10.2.0 postgres:13.1 tomcat:10.0.0-jdk15-openjdk-buster
BENCHMARK_SAMPLES_NUM: 5 BENCHMARK_SAMPLES_NUM: 5
BENCHMARK_PERCENTILE: 95 BENCHMARK_PERCENTILE: 95
BENCHMARK_PERCENTILES_GRANULARITY: 25 BENCHMARK_PERCENTILES_GRANULARITY: 25
@ -29,9 +25,9 @@ jobs:
steps: steps:
- name: Install tools - name: Install tools
run: | run: |
sudo apt-get update && \ sudo apt-get update && sudo apt-get --no-install-recommends install -y gnuplot
sudo apt-get install -y gnuplot python3-numpy pip install numpy
- uses: actions/checkout@v4 - uses: actions/checkout@v2
- name: Prepare directories - name: Prepare directories
run: mkdir "${BENCHMARK_RESULT_DIR}" "${BENCHMARK_LOG_DIR}" run: mkdir "${BENCHMARK_RESULT_DIR}" "${BENCHMARK_LOG_DIR}"
- name: Get instance information - name: Get instance information
@ -43,7 +39,7 @@ jobs:
env: env:
BENCHMARK_RUNTIME_MODE: ${{ matrix.runtime }} BENCHMARK_RUNTIME_MODE: ${{ matrix.runtime }}
run: make benchmark run: make benchmark
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v1
if: ${{ always() }} if: ${{ always() }}
with: with:
name: benchmarking-result-${{ matrix.runtime }} name: benchmarking-result-${{ matrix.runtime }}

View File

@ -1,51 +0,0 @@
name: Kind image
on:
push:
tags:
- 'v*'
pull_request:
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
jobs:
kind-image:
runs-on: ubuntu-24.04
name: Kind image
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=semver,pattern={{version}}-kind
- name: Login to GHCR
if: github.event_name != 'pull_request'
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:qemu-v7.0.0-28
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build and push
uses: docker/build-push-action@v6.18.0
with:
context: .
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64,linux/arm64

View File

@ -17,91 +17,43 @@ on:
env: env:
DOCKER_BUILDKIT: 1 DOCKER_BUILDKIT: 1
DOCKER_BUILD_ARGS: --build-arg=CONTAINERD_VERSION=main # do tests with the latest containerd DOCKER_BUILD_ARGS: --build-arg=CONTAINERD_VERSION=master # do tests with the latest containerd
jobs: jobs:
integration: integration:
runs-on: ubuntu-24.04 runs-on: ubuntu-20.04
name: Integration name: Integration
steps: steps:
- name: Install htpasswd for setting up private registry - name: Install htpasswd for setting up private registry
run: sudo apt-get update -y && sudo apt-get --no-install-recommends install -y apache2-utils run: sudo apt-get update -y && sudo apt-get --no-install-recommends install -y apache2-utils
- uses: actions/checkout@v4 - uses: actions/checkout@v2
- name: Run integration test - name: Run integration test
run: make integration run: make integration
test-optimize: test-optimize:
runs-on: ubuntu-24.04 runs-on: ubuntu-20.04
name: Optimize name: Optimize
steps: steps:
- name: Install htpasswd for setting up private registry - name: Install htpasswd for setting up private registry
run: sudo apt-get update -y && sudo apt-get --no-install-recommends install -y apache2-utils run: sudo apt-get update -y && sudo apt-get --no-install-recommends install -y apache2-utils
- uses: actions/checkout@v4 - uses: actions/checkout@v2
- name: Run test for optimize subcommand of ctr-remote - name: Run test for optimize subcommand of ctr-remote
run: make test-optimize run: make test-optimize
test-kind: test-pullsecrets:
runs-on: ubuntu-24.04 runs-on: ubuntu-20.04
name: Kind name: PullSecrets
steps: steps:
- name: Install htpasswd for setting up private registry - name: Install htpasswd for setting up private registry
run: sudo apt-get update -y && sudo apt-get --no-install-recommends install -y apache2-utils run: sudo apt-get update -y && sudo apt-get --no-install-recommends install -y apache2-utils
- uses: actions/checkout@v4 - uses: actions/checkout@v2
- name: Run test for pulling image from private registry on Kubernetes - name: Run test for pulling image from private registry on Kubernetes
run: make test-kind run: make test-pullsecrets
test-criauth: test-cri:
runs-on: ubuntu-24.04 runs-on: ubuntu-20.04
name: CRIAuth name: CRIValidation
steps: steps:
- name: Install htpasswd for setting up private registry - uses: actions/checkout@v2
run: sudo apt-get update -y && sudo apt-get --no-install-recommends install -y apache2-utils - name: Varidate the runtime through CRI
- uses: actions/checkout@v4 run: make test-cri
- name: Run test for pulling image from private registry on Kubernetes
run: make test-criauth
test-cri-containerd:
runs-on: ubuntu-24.04
name: CRIValidationContainerd
steps:
- uses: actions/checkout@v4
- name: Varidate the runtime through CRI with containerd
run: make test-cri-containerd
test-cri-o:
runs-on: ubuntu-24.04
name: CRIValidationCRIO
steps:
- name: Install the latest docker
run: |
sudo apt-get remove moby-cli moby-engine
wget -O get-docker.sh https://get.docker.com
sh get-docker.sh
- uses: actions/checkout@v4
- name: Varidate the runtime through CRI with CRI-O
env:
DOCKER_BUILD_ARGS: "--build-arg=RUNC_VERSION=v1.0.3"
run: |
# needed to pass "runtime should output OOMKilled reason" test
sudo swapoff -a
make test-cri-o
test-k3s:
runs-on: ubuntu-24.04
name: K3S
steps:
- uses: actions/setup-go@v5
with:
go-version: '1.24.x'
- name: Install k3d
run: |
wget -q -O - https://raw.githubusercontent.com/rancher/k3d/v5.6.3/install.sh | bash
- name: Install htpasswd for setting up private registry
run: sudo apt-get update -y && sudo apt-get --no-install-recommends install -y apache2-utils
- name: Install yq
run: |
sudo wget -O /usr/local/bin/yq https://github.com/mikefarah/yq/releases/download/v4.9.3/yq_linux_amd64
sudo chmod +x /usr/local/bin/yq
- uses: actions/checkout@v4
- name: Run test with k3s
run: make test-k3s

View File

@ -9,7 +9,7 @@ env:
jobs: jobs:
build: build:
runs-on: ubuntu-24.04 runs-on: ubuntu-20.04
name: Build name: Build
strategy: strategy:
matrix: matrix:
@ -17,7 +17,7 @@ jobs:
env: env:
OUTPUT_DIR: ${{ github.workspace }}/out OUTPUT_DIR: ${{ github.workspace }}/out
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v2
- name: Build Binary - name: Build Binary
env: env:
DOCKER_BUILDKIT: 1 DOCKER_BUILDKIT: 1
@ -29,28 +29,26 @@ jobs:
if [ "${ARCH_ID}" == "arm-v7" ] ; then if [ "${ARCH_ID}" == "arm-v7" ] ; then
BUILD_ARGS="--build-arg=TARGETARCH=arm --build-arg=GOARM=7" BUILD_ARGS="--build-arg=TARGETARCH=arm --build-arg=GOARM=7"
fi fi
# make binaries static
BUILD_ARGS="$BUILD_ARGS --build-arg=CGO_ENABLED=0"
TAR_FILE_NAME="stargz-snapshotter-${RELEASE_TAG}-linux-${ARCH_ID}.tar.gz" TAR_FILE_NAME="stargz-snapshotter-${RELEASE_TAG}-linux-${ARCH_ID}.tar.gz"
SHA256SUM_FILE_NAME="${TAR_FILE_NAME}.sha256sum" SHA256SUM_FILE_NAME="${TAR_FILE_NAME}.sha256sum"
docker build ${BUILD_ARGS} --target release-binaries -o - . | gzip > "${OUTPUT_DIR}/${TAR_FILE_NAME}" docker build ${BUILD_ARGS} --target release-binaries -o - . | gzip > "${OUTPUT_DIR}/${TAR_FILE_NAME}"
( cd ${OUTPUT_DIR}; sha256sum ${TAR_FILE_NAME} ) > "${OUTPUT_DIR}/${SHA256SUM_FILE_NAME}" ( cd ${OUTPUT_DIR}; sha256sum ${TAR_FILE_NAME} ) > "${OUTPUT_DIR}/${SHA256SUM_FILE_NAME}"
- name: Save Binary - name: Save Binary
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v2
with: with:
name: builds-${{ matrix.arch }} name: builds-${{ matrix.arch }}
path: ${{ env.OUTPUT_DIR }}/* path: ${{ env.OUTPUT_DIR }}/*
release: release:
runs-on: ubuntu-24.04 runs-on: ubuntu-20.04
name: Release name: Release
needs: [build] needs: [build]
env: env:
OUTPUT_DIR: ${{ github.workspace }}/builds OUTPUT_DIR: ${{ github.workspace }}/builds
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v2
- name: Download Builds - name: Download Builds
uses: actions/download-artifact@v5 uses: actions/download-artifact@v2
with: with:
path: ${{ env.OUTPUT_DIR }} path: ${{ env.OUTPUT_DIR }}
- name: Create Release - name: Create Release
@ -59,11 +57,15 @@ jobs:
run: | run: |
RELEASE_TAG="${GITHUB_REF##*/}" RELEASE_TAG="${GITHUB_REF##*/}"
cat <<EOF > ${GITHUB_WORKSPACE}/release-note.txt cat <<EOF > ${GITHUB_WORKSPACE}/release-note.txt
${RELEASE_TAG}
(TBD) (TBD)
EOF EOF
ASSET_ARGS=() ASSET_FLAGS=()
ls -al ${OUTPUT_DIR}/ ls -al ${OUTPUT_DIR}/
for A in "amd64" "arm-v7" "arm64" "ppc64le" "s390x" ; do for A in "amd64" "arm-v7" "arm64" "ppc64le" "s390x" ; do
ASSET_ARGS+=("${OUTPUT_DIR}/builds-${A}/*") for F in ${OUTPUT_DIR}/builds-${A}/* ; do
ASSET_FLAGS+=("-a" "$F")
done
done done
gh release create -F ${GITHUB_WORKSPACE}/release-note.txt --draft --title "${RELEASE_TAG}" "${RELEASE_TAG}" ${ASSET_ARGS[@]} hub release create "${ASSET_FLAGS[@]}" -F ${GITHUB_WORKSPACE}/release-note.txt --draft "${RELEASE_TAG}"

View File

@ -1,125 +1,80 @@
name: Tests name: Tests
on: on: [push, pull_request]
push:
branches:
- main
pull_request:
env: env:
DOCKER_BUILDKIT: 1 DOCKER_BUILDKIT: 1
jobs: jobs:
build: build:
runs-on: ubuntu-24.04 runs-on: ubuntu-20.04
name: Build name: Build
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v2
- name: Build all - name: Build all
run: ./script/util/make.sh build -j2 run: ./script/util/make.sh build -j2
test: test:
runs-on: ubuntu-24.04 runs-on: ubuntu-20.04
name: Test name: Test
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v2
- name: Test all - name: Test all
run: ./script/util/make.sh test-all -j2 run: ./script/util/make.sh test-all -j2
linter: linter:
runs-on: ubuntu-24.04 runs-on: ubuntu-20.04
name: Linter name: Linter
strategy:
fail-fast: false
matrix:
targetdir: [".", "./estargz", "./cmd", "./ipfs"]
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v2
with: with:
fetch-depth: '0' fetch-depth: '0'
- uses: actions/setup-go@v5 - name: Run Linter
with: run: ./script/util/make.sh install-check-tools check
go-version: '1.24.x'
- name: golangci-lint
uses: golangci/golangci-lint-action@v8.0.0
with:
version: v2.1
args: --verbose --timeout=10m
working-directory: ${{ matrix.targetdir }}
integration: integration:
runs-on: ubuntu-24.04 runs-on: ubuntu-20.04
name: Integration name: Integration
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
buildargs: ["", "--build-arg=CONTAINERD_VERSION=main"] # released version & main version buildargs: ["", "--build-arg=CONTAINERD_VERSION=master"] # released version & master version
builtin: ["true", "false"] builtin: ["true", "false"]
metadata-store: ["memory", "db"]
fuse-passthrough: ["true", "false"]
fuse-manager: ["true", "false"]
transfer-service: ["true", "false"]
exclude: exclude:
- buildargs: "" - buildargs: ""
builtin: "true" builtin: "true"
- metadata-store: "db"
builtin: "true"
- metadata-store: "db"
buildargs: "--build-arg=CONTAINERD_VERSION=main"
- fuse-passthrough: "true"
builtin: "true"
- fuse-passthrough: "true"
buildargs: "--build-arg=CONTAINERD_VERSION=main"
- fuse-passthrough: "true"
metadata-store: "db"
- fuse-manager: "true"
builtin: "true"
- fuse-manager: "true"
buildargs: "--build-arg=CONTAINERD_VERSION=main"
- transfer-service: "true"
buildargs: "--build-arg=CONTAINERD_VERSION=main"
- transfer-service: "true"
builtin: "true"
- transfer-service: "true"
metadata-store: "db"
- transfer-service: "true"
fuse-passthrough: "true"
steps: steps:
- name: Install htpasswd for setting up private registry - name: Install htpasswd for setting up private registry
run: sudo apt-get update -y && sudo apt-get --no-install-recommends install -y apache2-utils run: sudo apt-get update -y && sudo apt-get --no-install-recommends install -y apache2-utils
- uses: actions/checkout@v4 - uses: actions/checkout@v2
- name: Run integration test - name: Run integration test
env: env:
DOCKER_BUILD_ARGS: ${{ matrix.buildargs }} DOCKER_BUILD_ARGS: ${{ matrix.buildargs }}
BUILTIN_SNAPSHOTTER: ${{ matrix.builtin }} BUILTIN_SNAPSHOTTER: ${{ matrix.builtin }}
METADATA_STORE: ${{ matrix.metadata-store }}
FUSE_PASSTHROUGH: ${{ matrix.fuse-passthrough }}
FUSE_MANAGER: ${{ matrix.fuse-manager }}
TRANSFER_SERVICE: ${{ matrix.transfer-service }}
run: make integration run: make integration
test-optimize: test-optimize:
runs-on: ubuntu-24.04 runs-on: ubuntu-20.04
name: Optimize name: Optimize
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
buildargs: ["", "--build-arg=CONTAINERD_VERSION=main"] # released version & main version buildargs: ["", "--build-arg=CONTAINERD_VERSION=master"] # released version & master version
steps: steps:
- name: Install htpasswd for setting up private registry - name: Install htpasswd for setting up private registry
run: sudo apt-get update -y && sudo apt-get --no-install-recommends install -y apache2-utils run: sudo apt-get update -y && sudo apt-get --no-install-recommends install -y apache2-utils
- uses: actions/checkout@v4 - uses: actions/checkout@v2
- name: Run test for optimize subcommand of ctr-remote - name: Run test for optimize subcommand of ctr-remote
env: env:
DOCKER_BUILD_ARGS: ${{ matrix.buildargs }} DOCKER_BUILD_ARGS: ${{ matrix.buildargs }}
run: make test-optimize run: make test-optimize
test-kind: test-pullsecrets:
runs-on: ubuntu-24.04 runs-on: ubuntu-20.04
name: Kind name: PullSecrets
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
buildargs: ["", "--build-arg=CONTAINERD_VERSION=main"] # released version & main version buildargs: ["", "--build-arg=CONTAINERD_VERSION=master"] # released version & master version
builtin: ["true", "false"] builtin: ["true", "false"]
exclude: exclude:
- buildargs: "" - buildargs: ""
@ -127,181 +82,39 @@ jobs:
steps: steps:
- name: Install htpasswd for setting up private registry - name: Install htpasswd for setting up private registry
run: sudo apt-get update -y && sudo apt-get --no-install-recommends install -y apache2-utils run: sudo apt-get update -y && sudo apt-get --no-install-recommends install -y apache2-utils
- uses: actions/checkout@v4 - uses: actions/checkout@v2
- name: Run test for pulling image from private registry on Kubernetes - name: Run test for pulling image from private registry on Kubernetes
env: env:
DOCKER_BUILD_ARGS: ${{ matrix.buildargs }} DOCKER_BUILD_ARGS: ${{ matrix.buildargs }}
BUILTIN_SNAPSHOTTER: ${{ matrix.builtin }} BUILTIN_SNAPSHOTTER: ${{ matrix.builtin }}
run: make test-kind run: make test-pullsecrets
test-criauth:
runs-on: ubuntu-24.04
name: CRIAuth
strategy:
fail-fast: false
matrix:
buildargs: ["", "--build-arg=CONTAINERD_VERSION=main"] # released version & main version
builtin: ["true", "false"]
exclude:
- buildargs: ""
builtin: "true"
steps:
- name: Install htpasswd for setting up private registry
run: sudo apt-get update -y && sudo apt-get --no-install-recommends install -y apache2-utils
- uses: actions/checkout@v4
- name: Run test for pulling image from private registry on Kubernetes with CRI keychain mode
env:
DOCKER_BUILD_ARGS: ${{ matrix.buildargs }}
BUILTIN_SNAPSHOTTER: ${{ matrix.builtin }}
run: make test-criauth
test-cri-containerd: test-cri-containerd:
runs-on: ubuntu-24.04 runs-on: ubuntu-20.04
name: CRIValidationContainerd name: CRIValidationContainerd
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
buildargs: ["", "--build-arg=CONTAINERD_VERSION=main"] # released version & main version buildargs: ["", "--build-arg=CONTAINERD_VERSION=master"] # released version & master version
builtin: ["true", "false"] builtin: ["true", "false"]
metadata-store: ["memory", "db"]
fuse-passthrough: ["true", "false"]
fuse-manager: ["true", "false"]
transfer-service: ["true", "false"]
exclude: exclude:
- buildargs: "" - buildargs: ""
builtin: "true" builtin: "true"
- metadata-store: "db"
builtin: "true"
- metadata-store: "db"
buildargs: "--build-arg=CONTAINERD_VERSION=main"
- fuse-passthrough: "true"
builtin: "true"
- fuse-passthrough: "true"
buildargs: "--build-arg=CONTAINERD_VERSION=main"
- fuse-passthrough: "true"
metadata-store: "db"
- fuse-manager: "true"
builtin: "true"
- fuse-manager: "true"
buildargs: "--build-arg=CONTAINERD_VERSION=main"
- transfer-service: "true"
buildargs: "--build-arg=CONTAINERD_VERSION=main"
- transfer-service: "true"
builtin: "true"
- transfer-service: "true"
metadata-store: "db"
- transfer-service: "true"
fuse-passthrough: "true"
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v2
- name: Validate containerd through CRI - name: Validate containerd through CRI
env: env:
DOCKER_BUILD_ARGS: ${{ matrix.buildargs }} DOCKER_BUILD_ARGS: ${{ matrix.buildargs }}
BUILTIN_SNAPSHOTTER: ${{ matrix.builtin }} BUILTIN_SNAPSHOTTER: ${{ matrix.builtin }}
METADATA_STORE: ${{ matrix.metadata-store }}
FUSE_PASSTHROUGH: ${{ matrix.fuse-passthrough }}
FUSE_MANAGER: ${{ matrix.fuse-manager }}
TRANSFER_SERVICE: ${{ matrix.transfer-service }}
run: make test-cri-containerd run: make test-cri-containerd
test-cri-cri-o: test-cri-cri-o:
runs-on: ubuntu-24.04 runs-on: ubuntu-20.04
name: CRIValidationCRIO name: CRIValidationCRIO
strategy:
fail-fast: false
matrix:
metadata-store: ["memory", "db"]
steps: steps:
- name: Install the latest docker - uses: actions/checkout@v2
run: |
sudo apt-get remove moby-cli moby-engine
wget -O get-docker.sh https://get.docker.com
sh get-docker.sh
- uses: actions/checkout@v4
- name: Validate CRI-O through CRI - name: Validate CRI-O through CRI
env: run: make test-cri-o
DOCKER_BUILD_ARGS: "--build-arg=RUNC_VERSION=v1.0.3"
METADATA_STORE: ${{ matrix.metadata-store }}
run: |
# needed to pass "runtime should output OOMKilled reason" test
sudo swapoff -a
make test-cri-o
test-podman:
runs-on: ubuntu-24.04
name: PodmanRootless
steps:
- uses: actions/checkout@v4
- name: Test Podman (rootless)
run: make test-podman
test-k3s:
runs-on: ubuntu-24.04
name: K3S
steps:
- uses: actions/setup-go@v5
with:
go-version: '1.24.x'
- name: Install k3d
run: |
wget -q -O - https://raw.githubusercontent.com/rancher/k3d/v5.6.3/install.sh | bash
- name: Install htpasswd for setting up private registry
run: sudo apt-get update -y && sudo apt-get --no-install-recommends install -y apache2-utils
- name: Install yq
run: |
sudo wget -O /usr/local/bin/yq https://github.com/mikefarah/yq/releases/download/v4.9.3/yq_linux_amd64
sudo chmod +x /usr/local/bin/yq
- uses: actions/checkout@v4
- name: Run test with k3s
run: make test-k3s
test-ipfs:
runs-on: ubuntu-24.04
name: IPFS
steps:
- uses: actions/checkout@v4
- name: Run test
run: make test-ipfs
test-k3s-argo-workflow:
runs-on: ubuntu-24.04
name: K3SArgoWorkflow
env:
RESULT_DIR: ${{ github.workspace }}/argo-workflow/
steps:
- uses: actions/setup-go@v5
with:
go-version: '1.24.x'
- name: Install k3d
run: |
wget -q -O - https://raw.githubusercontent.com/rancher/k3d/v5.6.3/install.sh | bash
- name: Install argo worklflow
run: |
wget -q https://github.com/argoproj/argo-workflows/releases/download/v3.0.10/argo-linux-amd64.gz
gunzip argo-linux-amd64.gz
sudo mv argo-linux-amd64 /usr/local/bin/argo
sudo chmod +x /usr/local/bin/argo
- name: Workaround for freeing up more disk space
# https://github.com/actions/runner-images/issues/2606
run: |
sudo rm -rf /usr/local/lib/android # will release about 10 GB if you don't need Android
sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET
- uses: actions/checkout@v4
- name: Prepare directories
run: mkdir "${RESULT_DIR}"
- name: Get instance information
run: |
curl -H "Metadata:true" "http://169.254.169.254/metadata/instance?api-version=2019-11-01" | \
jq '{ location : .compute.location, vmSize : .compute.vmSize }' | \
tee ${{ env.RESULT_DIR }}/instance.json
- name: Run argo workflow
env:
RESULT: ${{ env.RESULT_DIR }}/result.json
run: make test-k3s-argo-workflow
- uses: actions/upload-artifact@v4
with:
name: k3s-argo-workflow
path: ${{ env.RESULT_DIR }}
# #
# Project checks # Project checks
@ -311,43 +124,13 @@ jobs:
project: project:
name: Project Checks name: Project Checks
runs-on: ubuntu-24.04 runs-on: ubuntu-20.04
timeout-minutes: 10 timeout-minutes: 5
steps: steps:
- uses: actions/setup-go@v5 - uses: actions/checkout@v2
with:
go-version: '1.24.x'
- uses: actions/checkout@v4
with: with:
path: src/github.com/containerd/stargz-snapshotter path: src/github.com/containerd/stargz-snapshotter
fetch-depth: 25 fetch-depth: 25
- uses: containerd/project-checks@v1.2.2 - uses: containerd/project-checks@v1
with: with:
working-directory: src/github.com/containerd/stargz-snapshotter working-directory: src/github.com/containerd/stargz-snapshotter
# go-licenses-ignore is set because go-licenses cannot correctly detect the license of the following packages:
# * estargz packages: Apache-2.0 and BSD-3-Clause dual license
# (https://github.com/containerd/stargz-snapshotter/blob/main/NOTICE.md)
#
# The list of the CNCF-approved licenses can be found here:
# https://github.com/cncf/foundation/blob/main/allowed-third-party-license-policy.md
#
# hashicorp packages: MPL-2.0
# (https://github.com/hashicorp/go-cleanhttp/blob/master/LICENSE,
# https://github.com/hashicorp/go-retryablehttp/blob/master/LICENSE)
# Note: MPL-2.0 is not in the CNCF-approved licenses list, but these packages are allowed as exceptions.
# See CNCF licensing exceptions:
# https://github.com/cncf/foundation/blob/main/license-exceptions/CNCF-licensing-exceptions.csv
go-licenses-ignore: |
github.com/containerd/stargz-snapshotter/estargz
github.com/containerd/stargz-snapshotter/estargz/errorutil
github.com/containerd/stargz-snapshotter/estargz/externaltoc
github.com/containerd/stargz-snapshotter/estargz/zstdchunked
github.com/hashicorp/go-cleanhttp
github.com/hashicorp/go-retryablehttp
- name: Check proto generated code
run: make validate-generated
working-directory: src/github.com/containerd/stargz-snapshotter
- run: ./script/util/verify-no-patent.sh
working-directory: src/github.com/containerd/stargz-snapshotter
- run: make validate-vendor
working-directory: src/github.com/containerd/stargz-snapshotter

View File

@ -1,54 +1,26 @@
version: "2" # This is applied to `estargz` submodule as well.
# https://golangci-lint.run/usage/configuration#config-file
linters: linters:
enable: enable:
- depguard - structcheck
- misspell - varcheck
- revive - staticcheck
- unconvert - unconvert
disable:
- errcheck
settings:
depguard:
rules:
main:
deny:
- pkg: github.com/containerd/containerd/errdefs
desc: The containerd errdefs package was migrated to a separate module. Use github.com/containerd/errdefs instead.
- pkg: github.com/containerd/containerd/log
desc: The containerd log package was migrated to a separate module. Use github.com/containerd/log instead.
exclusions:
generated: lax
presets:
- comments
- common-false-positives
- legacy
- std-error-handling
rules:
- linters:
- revive
text: unused-parameter
- linters:
- revive
text: redefines-builtin-id
paths:
- docs
- images
- out
- script
- third_party$
- builtin$
- examples$
formatters:
enable:
- gofmt - gofmt
- goimports - goimports
exclusions: - golint
generated: lax - ineffassign
paths: - vet
- docs - unused
- images - misspell
- out disable:
- script - errcheck
- third_party$
- builtin$ run:
- examples$ deadline: 4m
skip-dirs:
- docs
- images
- out
- script

View File

@ -12,83 +12,64 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
ARG CONTAINERD_VERSION=v2.1.3 ARG CONTAINERD_VERSION=v1.5.0
ARG RUNC_VERSION=v1.3.0 ARG RUNC_VERSION=v1.0.0-rc93
ARG CNI_PLUGINS_VERSION=v1.7.1 ARG CNI_PLUGINS_VERSION=v0.9.1
ARG NERDCTL_VERSION=2.1.3 ARG NERDCTL_VERSION=0.8.1
ARG PODMAN_VERSION=v5.5.2 ARG PODMAN_VERSION=ed6f399770946bb2e88f8b94e1d2f279208648d4
ARG CRIO_VERSION=v1.33.2 ARG CRIO_VERSION=7a7df87127ede57c26d666de9e926d6200637f95
ARG CONMON_VERSION=v2.1.13 ARG CONMON_VERSION=v2.0.26
ARG COMMON_VERSION=v0.63.0 ARG COMMON_VERSION=v0.37.1
ARG CRIO_TEST_PAUSE_IMAGE_NAME=registry.k8s.io/pause:3.6 ARG CRIO_TEST_PAUSE_IMAGE_NAME=k8s.gcr.io/pause:3.5
ARG NETAVARK_VERSION=v1.15.2
ARG CONTAINERIZED_SYSTEMD_VERSION=v0.1.1
ARG SLIRP4NETNS_VERSION=v1.3.3
ARG PAUSE_IMAGE_NAME_TEST=registry.k8s.io/pause:3.10.1
# Used in CI
ARG CRI_TOOLS_VERSION=v1.30.1
# Legacy builder that doesn't support TARGETARCH should set this explicitly using --build-arg. # Legacy builder that doesn't support TARGETARCH should set this explicitly using --build-arg.
# If TARGETARCH isn't supported by the builder, the default value is "amd64". # If TARGETARCH isn't supported by the builder, the default value is "amd64".
FROM golang:1.24-bullseye AS golang-base FROM golang:1.16-buster AS golang-base
# Build containerd # Build containerd
FROM --platform=$BUILDPLATFORM golang:1.24-bullseye AS containerd-dev FROM golang-base AS containerd-dev
ARG CONTAINERD_VERSION ARG CONTAINERD_VERSION
ARG TARGETARCH RUN apt-get update -y && apt-get install -y libbtrfs-dev libseccomp-dev && \
RUN git clone -b ${CONTAINERD_VERSION} --depth 1 \ git clone -b ${CONTAINERD_VERSION} --depth 1 \
https://github.com/containerd/containerd $GOPATH/src/github.com/containerd/containerd && \ https://github.com/containerd/containerd $GOPATH/src/github.com/containerd/containerd && \
cd $GOPATH/src/github.com/containerd/containerd && \ cd $GOPATH/src/github.com/containerd/containerd && \
GOARCH=$TARGETARCH make && DESTDIR=/out/ PREFIX= make install GO111MODULE=off make && DESTDIR=/out/ make install
# Build containerd with builtin stargz snapshotter # Build containerd with builtin stargz snapshotter
FROM --platform=$BUILDPLATFORM golang:1.24-bullseye AS containerd-snapshotter-dev FROM golang-base AS containerd-snapshotter-dev
ARG CONTAINERD_VERSION ARG CONTAINERD_VERSION
ARG TARGETARCH
COPY . $GOPATH/src/github.com/containerd/stargz-snapshotter COPY . $GOPATH/src/github.com/containerd/stargz-snapshotter
RUN git clone -b ${CONTAINERD_VERSION} --depth 1 \ RUN apt-get update -y && apt-get install -y libbtrfs-dev libseccomp-dev && \
https://github.com/containerd/containerd $GOPATH/src/github.com/containerd/containerd && \ git clone -b ${CONTAINERD_VERSION} --depth 1 \
https://github.com/containerd/containerd $GOPATH/src/github.com/containerd/containerd && \
echo 'require github.com/containerd/stargz-snapshotter v0.0.0\nreplace github.com/containerd/stargz-snapshotter => '$GOPATH'/src/github.com/containerd/stargz-snapshotter\nreplace github.com/containerd/stargz-snapshotter/estargz => '$GOPATH'/src/github.com/containerd/stargz-snapshotter/estargz' \
>> $GOPATH/src/github.com/containerd/containerd/go.mod && \
echo 'package main \nimport _ "github.com/containerd/stargz-snapshotter/service/plugin"' \
> $GOPATH/src/github.com/containerd/containerd/cmd/containerd/builtins_stargz_snapshotter.go && \
cd $GOPATH/src/github.com/containerd/containerd && \ cd $GOPATH/src/github.com/containerd/containerd && \
echo 'require github.com/containerd/stargz-snapshotter v0.0.0' >> go.mod && \ make vendor && make && DESTDIR=/out/ make install
echo 'replace github.com/containerd/stargz-snapshotter => '$GOPATH'/src/github.com/containerd/stargz-snapshotter' >> go.mod && \
echo 'replace github.com/containerd/stargz-snapshotter/estargz => '$GOPATH'/src/github.com/containerd/stargz-snapshotter/estargz' >> go.mod && \
# recent containerd requires to update api/go.mod and integration/client/go.mod as well.
if [ -f api/go.mod ] ; then \
echo 'replace github.com/containerd/stargz-snapshotter => '$GOPATH'/src/github.com/containerd/stargz-snapshotter' >> api/go.mod && \
echo 'replace github.com/containerd/stargz-snapshotter/estargz => '$GOPATH'/src/github.com/containerd/stargz-snapshotter/estargz' >> api/go.mod ; \
fi && \
if [ -f integration/client/go.mod ] ; then \
echo 'replace github.com/containerd/stargz-snapshotter => '$GOPATH'/src/github.com/containerd/stargz-snapshotter' >> integration/client/go.mod && \
echo 'replace github.com/containerd/stargz-snapshotter/estargz => '$GOPATH'/src/github.com/containerd/stargz-snapshotter/estargz' >> integration/client/go.mod ; \
fi && \
echo 'package main \nimport _ "github.com/containerd/stargz-snapshotter/service/plugin"' > cmd/containerd/builtins_stargz_snapshotter.go && \
make vendor && GOARCH=$TARGETARCH make && DESTDIR=/out/ PREFIX= make install
# Build runc # Build runc
FROM golang:1.24-bullseye AS runc-dev FROM golang-base AS runc-dev
ARG RUNC_VERSION ARG RUNC_VERSION
RUN apt-get update -y && apt-get install -y libseccomp-dev && \ RUN apt-get update -y && apt-get install -y libseccomp-dev && \
git clone -b ${RUNC_VERSION} --depth 1 \ git clone -b ${RUNC_VERSION} --depth 1 \
https://github.com/opencontainers/runc $GOPATH/src/github.com/opencontainers/runc && \ https://github.com/opencontainers/runc $GOPATH/src/github.com/opencontainers/runc && \
cd $GOPATH/src/github.com/opencontainers/runc && \ cd $GOPATH/src/github.com/opencontainers/runc && \
make && make install PREFIX=/out/ GO111MODULE=off make && make install PREFIX=/out/
# Build stargz snapshotter # Build stargz snapshotter
FROM --platform=$BUILDPLATFORM golang:1.24-bullseye AS snapshotter-dev FROM golang-base AS snapshotter-dev
ARG TARGETARCH ARG TARGETARCH
ARG GOARM ARG GOARM
ARG SNAPSHOTTER_BUILD_FLAGS ARG SNAPSHOTTER_BUILD_FLAGS
ARG CTR_REMOTE_BUILD_FLAGS ARG CTR_REMOTE_BUILD_FLAGS
COPY . $GOPATH/src/github.com/containerd/stargz-snapshotter COPY . $GOPATH/src/github.com/containerd/stargz-snapshotter
ARG CGO_ENABLED
RUN cd $GOPATH/src/github.com/containerd/stargz-snapshotter && \ RUN cd $GOPATH/src/github.com/containerd/stargz-snapshotter && \
PREFIX=/out/ GOARCH=${TARGETARCH:-amd64} GO_BUILD_FLAGS=${SNAPSHOTTER_BUILD_FLAGS} make containerd-stargz-grpc && \ PREFIX=/out/ GOARCH=${TARGETARCH:-amd64} GO_BUILD_FLAGS=${SNAPSHOTTER_BUILD_FLAGS} make containerd-stargz-grpc && \
PREFIX=/out/ GOARCH=${TARGETARCH:-amd64} GO_BUILD_FLAGS=${CTR_REMOTE_BUILD_FLAGS} make ctr-remote && \ PREFIX=/out/ GOARCH=${TARGETARCH:-amd64} GO_BUILD_FLAGS=${CTR_REMOTE_BUILD_FLAGS} make ctr-remote
PREFIX=/out/ GOARCH=${TARGETARCH:-amd64} GO_BUILD_FLAGS=${CTR_REMOTE_BUILD_FLAGS} make stargz-fuse-manager
# Build stargz store # Build stargz store
FROM golang-base AS stargz-store-dev FROM golang-base AS stargz-store-dev
@ -97,9 +78,8 @@ ARG GOARM
ARG SNAPSHOTTER_BUILD_FLAGS ARG SNAPSHOTTER_BUILD_FLAGS
ARG CTR_REMOTE_BUILD_FLAGS ARG CTR_REMOTE_BUILD_FLAGS
COPY . $GOPATH/src/github.com/containerd/stargz-snapshotter COPY . $GOPATH/src/github.com/containerd/stargz-snapshotter
ARG CGO_ENABLED
RUN cd $GOPATH/src/github.com/containerd/stargz-snapshotter && \ RUN cd $GOPATH/src/github.com/containerd/stargz-snapshotter && \
PREFIX=/out/ GOARCH=${TARGETARCH:-amd64} GO_BUILD_FLAGS=${SNAPSHOTTER_BUILD_FLAGS} make stargz-store stargz-store-helper PREFIX=/out/ GOARCH=${TARGETARCH:-amd64} GO_BUILD_FLAGS=${SNAPSHOTTER_BUILD_FLAGS} make stargz-store
# Build podman # Build podman
FROM golang-base AS podman-dev FROM golang-base AS podman-dev
@ -111,8 +91,7 @@ RUN apt-get update -y && apt-get install -y libseccomp-dev libgpgme-dev && \
make && make install PREFIX=/out/ make && make install PREFIX=/out/
# Build CRI-O # Build CRI-O
# FROM golang-base AS cri-o-dev FROM golang-base AS cri-o-dev
FROM golang:1.24-bullseye AS cri-o-dev
ARG CRIO_VERSION ARG CRIO_VERSION
RUN apt-get update -y && apt-get install -y libseccomp-dev libgpgme-dev && \ RUN apt-get update -y && apt-get install -y libseccomp-dev libgpgme-dev && \
git clone https://github.com/cri-o/cri-o $GOPATH/src/github.com/cri-o/cri-o && \ git clone https://github.com/cri-o/cri-o $GOPATH/src/github.com/cri-o/cri-o && \
@ -124,7 +103,7 @@ RUN apt-get update -y && apt-get install -y libseccomp-dev libgpgme-dev && \
# Build conmon # Build conmon
FROM golang-base AS conmon-dev FROM golang-base AS conmon-dev
ARG CONMON_VERSION ARG CONMON_VERSION
RUN apt-get update -y && apt-get install -y gcc git libc6-dev libglib2.0-dev pkg-config make libseccomp-dev && \ RUN apt-get update -y && apt-get install -y gcc git libc6-dev libglib2.0-dev pkg-config make && \
git clone -b ${CONMON_VERSION} --depth 1 \ git clone -b ${CONMON_VERSION} --depth 1 \
https://github.com/containers/conmon $GOPATH/src/github.com/containers/conmon && \ https://github.com/containers/conmon $GOPATH/src/github.com/containers/conmon && \
cd $GOPATH/src/github.com/containers/conmon && \ cd $GOPATH/src/github.com/containers/conmon && \
@ -146,7 +125,7 @@ COPY --from=stargz-store-dev /out/* /
FROM golang-base AS containerd-base FROM golang-base AS containerd-base
ARG TARGETARCH ARG TARGETARCH
ARG NERDCTL_VERSION ARG NERDCTL_VERSION
RUN apt-get update -y && apt-get --no-install-recommends install -y fuse3 && \ RUN apt-get update -y && apt-get --no-install-recommends install -y fuse && \
curl -sSL --output /tmp/nerdctl.tgz https://github.com/containerd/nerdctl/releases/download/v${NERDCTL_VERSION}/nerdctl-${NERDCTL_VERSION}-linux-${TARGETARCH:-amd64}.tar.gz && \ curl -sSL --output /tmp/nerdctl.tgz https://github.com/containerd/nerdctl/releases/download/v${NERDCTL_VERSION}/nerdctl-${NERDCTL_VERSION}-linux-${TARGETARCH:-amd64}.tar.gz && \
tar zxvf /tmp/nerdctl.tgz -C /usr/local/bin && \ tar zxvf /tmp/nerdctl.tgz -C /usr/local/bin && \
rm -f /tmp/nerdctl.tgz rm -f /tmp/nerdctl.tgz
@ -162,7 +141,7 @@ RUN ln -s /usr/local/bin/ctr-remote /usr/local/bin/ctr
FROM golang-base AS containerd-snapshotter-base FROM golang-base AS containerd-snapshotter-base
ARG TARGETARCH ARG TARGETARCH
ARG NERDCTL_VERSION ARG NERDCTL_VERSION
RUN apt-get update -y && apt-get --no-install-recommends install -y fuse3 && \ RUN apt-get update -y && apt-get --no-install-recommends install -y fuse && \
curl -sSL --output /tmp/nerdctl.tgz https://github.com/containerd/nerdctl/releases/download/v${NERDCTL_VERSION}/nerdctl-${NERDCTL_VERSION}-linux-${TARGETARCH:-amd64}.tar.gz && \ curl -sSL --output /tmp/nerdctl.tgz https://github.com/containerd/nerdctl/releases/download/v${NERDCTL_VERSION}/nerdctl-${NERDCTL_VERSION}-linux-${TARGETARCH:-amd64}.tar.gz && \
tar zxvf /tmp/nerdctl.tgz -C /usr/local/bin && \ tar zxvf /tmp/nerdctl.tgz -C /usr/local/bin && \
rm -f /tmp/nerdctl.tgz rm -f /tmp/nerdctl.tgz
@ -172,13 +151,12 @@ COPY --from=snapshotter-dev /out/ctr-remote /usr/local/bin/
RUN ln -s /usr/local/bin/ctr-remote /usr/local/bin/ctr RUN ln -s /usr/local/bin/ctr-remote /usr/local/bin/ctr
# Base image which contains podman with stargz-store # Base image which contains podman with stargz-store
FROM ubuntu:24.04 AS podman-base FROM golang-base AS podman-base
ARG TARGETARCH ARG TARGETARCH
ARG CNI_PLUGINS_VERSION ARG CNI_PLUGINS_VERSION
ARG PODMAN_VERSION ARG PODMAN_VERSION
ARG NETAVARK_VERSION RUN apt-get update -y && apt-get --no-install-recommends install -y fuse libgpgme-dev \
RUN apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y fuse3 libgpgme-dev \ iptables libyajl-dev && \
iptables libyajl-dev curl ca-certificates libglib2.0 libseccomp-dev wget && \
# Make CNI plugins manipulate iptables instead of nftables # Make CNI plugins manipulate iptables instead of nftables
# as this test runs in a Docker container that network is configured with iptables. # as this test runs in a Docker container that network is configured with iptables.
# c.f. https://github.com/moby/moby/issues/26824 # c.f. https://github.com/moby/moby/issues/26824
@ -187,51 +165,21 @@ RUN apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y fuse3
curl -qsSL https://raw.githubusercontent.com/containers/podman/${PODMAN_VERSION}/cni/87-podman-bridge.conflist | tee /etc/cni/net.d/87-podman-bridge.conflist && \ curl -qsSL https://raw.githubusercontent.com/containers/podman/${PODMAN_VERSION}/cni/87-podman-bridge.conflist | tee /etc/cni/net.d/87-podman-bridge.conflist && \
curl -Ls https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGINS_VERSION}/cni-plugins-linux-${TARGETARCH:-amd64}-${CNI_PLUGINS_VERSION}.tgz | tar xzv -C /opt/cni/bin curl -Ls https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGINS_VERSION}/cni-plugins-linux-${TARGETARCH:-amd64}-${CNI_PLUGINS_VERSION}.tgz | tar xzv -C /opt/cni/bin
RUN mkdir /tmp/netavark ; \
wget -O /tmp/netavark/netavark.gz https://github.com/containers/netavark/releases/download/${NETAVARK_VERSION}/netavark.gz ; \
gunzip /tmp/netavark/netavark.gz ; \
mkdir -p /usr/local/libexec/podman ; \
mv /tmp/netavark/netavark /usr/local/libexec/podman/ ; \
chmod 0755 /usr/local/libexec/podman/netavark
COPY --from=podman-dev /out/bin/* /usr/local/bin/ COPY --from=podman-dev /out/bin/* /usr/local/bin/
COPY --from=runc-dev /out/sbin/* /usr/local/sbin/ COPY --from=runc-dev /out/sbin/* /usr/local/sbin/
COPY --from=conmon-dev /out/bin/* /usr/local/bin/ COPY --from=conmon-dev /out/bin/* /usr/local/bin/
COPY --from=containers-common-dev /out/seccomp.json /usr/share/containers/ COPY --from=containers-common-dev /out/seccomp.json /usr/share/containers/
COPY --from=stargz-store-dev /out/* /usr/local/bin/ COPY --from=stargz-store-dev /out/* /usr/local/bin/
# Image for testing rootless Podman with Stargz Store. # Image which can be used as all-in-one single node demo environment
# This takes the same approach as nerdctl CI: https://github.com/containerd/nerdctl/blob/6341c8320984f7148b92dd33472d8eaca6dba756/Dockerfile#L302-L326 FROM snapshotter-base AS cind
FROM podman-base AS podman-rootless COPY ./script/config/ /
ARG CONTAINERIZED_SYSTEMD_VERSION COPY ./script/cind/ /
ARG SLIRP4NETNS_VERSION VOLUME /var/lib/containerd
RUN apt-get update -y && apt-get install -y \ VOLUME /var/lib/containerd-stargz-grpc
systemd systemd-sysv dbus dbus-user-session \ VOLUME /run/containerd-stargz-grpc
openssh-server openssh-client uidmap ENV CONTAINERD_SNAPSHOTTER=stargz
RUN curl -o /usr/local/bin/slirp4netns --fail -L https://github.com/rootless-containers/slirp4netns/releases/download/${SLIRP4NETNS_VERSION}/slirp4netns-$(uname -m) && \ ENTRYPOINT [ "/entrypoint.sh" ]
chmod +x /usr/local/bin/slirp4netns && \
curl -L -o /docker-entrypoint.sh https://raw.githubusercontent.com/AkihiroSuda/containerized-systemd/${CONTAINERIZED_SYSTEMD_VERSION}/docker-entrypoint.sh && \
chmod +x /docker-entrypoint.sh && \
curl -L -o /etc/containers/policy.json https://raw.githubusercontent.com/containers/skopeo/master/default-policy.json
# storage.conf plugs Stargz Store into Podman as an Additional Layer Store
COPY ./script/podman/config/storage.conf /home/rootless/.config/containers/storage.conf
# Stargz Store systemd service for rootless Podman
COPY ./script/podman/config/podman-rootless-stargz-store.service /home/rootless/.config/systemd/user/
COPY ./script/podman/config/containers.conf /home/rootless/.config/containers/containers.conf
# test-podman-rootless.sh logins to the user via SSH
COPY ./script/podman/config/test-podman-rootless.sh /test-podman-rootless.sh
RUN ssh-keygen -q -t rsa -f /root/.ssh/id_rsa -N '' && \
useradd -m -s /bin/bash rootless && \
mkdir -p -m 0700 /home/rootless/.ssh && \
cp -a /root/.ssh/id_rsa.pub /home/rootless/.ssh/authorized_keys && \
mkdir -p /home/rootless/.local/share /home/rootless/.local/share/stargz-store/store && \
chown -R rootless:rootless /home/rootless
VOLUME /home/rootless/.local/share
ENTRYPOINT ["/docker-entrypoint.sh", "/test-podman-rootless.sh"]
CMD ["/bin/bash", "--login", "-i"]
# Image which can be used for interactive demo environment # Image which can be used for interactive demo environment
FROM containerd-base AS demo FROM containerd-base AS demo
@ -246,21 +194,21 @@ RUN apt-get update && apt-get install -y iptables && \
curl -Ls https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGINS_VERSION}/cni-plugins-linux-${TARGETARCH:-amd64}-${CNI_PLUGINS_VERSION}.tgz | tar xzv -C /opt/cni/bin curl -Ls https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGINS_VERSION}/cni-plugins-linux-${TARGETARCH:-amd64}-${CNI_PLUGINS_VERSION}.tgz | tar xzv -C /opt/cni/bin
# Image which can be used as a node image for KinD (containerd with builtin snapshotter) # Image which can be used as a node image for KinD (containerd with builtin snapshotter)
FROM kindest/node:v1.33.2 AS kind-builtin-snapshotter FROM kindest/node:v1.20.0 AS kind-builtin-snapshotter
COPY --from=containerd-snapshotter-dev /out/bin/containerd /out/bin/containerd-shim-runc-v2 /usr/local/bin/ COPY --from=containerd-snapshotter-dev /out/bin/containerd /out/bin/containerd-shim-runc-v2 /usr/local/bin/
COPY --from=snapshotter-dev /out/ctr-remote /usr/local/bin/ COPY --from=snapshotter-dev /out/ctr-remote /usr/local/bin/
COPY ./script/config/ / COPY ./script/config/ /
RUN apt-get update -y && apt-get install --no-install-recommends -y fuse3 RUN apt-get update -y && apt-get install --no-install-recommends -y fuse
ENTRYPOINT [ "/usr/local/bin/kind-entrypoint.sh", "/usr/local/bin/entrypoint", "/sbin/init" ] ENTRYPOINT [ "/usr/local/bin/entrypoint", "/sbin/init" ]
# Image for testing CRI-O with Stargz Store. # Image for testing CRI-O with Stargz Store.
# NOTE: This cannot be used for the node image of KinD. # NOTE: This cannot be used for the node image of KinD.
FROM ubuntu:24.04 AS crio-stargz-store FROM ubuntu:20.04 AS crio-stargz-store
ARG CNI_PLUGINS_VERSION ARG CNI_PLUGINS_VERSION
ARG CRIO_TEST_PAUSE_IMAGE_NAME ARG CRIO_TEST_PAUSE_IMAGE_NAME
ENV container docker ENV container docker
RUN apt-get update -y && apt-get install --no-install-recommends -y \ RUN apt-get update -y && apt-get install --no-install-recommends -y \
ca-certificates fuse3 libgpgme-dev libglib2.0-dev curl \ ca-certificates fuse libgpgme-dev libglib2.0-dev curl \
iptables conntrack systemd systemd-sysv && \ iptables conntrack systemd systemd-sysv && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y tzdata && \ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y tzdata && \
# Make CNI plugins manipulate iptables instead of nftables # Make CNI plugins manipulate iptables instead of nftables
@ -271,10 +219,7 @@ RUN apt-get update -y && apt-get install --no-install-recommends -y \
curl -sSL https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGINS_VERSION}/cni-plugins-linux-${TARGETARCH:-amd64}-${CNI_PLUGINS_VERSION}.tgz | tar xzv -C /opt/cni/bin && \ curl -sSL https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGINS_VERSION}/cni-plugins-linux-${TARGETARCH:-amd64}-${CNI_PLUGINS_VERSION}.tgz | tar xzv -C /opt/cni/bin && \
echo ${CRIO_TEST_PAUSE_IMAGE_NAME} > /pause_name && \ echo ${CRIO_TEST_PAUSE_IMAGE_NAME} > /pause_name && \
mkdir -p /etc/sysconfig && \ mkdir -p /etc/sysconfig && \
echo CRIO_RUNTIME_OPTIONS=--pause-image=${CRIO_TEST_PAUSE_IMAGE_NAME} > /etc/sysconfig/crio && \ echo CRIO_RUNTIME_OPTIONS=--pause-image=${CRIO_TEST_PAUSE_IMAGE_NAME} > /etc/sysconfig/crio
# Necessary to pass CRI tests: https://github.com/kubernetes-sigs/cri-tools/pull/905
mkdir -p /etc/crio/crio.conf.d && \
printf '[crio.runtime]\nseccomp_use_default_when_empty = false\n' > /etc/crio/crio.conf.d/02-seccomp.conf
COPY --from=stargz-store-dev /out/* /usr/local/bin/ COPY --from=stargz-store-dev /out/* /usr/local/bin/
COPY --from=cri-o-dev /out/bin/* /usr/local/bin/ COPY --from=cri-o-dev /out/bin/* /usr/local/bin/
@ -287,10 +232,10 @@ COPY ./script/config-cri-o/ /
ENTRYPOINT [ "/usr/local/bin/entrypoint" ] ENTRYPOINT [ "/usr/local/bin/entrypoint" ]
# Image which can be used as a node image for KinD # Image which can be used as a node image for KinD
FROM kindest/node:v1.33.2 FROM kindest/node:v1.20.0
COPY --from=containerd-dev /out/bin/containerd /out/bin/containerd-shim-runc-v2 /usr/local/bin/ COPY --from=containerd-dev /out/bin/containerd /out/bin/containerd-shim-runc-v2 /usr/local/bin/
COPY --from=snapshotter-dev /out/* /usr/local/bin/ COPY --from=snapshotter-dev /out/* /usr/local/bin/
COPY ./script/config/ / COPY ./script/config/ /
RUN apt-get update -y && apt-get install --no-install-recommends -y fuse3 && \ RUN apt-get update -y && apt-get install --no-install-recommends -y fuse && \
systemctl enable stargz-snapshotter systemctl enable stargz-snapshotter
ENTRYPOINT [ "/usr/local/bin/kind-entrypoint.sh", "/usr/local/bin/entrypoint", "/sbin/init" ] ENTRYPOINT [ "/usr/local/bin/entrypoint", "/sbin/init" ]

View File

@ -1,8 +1,8 @@
# stargz-snapshotter maintainers # stargz-snapshotter maintainers
# #
# As a containerd sub-project, containerd maintainers are also included from https://github.com/containerd/project/blob/main/MAINTAINERS. # As a containerd sub-project, containerd maintainers are also included from https://github.com/containerd/project/blob/master/MAINTAINERS.
# See https://github.com/containerd/project/blob/main/GOVERNANCE.md for description of maintainer role # See https://github.com/containerd/project/blob/master/GOVERNANCE.md for description of maintainer role
# #
# COMMITTERS # MAINTAINERS
# GitHub ID, Name, Email address # GitHub ID, Name, Email address
ktock, Kohei Tokunaga, ktokunaga.mail@gmail.com ktock, Kohei Tokunaga, ktokunaga.mail@gmail.com

View File

@ -16,19 +16,18 @@
# Base path used to install. # Base path used to install.
CMD_DESTDIR ?= /usr/local CMD_DESTDIR ?= /usr/local
GO111MODULE_VALUE=auto GO111MODULE_VALUE=auto
PREFIX ?= $(CURDIR)/out/ PREFIX ?= out/
PKG=github.com/containerd/stargz-snapshotter PKG=github.com/containerd/stargz-snapshotter
VERSION=$(shell git describe --match 'v[0-9]*' --dirty='.m' --always --tags) VERSION=$(shell git describe --match 'v[0-9]*' --dirty='.m' --always --tags)
REVISION=$(shell git rev-parse HEAD)$(shell if ! git diff --no-ext-diff --quiet --exit-code; then echo .m; fi) REVISION=$(shell git rev-parse HEAD)$(shell if ! git diff --no-ext-diff --quiet --exit-code; then echo .m; fi)
GO_BUILD_LDFLAGS ?= -s -w GO_LD_FLAGS=-ldflags '-s -w -X $(PKG)/version.Version=$(VERSION) -X $(PKG)/version.Revision=$(REVISION) $(GO_EXTRA_LDFLAGS)'
GO_LD_FLAGS=-ldflags '$(GO_BUILD_LDFLAGS) -X $(PKG)/version.Version=$(VERSION) -X $(PKG)/version.Revision=$(REVISION) $(GO_EXTRA_LDFLAGS)'
CMD=containerd-stargz-grpc ctr-remote stargz-store stargz-fuse-manager CMD=containerd-stargz-grpc ctr-remote stargz-store
CMD_BINARIES=$(addprefix $(PREFIX),$(CMD)) CMD_BINARIES=$(addprefix $(PREFIX),$(CMD))
.PHONY: all build check install uninstall clean test test-root test-all integration test-optimize benchmark test-kind test-cri-containerd test-cri-o test-criauth generate validate-generated test-k3s test-k3s-argo-workflow vendor .PHONY: all build check install-check-tools install uninstall clean test test-root test-all integration test-optimize benchmark test-pullsecrets test-cri
all: build all: build
@ -37,26 +36,21 @@ build: $(CMD)
FORCE: FORCE:
containerd-stargz-grpc: FORCE containerd-stargz-grpc: FORCE
cd cmd/ ; GO111MODULE=$(GO111MODULE_VALUE) go build -o $(PREFIX)$@ $(GO_BUILD_FLAGS) $(GO_LD_FLAGS) -v ./containerd-stargz-grpc GO111MODULE=$(GO111MODULE_VALUE) go build -o $(PREFIX)$@ $(GO_BUILD_FLAGS) $(GO_LD_FLAGS) -v ./cmd/containerd-stargz-grpc
ctr-remote: FORCE ctr-remote: FORCE
cd cmd/ ; GO111MODULE=$(GO111MODULE_VALUE) go build -o $(PREFIX)$@ $(GO_BUILD_FLAGS) $(GO_LD_FLAGS) -v ./ctr-remote GO111MODULE=$(GO111MODULE_VALUE) go build -o $(PREFIX)$@ $(GO_BUILD_FLAGS) $(GO_LD_FLAGS) -v ./cmd/ctr-remote
stargz-store: FORCE stargz-store: FORCE
cd cmd/ ; GO111MODULE=$(GO111MODULE_VALUE) go build -o $(PREFIX)$@ $(GO_BUILD_FLAGS) $(GO_LD_FLAGS) -v ./stargz-store GO111MODULE=$(GO111MODULE_VALUE) go build -o $(PREFIX)$@ $(GO_BUILD_FLAGS) $(GO_LD_FLAGS) -v ./cmd/stargz-store
stargz-store-helper: FORCE
cd cmd/ ; GO111MODULE=$(GO111MODULE_VALUE) go build -o $(PREFIX)$@ $(GO_BUILD_FLAGS) $(GO_LD_FLAGS) -v ./stargz-store/helper
stargz-fuse-manager: FORCE
cd cmd/ ; GO111MODULE=$(GO111MODULE_VALUE) go build -o $(PREFIX)$@ $(GO_BUILD_FLAGS) $(GO_LD_FLAGS) -v ./stargz-fuse-manager
check: check:
@echo "$@" @echo "$@"
@GO111MODULE=$(GO111MODULE_VALUE) $(shell go env GOPATH)/bin/golangci-lint run @GO111MODULE=$(GO111MODULE_VALUE) golangci-lint run
@cd ./estargz ; GO111MODULE=$(GO111MODULE_VALUE) $(shell go env GOPATH)/bin/golangci-lint run @cd ./estargz ; GO111MODULE=$(GO111MODULE_VALUE) golangci-lint run
@cd ./cmd ; GO111MODULE=$(GO111MODULE_VALUE) $(shell go env GOPATH)/bin/golangci-lint run
@cd ./ipfs ; GO111MODULE=$(GO111MODULE_VALUE) $(shell go env GOPATH)/bin/golangci-lint run install-check-tools:
@curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b $(go env GOPATH)/bin v1.39.0
install: install:
@echo "$@" @echo "$@"
@ -71,24 +65,10 @@ clean:
@echo "$@" @echo "$@"
@rm -f $(CMD_BINARIES) @rm -f $(CMD_BINARIES)
generate:
@./script/generated-files/generate.sh update
validate-generated:
@./script/generated-files/generate.sh validate
vendor:
@cd ./estargz ; GO111MODULE=$(GO111MODULE_VALUE) go mod tidy
@cd ./ipfs ; GO111MODULE=$(GO111MODULE_VALUE) go mod tidy
@GO111MODULE=$(GO111MODULE_VALUE) go mod tidy
@cd ./cmd ; GO111MODULE=$(GO111MODULE_VALUE) go mod tidy
test: test:
@echo "$@" @echo "$@"
@GO111MODULE=$(GO111MODULE_VALUE) go test -race ./... @GO111MODULE=$(GO111MODULE_VALUE) go test -race ./...
@cd ./estargz ; GO111MODULE=$(GO111MODULE_VALUE) go test -timeout 30m -race ./... @cd ./estargz ; GO111MODULE=$(GO111MODULE_VALUE) go test -race ./...
@cd ./cmd ; GO111MODULE=$(GO111MODULE_VALUE) go test -timeout 20m -race ./...
@cd ./ipfs ; GO111MODULE=$(GO111MODULE_VALUE) go test -timeout 20m -race ./...
test-root: test-root:
@echo "$@" @echo "$@"
@ -105,33 +85,11 @@ test-optimize:
benchmark: benchmark:
@./script/benchmark/test.sh @./script/benchmark/test.sh
test-kind: test-pullsecrets:
@./script/kind/test.sh @./script/pullsecrets/test.sh
test-cri-containerd: test-cri-containerd:
@./script/cri-containerd/test.sh @./script/cri-containerd/test.sh
test-cri-o: test-cri-o:
@./script/cri-o/test.sh @./script/cri-o/test.sh
test-podman:
@./script/podman/test.sh
test-criauth:
@./script/criauth/test.sh
test-k3s:
@./script/k3s/test.sh
test-k3s-argo-workflow:
@./script/k3s-argo-workflow/run.sh
test-ipfs:
@./script/ipfs/test.sh
validate-vendor:
$(eval TMPDIR := $(shell mktemp -d))
@cp -R $(CURDIR) ${TMPDIR}
@(cd ${TMPDIR}/stargz-snapshotter && make vendor)
@diff -r -u -q $(CURDIR) ${TMPDIR}/stargz-snapshotter
@rm -rf ${TMPDIR}

126
README.md
View File

@ -6,9 +6,9 @@
# Stargz Snapshotter # Stargz Snapshotter
[![Tests Status](https://github.com/containerd/stargz-snapshotter/workflows/Tests/badge.svg)](https://github.com/containerd/stargz-snapshotter/actions?query=workflow%3ATests+branch%3Amain) [![Tests Status](https://github.com/containerd/stargz-snapshotter/workflows/Tests/badge.svg)](https://github.com/containerd/stargz-snapshotter/actions?query=workflow%3ATests+branch%3Amaster)
[![Benchmarking](https://github.com/containerd/stargz-snapshotter/workflows/Benchmark/badge.svg)](https://github.com/containerd/stargz-snapshotter/actions?query=workflow%3ABenchmark+branch%3Amain) [![Benchmarking](https://github.com/containerd/stargz-snapshotter/workflows/Benchmark/badge.svg)](https://github.com/containerd/stargz-snapshotter/actions?query=workflow%3ABenchmark+branch%3Amaster)
[![Nightly](https://github.com/containerd/stargz-snapshotter/workflows/Nightly/badge.svg)](https://github.com/containerd/stargz-snapshotter/actions?query=workflow%3ANightly+branch%3Amain) [![Nightly](https://github.com/containerd/stargz-snapshotter/workflows/Nightly/badge.svg)](https://github.com/containerd/stargz-snapshotter/actions?query=workflow%3ANightly+branch%3Amaster)
Read also introductory blog: [Startup Containers in Lightning Speed with Lazy Image Distribution on Containerd](https://medium.com/nttlabs/startup-containers-in-lightning-speed-with-lazy-image-distribution-on-containerd-243d94522361) Read also introductory blog: [Startup Containers in Lightning Speed with Lazy Image Distribution on Containerd](https://medium.com/nttlabs/startup-containers-in-lightning-speed-with-lazy-image-distribution-on-containerd-243d94522361)
@ -39,15 +39,12 @@ We are constantly measuring the performance of this snapshotter so you can get t
Please note that we sometimes see dispersion among the results because of the NW condition on the internet and the location of the instance in the Github Actions, etc. Please note that we sometimes see dispersion among the results because of the NW condition on the internet and the location of the instance in the Github Actions, etc.
Our benchmarking method is based on [HelloBench](https://github.com/Tintri/hello-bench). Our benchmarking method is based on [HelloBench](https://github.com/Tintri/hello-bench).
:nerd_face: You can also run containers on IPFS with lazy pulling. This is an experimental feature. See [`./docs/ipfs.md`](./docs/ipfs.md) for more details.
Stargz Snapshotter is a **non-core** sub-project of containerd. Stargz Snapshotter is a **non-core** sub-project of containerd.
## Quick Start with Kubernetes ## Quick Start with Kubernetes
- For more details about stargz snapshotter plugin and its configuration, refer to [Containerd Stargz Snapshotter Plugin Overview](/docs/overview.md). - For more details about stargz snapshotter plugin and its configuration, refer to [Containerd Stargz Snapshotter Plugin Overview](/docs/overview.md).
- For more details about setup lazy pulling of eStargz with containerd, CRI-O, Podman, systemd, etc., refer to [Install Stargz Snapshotter and Stargz Store](./docs/INSTALL.md). - For more details about setup lazy pulling of eStargz with containerd, CRI-O, Podman, systemd, etc., refer to [Install Stargz Snapshotter and Stargz Store](./docs/INSTALL.md).
- For more details about integration status of eStargz with tools in commuinty, refer to [Integration of eStargz with other tools](./docs/integration.md)
For using stargz snapshotter on kubernetes nodes, you need the following configuration to containerd as well as run stargz snapshotter daemon on the node. For using stargz snapshotter on kubernetes nodes, you need the following configuration to containerd as well as run stargz snapshotter daemon on the node.
We assume that you are using containerd (> v1.4.2) as a CRI runtime. We assume that you are using containerd (> v1.4.2) as a CRI runtime.
@ -62,8 +59,6 @@ version = 2
[proxy_plugins.stargz] [proxy_plugins.stargz]
type = "snapshot" type = "snapshot"
address = "/run/containerd-stargz-grpc/containerd-stargz-grpc.sock" address = "/run/containerd-stargz-grpc/containerd-stargz-grpc.sock"
[proxy_plugins.stargz.exports]
root = "/var/lib/containerd-stargz-grpc/"
# Use stargz snapshotter through CRI # Use stargz snapshotter through CRI
[plugins."io.containerd.grpc.v1.cri".containerd] [plugins."io.containerd.grpc.v1.cri".containerd]
@ -71,18 +66,18 @@ version = 2
disable_snapshot_annotations = false disable_snapshot_annotations = false
``` ```
You can try our [prebuilt](/Dockerfile) [KinD](https://github.com/kubernetes-sigs/kind) node image that contains the above configuration. **Note that `disable_snapshot_annotations = false` is required since containerd > v1.4.2**
This repo contains [a Dockerfile as a KinD node image](/Dockerfile) which includes the above configuration.
You can use it with [KinD](https://github.com/kubernetes-sigs/kind) like the following,
```console ```console
$ kind create cluster --name stargz-demo --image ghcr.io/containerd/stargz-snapshotter:0.12.1-kind $ docker build -t stargz-kind-node https://github.com/containerd/stargz-snapshotter.git
$ kind create cluster --name stargz-demo --image stargz-kind-node
``` ```
:information_source: kind binary v0.16.x or newer is recommended for `ghcr.io/containerd/stargz-snapshotter:0.12.1-kind`.
:information_source: You can get latest node images from [`ghcr.io/containerd/stargz-snapshotter:${VERSION}-kind`](https://github.com/orgs/containerd/packages/container/package/stargz-snapshotter) namespace.
Then you can create eStargz pods on the cluster. Then you can create eStargz pods on the cluster.
In this example, we create a stargz-converted Node.js pod (`ghcr.io/stargz-containers/node:17.8.0-esgz`) as a demo. In this example, we create a stargz-converted Node.js pod (`ghcr.io/stargz-containers/node:13.13.0-esgz`) as a demo.
```yaml ```yaml
apiVersion: v1 apiVersion: v1
@ -92,7 +87,7 @@ metadata:
spec: spec:
containers: containers:
- name: nodejs-stargz - name: nodejs-stargz
image: ghcr.io/stargz-containers/node:17.8.0-esgz image: ghcr.io/stargz-containers/node:13.13.0-esgz
command: ["node"] command: ["node"]
args: args:
- -e - -e
@ -105,10 +100,10 @@ spec:
- containerPort: 80 - containerPort: 80
``` ```
The following command lazily pulls `ghcr.io/stargz-containers/node:17.8.0-esgz` from Github Container Registry and creates the pod so the time to take for it is shorter than the original image `library/node:13.13`. The following command lazily pulls `ghcr.io/stargz-containers/node:13.13.0-esgz` from Github Container Registry and creates the pod so the time to take for it is shorter than the original image `library/node:13.13`.
```console ```console
$ kubectl --context kind-stargz-demo apply -f stargz-pod.yaml && kubectl --context kind-stargz-demo get po nodejs -w $ kubectl --context kind-stargz-demo apply -f stargz-pod.yaml && kubectl get po nodejs -w
$ kubectl --context kind-stargz-demo port-forward nodejs 8080:80 & $ kubectl --context kind-stargz-demo port-forward nodejs 8080:80 &
$ curl 127.0.0.1:8080 $ curl 127.0.0.1:8080
Hello World! Hello World!
@ -116,74 +111,16 @@ Hello World!
Stargz snapshotter also supports [further configuration](/docs/overview.md) including private registry authentication, mirror registries, etc. Stargz snapshotter also supports [further configuration](/docs/overview.md) including private registry authentication, mirror registries, etc.
## Getting eStargz images ## Creating eStargz images with optimization
- For more examples and details about the image converter `ctr-remote`, refer to [Optimize Images with `ctr-remote image optimize`](/docs/ctr-remote.md). - For more examples and details about the image converter `ctr-remote`, refer to [Optimize Images with `ctr-remote image optimize`](/docs/ctr-remote.md).
- For more details about eStargz format, refer to [eStargz: Standard-Compatible Extensions to Tar.gz Layers for Lazy Pulling Container Images](/docs/stargz-estargz.md) - For more details about eStargz format, refer to [eStargz: Standard-Compatible Extensions to Tar.gz Layers for Lazy Pulling Container Images](/docs/stargz-estargz.md)
For lazy pulling images, you need to prepare eStargz images first. For lazy pulling images, you need to prepare eStargz images first.
There are several ways to achieve that. You can use [`ctr-remote`](/docs/ctr-remote.md) command for do this.
This section describes some of them. You can also try our pre-converted images listed in [Trying pre-converted images](/docs/pre-converted-images.md).
### Trying pre-built eStargz images In this section, we introduce `ctr-remote` command for converting images into eStargz with optimization for reading files.
You can try our pre-converted eStargz images on ghcr.io listed in [Trying pre-converted images](/docs/pre-converted-images.md).
### Building eStargz images using BuildKit
BuildKit supports building eStargz image since v0.10.
You can try it using [Docker Buildx](https://docs.docker.com/buildx/working-with-buildx/).
The following command builds an eStargz image and push it to `ghcr.io/ktock/hello:esgz`.
Flags `oci-mediatypes=true,compression=estargz` enable to build eStargz.
```
$ docker buildx build -t ghcr.io/ktock/hello:esgz \
-o type=registry,oci-mediatypes=true,compression=estargz,force-compression=true \
/tmp/buildctx/
```
> NOTE1: `force-compression=true` isn't needed if the base image is already eStargz.
> NOTE2: Docker still does not support lazy pulling of eStargz.
eStargz-enabled BuildKit (v0.10) will be [included to Docker v22.XX](https://github.com/moby/moby/blob/v22.06.0-beta.0/vendor.mod#L51) however you can build eStargz images with the prior version using Buildx [driver](https://github.com/docker/buildx/blob/master/docs/reference/buildx_create.md#-set-the-builder-driver-to-use---driver) feature.
You can enable the specific version of BuildKit using [`docker buildx create`](https://docs.docker.com/engine/reference/commandline/buildx_create/) (this example specifies `v0.10.3`).
```
$ docker buildx create --use --name v0.10.3 --driver docker-container --driver-opt image=moby/buildkit:v0.10.3
$ docker buildx inspect --bootstrap v0.10.3
```
### Building eStargz images using Kaniko
[Kaniko](https://github.com/GoogleContainerTools/kaniko) is an image builder runnable in containers and Kubernetes.
Since v1.5.0, it experimentally supports building eStargz.
`GGCR_EXPERIMENT_ESTARGZ=1` is needed.
```console
$ docker run --rm -e GGCR_EXPERIMENT_ESTARGZ=1 \
-v /tmp/buildctx:/workspace -v ~/.docker/config.json:/kaniko/.docker/config.json:ro \
gcr.io/kaniko-project/executor:v1.8.1 --destination ghcr.io/ktock/hello:esgz
```
### Building eStargz images using nerdctl
[nerdctl](https://github.com/containerd/nerdctl), Docker-compatible CLI of containerd, supports building eStargz images.
```console
$ nerdctl build -t ghcr.io/ktock/hello:1 /tmp/buildctx
$ nerdctl image convert --estargz --oci ghcr.io/ktock/hello:1 ghcr.io/ktock/hello:esgz
$ nerdctl push ghcr.io/ktock/hello:esgz
```
> NOTE: `--estargz` should be specified in conjunction with `--oci`
Please refer to nerdctl document for details for further information (e.g. lazy pulling): https://github.com/containerd/nerdctl/blob/master/docs/stargz.md
### Creating eStargz images using `ctr-remote`
[`ctr-remote`](/docs/ctr-remote.md) allows converting an image into eStargz with optimizing it.
As shown in the above benchmarking result, on-demand lazy pulling improves the performance of pull but causes runtime performance penalty because reading files induce remotely downloading contents. As shown in the above benchmarking result, on-demand lazy pulling improves the performance of pull but causes runtime performance penalty because reading files induce remotely downloading contents.
For solving this, `ctr-remote` has *workload-based* optimization for images. For solving this, `ctr-remote` has *workload-based* optimization for images.
@ -203,33 +140,30 @@ Generally, container images are built with purpose and the *workloads* are defin
By default, `ctr-remote` optimizes the performance of reading files that are most likely accessed in the workload defined in the Dockerfile. By default, `ctr-remote` optimizes the performance of reading files that are most likely accessed in the workload defined in the Dockerfile.
[You can also specify the custom workload using options if needed](/docs/ctr-remote.md). [You can also specify the custom workload using options if needed](/docs/ctr-remote.md).
The following example converts the legacy `library/ubuntu:20.04` image into eStargz. The following example converts the legacy `library/ubuntu:18.04` image into eStargz.
The command also optimizes the image for the workload of executing `ls` on `/bin/bash`. The command also optimizes the image for the workload of executing `ls` on `/bin/bash`.
The thing actually done is it runs the specified workload in a temporary container and profiles all file accesses with marking them as *likely accessed* also during runtime. The thing actually done is it runs the specified workload in a temporary container and profiles all file accesses with marking them as *likely accessed* also during runtime.
The converted image is still **docker-compatible** so you can run it with eStargz-agnostic runtimes (e.g. Docker). The converted image is still **docker-compatible** so you can run it with eStargz-agnostic runtimes (e.g. Docker).
```console ```console
# ctr-remote image pull docker.io/library/ubuntu:20.04 # ctr-remote image pull docker.io/library/ubuntu:18.04
# ctr-remote image optimize --oci --entrypoint='[ "/bin/bash", "-c" ]' --args='[ "ls" ]' docker.io/library/ubuntu:20.04 registry2:5000/ubuntu:20.04 # ctr-remote image optimize --entrypoint='[ "/bin/bash", "-c" ]' --args='[ "ls" ]' docker.io/library/ubuntu:18.04 registry2:5000/ubuntu:18.04
# ctr-remote image push --plain-http registry2:5000/ubuntu:20.04 # ctr-remote image push --plain-http registry2:5000/ubuntu:18.04
``` ```
Finally, the following commands clear the local cache then pull the eStargz image lazily. Finally, the following commands clear the local cache then pull the eStargz image lazily.
Stargz snapshotter prefetches files that are most likely accessed in the optimized workload, which hopefully increases the cache hit rate for that workload and mitigates runtime overheads as shown in the benchmarking result shown top of this doc. Stargz snapshotter prefetches files that are most likely accessed in the optimized workload, which hopefully increases the cache hit rate for that workload and mitigates runtime overheads as shown in the benchmarking result shown top of this doc.
```console ```console
# ctr-remote image rm --sync registry2:5000/ubuntu:20.04 # ctr-remote image rm --sync registry2:5000/ubuntu:18.04
# ctr-remote images rpull --plain-http registry2:5000/ubuntu:20.04 # ctr-remote images rpull --plain-http registry2:5000/ubuntu:18.04
fetching sha256:610399d1... application/vnd.oci.image.index.v1+json fetching sha256:728332a6... application/vnd.docker.distribution.manifest.v2+json
fetching sha256:0b4a26b4... application/vnd.oci.image.manifest.v1+json fetching sha256:80026893... application/vnd.docker.container.image.v1+json
fetching sha256:8d8d9dbe... application/vnd.oci.image.config.v1+json # ctr-remote run --rm -t --snapshotter=stargz registry2:5000/ubuntu:18.04 test /bin/bash
# ctr-remote run --rm -t --snapshotter=stargz registry2:5000/ubuntu:20.04 test /bin/bash root@8dab301bd68d:/# ls
root@8eabb871a9bd:/# ls bin boot dev etc home lib lib64 media mnt opt proc root run sbin srv sys tmp usr var
bin boot dev etc home lib lib32 lib64 libx32 media mnt opt proc root run sbin srv sys tmp usr var
``` ```
> NOTE: You can perform lazy pulling from any OCI-compatible registries (e.g. docker.io, ghcr.io, etc) as long as the image is formatted as eStargz.
## Importing Stargz Snapshotter as go module ## Importing Stargz Snapshotter as go module
Currently, Stargz Snapshotter repository contains two Go modules as the following and both of them need to be imported. Currently, Stargz Snapshotter repository contains two Go modules as the following and both of them need to be imported.
@ -243,8 +177,8 @@ Please make sure you import the both of them and they point to *the same commit
Stargz Snapshotter is a containerd **non-core** sub-project, licensed under the [Apache 2.0 license](./LICENSE). Stargz Snapshotter is a containerd **non-core** sub-project, licensed under the [Apache 2.0 license](./LICENSE).
As a containerd non-core sub-project, you will find the: As a containerd non-core sub-project, you will find the:
* [Project governance](https://github.com/containerd/project/blob/main/GOVERNANCE.md), * [Project governance](https://github.com/containerd/project/blob/master/GOVERNANCE.md),
* [Maintainers](./MAINTAINERS), * [Maintainers](./MAINTAINERS),
* and [Contributing guidelines](https://github.com/containerd/project/blob/main/CONTRIBUTING.md) * and [Contributing guidelines](https://github.com/containerd/project/blob/master/CONTRIBUTING.md)
information in our [`containerd/project`](https://github.com/containerd/project) repository. information in our [`containerd/project`](https://github.com/containerd/project) repository.

View File

@ -17,34 +17,34 @@
package analyzer package analyzer
import ( import (
"bufio"
"context" "context"
"fmt" "fmt"
"io" "io"
"io/ioutil"
"os" "os"
"os/signal" "os/signal"
"strings"
"sync" "sync"
"sync/atomic" "sync/atomic"
"syscall" "syscall"
"time" "time"
"github.com/containerd/console" "github.com/containerd/console"
containerd "github.com/containerd/containerd/v2/client" "github.com/containerd/containerd"
"github.com/containerd/containerd/v2/cmd/ctr/commands" "github.com/containerd/containerd/cio"
"github.com/containerd/containerd/v2/cmd/ctr/commands/tasks" "github.com/containerd/containerd/cmd/ctr/commands"
"github.com/containerd/containerd/v2/core/mount" "github.com/containerd/containerd/cmd/ctr/commands/tasks"
"github.com/containerd/containerd/v2/core/snapshots" "github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/v2/pkg/cio" "github.com/containerd/containerd/log"
"github.com/containerd/containerd/v2/pkg/oci" "github.com/containerd/containerd/mount"
"github.com/containerd/errdefs" "github.com/containerd/containerd/oci"
"github.com/containerd/log" "github.com/containerd/containerd/platforms"
"github.com/containerd/platforms" "github.com/containerd/containerd/snapshots"
"github.com/containerd/stargz-snapshotter/analyzer/fanotify" "github.com/containerd/stargz-snapshotter/analyzer/fanotify"
"github.com/containerd/stargz-snapshotter/analyzer/recorder" "github.com/containerd/stargz-snapshotter/analyzer/recorder"
"github.com/opencontainers/go-digest" "github.com/opencontainers/go-digest"
"github.com/opencontainers/image-spec/identity" "github.com/opencontainers/image-spec/identity"
runtimespec "github.com/opencontainers/runtime-spec/specs-go" runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/rs/xid" "github.com/rs/xid"
) )
@ -62,7 +62,7 @@ func Analyze(ctx context.Context, client *containerd.Client, ref string, opts ..
return "", fmt.Errorf("wait-on-signal option cannot be used with terminal option") return "", fmt.Errorf("wait-on-signal option cannot be used with terminal option")
} }
target, err := os.MkdirTemp("", "target") target, err := ioutil.TempDir("", "target")
if err != nil { if err != nil {
return "", err return "", err
} }
@ -99,7 +99,7 @@ func Analyze(ctx context.Context, client *containerd.Client, ref string, opts ..
// Spawn a fanotifier process in a new mount namespace and setup recorder. // Spawn a fanotifier process in a new mount namespace and setup recorder.
fanotifier, err := fanotify.SpawnFanotifier("/proc/self/exe") fanotifier, err := fanotify.SpawnFanotifier("/proc/self/exe")
if err != nil { if err != nil {
return "", fmt.Errorf("failed to spawn fanotifier: %w", err) return "", errors.Wrapf(err, "failed to spawn fanotifier")
} }
defer func() { defer func() {
if err := fanotifier.Close(); err != nil { if err := fanotifier.Close(); err != nil {
@ -163,7 +163,6 @@ func Analyze(ctx context.Context, client *containerd.Client, ref string, opts ..
defer container.Delete(ctx, containerd.WithSnapshotCleanup) defer container.Delete(ctx, containerd.WithSnapshotCleanup)
var ioCreator cio.Creator var ioCreator cio.Creator
var con console.Console var con console.Console
waitLine := newLineWaiter(aOpts.waitLineOut)
stdinC := newLazyReadCloser(os.Stdin) stdinC := newLazyReadCloser(os.Stdin)
if aOpts.terminal { if aOpts.terminal {
if !aOpts.stdin { if !aOpts.stdin {
@ -175,11 +174,11 @@ func Analyze(ctx context.Context, client *containerd.Client, ref string, opts ..
return "", err return "", err
} }
// On terminal mode, the "stderr" field is unused. // On terminal mode, the "stderr" field is unused.
ioCreator = cio.NewCreator(cio.WithStreams(con, waitLine.registerWriter(con), nil), cio.WithTerminal) ioCreator = cio.NewCreator(cio.WithStreams(con, con, nil), cio.WithTerminal)
} else if aOpts.stdin { } else if aOpts.stdin {
ioCreator = cio.NewCreator(cio.WithStreams(stdinC, waitLine.registerWriter(os.Stdout), os.Stderr)) ioCreator = cio.NewCreator(cio.WithStreams(stdinC, os.Stdout, os.Stderr))
} else { } else {
ioCreator = cio.NewCreator(cio.WithStreams(nil, waitLine.registerWriter(os.Stdout), os.Stderr)) ioCreator = cio.NewCreator(cio.WithStreams(nil, os.Stdout, os.Stderr))
} }
task, err := container.NewTask(ctx, ioCreator) task, err := container.NewTask(ctx, ioCreator)
if err != nil { if err != nil {
@ -196,15 +195,11 @@ func Analyze(ctx context.Context, client *containerd.Client, ref string, opts ..
} }
defer rc.Close() defer rc.Close()
if err := fanotifier.Start(); err != nil { if err := fanotifier.Start(); err != nil {
return "", fmt.Errorf("failed to start fanotifier: %w", err) return "", errors.Wrapf(err, "failed to start fanotifier")
} }
var fanotifierClosed bool var fanotifierClosed bool
var fanotifierClosedMu sync.Mutex var fanotifierClosedMu sync.Mutex
go func() { go func() {
var successCount int
defer func() {
log.G(ctx).Debugf("success record %d path", successCount)
}()
for { for {
path, err := fanotifier.GetPath() path, err := fanotifier.GetPath()
if err != nil { if err != nil {
@ -222,12 +217,8 @@ func Analyze(ctx context.Context, client *containerd.Client, ref string, opts ..
if err := rc.Record(path); err != nil { if err := rc.Record(path); err != nil {
log.G(ctx).WithError(err).Debugf("failed to record %q", path) log.G(ctx).WithError(err).Debugf("failed to record %q", path)
} }
successCount++
} }
}() }()
if err := task.Start(ctx); err != nil {
return "", err
}
if aOpts.terminal { if aOpts.terminal {
if err := tasks.HandleConsoleResize(ctx, task, con); err != nil { if err := tasks.HandleConsoleResize(ctx, task, con); err != nil {
log.G(ctx).WithError(err).Error("failed to resize console") log.G(ctx).WithError(err).Error("failed to resize console")
@ -236,6 +227,9 @@ func Analyze(ctx context.Context, client *containerd.Client, ref string, opts ..
sigc := commands.ForwardAllSignals(ctx, task) sigc := commands.ForwardAllSignals(ctx, task)
defer commands.StopCatch(sigc) defer commands.StopCatch(sigc)
} }
if err := task.Start(ctx); err != nil {
return "", err
}
// Wait until the task exit // Wait until the task exit
var status containerd.ExitStatus var status containerd.ExitStatus
@ -251,7 +245,7 @@ func Analyze(ctx context.Context, client *containerd.Client, ref string, opts ..
aOpts.period = defaultPeriod aOpts.period = defaultPeriod
} }
log.G(ctx).Infof("waiting for %v ...", aOpts.period) log.G(ctx).Infof("waiting for %v ...", aOpts.period)
status, killOk, err = waitOnTimeout(ctx, container, task, aOpts.period, waitLine) status, killOk, err = waitOnTimeout(ctx, container, task, aOpts.period)
if err != nil { if err != nil {
return "", err return "", err
} }
@ -294,7 +288,7 @@ func mountImage(ctx context.Context, ss snapshots.Snapshotter, image containerd.
if err := ss.Remove(ctx, mountpoint); err != nil && !errdefs.IsNotFound(err) { if err := ss.Remove(ctx, mountpoint); err != nil && !errdefs.IsNotFound(err) {
log.G(ctx).WithError(err).Warnf("failed to cleanup snapshot after mount error") log.G(ctx).WithError(err).Warnf("failed to cleanup snapshot after mount error")
} }
return nil, fmt.Errorf("failed to mount rootfs at %q: %w", mountpoint, err) return nil, errors.Wrapf(err, "failed to mount rootfs at %q", mountpoint)
} }
return func() { return func() {
if err := mount.UnmountAll(mountpoint, 0); err != nil { if err := mount.UnmountAll(mountpoint, 0); err != nil {
@ -328,7 +322,7 @@ func waitOnSignal(ctx context.Context, container containerd.Container, task cont
} }
} }
func waitOnTimeout(ctx context.Context, container containerd.Container, task containerd.Task, period time.Duration, line *lineWaiter) (containerd.ExitStatus, bool, error) { func waitOnTimeout(ctx context.Context, container containerd.Container, task containerd.Task, period time.Duration) (containerd.ExitStatus, bool, error) {
statusC, err := task.Wait(ctx) statusC, err := task.Wait(ctx)
if err != nil { if err != nil {
return containerd.ExitStatus{}, false, err return containerd.ExitStatus{}, false, err
@ -336,17 +330,14 @@ func waitOnTimeout(ctx context.Context, container containerd.Container, task con
select { select {
case status := <-statusC: case status := <-statusC:
return status, true, nil return status, true, nil
case l := <-line.waitCh:
log.G(ctx).Infof("Waiting line detected %q; killing task", l)
case <-time.After(period): case <-time.After(period):
log.G(ctx).Warnf("killing task. the time period to monitor access log (%s) has timed out", period.String()) status, err := killTask(ctx, container, task, statusC)
if err != nil {
log.G(ctx).WithError(err).Warnf("failed to kill container")
return containerd.ExitStatus{}, false, nil
}
return status, true, nil
} }
status, err := killTask(ctx, container, task, statusC)
if err != nil {
log.G(ctx).WithError(err).Warnf("failed to kill container")
return containerd.ExitStatus{}, false, nil
}
return status, true, nil
} }
func killTask(ctx context.Context, container containerd.Container, task containerd.Task, statusC <-chan containerd.ExitStatus) (containerd.ExitStatus, error) { func killTask(ctx context.Context, container containerd.Container, task containerd.Task, statusC <-chan containerd.ExitStatus) (containerd.ExitStatus, error) {
@ -355,7 +346,7 @@ func killTask(ctx context.Context, container containerd.Container, task containe
return containerd.ExitStatus{}, err return containerd.ExitStatus{}, err
} }
if err := task.Kill(ctx, sig, containerd.WithKillAll); err != nil && !errdefs.IsNotFound(err) { if err := task.Kill(ctx, sig, containerd.WithKillAll); err != nil && !errdefs.IsNotFound(err) {
return containerd.ExitStatus{}, fmt.Errorf("forward SIGKILL: %w", err) return containerd.ExitStatus{}, errors.Wrapf(err, "forward SIGKILL")
} }
select { select {
case status := <-statusC: case status := <-statusC:
@ -404,37 +395,3 @@ func (s *lazyReadCloser) Read(p []byte) (int, error) {
} }
return n, err return n, err
} }
func newLineWaiter(s string) *lineWaiter {
return &lineWaiter{
waitCh: make(chan string),
waitLine: s,
}
}
type lineWaiter struct {
waitCh chan string
waitLine string
}
func (lw *lineWaiter) registerWriter(w io.Writer) io.Writer {
if lw.waitLine == "" {
return w
}
pr, pw := io.Pipe()
go func() {
scanner := bufio.NewScanner(pr)
for scanner.Scan() {
if strings.Contains(scanner.Text(), lw.waitLine) {
lw.waitCh <- lw.waitLine
}
}
if _, err := io.Copy(io.Discard, pr); err != nil {
pr.CloseWithError(err)
return
}
}()
return io.MultiWriter(w, pw)
}

View File

@ -24,6 +24,8 @@ import (
"strconv" "strconv"
"strings" "strings"
"time" "time"
"github.com/pkg/errors"
) )
const ( const (
@ -72,11 +74,11 @@ func (nc *Client) GetPath() (string, error) {
} }
fd, err := strconv.ParseInt(mes[len(mesFdPrefix):], 10, 32) fd, err := strconv.ParseInt(mes[len(mesFdPrefix):], 10, 32)
if err != nil { if err != nil {
return "", fmt.Errorf("invalid fd %q: %w", mes, err) return "", errors.Wrapf(err, "invalid fd %q", mes)
} }
path, err := os.Readlink(fmt.Sprintf("/proc/%d/fd/%d", nc.servicePid, fd)) path, err := os.Readlink(fmt.Sprintf("/proc/%d/fd/%d", nc.servicePid, fd))
if err != nil { if err != nil {
return "", fmt.Errorf("failed to get link from fd %q: %w", mes, err) return "", errors.Wrapf(err, "failed to get link from fd %q", mes)
} }
return path, writeMessage(nc.w, mesAck) return path, writeMessage(nc.w, mesAck)
} }

View File

@ -17,7 +17,6 @@
package fanotify package fanotify
import ( import (
"errors"
"fmt" "fmt"
"os/exec" "os/exec"
"sync" "sync"
@ -25,6 +24,7 @@ import (
"time" "time"
"github.com/containerd/stargz-snapshotter/analyzer/fanotify/conn" "github.com/containerd/stargz-snapshotter/analyzer/fanotify/conn"
"github.com/hashicorp/go-multierror"
) )
// Fanotifier monitors "/" mountpoint of a new mount namespace and notifies all // Fanotifier monitors "/" mountpoint of a new mount namespace and notifies all
@ -59,15 +59,14 @@ func SpawnFanotifier(fanotifierBin string) (*Fanotifier, error) {
// Connect to the spawned fanotifier over stdio // Connect to the spawned fanotifier over stdio
conn: conn.NewClient(notifyR, notifyW, cmd.Process.Pid, 5*time.Second), conn: conn.NewClient(notifyR, notifyW, cmd.Process.Pid, 5*time.Second),
closeFunc: func() error { closeFunc: func() (allErr error) {
var errs []error
if err := notifyR.Close(); err != nil { if err := notifyR.Close(); err != nil {
errs = append(errs, err) allErr = multierror.Append(allErr, err)
} }
if err := notifyW.Close(); err != nil { if err := notifyW.Close(); err != nil {
errs = append(errs, err) allErr = multierror.Append(allErr, err)
} }
return errors.Join(errs...) return
}, },
}, nil }, nil
} }

View File

@ -25,6 +25,7 @@ import (
"time" "time"
"github.com/containerd/stargz-snapshotter/analyzer/fanotify/conn" "github.com/containerd/stargz-snapshotter/analyzer/fanotify/conn"
"github.com/pkg/errors"
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
) )
@ -35,12 +36,12 @@ func Serve(target string, r io.Reader, w io.Writer) error {
fd, err := unix.FanotifyInit(unix.FAN_CLASS_NOTIF, unix.O_RDONLY) fd, err := unix.FanotifyInit(unix.FAN_CLASS_NOTIF, unix.O_RDONLY)
if err != nil { if err != nil {
return fmt.Errorf("fanotify_init: %w", err) return errors.Wrapf(err, "fanotify_init")
} }
// This blocks until the client tells us to start monitoring the target mountpoint. // This blocks until the client tells us to start monitoring the target mountpoint.
if err := sConn.WaitStart(); err != nil { if err := sConn.WaitStart(); err != nil {
return fmt.Errorf("waiting for start inst: %w", err) return errors.Wrapf(err, "waiting for start inst")
} }
// Start monitoring the target mountpoint. // Start monitoring the target mountpoint.
@ -50,12 +51,12 @@ func Serve(target string, r io.Reader, w io.Writer) error {
unix.AT_FDCWD, unix.AT_FDCWD,
target, target,
); err != nil { ); err != nil {
return fmt.Errorf("fanotify_mark: %w", err) return errors.Wrapf(err, "fanotify_mark")
} }
// Notify "started" state to the client. // Notify "started" state to the client.
if err := sConn.SendStarted(); err != nil { if err := sConn.SendStarted(); err != nil {
return fmt.Errorf("failed to send started message: %w", err) return errors.Wrapf(err, "failed to send started message")
} }
nr := bufio.NewReader(os.NewFile(uintptr(fd), "")) nr := bufio.NewReader(os.NewFile(uintptr(fd), ""))
@ -65,10 +66,10 @@ func Serve(target string, r io.Reader, w io.Writer) error {
if err == io.EOF { if err == io.EOF {
break break
} }
return fmt.Errorf("read fanotify fd: %w", err) return errors.Wrapf(err, "read fanotify fd")
} }
if event.Vers != unix.FANOTIFY_METADATA_VERSION { if event.Vers != unix.FANOTIFY_METADATA_VERSION {
return fmt.Errorf("fanotify version mismatch %d(got) != %d(want)", return fmt.Errorf("Fanotify version mismatch %d(got) != %d(want)",
event.Vers, unix.FANOTIFY_METADATA_VERSION) event.Vers, unix.FANOTIFY_METADATA_VERSION)
} }
if event.Fd < 0 { if event.Fd < 0 {
@ -84,10 +85,10 @@ func Serve(target string, r io.Reader, w io.Writer) error {
// descriptor and let the client resolve the path of this file using /proc of // descriptor and let the client resolve the path of this file using /proc of
// this process. // this process.
if err := sConn.SendFd(int(event.Fd)); err != nil { if err := sConn.SendFd(int(event.Fd)); err != nil {
return fmt.Errorf("failed to send fd %d to client: %w", fd, err) return errors.Wrapf(err, "failed to send fd %d to client", fd)
} }
if err := unix.Close(int(event.Fd)); err != nil { if err := unix.Close(int(event.Fd)); err != nil {
return fmt.Errorf("Close(fd): %w", err) return errors.Wrapf(err, "Close(fd)")
} }
continue continue

View File

@ -19,8 +19,8 @@ package analyzer
import ( import (
"time" "time"
containerd "github.com/containerd/containerd/v2/client" "github.com/containerd/containerd"
"github.com/containerd/containerd/v2/pkg/oci" "github.com/containerd/containerd/oci"
) )
type analyzerOpts struct { type analyzerOpts struct {
@ -30,7 +30,6 @@ type analyzerOpts struct {
specOpts SpecOpts specOpts SpecOpts
terminal bool terminal bool
stdin bool stdin bool
waitLineOut string
} }
// Option is runtime configuration of analyzer container // Option is runtime configuration of analyzer container
@ -80,11 +79,3 @@ func WithSnapshotter(snapshotter string) Option {
opts.snapshotter = snapshotter opts.snapshotter = snapshotter
} }
} }
// WithWaitLineOut specifies a substring of a stdout line to be waited.
// When this line is detected, the container will be killed.
func WithWaitLineOut(s string) Option {
return func(opts *analyzerOpts) {
opts.waitLineOut = s
}
}

View File

@ -18,6 +18,7 @@ package recorder
import ( import (
"archive/tar" "archive/tar"
"compress/gzip"
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
@ -26,17 +27,17 @@ import (
"strings" "strings"
"sync" "sync"
"github.com/containerd/containerd/v2/core/content" "github.com/containerd/containerd/content"
"github.com/containerd/containerd/v2/core/images" "github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/v2/core/images/converter/uncompress" "github.com/containerd/containerd/images"
"github.com/containerd/containerd/v2/pkg/archive/compression" "github.com/containerd/containerd/images/converter/uncompress"
"github.com/containerd/errdefs" "github.com/containerd/containerd/log"
"github.com/containerd/log" "github.com/containerd/containerd/platforms"
"github.com/containerd/platforms"
"github.com/containerd/stargz-snapshotter/recorder" "github.com/containerd/stargz-snapshotter/recorder"
"github.com/containerd/stargz-snapshotter/util/containerdutil" "github.com/containerd/stargz-snapshotter/util/containerdutil"
"github.com/opencontainers/go-digest" "github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1" ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/pkg/errors"
"github.com/rs/xid" "github.com/rs/xid"
"golang.org/x/sync/errgroup" "golang.org/x/sync/errgroup"
) )
@ -87,14 +88,14 @@ func imageRecorderFromManifest(ctx context.Context, cs content.Store, manifestDe
log.G(ctx).Infof("analyzing blob %q", desc.Digest) log.G(ctx).Infof("analyzing blob %q", desc.Digest)
readerAt, err := cs.ReaderAt(ctx, desc) readerAt, err := cs.ReaderAt(ctx, desc)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get reader blob %v: %w", desc.Digest, err) return nil, errors.Wrapf(err, "failed to get reader blob %v", desc.Digest)
} }
defer readerAt.Close() defer readerAt.Close()
r := io.Reader(io.NewSectionReader(readerAt, 0, desc.Size)) r := io.Reader(io.NewSectionReader(readerAt, 0, desc.Size))
if !uncompress.IsUncompressedType(desc.MediaType) { if !uncompress.IsUncompressedType(desc.MediaType) {
r, err = compression.DecompressStream(r) r, err = gzip.NewReader(r)
if err != nil { if err != nil {
return nil, fmt.Errorf("cannot decompress layer %v: %w", desc.Digest, err) return nil, errors.Wrapf(err, "cannot decompress layer %v", desc.Digest)
} }
} }
eg.Go(func() error { eg.Go(func() error {
@ -119,7 +120,7 @@ func imageRecorderFromManifest(ctx context.Context, cs content.Store, manifestDe
recordW, err := content.OpenWriter(ctx, cs, recordW, err := content.OpenWriter(ctx, cs,
content.WithRef(fmt.Sprintf("recorder-%v", xid.New().String()))) content.WithRef(fmt.Sprintf("recorder-%v", xid.New().String())))
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to open writer for recorder: %w", err) return nil, errors.Wrapf(err, "faeild to open writer for recorder")
} }
return &ImageRecorder{ return &ImageRecorder{
r: recorder.New(recordW), r: recorder.New(recordW),
@ -151,7 +152,7 @@ func (r *ImageRecorder) Record(name string) error {
} }
whDir := cleanEntryName(path.Join(path.Dir("/"+name), whiteoutOpaqueDir)) whDir := cleanEntryName(path.Join(path.Dir("/"+name), whiteoutOpaqueDir))
if _, ok := r.index[i][whDir]; ok { if _, ok := r.index[i][whDir]; ok {
return fmt.Errorf("parent dir of %q is a deleted directory", name) return fmt.Errorf("Parent dir of %q is a deleted directory", name)
} }
} }
if index < 0 { if index < 0 {

View File

@ -22,13 +22,14 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
"io/ioutil"
"os" "os"
"path" "path"
"testing" "testing"
"github.com/containerd/containerd/v2/core/content" "github.com/containerd/containerd/content"
"github.com/containerd/containerd/v2/plugins/content/local" "github.com/containerd/containerd/content/local"
"github.com/containerd/errdefs" "github.com/containerd/containerd/errdefs"
"github.com/containerd/stargz-snapshotter/recorder" "github.com/containerd/stargz-snapshotter/recorder"
"github.com/containerd/stargz-snapshotter/util/testutil" "github.com/containerd/stargz-snapshotter/util/testutil"
ocispec "github.com/opencontainers/image-spec/specs-go/v1" ocispec "github.com/opencontainers/image-spec/specs-go/v1"
@ -201,7 +202,7 @@ func TestNodeIndex(t *testing.T) {
}, },
} }
tempDir, err := os.MkdirTemp("", "test-recorder") tempDir, err := ioutil.TempDir("", "test-recorder")
if err != nil { if err != nil {
t.Fatalf("failed to prepare content store dir: %v", err) t.Fatalf("failed to prepare content store dir: %v", err)
} }

133
cache/cache.go vendored
View File

@ -18,16 +18,17 @@ package cache
import ( import (
"bytes" "bytes"
"errors"
"fmt" "fmt"
"io" "io"
"io/ioutil"
"os" "os"
"path/filepath" "path/filepath"
"sync" "sync"
"github.com/containerd/stargz-snapshotter/util/cacheutil" "github.com/containerd/stargz-snapshotter/util/lrucache"
"github.com/containerd/stargz-snapshotter/util/namedmutex" "github.com/containerd/stargz-snapshotter/util/namedmutex"
"golang.org/x/sys/unix" "github.com/hashicorp/go-multierror"
"github.com/pkg/errors"
) )
const ( const (
@ -50,21 +51,14 @@ type DirectoryCacheConfig struct {
// DataCache is an on-memory cache of the data. // DataCache is an on-memory cache of the data.
// OnEvicted will be overridden and replaced for internal use. // OnEvicted will be overridden and replaced for internal use.
DataCache *cacheutil.LRUCache DataCache *lrucache.Cache
// FdCache is a cache for opened file descriptors. // FdCache is a cache for opened file descriptors.
// OnEvicted will be overridden and replaced for internal use. // OnEvicted will be overridden and replaced for internal use.
FdCache *cacheutil.LRUCache FdCache *lrucache.Cache
// BufPool will be used for pooling bytes.Buffer. // BufPool will be used for pooling bytes.Buffer.
BufPool *sync.Pool BufPool *sync.Pool
// Direct forcefully enables direct mode for all operation in cache.
// Thus operation won't use on-memory caches.
Direct bool
// FadvDontNeed forcefully clean fscache pagecache for saving memory.
FadvDontNeed bool
} }
// TODO: contents validation. // TODO: contents validation.
@ -86,9 +80,6 @@ type BlobCache interface {
type Reader interface { type Reader interface {
io.ReaderAt io.ReaderAt
Close() error Close() error
// If a blob is backed by a file, it should return *os.File so that it can be used for FUSE passthrough
GetReaderAt() io.ReaderAt
} }
// Writer enables the client to cache byte data. Commit() must be // Writer enables the client to cache byte data. Commit() must be
@ -101,16 +92,15 @@ type Writer interface {
} }
type cacheOpt struct { type cacheOpt struct {
direct bool direct bool
passThrough bool
} }
type Option func(o *cacheOpt) *cacheOpt type Option func(o *cacheOpt) *cacheOpt
// Direct option lets FetchAt and Add methods not to use on-memory caches. When // When Direct option is specified for FetchAt and Add methods, these operation
// you know that the targeting value won't be used immediately, you can prevent // won't use on-memory caches. When you know that the targeting value won't be
// the limited space of on-memory caches from being polluted by these unimportant // used immediately, you can prevent the limited space of on-memory caches from
// values. // being polluted by these unimportant values.
func Direct() Option { func Direct() Option {
return func(o *cacheOpt) *cacheOpt { return func(o *cacheOpt) *cacheOpt {
o.direct = true o.direct = true
@ -118,15 +108,6 @@ func Direct() Option {
} }
} }
// PassThrough option indicates whether to enable FUSE passthrough mode
// to improve local file read performance.
func PassThrough() Option {
return func(o *cacheOpt) *cacheOpt {
o.passThrough = true
return o
}
}
func NewDirectoryCache(directory string, config DirectoryCacheConfig) (BlobCache, error) { func NewDirectoryCache(directory string, config DirectoryCacheConfig) (BlobCache, error) {
if !filepath.IsAbs(directory) { if !filepath.IsAbs(directory) {
return nil, fmt.Errorf("dir cache path must be an absolute path; got %q", directory) return nil, fmt.Errorf("dir cache path must be an absolute path; got %q", directory)
@ -145,9 +126,8 @@ func NewDirectoryCache(directory string, config DirectoryCacheConfig) (BlobCache
if maxEntry == 0 { if maxEntry == 0 {
maxEntry = defaultMaxLRUCacheEntry maxEntry = defaultMaxLRUCacheEntry
} }
dataCache = cacheutil.NewLRUCache(maxEntry) dataCache = lrucache.New(maxEntry)
dataCache.OnEvicted = func(key string, value interface{}) { dataCache.OnEvicted = func(key string, value interface{}) {
value.(*bytes.Buffer).Reset()
bufPool.Put(value) bufPool.Put(value)
} }
} }
@ -157,7 +137,7 @@ func NewDirectoryCache(directory string, config DirectoryCacheConfig) (BlobCache
if maxEntry == 0 { if maxEntry == 0 {
maxEntry = defaultMaxCacheFds maxEntry = defaultMaxCacheFds
} }
fdCache = cacheutil.NewLRUCache(maxEntry) fdCache = lrucache.New(maxEntry)
fdCache.OnEvicted = func(key string, value interface{}) { fdCache.OnEvicted = func(key string, value interface{}) {
value.(*os.File).Close() value.(*os.File).Close()
} }
@ -176,8 +156,6 @@ func NewDirectoryCache(directory string, config DirectoryCacheConfig) (BlobCache
directory: directory, directory: directory,
wipDirectory: wipdir, wipDirectory: wipdir,
bufPool: bufPool, bufPool: bufPool,
direct: config.Direct,
fadvDontNeed: config.FadvDontNeed,
} }
dc.syncAdd = config.SyncAdd dc.syncAdd = config.SyncAdd
return dc, nil return dc, nil
@ -185,17 +163,15 @@ func NewDirectoryCache(directory string, config DirectoryCacheConfig) (BlobCache
// directoryCache is a cache implementation which backend is a directory. // directoryCache is a cache implementation which backend is a directory.
type directoryCache struct { type directoryCache struct {
cache *cacheutil.LRUCache cache *lrucache.Cache
fileCache *cacheutil.LRUCache fileCache *lrucache.Cache
wipDirectory string wipDirectory string
directory string directory string
wipLock *namedmutex.NamedMutex wipLock *namedmutex.NamedMutex
bufPool *sync.Pool bufPool *sync.Pool
syncAdd bool syncAdd bool
direct bool
fadvDontNeed bool
closed bool closed bool
closedMu sync.Mutex closedMu sync.Mutex
@ -211,7 +187,7 @@ func (dc *directoryCache) Get(key string, opts ...Option) (Reader, error) {
opt = o(opt) opt = o(opt)
} }
if !dc.direct && !opt.direct { if !opt.direct {
// Get data from memory // Get data from memory
if b, done, ok := dc.cache.Get(key); ok { if b, done, ok := dc.cache.Get(key); ok {
return &reader{ return &reader{
@ -240,30 +216,16 @@ func (dc *directoryCache) Get(key string, opts ...Option) (Reader, error) {
// or simply report the cache miss? // or simply report the cache miss?
file, err := os.Open(dc.cachePath(key)) file, err := os.Open(dc.cachePath(key))
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to open blob file for %q: %w", key, err) return nil, errors.Wrapf(err, "failed to open blob file for %q", key)
} }
// If "direct" option is specified, do not cache the file on memory. // If "direct" option is specified, do not cache the file on memory.
// This option is useful for preventing memory cache from being polluted by data // This option is useful for preventing memory cache from being polluted by data
// that won't be accessed immediately. // that won't be accessed immediately.
if dc.direct || opt.direct { if opt.direct {
return &reader{ return &reader{
ReaderAt: file, ReaderAt: file,
closeFunc: func() error { closeFunc: func() error { return file.Close() },
if dc.fadvDontNeed {
if err := dropFilePageCache(file); err != nil {
fmt.Printf("Warning: failed to drop page cache: %v\n", err)
}
}
// In passthough model, close will be toke over by go-fuse
// If "passThrough" option is specified, "direct" option also will
// be specified, so adding this branch here is enough
if opt.passThrough {
return nil
}
return file.Close()
},
}, nil }, nil
} }
@ -306,20 +268,13 @@ func (dc *directoryCache) Add(key string, opts ...Option) (Writer, error) {
// Commit the cache contents // Commit the cache contents
c := dc.cachePath(key) c := dc.cachePath(key)
if err := os.MkdirAll(filepath.Dir(c), os.ModePerm); err != nil { if err := os.MkdirAll(filepath.Dir(c), os.ModePerm); err != nil {
var errs []error var allErr error
if err := os.Remove(wip.Name()); err != nil { if err := os.Remove(wip.Name()); err != nil {
errs = append(errs, err) allErr = multierror.Append(allErr, err)
} }
errs = append(errs, fmt.Errorf("failed to create cache directory %q: %w", c, err)) return multierror.Append(allErr,
return errors.Join(errs...) errors.Wrapf(err, "failed to create cache directory %q", c))
} }
if dc.fadvDontNeed {
if err := dropFilePageCache(wip); err != nil {
fmt.Printf("Warning: failed to drop page cache: %v\n", err)
}
}
return os.Rename(wip.Name(), c) return os.Rename(wip.Name(), c)
}, },
abortFunc: func() error { abortFunc: func() error {
@ -330,11 +285,12 @@ func (dc *directoryCache) Add(key string, opts ...Option) (Writer, error) {
// If "direct" option is specified, do not cache the passed data on memory. // If "direct" option is specified, do not cache the passed data on memory.
// This option is useful for preventing memory cache from being polluted by data // This option is useful for preventing memory cache from being polluted by data
// that won't be accessed immediately. // that won't be accessed immediately.
if dc.direct || opt.direct { if opt.direct {
return w, nil return w, nil
} }
b := dc.bufPool.Get().(*bytes.Buffer) b := dc.bufPool.Get().(*bytes.Buffer)
b.Reset()
memW := &writer{ memW := &writer{
WriteCloser: nopWriteCloser(io.Writer(b)), WriteCloser: nopWriteCloser(io.Writer(b)),
commitFunc: func() error { commitFunc: func() error {
@ -344,7 +300,7 @@ func (dc *directoryCache) Add(key string, opts ...Option) (Writer, error) {
} }
cached, done, added := dc.cache.Add(key, b) cached, done, added := dc.cache.Add(key, b)
if !added { if !added {
dc.putBuffer(b) // already exists in the cache. abort it. dc.bufPool.Put(b) // already exists in the cache. abort it.
} }
commit := func() error { commit := func() error {
defer done() defer done()
@ -369,7 +325,7 @@ func (dc *directoryCache) Add(key string, opts ...Option) (Writer, error) {
abortFunc: func() error { abortFunc: func() error {
defer w.Close() defer w.Close()
defer w.Abort() defer w.Abort()
dc.putBuffer(b) // abort it. dc.bufPool.Put(b) // abort it.
return nil return nil
}, },
} }
@ -377,11 +333,6 @@ func (dc *directoryCache) Add(key string, opts ...Option) (Writer, error) {
return memW, nil return memW, nil
} }
func (dc *directoryCache) putBuffer(b *bytes.Buffer) {
b.Reset()
dc.bufPool.Put(b)
}
func (dc *directoryCache) Close() error { func (dc *directoryCache) Close() error {
dc.closedMu.Lock() dc.closedMu.Lock()
defer dc.closedMu.Unlock() defer dc.closedMu.Unlock()
@ -389,7 +340,10 @@ func (dc *directoryCache) Close() error {
return nil return nil
} }
dc.closed = true dc.closed = true
return os.RemoveAll(dc.directory) if err := os.RemoveAll(dc.directory); err != nil {
return err
}
return nil
} }
func (dc *directoryCache) isClosed() bool { func (dc *directoryCache) isClosed() bool {
@ -404,7 +358,7 @@ func (dc *directoryCache) cachePath(key string) string {
} }
func (dc *directoryCache) wipFile(key string) (*os.File, error) { func (dc *directoryCache) wipFile(key string) (*os.File, error) {
return os.CreateTemp(dc.wipDirectory, key+"-*") return ioutil.TempFile(dc.wipDirectory, key+"-*")
} }
func NewMemoryCache() BlobCache { func NewMemoryCache() BlobCache {
@ -424,7 +378,7 @@ func (mc *MemoryCache) Get(key string, opts ...Option) (Reader, error) {
defer mc.mu.Unlock() defer mc.mu.Unlock()
b, ok := mc.Membuf[key] b, ok := mc.Membuf[key]
if !ok { if !ok {
return nil, fmt.Errorf("missed cache: %q", key) return nil, fmt.Errorf("Missed cache: %q", key)
} }
return &reader{bytes.NewReader(b.Bytes()), func() error { return nil }}, nil return &reader{bytes.NewReader(b.Bytes()), func() error { return nil }}, nil
} }
@ -454,10 +408,6 @@ type reader struct {
func (r *reader) Close() error { return r.closeFunc() } func (r *reader) Close() error { return r.closeFunc() }
func (r *reader) GetReaderAt() io.ReaderAt {
return r.ReaderAt
}
type writer struct { type writer struct {
io.WriteCloser io.WriteCloser
commitFunc func() error commitFunc func() error
@ -482,16 +432,3 @@ func (w *writeCloser) Close() error { return w.closeFunc() }
func nopWriteCloser(w io.Writer) io.WriteCloser { func nopWriteCloser(w io.Writer) io.WriteCloser {
return &writeCloser{w, func() error { return nil }} return &writeCloser{w, func() error { return nil }}
} }
func dropFilePageCache(file *os.File) error {
if file == nil {
return nil
}
fd := file.Fd()
err := unix.Fadvise(int(fd), 0, 0, unix.FADV_DONTNEED)
if err != nil {
return fmt.Errorf("posix_fadvise failed, ret=%d", err)
}
return nil
}

5
cache/cache_test.go vendored
View File

@ -26,6 +26,7 @@ import (
"crypto/sha256" "crypto/sha256"
"fmt" "fmt"
"io" "io"
"io/ioutil"
"os" "os"
"testing" "testing"
) )
@ -38,7 +39,7 @@ func TestDirectoryCache(t *testing.T) {
// with enough memory cache // with enough memory cache
newCache := func() (BlobCache, cleanFunc) { newCache := func() (BlobCache, cleanFunc) {
tmp, err := os.MkdirTemp("", "testcache") tmp, err := ioutil.TempDir("", "testcache")
if err != nil { if err != nil {
t.Fatalf("failed to make tempdir: %v", err) t.Fatalf("failed to make tempdir: %v", err)
} }
@ -55,7 +56,7 @@ func TestDirectoryCache(t *testing.T) {
// with smaller memory cache // with smaller memory cache
newCache = func() (BlobCache, cleanFunc) { newCache = func() (BlobCache, cleanFunc) {
tmp, err := os.MkdirTemp("", "testcache") tmp, err := ioutil.TempDir("", "testcache")
if err != nil { if err != nil {
t.Fatalf("failed to make tempdir: %v", err) t.Fatalf("failed to make tempdir: %v", err)
} }

View File

@ -1,553 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package db
import (
"encoding/binary"
"fmt"
"os"
"sort"
"github.com/containerd/stargz-snapshotter/metadata"
bolt "go.etcd.io/bbolt"
)
// Metadata package stores filesystem metadata in the following schema.
//
// - filesystems
// - *filesystem id* : bucket for each filesystem keyed by a unique string.
// - nodes
// - *node id* : bucket for each node keyed by a uniqe uint64.
// - size : <varint> : size of the regular node.
// - modtime : <varint> : modification time of the node.
// - linkName : <string> : link target of symlink
// - mode : <uvarint> : permission and mode bits (os.FileMode).
// - uid : <varint> : uid of the owner.
// - gid : <varint> : gid of the owner.
// - devMajor : <varint> : the major device number for device
// - devMinor : <varint> : the minor device number for device
// - xattrKey : <string> : key of the first extended attribute.
// - xattrValue : <string> : value of the first extended attribute
// - xattrsExtra : 2nd and the following extended attribute.
// - *key* : <string> : map of key to value string
// - numLink : <varint> : the number of links pointing to this node.
// - metadata
// - *node id* : bucket for each node keyed by a uniqe uint64.
// - childName : <string> : base name of the first child
// - childID : <node id> : id of the first child
// - childrenExtra : 2nd and following child nodes of directory.
// - *basename* : <node id> : map of basename string to the child node id
// - chunk : <encoded> : information of the first chunkn
// - chunksExtra : 2nd and following chunks (this is rarely used so we can avoid the cost of creating the bucket)
// - *chunk offset* : <encoded> : keyed by chunk offset (varint) in the estargz file to the chunk.
// - nextOffset : <varint> : the offset of the next node with a non-zero offset.
// - stream
// - *offset* : bucket for each chunk stream that have multiple inner chunks.
// - *innerOffset* : node id : node id that has the contents at the keyed innerOffset.
var (
bucketKeyFilesystems = []byte("filesystems")
bucketKeyNodes = []byte("nodes")
bucketKeySize = []byte("size")
bucketKeyModTime = []byte("modtime")
bucketKeyLinkName = []byte("linkName")
bucketKeyMode = []byte("mode")
bucketKeyUID = []byte("uid")
bucketKeyGID = []byte("gid")
bucketKeyDevMajor = []byte("devMajor")
bucketKeyDevMinor = []byte("devMinor")
bucketKeyXattrKey = []byte("xattrKey")
bucketKeyXattrValue = []byte("xattrValue")
bucketKeyXattrsExtra = []byte("xattrsExtra")
bucketKeyNumLink = []byte("numLink")
bucketKeyMetadata = []byte("metadata")
bucketKeyChildName = []byte("childName")
bucketKeyChildID = []byte("childID")
bucketKeyChildrenExtra = []byte("childrenExtra")
bucketKeyChunk = []byte("chunk")
bucketKeyChunksExtra = []byte("chunksExtra")
bucketKeyNextOffset = []byte("nextOffset")
bucketKeyStream = []byte("stream")
)
type childEntry struct {
base string
id uint32
}
type chunkEntry struct {
offset int64
chunkOffset int64
chunkSize int64
chunkDigest string
innerOffset int64 // -1 indicates that no following chunks in the stream.
}
type metadataEntry struct {
children map[string]childEntry
chunks []chunkEntry
nextOffset int64
}
func getNodes(tx *bolt.Tx, fsID string) (*bolt.Bucket, error) {
filesystems := tx.Bucket(bucketKeyFilesystems)
if filesystems == nil {
return nil, fmt.Errorf("fs %q not found: no fs is registered", fsID)
}
lbkt := filesystems.Bucket([]byte(fsID))
if lbkt == nil {
return nil, fmt.Errorf("fs bucket for %q not found", fsID)
}
nodes := lbkt.Bucket(bucketKeyNodes)
if nodes == nil {
return nil, fmt.Errorf("nodes bucket for %q not found", fsID)
}
return nodes, nil
}
func getMetadata(tx *bolt.Tx, fsID string) (*bolt.Bucket, error) {
filesystems := tx.Bucket(bucketKeyFilesystems)
if filesystems == nil {
return nil, fmt.Errorf("fs %q not found: no fs is registered", fsID)
}
lbkt := filesystems.Bucket([]byte(fsID))
if lbkt == nil {
return nil, fmt.Errorf("fs bucket for %q not found", fsID)
}
md := lbkt.Bucket(bucketKeyMetadata)
if md == nil {
return nil, fmt.Errorf("metadata bucket for fs %q not found", fsID)
}
return md, nil
}
func getStream(tx *bolt.Tx, fsID string) (*bolt.Bucket, error) {
filesystems := tx.Bucket(bucketKeyFilesystems)
if filesystems == nil {
return nil, fmt.Errorf("fs %q not found: no fs is registered", fsID)
}
lbkt := filesystems.Bucket([]byte(fsID))
if lbkt == nil {
return nil, fmt.Errorf("fs bucket for %q not found", fsID)
}
st := lbkt.Bucket(bucketKeyStream)
if st == nil {
return nil, fmt.Errorf("stream bucket for fs %q not found", fsID)
}
return st, nil
}
func getNodeBucketByID(nodes *bolt.Bucket, id uint32) (*bolt.Bucket, error) {
b := nodes.Bucket(encodeID(id))
if b == nil {
return nil, fmt.Errorf("node bucket for %d not found", id)
}
return b, nil
}
func getMetadataBucketByID(md *bolt.Bucket, id uint32) (*bolt.Bucket, error) {
b := md.Bucket(encodeID(id))
if b == nil {
return nil, fmt.Errorf("metadata bucket for %d not found", id)
}
return b, nil
}
func writeAttr(b *bolt.Bucket, attr *metadata.Attr) error {
for _, v := range []struct {
key []byte
val int64
}{
{bucketKeySize, attr.Size},
{bucketKeyUID, int64(attr.UID)},
{bucketKeyGID, int64(attr.GID)},
{bucketKeyDevMajor, int64(attr.DevMajor)},
{bucketKeyDevMinor, int64(attr.DevMinor)},
{bucketKeyNumLink, int64(attr.NumLink - 1)}, // numLink = 0 means num link = 1 in DB
} {
if v.val != 0 {
val, err := encodeInt(v.val)
if err != nil {
return err
}
if err := b.Put(v.key, val); err != nil {
return err
}
}
}
if !attr.ModTime.IsZero() {
te, err := attr.ModTime.GobEncode()
if err != nil {
return err
}
if err := b.Put(bucketKeyModTime, te); err != nil {
return err
}
}
if len(attr.LinkName) > 0 {
if err := b.Put(bucketKeyLinkName, []byte(attr.LinkName)); err != nil {
return err
}
}
if attr.Mode != 0 {
val, err := encodeUint(uint64(attr.Mode))
if err != nil {
return err
}
if err := b.Put(bucketKeyMode, val); err != nil {
return err
}
}
if len(attr.Xattrs) > 0 {
var firstK string
var firstV []byte
for k, v := range attr.Xattrs {
firstK, firstV = k, v
break
}
if err := b.Put(bucketKeyXattrKey, []byte(firstK)); err != nil {
return err
}
if err := b.Put(bucketKeyXattrValue, firstV); err != nil {
return err
}
var xbkt *bolt.Bucket
for k, v := range attr.Xattrs {
if k == firstK || len(v) == 0 {
continue
}
if xbkt == nil {
if xbkt := b.Bucket(bucketKeyXattrsExtra); xbkt != nil {
// Reset
if err := b.DeleteBucket(bucketKeyXattrsExtra); err != nil {
return err
}
}
var err error
xbkt, err = b.CreateBucket(bucketKeyXattrsExtra)
if err != nil {
return err
}
}
if err := xbkt.Put([]byte(k), v); err != nil {
return fmt.Errorf("failed to set xattr %q=%q: %w", k, string(v), err)
}
}
}
return nil
}
func readAttr(b *bolt.Bucket, attr *metadata.Attr) error {
return b.ForEach(func(k, v []byte) error {
switch string(k) {
case string(bucketKeySize):
attr.Size, _ = binary.Varint(v)
case string(bucketKeyModTime):
if err := (&attr.ModTime).GobDecode(v); err != nil {
return err
}
case string(bucketKeyLinkName):
attr.LinkName = string(v)
case string(bucketKeyMode):
mode, _ := binary.Uvarint(v)
attr.Mode = os.FileMode(uint32(mode))
case string(bucketKeyUID):
i, _ := binary.Varint(v)
attr.UID = int(i)
case string(bucketKeyGID):
i, _ := binary.Varint(v)
attr.GID = int(i)
case string(bucketKeyDevMajor):
i, _ := binary.Varint(v)
attr.DevMajor = int(i)
case string(bucketKeyDevMinor):
i, _ := binary.Varint(v)
attr.DevMinor = int(i)
case string(bucketKeyNumLink):
i, _ := binary.Varint(v)
attr.NumLink = int(i) + 1 // numLink = 0 means num link = 1 in DB
case string(bucketKeyXattrKey):
if attr.Xattrs == nil {
attr.Xattrs = make(map[string][]byte)
}
attr.Xattrs[string(v)] = b.Get(bucketKeyXattrValue)
case string(bucketKeyXattrsExtra):
if err := b.Bucket(k).ForEach(func(k, v []byte) error {
if attr.Xattrs == nil {
attr.Xattrs = make(map[string][]byte)
}
attr.Xattrs[string(k)] = v
return nil
}); err != nil {
return err
}
}
return nil
})
}
func readNumLink(b *bolt.Bucket) int {
// numLink = 0 means num link = 1 in BD
numLink, _ := binary.Varint(b.Get(bucketKeyNumLink))
return int(numLink) + 1
}
func readChunks(b *bolt.Bucket, size int64) (chunks []chunkEntry, err error) {
if chunk := b.Get(bucketKeyChunk); len(chunk) > 0 {
e, err := decodeChunkEntry(chunk)
if err != nil {
return nil, err
}
chunks = append(chunks, e)
}
if chbkt := b.Bucket(bucketKeyChunksExtra); chbkt != nil {
if err := chbkt.ForEach(func(_, v []byte) error {
e, err := decodeChunkEntry(v)
if err != nil {
return err
}
chunks = append(chunks, e)
return nil
}); err != nil {
return nil, err
}
sort.Slice(chunks, func(i, j int) bool {
return chunks[i].chunkOffset < chunks[j].chunkOffset
})
}
nextOffset := size
for i := len(chunks) - 1; i >= 0; i-- {
chunks[i].chunkSize = nextOffset - chunks[i].chunkOffset
nextOffset = chunks[i].chunkOffset
}
return
}
type chunkEntryWithID struct {
chunkEntry
id uint32
}
func readInnerChunks(tx *bolt.Tx, fsID string, off int64) (chunks []chunkEntryWithID, err error) {
sb, err := getStream(tx, fsID)
if err != nil {
return nil, err
}
offEncoded, err := encodeInt(off)
if err != nil {
return nil, err
}
ob := sb.Bucket(offEncoded)
if ob == nil {
return nil, fmt.Errorf("inner chunk bucket for %d not found", off)
}
nodes, err := getNodes(tx, fsID)
if err != nil {
return nil, fmt.Errorf("nodes bucket of %q not found: %w", fsID, err)
}
metadataEntries, err := getMetadata(tx, fsID)
if err != nil {
return nil, fmt.Errorf("metadata bucket of %q not found: %w", fsID, err)
}
if err := ob.ForEach(func(_, v []byte) error {
nodeid := decodeID(v)
b, err := getNodeBucketByID(nodes, nodeid)
if err != nil {
return fmt.Errorf("failed to get file bucket %d: %w", nodeid, err)
}
size, _ := binary.Varint(b.Get(bucketKeySize))
if md, err := getMetadataBucketByID(metadataEntries, nodeid); err == nil {
nodeChunks, err := readChunks(md, size)
if err != nil {
return fmt.Errorf("failed to get chunks: %w", err)
}
for _, e := range nodeChunks {
if e.offset == off {
chunks = append(chunks, chunkEntryWithID{e, nodeid})
}
}
}
return nil
}); err != nil {
return nil, err
}
sort.Slice(chunks, func(i, j int) bool {
return chunks[i].innerOffset < chunks[j].innerOffset
})
return chunks, nil
}
func readChild(md *bolt.Bucket, base string) (uint32, error) {
if base == string(md.Get(bucketKeyChildName)) {
return decodeID(md.Get(bucketKeyChildID)), nil
}
cbkt := md.Bucket(bucketKeyChildrenExtra)
if cbkt == nil {
return 0, fmt.Errorf("extra children not found")
}
eid := cbkt.Get([]byte(base))
if len(eid) == 0 {
return 0, fmt.Errorf("children %q not found", base)
}
return decodeID(eid), nil
}
func writeMetadataEntry(md *bolt.Bucket, m *metadataEntry) error {
if len(m.children) > 0 {
var firstChildName string
var firstChild childEntry
for name, child := range m.children {
firstChildName, firstChild = name, child
break
}
if err := md.Put(bucketKeyChildID, encodeID(firstChild.id)); err != nil {
return fmt.Errorf("failed to put id of first child %q: %w", firstChildName, err)
}
if err := md.Put(bucketKeyChildName, []byte(firstChildName)); err != nil {
return fmt.Errorf("failed to put name first child %q: %w", firstChildName, err)
}
if len(m.children) > 1 {
var cbkt *bolt.Bucket
for k, c := range m.children {
if k == firstChildName {
continue
}
if cbkt == nil {
if cbkt := md.Bucket(bucketKeyChildrenExtra); cbkt != nil {
// Reset
if err := md.DeleteBucket(bucketKeyChildrenExtra); err != nil {
return err
}
}
var err error
cbkt, err = md.CreateBucket(bucketKeyChildrenExtra)
if err != nil {
return err
}
}
if err := cbkt.Put([]byte(c.base), encodeID(c.id)); err != nil {
return fmt.Errorf("failed to add child ID %q: %w", c.id, err)
}
}
}
}
if len(m.chunks) > 0 {
first := m.chunks[0]
if err := md.Put(bucketKeyChunk, encodeChunkEntry(first)); err != nil {
return fmt.Errorf("failed to set chunk %q: %w", first.offset, err)
}
var cbkt *bolt.Bucket
for _, e := range m.chunks[1:] {
if cbkt == nil {
if cbkt := md.Bucket(bucketKeyChunksExtra); cbkt != nil {
// Reset
if err := md.DeleteBucket(bucketKeyChunksExtra); err != nil {
return err
}
}
var err error
cbkt, err = md.CreateBucket(bucketKeyChunksExtra)
if err != nil {
return err
}
}
ecoff, err := encodeInt(e.chunkOffset)
if err != nil {
return err
}
if err := cbkt.Put(ecoff, encodeChunkEntry(e)); err != nil {
return err
}
}
}
if m.nextOffset > 0 {
if err := putInt(md, bucketKeyNextOffset, m.nextOffset); err != nil {
return fmt.Errorf("failed to set next offset value %d: %w", m.nextOffset, err)
}
}
return nil
}
func encodeChunkEntry(e chunkEntry) []byte {
eb := make([]byte, 24+len([]byte(e.chunkDigest)))
binary.BigEndian.PutUint64(eb[0:8], uint64(e.chunkOffset))
binary.BigEndian.PutUint64(eb[8:16], uint64(e.offset))
binary.BigEndian.PutUint64(eb[16:24], uint64(e.innerOffset))
copy(eb[24:], []byte(e.chunkDigest))
return eb
}
func decodeChunkEntry(d []byte) (e chunkEntry, _ error) {
if len(d) < 24 {
return e, fmt.Errorf("mulformed chunk entry (len:%d)", len(d))
}
e.chunkOffset = int64(binary.BigEndian.Uint64(d[0:8]))
e.offset = int64(binary.BigEndian.Uint64(d[8:16]))
e.innerOffset = int64(binary.BigEndian.Uint64(d[16:24]))
if len(d) > 24 {
e.chunkDigest = string(d[24:])
}
return e, nil
}
func putInt(b *bolt.Bucket, k []byte, v int64) error {
i, err := encodeInt(v)
if err != nil {
return err
}
return b.Put(k, i)
}
func encodeID(id uint32) []byte {
b := [4]byte{}
binary.BigEndian.PutUint32(b[:], id)
return b[:]
}
func decodeID(b []byte) uint32 {
return binary.BigEndian.Uint32(b)
}
func encodeInt(i int64) ([]byte, error) {
var (
buf [binary.MaxVarintLen64]byte
iEncoded = buf[:]
)
iEncoded = iEncoded[:binary.PutVarint(iEncoded, i)]
if len(iEncoded) == 0 {
return nil, fmt.Errorf("failed encoding integer = %v", i)
}
return iEncoded, nil
}
func encodeUint(i uint64) ([]byte, error) {
var (
buf [binary.MaxVarintLen64]byte
iEncoded = buf[:]
)
iEncoded = iEncoded[:binary.PutUvarint(iEncoded, i)]
if len(iEncoded) == 0 {
return nil, fmt.Errorf("failed encoding integer = %v", i)
}
return iEncoded, nil
}

File diff suppressed because it is too large Load Diff

View File

@ -1,153 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package db
import (
"io"
"os"
"testing"
"github.com/containerd/stargz-snapshotter/fs/layer"
fsreader "github.com/containerd/stargz-snapshotter/fs/reader"
"github.com/containerd/stargz-snapshotter/metadata"
"github.com/containerd/stargz-snapshotter/metadata/testutil"
bolt "go.etcd.io/bbolt"
)
func TestReader(t *testing.T) {
testRunner := &testutil.TestRunner{
TestingT: t,
Runner: func(testingT testutil.TestingT, name string, run func(t testutil.TestingT)) {
tt, ok := testingT.(*testing.T)
if !ok {
testingT.Fatal("TestingT is not a *testing.T")
return
}
tt.Run(name, func(t *testing.T) {
run(t)
})
},
}
testutil.TestReader(testRunner, newTestableReader)
}
func TestFSReader(t *testing.T) {
testRunner := &fsreader.TestRunner{
TestingT: t,
Runner: func(testingT fsreader.TestingT, name string, run func(t fsreader.TestingT)) {
tt, ok := testingT.(*testing.T)
if !ok {
testingT.Fatal("TestingT is not a *testing.T")
return
}
tt.Run(name, func(t *testing.T) {
run(t)
})
},
}
fsreader.TestSuiteReader(testRunner, newStore)
}
func TestFSLayer(t *testing.T) {
testRunner := &layer.TestRunner{
TestingT: t,
Runner: func(testingT layer.TestingT, name string, run func(t layer.TestingT)) {
tt, ok := testingT.(*testing.T)
if !ok {
testingT.Fatal("TestingT is not a *testing.T")
return
}
tt.Run(name, func(t *testing.T) {
run(t)
})
},
}
layer.TestSuiteLayer(testRunner, newStore)
}
func newTestableReader(sr *io.SectionReader, opts ...metadata.Option) (testutil.TestableReader, error) {
f, err := os.CreateTemp("", "readertestdb")
if err != nil {
return nil, err
}
defer f.Close()
defer os.Remove(f.Name())
db, err := bolt.Open(f.Name(), 0600, nil)
if err != nil {
return nil, err
}
r, err := NewReader(db, sr, opts...)
if err != nil {
return nil, err
}
return &testableReadCloser{
TestableReader: r.(*reader),
closeFn: func() error {
db.Close()
return os.Remove(f.Name())
},
}, nil
}
func newStore(sr *io.SectionReader, opts ...metadata.Option) (metadata.Reader, error) {
f, err := os.CreateTemp("", "readertestdb")
if err != nil {
return nil, err
}
defer f.Close()
db, err := bolt.Open(f.Name(), 0600, nil)
if err != nil {
return nil, err
}
r, err := NewReader(db, sr, opts...)
if err != nil {
return nil, err
}
return &readCloser{
Reader: r,
closeFn: func() error {
db.Close()
return os.Remove(f.Name())
},
}, nil
}
type readCloser struct {
metadata.Reader
closeFn func() error
}
func (r *readCloser) Close() error {
r.closeFn()
return r.Reader.Close()
}
type testableReadCloser struct {
testutil.TestableReader
closeFn func() error
}
func (r *testableReadCloser) Close() error {
r.closeFn()
return r.TestableReader.Close()
}

View File

@ -1,80 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package fsopts
import (
"context"
"fmt"
"io"
"path/filepath"
"github.com/containerd/log"
dbmetadata "github.com/containerd/stargz-snapshotter/cmd/containerd-stargz-grpc/db"
ipfs "github.com/containerd/stargz-snapshotter/cmd/containerd-stargz-grpc/ipfs"
"github.com/containerd/stargz-snapshotter/fs"
"github.com/containerd/stargz-snapshotter/metadata"
memorymetadata "github.com/containerd/stargz-snapshotter/metadata/memory"
bolt "go.etcd.io/bbolt"
)
type Config struct {
EnableIpfs bool
MetadataStore string
OpenBoltDB func(string) (*bolt.DB, error)
}
const (
memoryMetadataType = "memory"
dbMetadataType = "db"
)
func ConfigFsOpts(ctx context.Context, rootDir string, config *Config) ([]fs.Option, error) {
fsOpts := []fs.Option{fs.WithMetricsLogLevel(log.InfoLevel)}
if config.EnableIpfs {
fsOpts = append(fsOpts, fs.WithResolveHandler("ipfs", new(ipfs.ResolveHandler)))
}
mt, err := getMetadataStore(rootDir, config)
if err != nil {
return nil, fmt.Errorf("failed to configure metadata store: %w", err)
}
fsOpts = append(fsOpts, fs.WithMetadataStore(mt))
return fsOpts, nil
}
func getMetadataStore(rootDir string, config *Config) (metadata.Store, error) {
switch config.MetadataStore {
case "", memoryMetadataType:
return memorymetadata.NewReader, nil
case dbMetadataType:
if config.OpenBoltDB == nil {
return nil, fmt.Errorf("bolt DB is not configured")
}
db, err := config.OpenBoltDB(filepath.Join(rootDir, "metadata.db"))
if err != nil {
return nil, err
}
return func(sr *io.SectionReader, opts ...metadata.Option) (metadata.Reader, error) {
return dbmetadata.NewReader(db, sr, opts...)
}, nil
default:
return nil, fmt.Errorf("unknown metadata store type: %v; must be %v or %v",
config.MetadataStore, memoryMetadataType, dbMetadataType)
}
}

View File

@ -1,79 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package ipfs
import (
"context"
"crypto/sha256"
"fmt"
"io"
"os"
"github.com/containerd/stargz-snapshotter/fs/remote"
"github.com/containerd/stargz-snapshotter/ipfs"
ipfsclient "github.com/containerd/stargz-snapshotter/ipfs/client"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
)
type ResolveHandler struct{}
func (r *ResolveHandler) Handle(ctx context.Context, desc ocispec.Descriptor) (remote.Fetcher, int64, error) {
cid, err := ipfs.GetCID(desc)
if err != nil {
return nil, 0, err
}
var ipath string
if idir := os.Getenv("IPFS_PATH"); idir != "" {
ipath = idir
}
// HTTP is only supported as of now. We can add https support here if needed (e.g. for connecting to it via proxy, etc)
iurl, err := ipfsclient.GetIPFSAPIAddress(ipath, "http")
if err != nil {
return nil, 0, err
}
client := ipfsclient.New(iurl)
info, err := client.StatCID(cid)
if err != nil {
return nil, 0, err
}
return &fetcher{cid: cid, size: int64(info.Size), client: client}, int64(info.Size), nil
}
type fetcher struct {
cid string
size int64
client *ipfsclient.Client
}
func (f *fetcher) Fetch(ctx context.Context, off int64, size int64) (io.ReadCloser, error) {
if off > f.size {
return nil, fmt.Errorf("offset is larger than the size of the blob %d(offset) > %d(blob size)", off, f.size)
}
o, s := int(off), int(size)
return f.client.Get("/ipfs/"+f.cid, &o, &s)
}
func (f *fetcher) Check() error {
_, err := f.client.StatCID(f.cid)
return err
}
func (f *fetcher) GenID(off int64, size int64) string {
sum := sha256.Sum256([]byte(fmt.Sprintf("%s-%d-%d", f.cid, off, size)))
return fmt.Sprintf("%x", sum)
}

View File

@ -21,42 +21,32 @@ import (
"flag" "flag"
"fmt" "fmt"
golog "log" golog "log"
"math/rand"
"net" "net"
"net/http" "net/http"
"os" "os"
"os/exec"
"os/signal" "os/signal"
"path/filepath" "path/filepath"
"time"
"github.com/BurntSushi/toml"
snapshotsapi "github.com/containerd/containerd/api/services/snapshots/v1" snapshotsapi "github.com/containerd/containerd/api/services/snapshots/v1"
"github.com/containerd/containerd/v2/contrib/snapshotservice" "github.com/containerd/containerd/contrib/snapshotservice"
"github.com/containerd/containerd/v2/core/snapshots" "github.com/containerd/containerd/log"
"github.com/containerd/containerd/v2/pkg/sys" "github.com/containerd/containerd/snapshots"
"github.com/containerd/log"
"github.com/containerd/stargz-snapshotter/cmd/containerd-stargz-grpc/fsopts"
"github.com/containerd/stargz-snapshotter/fusemanager"
"github.com/containerd/stargz-snapshotter/service" "github.com/containerd/stargz-snapshotter/service"
"github.com/containerd/stargz-snapshotter/service/keychain/keychainconfig"
snbase "github.com/containerd/stargz-snapshotter/snapshot"
"github.com/containerd/stargz-snapshotter/version" "github.com/containerd/stargz-snapshotter/version"
sddaemon "github.com/coreos/go-systemd/v22/daemon" sddaemon "github.com/coreos/go-systemd/v22/daemon"
metrics "github.com/docker/go-metrics" metrics "github.com/docker/go-metrics"
"github.com/pelletier/go-toml" "github.com/pkg/errors"
bolt "go.etcd.io/bbolt" "github.com/sirupsen/logrus"
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
"google.golang.org/grpc" "google.golang.org/grpc"
) )
const ( const (
defaultAddress = "/run/containerd-stargz-grpc/containerd-stargz-grpc.sock" defaultAddress = "/run/containerd-stargz-grpc/containerd-stargz-grpc.sock"
defaultConfigPath = "/etc/containerd-stargz-grpc/config.toml" defaultConfigPath = "/etc/containerd-stargz-grpc/config.toml"
defaultLogLevel = log.InfoLevel defaultLogLevel = logrus.InfoLevel
defaultRootDir = "/var/lib/containerd-stargz-grpc" defaultRootDir = "/var/lib/containerd-stargz-grpc"
defaultImageServiceAddress = "/run/containerd/containerd.sock"
defaultFuseManagerAddress = "/run/containerd-stargz-grpc/fuse-manager.sock"
fuseManagerBin = "stargz-fuse-manager"
) )
var ( var (
@ -71,40 +61,12 @@ type snapshotterConfig struct {
service.Config service.Config
// MetricsAddress is address for the metrics API // MetricsAddress is address for the metrics API
MetricsAddress string `toml:"metrics_address" json:"metrics_address"` MetricsAddress string `toml:"metrics_address"`
// NoPrometheus is a flag to disable the emission of the metrics
NoPrometheus bool `toml:"no_prometheus" json:"no_prometheus"`
// DebugAddress is a Unix domain socket address where the snapshotter exposes /debug/ endpoints.
DebugAddress string `toml:"debug_address" json:"debug_address"`
// IPFS is a flag to enbale lazy pulling from IPFS.
IPFS bool `toml:"ipfs" json:"ipfs"`
// MetadataStore is the type of the metadata store to use.
MetadataStore string `toml:"metadata_store" default:"memory" json:"metadata_store"`
// FuseManagerConfig is configuration for fusemanager
FuseManagerConfig `toml:"fuse_manager" json:"fuse_manager"`
}
type FuseManagerConfig struct {
// Enable is whether detach fusemanager or not
Enable bool `toml:"enable" default:"false" json:"enable"`
// Address is address for the fusemanager's GRPC server (default: "/run/containerd-stargz-grpc/fuse-manager.sock")
Address string `toml:"address" json:"address"`
// Path is path to the fusemanager's executable (default: looking for a binary "stargz-fuse-manager")
Path string `toml:"path" json:"path"`
} }
func main() { func main() {
rand.Seed(time.Now().UnixNano()) //nolint:staticcheck // Global math/rand seed is deprecated, but still used by external dependencies
flag.Parse() flag.Parse()
log.SetFormat(log.JSONFormat) lvl, err := logrus.ParseLevel(*logLevel)
err := log.SetLevel(*logLevel)
if err != nil { if err != nil {
log.L.WithError(err).Fatal("failed to prepare logger") log.L.WithError(err).Fatal("failed to prepare logger")
} }
@ -112,178 +74,50 @@ func main() {
fmt.Println("containerd-stargz-grpc", version.Version, version.Revision) fmt.Println("containerd-stargz-grpc", version.Version, version.Revision)
return return
} }
logrus.SetLevel(lvl)
logrus.SetFormatter(&logrus.JSONFormatter{
TimestampFormat: log.RFC3339NanoFixed,
})
var ( var (
ctx = log.WithLogger(context.Background(), log.L) ctx = log.WithLogger(context.Background(), log.L)
config snapshotterConfig config snapshotterConfig
) )
// Streams log of standard lib (go-fuse uses this) into debug log // Streams log of standard lib (go-fuse uses this) into debug log
// Snapshotter should use "github.com/containerd/log" otherwize // Snapshotter should use "github.com/containerd/containerd/log" otherwize
// logs are always printed as "debug" mode. // logs are always printed as "debug" mode.
golog.SetOutput(log.G(ctx).WriterLevel(log.DebugLevel)) golog.SetOutput(log.G(ctx).WriterLevel(logrus.DebugLevel))
// Get configuration from specified file // Get configuration from specified file
tree, err := toml.LoadFile(*configPath) if _, err := toml.DecodeFile(*configPath, &config); err != nil && !(os.IsNotExist(err) && *configPath == defaultConfigPath) {
if err != nil && (!os.IsNotExist(err) || *configPath != defaultConfigPath) {
log.G(ctx).WithError(err).Fatalf("failed to load config file %q", *configPath) log.G(ctx).WithError(err).Fatalf("failed to load config file %q", *configPath)
} }
if err := tree.Unmarshal(&config); err != nil {
log.G(ctx).WithError(err).Fatalf("failed to unmarshal config file %q", *configPath)
}
if err := service.Supported(*rootDir); err != nil { if err := service.Supported(*rootDir); err != nil {
log.G(ctx).WithError(err).Fatalf("snapshotter is not supported") log.G(ctx).WithError(err).Fatalf("snapshotter is not supported")
} }
// Create a gRPC server rs, err := service.NewStargzSnapshotterService(ctx, *rootDir, &config.Config)
rpc := grpc.NewServer() if err != nil {
log.G(ctx).WithError(err).Fatalf("failed to configure snapshotter")
// Configure FUSE passthrough
// Always set Direct to true to ensure that
// *directoryCache.Get always return *os.File instead of buffer
if config.PassThrough {
config.Direct = true
} }
// Configure keychain cleanup, err := serve(ctx, *address, rs, config)
keyChainConfig := keychainconfig.Config{
EnableKubeKeychain: config.KubeconfigKeychainConfig.EnableKeychain,
EnableCRIKeychain: config.CRIKeychainConfig.EnableKeychain,
KubeconfigPath: config.KubeconfigPath,
DefaultImageServiceAddress: defaultImageServiceAddress,
ImageServicePath: config.ImageServicePath,
}
var rs snapshots.Snapshotter
fuseManagerConfig := config.FuseManagerConfig
if fuseManagerConfig.Enable {
fmPath := fuseManagerConfig.Path
if fmPath == "" {
var err error
fmPath, err = exec.LookPath(fuseManagerBin)
if err != nil {
log.G(ctx).WithError(err).Fatalf("failed to find fusemanager bin")
}
}
fmAddr := fuseManagerConfig.Address
if fmAddr == "" {
fmAddr = defaultFuseManagerAddress
}
if !filepath.IsAbs(fmAddr) {
log.G(ctx).WithError(err).Fatalf("fuse manager address must be an absolute path: %s", fmAddr)
}
managerNewlyStarted, err := fusemanager.StartFuseManager(ctx, fmPath, fmAddr, filepath.Join(*rootDir, "fusestore.db"), *logLevel, filepath.Join(*rootDir, "stargz-fuse-manager.log"))
if err != nil {
log.G(ctx).WithError(err).Fatalf("failed to start fusemanager")
}
fuseManagerConfig := fusemanager.Config{
Config: config.Config,
IPFS: config.IPFS,
MetadataStore: config.MetadataStore,
DefaultImageServiceAddress: defaultImageServiceAddress,
}
fs, err := fusemanager.NewManagerClient(ctx, *rootDir, fmAddr, &fuseManagerConfig)
if err != nil {
log.G(ctx).WithError(err).Fatalf("failed to configure fusemanager")
}
flags := []snbase.Opt{snbase.AsynchronousRemove}
// "managerNewlyStarted" being true indicates that the FUSE manager is newly started. To
// fully recover the snapshotter and the FUSE manager's state, we need to restore
// all snapshot mounts. If managerNewlyStarted is false, the existing FUSE manager maintains
// snapshot mounts so we don't need to restore them.
if !managerNewlyStarted {
flags = append(flags, snbase.NoRestore)
}
rs, err = snbase.NewSnapshotter(ctx, filepath.Join(*rootDir, "snapshotter"), fs, flags...)
if err != nil {
log.G(ctx).WithError(err).Fatalf("failed to configure snapshotter")
}
log.G(ctx).Infof("Start snapshotter with fusemanager mode")
} else {
crirpc := rpc
// For CRI keychain, if listening path is different from stargz-snapshotter's socket, prepare for the dedicated grpc server and the socket.
serveCRISocket := config.CRIKeychainConfig.EnableKeychain && config.ListenPath != "" && config.ListenPath != *address
if serveCRISocket {
crirpc = grpc.NewServer()
}
credsFuncs, err := keychainconfig.ConfigKeychain(ctx, crirpc, &keyChainConfig)
if err != nil {
log.G(ctx).WithError(err).Fatalf("failed to configure keychain")
}
if serveCRISocket {
addr := config.ListenPath
// Prepare the directory for the socket
if err := os.MkdirAll(filepath.Dir(addr), 0700); err != nil {
log.G(ctx).WithError(err).Fatalf("failed to create directory %q", filepath.Dir(addr))
}
// Try to remove the socket file to avoid EADDRINUSE
if err := os.RemoveAll(addr); err != nil {
log.G(ctx).WithError(err).Fatalf("failed to remove %q", addr)
}
// Listen and serve
l, err := net.Listen("unix", addr)
if err != nil {
log.G(ctx).WithError(err).Fatalf("error on listen socket %q", addr)
}
go func() {
if err := crirpc.Serve(l); err != nil {
log.G(ctx).WithError(err).Errorf("error on serving CRI via socket %q", addr)
}
}()
}
fsConfig := fsopts.Config{
EnableIpfs: config.IPFS,
MetadataStore: config.MetadataStore,
OpenBoltDB: func(p string) (*bolt.DB, error) {
return bolt.Open(p, 0600, &bolt.Options{
NoFreelistSync: true,
InitialMmapSize: 64 * 1024 * 1024,
FreelistType: bolt.FreelistMapType,
})
},
}
fsOpts, err := fsopts.ConfigFsOpts(ctx, *rootDir, &fsConfig)
if err != nil {
log.G(ctx).WithError(err).Fatalf("failed to configure fs config")
}
rs, err = service.NewStargzSnapshotterService(ctx, *rootDir, &config.Config,
service.WithCredsFuncs(credsFuncs...), service.WithFilesystemOptions(fsOpts...))
if err != nil {
log.G(ctx).WithError(err).Fatalf("failed to configure snapshotter")
}
}
cleanup, err := serve(ctx, rpc, *address, rs, config)
if err != nil { if err != nil {
log.G(ctx).WithError(err).Fatalf("failed to serve snapshotter") log.G(ctx).WithError(err).Fatalf("failed to serve snapshotter")
} }
// When FUSE manager is disabled, FUSE servers are goroutines in the if cleanup {
// contaienrd-stargz-grpc process. So killing containerd-stargz-grpc will
// result in all FUSE mount becoming unavailable with leaving all resources
// (e.g. temporary cache) on the node. To ensure graceful shutdown, we
// should always cleanup mounts and associated resources here.
//
// When FUSE manager is enabled, those mounts are still under the control by
// the FUSE manager so we need to avoid cleaning them up unless explicitly
// commanded via SIGINT. The user can use SIGINT to gracefully killing the FUSE
// manager before rebooting the node for ensuring that the all snapshots are
// unmounted with cleaning up associated temporary resources.
if cleanup || !fuseManagerConfig.Enable {
log.G(ctx).Debug("Closing the snapshotter") log.G(ctx).Debug("Closing the snapshotter")
rs.Close() rs.Close()
} }
log.G(ctx).Info("Exiting") log.G(ctx).Info("Exiting")
} }
func serve(ctx context.Context, rpc *grpc.Server, addr string, rs snapshots.Snapshotter, config snapshotterConfig) (bool, error) { func serve(ctx context.Context, addr string, rs snapshots.Snapshotter, config snapshotterConfig) (bool, error) {
// Create a gRPC server
rpc := grpc.NewServer()
// Convert the snapshotter to a gRPC service, // Convert the snapshotter to a gRPC service,
snsvc := snapshotservice.FromSnapshotter(rs) snsvc := snapshotservice.FromSnapshotter(rs)
@ -292,40 +126,26 @@ func serve(ctx context.Context, rpc *grpc.Server, addr string, rs snapshots.Snap
// Prepare the directory for the socket // Prepare the directory for the socket
if err := os.MkdirAll(filepath.Dir(addr), 0700); err != nil { if err := os.MkdirAll(filepath.Dir(addr), 0700); err != nil {
return false, fmt.Errorf("failed to create directory %q: %w", filepath.Dir(addr), err) return false, errors.Wrapf(err, "failed to create directory %q", filepath.Dir(addr))
} }
// Try to remove the socket file to avoid EADDRINUSE // Try to remove the socket file to avoid EADDRINUSE
if err := os.RemoveAll(addr); err != nil { if err := os.RemoveAll(addr); err != nil {
return false, fmt.Errorf("failed to remove %q: %w", addr, err) return false, errors.Wrapf(err, "failed to remove %q", addr)
} }
errCh := make(chan error, 1) errCh := make(chan error, 1)
// We need to consider both the existence of MetricsAddress as well as NoPrometheus flag not set if config.MetricsAddress != "" {
if config.MetricsAddress != "" && !config.NoPrometheus {
l, err := net.Listen("tcp", config.MetricsAddress) l, err := net.Listen("tcp", config.MetricsAddress)
if err != nil { if err != nil {
return false, fmt.Errorf("failed to get listener for metrics endpoint: %w", err) return false, errors.Wrapf(err, "failed to get listener for metrics endpoint")
} }
m := http.NewServeMux() m := http.NewServeMux()
m.Handle("/metrics", metrics.Handler()) m.Handle("/metrics", metrics.Handler())
go func() { go func() {
if err := http.Serve(l, m); err != nil { if err := http.Serve(l, m); err != nil {
errCh <- fmt.Errorf("error on serving metrics via socket %q: %w", addr, err) errCh <- errors.Wrapf(err, "error on serving metrics via socket %q", addr)
}
}()
}
if config.DebugAddress != "" {
log.G(ctx).Infof("listen %q for debugging", config.DebugAddress)
l, err := sys.GetLocalListener(config.DebugAddress, 0, 0)
if err != nil {
return false, fmt.Errorf("failed to listen %q: %w", config.DebugAddress, err)
}
go func() {
if err := http.Serve(l, debugServerMux()); err != nil {
errCh <- fmt.Errorf("error on serving a debug endpoint via socket %q: %w", addr, err)
} }
}() }()
} }
@ -333,11 +153,11 @@ func serve(ctx context.Context, rpc *grpc.Server, addr string, rs snapshots.Snap
// Listen and serve // Listen and serve
l, err := net.Listen("unix", addr) l, err := net.Listen("unix", addr)
if err != nil { if err != nil {
return false, fmt.Errorf("error on listen socket %q: %w", addr, err) return false, errors.Wrapf(err, "error on listen socket %q", addr)
} }
go func() { go func() {
if err := rpc.Serve(l); err != nil { if err := rpc.Serve(l); err != nil {
errCh <- fmt.Errorf("error on serving via socket %q: %w", addr, err) errCh <- errors.Wrapf(err, "error on serving via socket %q", addr)
} }
}() }()

View File

@ -1,34 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"expvar"
"net/http"
"net/http/pprof"
)
func debugServerMux() *http.ServeMux {
m := http.NewServeMux()
m.Handle("/debug/vars", expvar.Handler())
m.Handle("/debug/pprof/", http.HandlerFunc(pprof.Index))
m.Handle("/debug/pprof/cmdline", http.HandlerFunc(pprof.Cmdline))
m.Handle("/debug/pprof/profile", http.HandlerFunc(pprof.Profile))
m.Handle("/debug/pprof/symbol", http.HandlerFunc(pprof.Symbol))
m.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace))
return m
}

View File

@ -18,32 +18,24 @@ package commands
import ( import (
"compress/gzip" "compress/gzip"
gocontext "context"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"os" "os"
"os/signal"
"github.com/containerd/containerd/v2/cmd/ctr/commands" "github.com/containerd/containerd/cmd/ctr/commands"
"github.com/containerd/containerd/v2/core/content" "github.com/containerd/containerd/images/converter"
"github.com/containerd/containerd/v2/core/images" "github.com/containerd/containerd/images/converter/uncompress"
"github.com/containerd/containerd/v2/core/images/converter" "github.com/containerd/containerd/platforms"
"github.com/containerd/containerd/v2/core/images/converter/uncompress"
"github.com/containerd/log"
"github.com/containerd/platforms"
"github.com/containerd/stargz-snapshotter/estargz" "github.com/containerd/stargz-snapshotter/estargz"
estargzconvert "github.com/containerd/stargz-snapshotter/nativeconverter/estargz" estargzconvert "github.com/containerd/stargz-snapshotter/nativeconverter/estargz"
esgzexternaltocconvert "github.com/containerd/stargz-snapshotter/nativeconverter/estargz/externaltoc"
zstdchunkedconvert "github.com/containerd/stargz-snapshotter/nativeconverter/zstdchunked"
"github.com/containerd/stargz-snapshotter/recorder" "github.com/containerd/stargz-snapshotter/recorder"
"github.com/klauspost/compress/zstd"
ocispec "github.com/opencontainers/image-spec/specs-go/v1" ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/urfave/cli/v2" "github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
) )
// ConvertCommand converts an image var ConvertCommand = cli.Command{
var ConvertCommand = &cli.Command{
Name: "convert", Name: "convert",
Usage: "convert an image", Usage: "convert an image",
ArgsUsage: "[flags] <source_ref> <target_ref>...", ArgsUsage: "[flags] <source_ref> <target_ref>...",
@ -56,72 +48,40 @@ When '--all-platforms' is given all images in a manifest list must be available.
`, `,
Flags: []cli.Flag{ Flags: []cli.Flag{
// estargz flags // estargz flags
&cli.BoolFlag{ cli.BoolFlag{
Name: "estargz", Name: "estargz",
Usage: "convert legacy tar(.gz) layers to eStargz for lazy pulling. Should be used in conjunction with '--oci'", Usage: "convert legacy tar(.gz) layers to eStargz for lazy pulling. Should be used in conjunction with '--oci'",
}, },
&cli.StringFlag{ cli.StringFlag{
Name: "estargz-record-in", Name: "estargz-record-in",
Usage: "Read 'ctr-remote optimize --record-out=<FILE>' record file", Usage: "Read 'ctr-remote optimize --record-out=<FILE>' record file",
}, },
&cli.IntFlag{ cli.IntFlag{
Name: "estargz-compression-level", Name: "estargz-compression-level",
Usage: "eStargz compression level", Usage: "eStargz compression level",
Value: gzip.BestCompression, Value: gzip.BestCompression,
}, },
&cli.IntFlag{ cli.IntFlag{
Name: "estargz-chunk-size", Name: "estargz-chunk-size",
Usage: "eStargz chunk size", Usage: "eStargz chunk size",
Value: 0, Value: 0,
}, },
&cli.IntFlag{
Name: "estargz-min-chunk-size",
Usage: "The minimal number of bytes of data must be written in one gzip stream. Note that this adds a TOC property that old reader doesn't understand.",
Value: 0,
},
&cli.BoolFlag{
Name: "estargz-external-toc",
Usage: "Separate TOC JSON into another image (called \"TOC image\"). The name of TOC image is the original + \"-esgztoc\" suffix. Both eStargz and the TOC image should be pushed to the same registry. stargz-snapshotter refers to the TOC image when it pulls the result eStargz image.",
},
&cli.BoolFlag{
Name: "estargz-keep-diff-id",
Usage: "convert to esgz without changing diffID (cannot be used in conjunction with '--estargz-record-in'. must be specified with '--estargz-external-toc')",
},
// zstd:chunked flags
&cli.BoolFlag{
Name: "zstdchunked",
Usage: "use zstd compression instead of gzip (a.k.a zstd:chunked). Must be used in conjunction with '--oci'.",
},
&cli.StringFlag{
Name: "zstdchunked-record-in",
Usage: "Read 'ctr-remote optimize --record-out=<FILE>' record file",
},
&cli.IntFlag{
Name: "zstdchunked-compression-level",
Usage: "zstd:chunked compression level",
Value: 3, // SpeedDefault; see also https://pkg.go.dev/github.com/klauspost/compress/zstd#EncoderLevel
},
&cli.IntFlag{
Name: "zstdchunked-chunk-size",
Usage: "zstd:chunked chunk size",
Value: 0,
},
// generic flags // generic flags
&cli.BoolFlag{ cli.BoolFlag{
Name: "uncompress", Name: "uncompress",
Usage: "convert tar.gz layers to uncompressed tar layers", Usage: "convert tar.gz layers to uncompressed tar layers",
}, },
&cli.BoolFlag{ cli.BoolFlag{
Name: "oci", Name: "oci",
Usage: "convert Docker media types to OCI media types", Usage: "convert Docker media types to OCI media types",
}, },
// platform flags // platform flags
&cli.StringSliceFlag{ cli.StringSliceFlag{
Name: "platform", Name: "platform",
Usage: "Convert content for a specific platform", Usage: "Convert content for a specific platform",
Value: &cli.StringSlice{}, Value: &cli.StringSlice{},
}, },
&cli.BoolFlag{ cli.BoolFlag{
Name: "all-platforms", Name: "all-platforms",
Usage: "Convert content for all platforms", Usage: "Convert content for all platforms",
}, },
@ -136,87 +96,40 @@ When '--all-platforms' is given all images in a manifest list must be available.
return errors.New("src and target image need to be specified") return errors.New("src and target image need to be specified")
} }
var platformMC platforms.MatchComparer if !context.Bool("all-platforms") {
if context.Bool("all-platforms") {
platformMC = platforms.All
} else {
if pss := context.StringSlice("platform"); len(pss) > 0 { if pss := context.StringSlice("platform"); len(pss) > 0 {
var all []ocispec.Platform var all []ocispec.Platform
for _, ps := range pss { for _, ps := range pss {
p, err := platforms.Parse(ps) p, err := platforms.Parse(ps)
if err != nil { if err != nil {
return fmt.Errorf("invalid platform %q: %w", ps, err) return errors.Wrapf(err, "invalid platform %q", ps)
} }
all = append(all, p) all = append(all, p)
} }
platformMC = platforms.Ordered(all...) convertOpts = append(convertOpts, converter.WithPlatform(platforms.Ordered(all...)))
} else { } else {
platformMC = platforms.DefaultStrict() convertOpts = append(convertOpts, converter.WithPlatform(platforms.DefaultStrict()))
} }
} }
convertOpts = append(convertOpts, converter.WithPlatform(platformMC))
var layerConvertFunc converter.ConvertFunc
var finalize func(ctx gocontext.Context, cs content.Store, ref string, desc *ocispec.Descriptor) (*images.Image, error)
if context.Bool("estargz") { if context.Bool("estargz") {
esgzOpts, err := getESGZConvertOpts(context) esgzOpts, err := getESGZConvertOpts(context)
if err != nil { if err != nil {
return err return err
} }
if context.Bool("estargz-external-toc") { convertOpts = append(convertOpts, converter.WithLayerConvertFunc(estargzconvert.LayerConvertFunc(esgzOpts...)))
if !context.Bool("estargz-keep-diff-id") {
layerConvertFunc, finalize = esgzexternaltocconvert.LayerConvertFunc(esgzOpts, context.Int("estargz-compression-level"))
} else {
if context.String("estargz-record-in") != "" {
return fmt.Errorf("option --estargz-keep-diff-id conflicts with --estargz-record-in")
}
layerConvertFunc, finalize = esgzexternaltocconvert.LayerConvertLossLessFunc(esgzexternaltocconvert.LayerConvertLossLessConfig{
CompressionLevel: context.Int("estargz-compression-level"),
ChunkSize: context.Int("estargz-chunk-size"),
MinChunkSize: context.Int("estargz-min-chunk-size"),
})
}
} else {
if context.Bool("estargz-keep-diff-id") {
return fmt.Errorf("option --estargz-keep-diff-id must be used with --estargz-external-toc")
}
layerConvertFunc = estargzconvert.LayerConvertFunc(esgzOpts...)
}
if !context.Bool("oci") { if !context.Bool("oci") {
log.L.Warn("option --estargz should be used in conjunction with --oci") logrus.Warn("option --estargz should be used in conjunction with --oci")
} }
if context.Bool("uncompress") { if context.Bool("uncompress") {
return errors.New("option --estargz conflicts with --uncompress") return errors.New("option --estargz conflicts with --uncompress")
} }
if context.Bool("zstdchunked") {
return errors.New("option --estargz conflicts with --zstdchunked")
}
}
if context.Bool("zstdchunked") {
esgzOpts, err := getZstdchunkedConvertOpts(context)
if err != nil {
return err
}
layerConvertFunc = zstdchunkedconvert.LayerConvertFuncWithCompressionLevel(
zstd.EncoderLevelFromZstd(context.Int("zstdchunked-compression-level")), esgzOpts...)
if !context.Bool("oci") {
return errors.New("option --zstdchunked must be used in conjunction with --oci")
}
if context.Bool("uncompress") {
return errors.New("option --zstdchunked conflicts with --uncompress")
}
} }
if context.Bool("uncompress") { if context.Bool("uncompress") {
layerConvertFunc = uncompress.LayerConvertFunc convertOpts = append(convertOpts, converter.WithLayerConvertFunc(uncompress.LayerConvertFunc))
} }
if layerConvertFunc == nil {
return errors.New("specify layer converter")
}
convertOpts = append(convertOpts, converter.WithLayerConvertFunc(layerConvertFunc))
if context.Bool("oci") { if context.Bool("oci") {
convertOpts = append(convertOpts, converter.WithDockerToOCI(true)) convertOpts = append(convertOpts, converter.WithDockerToOCI(true))
} }
@ -227,40 +140,10 @@ When '--all-platforms' is given all images in a manifest list must be available.
} }
defer cancel() defer cancel()
ctx, done, err := client.WithLease(ctx)
if err != nil {
return err
}
defer done(ctx)
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, os.Interrupt)
go func() {
// Cleanly cancel conversion
select {
case s := <-sigCh:
log.G(ctx).Infof("Got %v", s)
cancel()
case <-ctx.Done():
}
}()
newImg, err := converter.Convert(ctx, client, targetRef, srcRef, convertOpts...) newImg, err := converter.Convert(ctx, client, targetRef, srcRef, convertOpts...)
if err != nil { if err != nil {
return err return err
} }
if finalize != nil {
newI, err := finalize(ctx, client.ContentStore(), targetRef, &newImg.Target)
if err != nil {
return err
}
is := client.ImageService()
_ = is.Delete(ctx, newI.Name)
finimg, err := is.Create(ctx, *newI)
if err != nil {
return err
}
fmt.Fprintln(context.App.Writer, "extra image:", finimg.Name)
}
fmt.Fprintln(context.App.Writer, newImg.Target.Digest.String()) fmt.Fprintln(context.App.Writer, newImg.Target.Digest.String())
return nil return nil
}, },
@ -270,7 +153,6 @@ func getESGZConvertOpts(context *cli.Context) ([]estargz.Option, error) {
esgzOpts := []estargz.Option{ esgzOpts := []estargz.Option{
estargz.WithCompressionLevel(context.Int("estargz-compression-level")), estargz.WithCompressionLevel(context.Int("estargz-compression-level")),
estargz.WithChunkSize(context.Int("estargz-chunk-size")), estargz.WithChunkSize(context.Int("estargz-chunk-size")),
estargz.WithMinChunkSize(context.Int("estargz-min-chunk-size")),
} }
if estargzRecordIn := context.String("estargz-record-in"); estargzRecordIn != "" { if estargzRecordIn := context.String("estargz-record-in"); estargzRecordIn != "" {
paths, err := readPathsFromRecordFile(estargzRecordIn) paths, err := readPathsFromRecordFile(estargzRecordIn)
@ -284,22 +166,6 @@ func getESGZConvertOpts(context *cli.Context) ([]estargz.Option, error) {
return esgzOpts, nil return esgzOpts, nil
} }
func getZstdchunkedConvertOpts(context *cli.Context) ([]estargz.Option, error) {
esgzOpts := []estargz.Option{
estargz.WithChunkSize(context.Int("zstdchunked-chunk-size")),
}
if zstdchunkedRecordIn := context.String("zstdchunked-record-in"); zstdchunkedRecordIn != "" {
paths, err := readPathsFromRecordFile(zstdchunkedRecordIn)
if err != nil {
return nil, err
}
esgzOpts = append(esgzOpts, estargz.WithPrioritizedFiles(paths))
var ignored []string
esgzOpts = append(esgzOpts, estargz.WithAllowPrioritizeNotFound(&ignored))
}
return esgzOpts, nil
}
func readPathsFromRecordFile(filename string) ([]string, error) { func readPathsFromRecordFile(filename string) ([]string, error) {
r, err := os.Open(filename) r, err := os.Open(filename)
if err != nil { if err != nil {

View File

@ -21,158 +21,125 @@ import (
gocontext "context" gocontext "context"
"encoding/csv" "encoding/csv"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"io/ioutil"
"os" "os"
"path/filepath" "path/filepath"
"runtime"
"strconv"
"strings" "strings"
containerd "github.com/containerd/containerd/v2/client" "github.com/containerd/containerd"
"github.com/containerd/containerd/v2/contrib/nvidia" "github.com/containerd/containerd/containers"
"github.com/containerd/containerd/v2/core/containers" "github.com/containerd/containerd/content"
"github.com/containerd/containerd/v2/core/content" "github.com/containerd/containerd/images"
"github.com/containerd/containerd/v2/core/images" "github.com/containerd/containerd/oci"
"github.com/containerd/containerd/v2/pkg/netns" "github.com/containerd/containerd/pkg/netns"
"github.com/containerd/containerd/v2/pkg/oci"
gocni "github.com/containerd/go-cni" gocni "github.com/containerd/go-cni"
"github.com/containerd/log" "github.com/hashicorp/go-multierror"
imagespec "github.com/opencontainers/image-spec/specs-go/v1" imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go" runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/rs/xid" "github.com/rs/xid"
"github.com/urfave/cli/v2" "github.com/urfave/cli"
) )
const netnsMountDir = "/var/run/netns" const netnsMountDir = "/var/run/netns"
func parseGPUs(gpuStr string) ([]int, bool) {
if gpuStr == "" {
return nil, false
}
if gpuStr == "all" {
return nil, true
}
parts := strings.Split(gpuStr, ",")
var devices []int
for _, part := range parts {
part = strings.TrimSpace(part)
if device, err := strconv.Atoi(part); err == nil {
devices = append(devices, device)
}
}
return devices, false
}
var samplerFlags = []cli.Flag{ var samplerFlags = []cli.Flag{
&cli.BoolFlag{ cli.BoolFlag{
Name: "terminal,t", Name: "terminal,t",
Usage: "enable terminal for sample container. must be specified with i option", Usage: "enable terminal for sample container. must be specified with i option",
}, },
&cli.BoolFlag{ cli.BoolFlag{
Name: "i", Name: "i",
Usage: "attach stdin to the container", Usage: "attach stdin to the container",
}, },
&cli.IntFlag{ cli.IntFlag{
Name: "period", Name: "period",
Usage: "time period to monitor access log", Usage: "time period to monitor access log",
Value: defaultPeriod, Value: defaultPeriod,
}, },
&cli.StringFlag{ cli.StringFlag{
Name: "user", Name: "user",
Usage: "user/group name to override image's default config(user[:group])", Usage: "user/group name to override image's default config(user[:group])",
}, },
&cli.StringFlag{ cli.StringFlag{
Name: "cwd", Name: "cwd",
Usage: "working dir to override image's default config", Usage: "working dir to override image's default config",
}, },
&cli.StringFlag{ cli.StringFlag{
Name: "args", Name: "args",
Usage: "command arguments to override image's default config(in JSON array)", Usage: "command arguments to override image's default config(in JSON array)",
}, },
&cli.StringFlag{ cli.StringFlag{
Name: "entrypoint", Name: "entrypoint",
Usage: "entrypoint to override image's default config(in JSON array)", Usage: "entrypoint to override image's default config(in JSON array)",
}, },
&cli.StringSliceFlag{ cli.StringSliceFlag{
Name: "env", Name: "env",
Usage: "environment valulable to add or override to the image's default config", Usage: "environment valulable to add or override to the image's default config",
}, },
&cli.StringFlag{ cli.StringSliceFlag{
Name: "env-file",
Usage: "specify additional container environment variables in a file(i.e. FOO=bar, one per line)",
},
&cli.StringSliceFlag{
Name: "mount", Name: "mount",
Usage: "additional mounts for the container (e.g. type=foo,source=/path,destination=/target,options=bind)", Usage: "additional mounts for the container (e.g. type=foo,source=/path,destination=/target,options=bind)",
}, },
&cli.StringFlag{ cli.StringFlag{
Name: "dns-nameservers", Name: "dns-nameservers",
Usage: "comma-separated nameservers added to the container's /etc/resolv.conf", Usage: "comma-separated nameservers added to the container's /etc/resolv.conf",
Value: "8.8.8.8", Value: "8.8.8.8",
}, },
&cli.StringFlag{ cli.StringFlag{
Name: "dns-search-domains", Name: "dns-search-domains",
Usage: "comma-separated search domains added to the container's /etc/resolv.conf", Usage: "comma-separated search domains added to the container's /etc/resolv.conf",
}, },
&cli.StringFlag{ cli.StringFlag{
Name: "dns-options", Name: "dns-options",
Usage: "comma-separated options added to the container's /etc/resolv.conf", Usage: "comma-separated options added to the container's /etc/resolv.conf",
}, },
&cli.StringFlag{ cli.StringFlag{
Name: "add-hosts", Name: "add-hosts",
Usage: "comma-separated hosts configuration (host:IP) added to container's /etc/hosts", Usage: "comma-separated hosts configuration (host:IP) added to container's /etc/hosts",
}, },
&cli.BoolFlag{ cli.BoolFlag{
Name: "cni", Name: "cni",
Usage: "enable CNI-based networking", Usage: "enable CNI-based networking",
}, },
&cli.StringFlag{ cli.StringFlag{
Name: "cni-plugin-conf-dir", Name: "cni-plugin-conf-dir",
Usage: "path to the CNI plugins configuration directory", Usage: "path to the CNI plugins configuration directory",
}, },
&cli.StringFlag{ cli.StringFlag{
Name: "cni-plugin-dir", Name: "cni-plugin-dir",
Usage: "path to the CNI plugins binary directory", Usage: "path to the CNI plugins binary directory",
}, },
&cli.StringFlag{
Name: "gpus",
Usage: "add gpus to the container (comma-separated list of indices or 'all')",
},
&cli.BoolFlag{
Name: "net-host",
Usage: "enable host networking in the container",
},
} }
func getSpecOpts(clicontext *cli.Context) func(image containerd.Image, rootfs string) (opts []oci.SpecOpts, done func() error, rErr error) { func getSpecOpts(clicontext *cli.Context) func(image containerd.Image, rootfs string) (opts []oci.SpecOpts, done func() error, rErr error) {
return func(image containerd.Image, rootfs string) (opts []oci.SpecOpts, done func() error, rErr error) { return func(image containerd.Image, rootfs string) (opts []oci.SpecOpts, done func() error, rErr error) {
var cleanups []func() error var cleanups []func() error
done = func() error { done = func() (allErr error) {
var errs []error
for i := len(cleanups) - 1; i >= 0; i-- { for i := len(cleanups) - 1; i >= 0; i-- {
if err := cleanups[i](); err != nil { if err := cleanups[i](); err != nil {
errs = append(errs, err) allErr = multierror.Append(allErr, err)
} }
} }
return errors.Join(errs...) return
} }
defer func() { defer func() {
if rErr != nil { if rErr != nil {
if err := done(); err != nil { if err := done(); err != nil {
rErr = fmt.Errorf("failed to cleanup: %w", rErr) rErr = errors.Wrap(rErr, "failed to cleanup")
} }
} }
}() }()
entrypointOpt, err := withEntrypointArgs(clicontext, image) entrypointOpt, err := withEntrypointArgs(clicontext, image)
if err != nil { if err != nil {
rErr = fmt.Errorf("failed to parse entrypoint and arg flags: %w", err) rErr = errors.Wrapf(err, "failed to parse entrypoint and arg flags")
return return
} }
resolverOpt, cleanup, err := withResolveConfig(clicontext) resolverOpt, cleanup, err := withResolveConfig(clicontext)
if err != nil { if err != nil {
rErr = fmt.Errorf("failed to parse DNS-related flags: %w", err) rErr = errors.Wrapf(err, "failed to parse DNS-related flags")
return return
} }
cleanups = append(cleanups, cleanup) cleanups = append(cleanups, cleanup)
@ -180,7 +147,7 @@ func getSpecOpts(clicontext *cli.Context) func(image containerd.Image, rootfs st
for _, mount := range clicontext.StringSlice("mount") { for _, mount := range clicontext.StringSlice("mount") {
m, err := parseMountFlag(mount) m, err := parseMountFlag(mount)
if err != nil { if err != nil {
rErr = fmt.Errorf("failed to parse mount flag %q: %w", mount, err) rErr = errors.Wrapf(err, "failed to parse mount flag %q", mount)
return return
} }
mounts = append(mounts, m) mounts = append(mounts, m)
@ -195,9 +162,6 @@ func getSpecOpts(clicontext *cli.Context) func(image containerd.Image, rootfs st
resolverOpt, resolverOpt,
entrypointOpt, entrypointOpt,
) )
if envFile := clicontext.String("env-file"); envFile != "" {
opts = append(opts, oci.WithEnvFile(envFile))
}
if username := clicontext.String("user"); username != "" { if username := clicontext.String("user"); username != "" {
opts = append(opts, oci.WithUser(username)) opts = append(opts, oci.WithUser(username))
} }
@ -215,31 +179,12 @@ func getSpecOpts(clicontext *cli.Context) func(image containerd.Image, rootfs st
var nOpt oci.SpecOpts var nOpt oci.SpecOpts
nOpt, cleanup, err = withCNI(clicontext) nOpt, cleanup, err = withCNI(clicontext)
if err != nil { if err != nil {
rErr = fmt.Errorf("failed to parse CNI-related flags: %w", err) rErr = errors.Wrapf(err, "failed to parse CNI-related flags")
return return
} }
cleanups = append(cleanups, cleanup) cleanups = append(cleanups, cleanup)
opts = append(opts, nOpt) opts = append(opts, nOpt)
} }
if clicontext.Bool("net-host") {
if runtime.GOOS == "windows" {
log.L.Warn("option --net-host is not supported on Windows")
} else {
opts = append(opts, oci.WithHostNamespace(runtimespec.NetworkNamespace), oci.WithHostHostsFile, oci.WithHostResolvconf)
}
}
if clicontext.IsSet("gpus") {
if runtime.GOOS == "windows" {
log.L.Warn("option --gpus is not supported on Windows")
} else {
devices, useAll := parseGPUs(clicontext.String("gpus"))
if useAll {
opts = append(opts, nvidia.WithGPUs(nvidia.WithAllCapabilities))
} else if len(devices) > 0 {
opts = append(opts, nvidia.WithGPUs(nvidia.WithDevices(devices...), nvidia.WithAllCapabilities))
}
}
}
return return
} }
@ -249,13 +194,13 @@ func withEntrypointArgs(clicontext *cli.Context, image containerd.Image) (oci.Sp
var eFlag []string var eFlag []string
if eStr := clicontext.String("entrypoint"); eStr != "" { if eStr := clicontext.String("entrypoint"); eStr != "" {
if err := json.Unmarshal([]byte(eStr), &eFlag); err != nil { if err := json.Unmarshal([]byte(eStr), &eFlag); err != nil {
return nil, fmt.Errorf("invalid option \"entrypoint\": %w", err) return nil, errors.Wrapf(err, "invalid option \"entrypoint\"")
} }
} }
var aFlag []string var aFlag []string
if aStr := clicontext.String("args"); aStr != "" { if aStr := clicontext.String("args"); aStr != "" {
if err := json.Unmarshal([]byte(aStr), &aFlag); err != nil { if err := json.Unmarshal([]byte(aStr), &aFlag); err != nil {
return nil, fmt.Errorf("invalid option \"args\": %w", err) return nil, errors.Wrapf(err, "invalid option \"args\"")
} }
} }
return func(ctx gocontext.Context, client oci.Client, container *containers.Container, s *runtimespec.Spec) error { return func(ctx gocontext.Context, client oci.Client, container *containers.Container, s *runtimespec.Spec) error {
@ -290,19 +235,18 @@ func withEntrypointArgs(clicontext *cli.Context, image containerd.Image) (oci.Sp
func withCNI(clicontext *cli.Context) (specOpt oci.SpecOpts, done func() error, rErr error) { func withCNI(clicontext *cli.Context) (specOpt oci.SpecOpts, done func() error, rErr error) {
var cleanups []func() error var cleanups []func() error
done = func() error { done = func() (allErr error) {
var errs []error
for i := len(cleanups) - 1; i >= 0; i-- { for i := len(cleanups) - 1; i >= 0; i-- {
if err := cleanups[i](); err != nil { if err := cleanups[i](); err != nil {
errs = append(errs, err) allErr = multierror.Append(allErr, err)
} }
} }
return errors.Join(errs...) return
} }
defer func() { defer func() {
if rErr != nil { if rErr != nil {
if err := done(); err != nil { if err := done(); err != nil {
rErr = fmt.Errorf("failed to cleanup: %w", rErr) rErr = errors.Wrap(rErr, "failed to cleanup")
} }
} }
}() }()
@ -310,7 +254,7 @@ func withCNI(clicontext *cli.Context) (specOpt oci.SpecOpts, done func() error,
// Create a new network namespace for configuring it with CNI plugins // Create a new network namespace for configuring it with CNI plugins
ns, err := netns.NewNetNS(netnsMountDir) ns, err := netns.NewNetNS(netnsMountDir)
if err != nil { if err != nil {
rErr = fmt.Errorf("failed to prepare netns: %w", err) rErr = errors.Wrapf(err, "failed to prepare netns")
return return
} }
cleanups = append(cleanups, ns.Remove) cleanups = append(cleanups, ns.Remove)
@ -328,13 +272,13 @@ func withCNI(clicontext *cli.Context) (specOpt oci.SpecOpts, done func() error,
cniopts = append(cniopts, gocni.WithDefaultConf) cniopts = append(cniopts, gocni.WithDefaultConf)
network, err := gocni.New(cniopts...) network, err := gocni.New(cniopts...)
if err != nil { if err != nil {
rErr = fmt.Errorf("failed to prepare CNI plugins: %w", err) rErr = errors.Wrap(err, "failed to prepare CNI plugins")
return return
} }
id := xid.New().String() id := xid.New().String()
ctx := gocontext.Background() ctx := gocontext.Background()
if _, err := network.Setup(ctx, id, ns.GetPath()); err != nil { if _, err := network.Setup(ctx, id, ns.GetPath()); err != nil {
rErr = fmt.Errorf("failed to setup netns with CNI plugins: %w", err) rErr = errors.Wrap(err, "failed to setup netns with CNI plugins")
return return
} }
cleanups = append(cleanups, func() error { cleanups = append(cleanups, func() error {
@ -352,7 +296,7 @@ func withResolveConfig(clicontext *cli.Context) (specOpt oci.SpecOpts, cleanup f
defer func() { defer func() {
if rErr != nil { if rErr != nil {
if err := cleanup(); err != nil { if err := cleanup(); err != nil {
rErr = fmt.Errorf("failed to cleanup: %w", rErr) rErr = errors.Wrap(rErr, "failed to cleanup")
} }
} }
}() }()
@ -363,7 +307,7 @@ func withResolveConfig(clicontext *cli.Context) (specOpt oci.SpecOpts, cleanup f
} }
// Generate /etc/hosts and /etc/resolv.conf // Generate /etc/hosts and /etc/resolv.conf
resolvDir, err := os.MkdirTemp("", "tmpetc") resolvDir, err := ioutil.TempDir("", "tmpetc")
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }
@ -375,7 +319,7 @@ func withResolveConfig(clicontext *cli.Context) (specOpt oci.SpecOpts, cleanup f
) )
for _, n := range nameservers { for _, n := range nameservers {
if _, err := fmt.Fprintf(buf, "nameserver %s\n", n); err != nil { if _, err := fmt.Fprintf(buf, "nameserver %s\n", n); err != nil {
rErr = fmt.Errorf("failed to prepare nameserver of /etc/resolv.conf: %w", err) rErr = errors.Wrap(err, "failed to prepare nameserver of /etc/resolv.conf")
return return
} }
} }
@ -383,19 +327,19 @@ func withResolveConfig(clicontext *cli.Context) (specOpt oci.SpecOpts, cleanup f
if len(searches) > 0 { if len(searches) > 0 {
_, err := fmt.Fprintf(buf, "search %s\n", strings.Join(searches, " ")) _, err := fmt.Fprintf(buf, "search %s\n", strings.Join(searches, " "))
if err != nil { if err != nil {
rErr = fmt.Errorf("failed to prepare search contents of /etc/resolv.conf: %w", err) rErr = errors.Wrap(err, "failed to prepare search contents of /etc/resolv.conf")
return return
} }
} }
if len(dnsopts) > 0 { if len(dnsopts) > 0 {
_, err := fmt.Fprintf(buf, "options %s\n", strings.Join(dnsopts, " ")) _, err := fmt.Fprintf(buf, "options %s\n", strings.Join(dnsopts, " "))
if err != nil { if err != nil {
rErr = fmt.Errorf("failed to prepare options contents of /etc/resolv.conf: %w", err) rErr = errors.Wrap(err, "failed to prepare options contents of /etc/resolv.conf")
return return
} }
} }
if err := os.WriteFile(etcResolvConfPath, buf.Bytes(), 0644); err != nil { if err := ioutil.WriteFile(etcResolvConfPath, buf.Bytes(), 0644); err != nil {
rErr = fmt.Errorf("failed to write contents to /etc/resolv.conf: %w", err) rErr = errors.Wrap(err, "failed to write contents to /etc/resolv.conf")
return return
} }
buf.Reset() // Reusing for /etc/hosts buf.Reset() // Reusing for /etc/hosts
@ -413,7 +357,7 @@ func withResolveConfig(clicontext *cli.Context) (specOpt oci.SpecOpts, cleanup f
{"ip6-allrouters", "ff02::2"}, {"ip6-allrouters", "ff02::2"},
} { } {
if _, err := fmt.Fprintf(buf, "%s\t%s\n", h.ip, h.host); err != nil { if _, err := fmt.Fprintf(buf, "%s\t%s\n", h.ip, h.host); err != nil {
rErr = fmt.Errorf("failed to write default hosts to /etc/hosts: %w", err) rErr = errors.Wrap(err, "failed to write default hosts to /etc/hosts")
return return
} }
} }
@ -425,12 +369,12 @@ func withResolveConfig(clicontext *cli.Context) (specOpt oci.SpecOpts, cleanup f
} }
// TODO: Validate them // TODO: Validate them
if _, err := fmt.Fprintf(buf, "%s\t%s\n", parts[1], parts[0]); err != nil { if _, err := fmt.Fprintf(buf, "%s\t%s\n", parts[1], parts[0]); err != nil {
rErr = fmt.Errorf("failed to write extra hosts to /etc/hosts: %w", err) rErr = errors.Wrap(err, "failed to write extra hosts to /etc/hosts")
return return
} }
} }
if err := os.WriteFile(etcHostsPath, buf.Bytes(), 0644); err != nil { if err := ioutil.WriteFile(etcHostsPath, buf.Bytes(), 0644); err != nil {
rErr = fmt.Errorf("failed to write contents to /etc/hosts: %w", err) rErr = errors.Wrap(err, "failed to write contents to /etc/hosts")
return return
} }

View File

@ -1,110 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package commands
import (
"encoding/json"
"errors"
"fmt"
"io"
"github.com/containerd/containerd/v2/cmd/ctr/commands"
"github.com/containerd/stargz-snapshotter/estargz"
"github.com/containerd/stargz-snapshotter/estargz/zstdchunked"
digest "github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/urfave/cli/v2"
)
// GetTOCDigestCommand outputs TOC info of a layer
var GetTOCDigestCommand = &cli.Command{
Name: "get-toc-digest",
Usage: "get the digest of TOC of a layer",
ArgsUsage: "<layer digest>",
Flags: []cli.Flag{
// zstd:chunked flags
&cli.BoolFlag{
Name: "zstdchunked",
Usage: "parse layer as zstd:chunked",
},
// other flags for debugging
&cli.BoolFlag{
Name: "dump-toc",
Usage: "dump TOC instead of digest. Note that the dumped TOC might be formatted with indents so may have different digest against the original in the layer",
},
},
Action: func(clicontext *cli.Context) error {
layerDgstStr := clicontext.Args().Get(0)
if layerDgstStr == "" {
return errors.New("layer digest need to be specified")
}
client, ctx, cancel, err := commands.NewClient(clicontext)
if err != nil {
return err
}
defer cancel()
layerDgst, err := digest.Parse(layerDgstStr)
if err != nil {
return err
}
ra, err := client.ContentStore().ReaderAt(ctx, ocispec.Descriptor{Digest: layerDgst})
if err != nil {
return err
}
defer ra.Close()
footerSize := estargz.FooterSize
if clicontext.Bool("zstdchunked") {
footerSize = zstdchunked.FooterSize
}
footer := make([]byte, footerSize)
if _, err := ra.ReadAt(footer, ra.Size()-int64(footerSize)); err != nil {
return fmt.Errorf("error reading footer: %w", err)
}
var decompressor estargz.Decompressor
decompressor = new(estargz.GzipDecompressor)
if clicontext.Bool("zstdchunked") {
decompressor = new(zstdchunked.Decompressor)
}
_, tocOff, tocSize, err := decompressor.ParseFooter(footer)
if err != nil {
return fmt.Errorf("error parsing footer: %w", err)
}
if tocSize <= 0 {
tocSize = ra.Size() - tocOff - int64(footerSize)
}
toc, tocDgst, err := decompressor.ParseTOC(io.NewSectionReader(ra, tocOff, tocSize))
if err != nil {
return fmt.Errorf("error parsing TOC: %w", err)
}
if clicontext.Bool("dump-toc") {
tocJSON, err := json.MarshalIndent(toc, "", "\t")
if err != nil {
return fmt.Errorf("failed to marshal toc: %w", err)
}
fmt.Println(string(tocJSON))
return nil
}
fmt.Println(tocDgst.String())
return nil
},
}

View File

@ -1,99 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package commands
import (
"errors"
"fmt"
"github.com/containerd/containerd/v2/cmd/ctr/commands"
"github.com/containerd/containerd/v2/core/images/converter"
"github.com/containerd/log"
"github.com/containerd/platforms"
"github.com/containerd/stargz-snapshotter/ipfs"
estargzconvert "github.com/containerd/stargz-snapshotter/nativeconverter/estargz"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/urfave/cli/v2"
)
// IPFSPushCommand pushes an image to IPFS
var IPFSPushCommand = &cli.Command{
Name: "ipfs-push",
Usage: "push an image to IPFS (experimental)",
ArgsUsage: "[flags] <image_ref>",
Flags: []cli.Flag{
// platform flags
&cli.StringSliceFlag{
Name: "platform",
Usage: "Add content for a specific platform",
Value: &cli.StringSlice{},
},
&cli.BoolFlag{
Name: "all-platforms",
Usage: "Add content for all platforms",
},
&cli.BoolFlag{
Name: "estargz",
Value: true,
Usage: "Convert the image into eStargz",
},
},
Action: func(context *cli.Context) error {
srcRef := context.Args().Get(0)
if srcRef == "" {
return errors.New("image need to be specified")
}
var platformMC platforms.MatchComparer
if context.Bool("all-platforms") {
platformMC = platforms.All
} else {
if pss := context.StringSlice("platform"); len(pss) > 0 {
var all []ocispec.Platform
for _, ps := range pss {
p, err := platforms.Parse(ps)
if err != nil {
return fmt.Errorf("invalid platform %q: %w", ps, err)
}
all = append(all, p)
}
platformMC = platforms.Ordered(all...)
} else {
platformMC = platforms.DefaultStrict()
}
}
client, ctx, cancel, err := commands.NewClient(context)
if err != nil {
return err
}
defer cancel()
var layerConvert converter.ConvertFunc
if context.Bool("estargz") {
layerConvert = estargzconvert.LayerConvertFunc()
}
p, err := ipfs.Push(ctx, client, srcRef, layerConvert, platformMC)
if err != nil {
return err
}
log.L.WithField("CID", p).Infof("Pushed")
fmt.Println(p)
return nil
},
}

View File

@ -21,11 +21,10 @@ import (
"os" "os"
"github.com/containerd/stargz-snapshotter/analyzer/fanotify/service" "github.com/containerd/stargz-snapshotter/analyzer/fanotify/service"
"github.com/urfave/cli/v2" "github.com/urfave/cli"
) )
// FanotifyCommand notifies filesystem event under the specified directory. var FanotifyCommand = cli.Command{
var FanotifyCommand = &cli.Command{
Name: "fanotify", Name: "fanotify",
Hidden: true, Hidden: true,
Action: func(context *cli.Context) error { Action: func(context *cli.Context) error {

View File

@ -17,106 +17,66 @@
package commands package commands
import ( import (
"compress/gzip"
"context" "context"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"io" "io"
"os" "os"
"os/signal"
"time" "time"
containerd "github.com/containerd/containerd/v2/client" "github.com/containerd/containerd"
"github.com/containerd/containerd/v2/cmd/ctr/commands" "github.com/containerd/containerd/cmd/ctr/commands"
"github.com/containerd/containerd/v2/core/content" "github.com/containerd/containerd/content"
"github.com/containerd/containerd/v2/core/images" "github.com/containerd/containerd/images/converter"
"github.com/containerd/containerd/v2/core/images/converter" "github.com/containerd/containerd/platforms"
"github.com/containerd/log"
"github.com/containerd/platforms"
"github.com/containerd/stargz-snapshotter/analyzer" "github.com/containerd/stargz-snapshotter/analyzer"
"github.com/containerd/stargz-snapshotter/estargz" "github.com/containerd/stargz-snapshotter/estargz"
"github.com/containerd/stargz-snapshotter/estargz/zstdchunked"
estargzconvert "github.com/containerd/stargz-snapshotter/nativeconverter/estargz" estargzconvert "github.com/containerd/stargz-snapshotter/nativeconverter/estargz"
esgzexternaltocconvert "github.com/containerd/stargz-snapshotter/nativeconverter/estargz/externaltoc"
zstdchunkedconvert "github.com/containerd/stargz-snapshotter/nativeconverter/zstdchunked"
"github.com/containerd/stargz-snapshotter/recorder" "github.com/containerd/stargz-snapshotter/recorder"
"github.com/containerd/stargz-snapshotter/util/containerdutil" "github.com/containerd/stargz-snapshotter/util/containerdutil"
"github.com/klauspost/compress/zstd"
"github.com/opencontainers/go-digest" "github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1" ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/urfave/cli/v2" "github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
) )
const defaultPeriod = 10 const defaultPeriod = 10
// OptimizeCommand converts and optimizes an image var OptimizeCommand = cli.Command{
var OptimizeCommand = &cli.Command{
Name: "optimize", Name: "optimize",
Usage: "optimize an image with user-specified workload", Usage: "optimize an image with user-specified workload",
ArgsUsage: "[flags] <source_ref> <target_ref>...", ArgsUsage: "[flags] <source_ref> <target_ref>...",
Flags: append([]cli.Flag{ Flags: append([]cli.Flag{
&cli.BoolFlag{ cli.BoolFlag{
Name: "reuse", Name: "reuse",
Usage: "reuse eStargz (already optimized) layers without further conversion", Usage: "reuse eStargz (already optimized) layers without further conversion",
}, },
&cli.StringSliceFlag{ cli.StringSliceFlag{
Name: "platform", Name: "platform",
Usage: "Pull content from a specific platform", Usage: "Pull content from a specific platform",
Value: &cli.StringSlice{}, Value: &cli.StringSlice{},
}, },
&cli.BoolFlag{ cli.BoolFlag{
Name: "all-platforms", Name: "all-platforms",
Usage: "targeting all platform of the source image", Usage: "targeting all platform of the source image",
}, },
&cli.BoolFlag{ cli.BoolFlag{
Name: "wait-on-signal", Name: "wait-on-signal",
Usage: "ignore context cancel and keep the container running until it receives SIGINT (Ctrl + C) sent manually", Usage: "ignore context cancel and keep the container running until it receives SIGINT (Ctrl + C) sent manually",
}, },
&cli.StringFlag{ cli.BoolFlag{
Name: "wait-on-line",
Usage: "Substring of a stdout line to be waited. When this string is detected, the container will be killed.",
},
&cli.BoolFlag{
Name: "no-optimize", Name: "no-optimize",
Usage: "convert image without optimization", Usage: "convert image without optimization",
}, },
&cli.StringFlag{ cli.StringFlag{
Name: "record-out", Name: "record-out",
Usage: "record the monitor log to the specified file", Usage: "record the monitor log to the specified file",
}, },
&cli.BoolFlag{ cli.BoolFlag{
Name: "oci", Name: "oci",
Usage: "convert Docker media types to OCI media types", Usage: "convert Docker media types to OCI media types",
}, },
&cli.IntFlag{
Name: "estargz-compression-level",
Usage: "eStargz compression level",
Value: gzip.BestCompression,
},
&cli.BoolFlag{
Name: "estargz-external-toc",
Usage: "Separate TOC JSON into another image (called \"TOC image\"). The name of TOC image is the original + \"-esgztoc\" suffix. Both eStargz and the TOC image should be pushed to the same registry. stargz-snapshotter refers to the TOC image when it pulls the result eStargz image.",
},
&cli.IntFlag{
Name: "estargz-chunk-size",
Usage: "eStargz chunk size (not applied to zstd:chunked)",
Value: 0,
},
&cli.IntFlag{
Name: "estargz-min-chunk-size",
Usage: "The minimal number of bytes of data must be written in one gzip stream. Note that this adds a TOC property that old reader doesn't understand (not applied to zstd:chunked)",
Value: 0,
},
&cli.BoolFlag{
Name: "zstdchunked",
Usage: "use zstd compression instead of gzip (a.k.a zstd:chunked)",
},
&cli.IntFlag{
Name: "zstdchunked-compression-level",
Usage: "zstd:chunked compression level",
Value: 3, // SpeedDefault; see also https://pkg.go.dev/github.com/klauspost/compress/zstd#EncoderLevel
},
}, samplerFlags...), }, samplerFlags...),
Action: func(clicontext *cli.Context) error { Action: func(clicontext *cli.Context) error {
convertOpts := []converter.Opt{} convertOpts := []converter.Opt{}
@ -126,30 +86,26 @@ var OptimizeCommand = &cli.Command{
return errors.New("src and target image need to be specified") return errors.New("src and target image need to be specified")
} }
var platformMC platforms.MatchComparer if !clicontext.Bool("all-platforms") {
if clicontext.Bool("all-platforms") {
platformMC = platforms.All
} else {
if pss := clicontext.StringSlice("platform"); len(pss) > 0 { if pss := clicontext.StringSlice("platform"); len(pss) > 0 {
var all []ocispec.Platform var all []ocispec.Platform
for _, ps := range pss { for _, ps := range pss {
p, err := platforms.Parse(ps) p, err := platforms.Parse(ps)
if err != nil { if err != nil {
return fmt.Errorf("invalid platform %q: %w", ps, err) return errors.Wrapf(err, "invalid platform %q", ps)
} }
all = append(all, p) all = append(all, p)
} }
platformMC = platforms.Ordered(all...) convertOpts = append(convertOpts, converter.WithPlatform(platforms.Ordered(all...)))
} else { } else {
platformMC = platforms.DefaultStrict() convertOpts = append(convertOpts, converter.WithPlatform(platforms.DefaultStrict()))
} }
} }
convertOpts = append(convertOpts, converter.WithPlatform(platformMC))
if clicontext.Bool("oci") { if clicontext.Bool("oci") {
convertOpts = append(convertOpts, converter.WithDockerToOCI(true)) convertOpts = append(convertOpts, converter.WithDockerToOCI(true))
} else if clicontext.Bool("zstdchunked") { } else {
return errors.New("option --zstdchunked must be used in conjunction with --oci") logrus.Warn("option --oci should be used as well")
} }
client, ctx, cancel, err := commands.NewClient(clicontext) client, ctx, cancel, err := commands.NewClient(clicontext)
@ -170,64 +126,19 @@ var OptimizeCommand = &cli.Command{
} }
if recordOutFile := clicontext.String("record-out"); recordOutFile != "" { if recordOutFile := clicontext.String("record-out"); recordOutFile != "" {
if err := writeContentFile(ctx, client, recordOut, recordOutFile); err != nil { if err := writeContentFile(ctx, client, recordOut, recordOutFile); err != nil {
return fmt.Errorf("failed output record file: %w", err) return errors.Wrapf(err, "failed output record file")
} }
} }
var f converter.ConvertFunc f := estargzconvert.LayerConvertWithLayerOptsFunc(esgzOptsPerLayer)
var finalize func(ctx context.Context, cs content.Store, ref string, desc *ocispec.Descriptor) (*images.Image, error)
if clicontext.Bool("zstdchunked") {
f = zstdchunkedconvert.LayerConvertWithLayerOptsFuncWithCompressionLevel(
zstd.EncoderLevelFromZstd(clicontext.Int("zstdchunked-compression-level")), esgzOptsPerLayer)
} else if !clicontext.Bool("estargz-external-toc") {
f = estargzconvert.LayerConvertWithLayerAndCommonOptsFunc(esgzOptsPerLayer,
estargz.WithCompressionLevel(clicontext.Int("estargz-compression-level")),
estargz.WithChunkSize(clicontext.Int("estargz-chunk-size")),
estargz.WithMinChunkSize(clicontext.Int("estargz-min-chunk-size")))
} else {
if clicontext.Bool("reuse") {
// We require that the layer conversion is triggerd for each layer
// to make sure that "finalize" function has the information of all layers.
return fmt.Errorf("\"estargz-external-toc\" can't be used with \"reuse\" flag")
}
f, finalize = esgzexternaltocconvert.LayerConvertWithLayerAndCommonOptsFunc(esgzOptsPerLayer, []estargz.Option{
estargz.WithChunkSize(clicontext.Int("estargz-chunk-size")),
estargz.WithMinChunkSize(clicontext.Int("estargz-min-chunk-size")),
}, clicontext.Int("estargz-compression-level"))
}
if wrapper != nil { if wrapper != nil {
f = wrapper(f) f = wrapper(f)
} }
layerConvertFunc := logWrapper(f) convertOpts = append(convertOpts, converter.WithLayerConvertFunc(logWrapper(f)))
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, os.Interrupt)
go func() {
// Cleanly cancel conversion
select {
case s := <-sigCh:
log.G(ctx).Infof("Got %v", s)
cancel()
case <-ctx.Done():
}
}()
convertOpts = append(convertOpts, converter.WithLayerConvertFunc(layerConvertFunc))
newImg, err := converter.Convert(ctx, client, targetRef, srcRef, convertOpts...) newImg, err := converter.Convert(ctx, client, targetRef, srcRef, convertOpts...)
if err != nil { if err != nil {
return err return err
} }
if finalize != nil {
newI, err := finalize(ctx, client.ContentStore(), targetRef, &newImg.Target)
if err != nil {
return err
}
is := client.ImageService()
_ = is.Delete(ctx, newI.Name)
finimg, err := is.Create(ctx, *newI)
if err != nil {
return err
}
fmt.Fprintln(clicontext.App.Writer, "extra image:", finimg.Name)
}
fmt.Fprintln(clicontext.App.Writer, newImg.Target.Digest.String()) fmt.Fprintln(clicontext.App.Writer, newImg.Target.Digest.String())
return nil return nil
}, },
@ -260,7 +171,7 @@ func analyze(ctx context.Context, clicontext *cli.Context, client *containerd.Cl
for _, ps := range pss { for _, ps := range pss {
p, err := platforms.Parse(ps) p, err := platforms.Parse(ps)
if err != nil { if err != nil {
return "", nil, nil, fmt.Errorf("invalid platform %q: %w", ps, err) return "", nil, nil, errors.Wrapf(err, "invalid platform %q", ps)
} }
if platforms.DefaultStrict().Match(p) { if platforms.DefaultStrict().Match(p) {
containsDefault = true containsDefault = true
@ -284,8 +195,7 @@ func analyze(ctx context.Context, clicontext *cli.Context, client *containerd.Cl
aOpts = append(aOpts, analyzer.WithWaitOnSignal()) aOpts = append(aOpts, analyzer.WithWaitOnSignal())
} else { } else {
aOpts = append(aOpts, aOpts = append(aOpts,
analyzer.WithPeriod(time.Duration(clicontext.Int("period"))*time.Second), analyzer.WithPeriod(time.Duration(clicontext.Int("period"))*time.Second))
analyzer.WithWaitLineOut(clicontext.String("wait-on-line")))
} }
if clicontext.Bool("terminal") { if clicontext.Bool("terminal") {
if !clicontext.Bool("i") { if !clicontext.Bool("i") {
@ -374,7 +284,7 @@ func isReusableESGZLayer(ctx context.Context, desc ocispec.Descriptor, cs conten
return false return false
} }
defer ra.Close() defer ra.Close()
r, err := estargz.Open(io.NewSectionReader(ra, 0, desc.Size), estargz.WithDecompressors(new(zstdchunked.Decompressor))) r, err := estargz.Open(io.NewSectionReader(ra, 0, desc.Size))
if err != nil { if err != nil {
return false return false
} }
@ -389,7 +299,7 @@ func excludeWrapper(excludes []digest.Digest) func(converter.ConvertFunc) conver
return func(ctx context.Context, cs content.Store, desc ocispec.Descriptor) (*ocispec.Descriptor, error) { return func(ctx context.Context, cs content.Store, desc ocispec.Descriptor) (*ocispec.Descriptor, error) {
for _, e := range excludes { for _, e := range excludes {
if e == desc.Digest { if e == desc.Digest {
log.G(ctx).Warnf("reusing %q without conversion", e) logrus.Warnf("reusing %q without conversion", e)
return nil, nil return nil, nil
} }
} }
@ -400,7 +310,7 @@ func excludeWrapper(excludes []digest.Digest) func(converter.ConvertFunc) conver
func logWrapper(convertFunc converter.ConvertFunc) converter.ConvertFunc { func logWrapper(convertFunc converter.ConvertFunc) converter.ConvertFunc {
return func(ctx context.Context, cs content.Store, desc ocispec.Descriptor) (*ocispec.Descriptor, error) { return func(ctx context.Context, cs content.Store, desc ocispec.Descriptor) (*ocispec.Descriptor, error) {
log.G(ctx).WithField("digest", desc.Digest).Infof("converting...") logrus.WithField("digest", desc.Digest).Infof("converting...")
return convertFunc(ctx, cs, desc) return convertFunc(ctx, cs, desc)
} }
} }

View File

@ -20,18 +20,16 @@ import (
"context" "context"
"fmt" "fmt"
containerd "github.com/containerd/containerd/v2/client" "github.com/containerd/containerd"
"github.com/containerd/containerd/v2/cmd/ctr/commands" "github.com/containerd/containerd/cmd/ctr/commands"
"github.com/containerd/containerd/v2/cmd/ctr/commands/content" "github.com/containerd/containerd/cmd/ctr/commands/content"
"github.com/containerd/containerd/v2/core/images" "github.com/containerd/containerd/images"
"github.com/containerd/containerd/v2/core/snapshots" "github.com/containerd/containerd/log"
ctdsnapshotters "github.com/containerd/containerd/v2/pkg/snapshotters" "github.com/containerd/containerd/snapshots"
"github.com/containerd/log"
fsconfig "github.com/containerd/stargz-snapshotter/fs/config" fsconfig "github.com/containerd/stargz-snapshotter/fs/config"
"github.com/containerd/stargz-snapshotter/fs/source" "github.com/containerd/stargz-snapshotter/fs/source"
"github.com/containerd/stargz-snapshotter/ipfs"
ocispec "github.com/opencontainers/image-spec/specs-go/v1" ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/urfave/cli/v2" "github.com/urfave/cli"
) )
const ( const (
@ -39,30 +37,21 @@ const (
skipContentVerifyOpt = "skip-content-verify" skipContentVerifyOpt = "skip-content-verify"
) )
// RpullCommand is a subcommand to pull an image from a registry leveraging stargz snapshotter var RpullCommand = cli.Command{
var RpullCommand = &cli.Command{
Name: "rpull", Name: "rpull",
Usage: "pull an image from a registry leveraging stargz snapshotter", Usage: "pull an image from a registry levaraging stargz snapshotter",
ArgsUsage: "[flags] <ref>", ArgsUsage: "[flags] <ref>",
Description: `Fetch and prepare an image for use in containerd leveraging stargz snapshotter. Description: `Fetch and prepare an image for use in containerd levaraging stargz snapshotter.
After pulling an image, it should be ready to use the same reference in a run After pulling an image, it should be ready to use the same reference in a run
command. command.
`, `,
Flags: append(append(commands.RegistryFlags, commands.LabelFlag, Flags: append(commands.RegistryFlags, commands.LabelFlag,
&cli.BoolFlag{ cli.BoolFlag{
Name: skipContentVerifyOpt, Name: skipContentVerifyOpt,
Usage: "Skip content verification for layers contained in this image.", Usage: "Skip content verification for layers contained in this image.",
}, },
&cli.BoolFlag{ ),
Name: "ipfs",
Usage: "Pull image from IPFS. Specify an IPFS CID as a reference. (experimental)",
},
&cli.BoolFlag{
Name: "use-containerd-labels",
Usage: "Use labels defined in containerd project",
},
), commands.SnapshotterFlags...),
Action: func(context *cli.Context) error { Action: func(context *cli.Context) error {
var ( var (
ref = context.Args().First() ref = context.Args().First()
@ -89,35 +78,21 @@ command.
return err return err
} }
config.FetchConfig = fc config.FetchConfig = fc
config.containerdLabels = context.Bool("use-containerd-labels")
if context.Bool(skipContentVerifyOpt) { if context.Bool(skipContentVerifyOpt) {
config.skipVerify = true config.skipVerify = true
} }
if context.Bool("ipfs") { if err := pull(ctx, client, ref, config); err != nil {
r, err := ipfs.NewResolver(ipfs.ResolverOptions{ return err
Scheme: "ipfs",
})
if err != nil {
return err
}
config.Resolver = r
} }
config.snapshotter = remoteSnapshotterName return nil
if sn := context.String("snapshotter"); sn != "" {
config.snapshotter = sn
}
return pull(ctx, client, ref, config)
}, },
} }
type rPullConfig struct { type rPullConfig struct {
*content.FetchConfig *content.FetchConfig
skipVerify bool skipVerify bool
snapshotter string
containerdLabels bool
} }
func pull(ctx context.Context, client *containerd.Client, ref string, config *rPullConfig) error { func pull(ctx context.Context, client *containerd.Client, ref string, config *rPullConfig) error {
@ -137,23 +112,16 @@ func pull(ctx context.Context, client *containerd.Client, ref string, config *rP
})) }))
} }
var labelHandler func(h images.Handler) images.Handler
prefetchSize := int64(10 * 1024 * 1024)
if config.containerdLabels {
labelHandler = source.AppendExtraLabelsHandler(prefetchSize, ctdsnapshotters.AppendInfoHandlerWrapper(ref))
} else {
labelHandler = source.AppendDefaultLabelsHandlerWrapper(ref, prefetchSize)
}
log.G(pCtx).WithField("image", ref).Debug("fetching") log.G(pCtx).WithField("image", ref).Debug("fetching")
labels := commands.LabelArgs(config.Labels) labels := commands.LabelArgs(config.Labels)
if _, err := client.Pull(pCtx, ref, []containerd.RemoteOpt{ if _, err := client.Pull(pCtx, ref, []containerd.RemoteOpt{
containerd.WithPullLabels(labels), containerd.WithPullLabels(labels),
containerd.WithResolver(config.Resolver), containerd.WithResolver(config.Resolver),
containerd.WithImageHandler(h), containerd.WithImageHandler(h),
containerd.WithSchema1Conversion,
containerd.WithPullUnpack, containerd.WithPullUnpack,
containerd.WithPullSnapshotter(config.snapshotter, snOpts...), containerd.WithPullSnapshotter(remoteSnapshotterName, snOpts...),
containerd.WithImageHandlerWrapper(labelHandler), containerd.WithImageHandlerWrapper(source.AppendDefaultLabelsHandlerWrapper(ref, 10*1024*1024)),
}...); err != nil { }...); err != nil {
return err return err
} }

View File

@ -20,23 +20,22 @@ import (
"fmt" "fmt"
"os" "os"
"github.com/containerd/containerd/v2/cmd/ctr/app" "github.com/containerd/containerd/cmd/ctr/app"
"github.com/containerd/containerd/pkg/seed"
"github.com/containerd/stargz-snapshotter/cmd/ctr-remote/commands" "github.com/containerd/stargz-snapshotter/cmd/ctr-remote/commands"
"github.com/urfave/cli/v2" "github.com/urfave/cli"
) )
func init() {
seed.WithTimeAndRand()
}
func main() { func main() {
customCommands := []*cli.Command{ customCommands := []cli.Command{commands.RpullCommand, commands.OptimizeCommand, commands.ConvertCommand}
commands.RpullCommand,
commands.OptimizeCommand,
commands.ConvertCommand,
commands.GetTOCDigestCommand,
commands.IPFSPushCommand,
}
app := app.New() app := app.New()
for i := range app.Commands { for i := range app.Commands {
if app.Commands[i].Name == "images" { if app.Commands[i].Name == "images" {
sc := map[string]*cli.Command{} sc := map[string]cli.Command{}
for _, subcmd := range customCommands { for _, subcmd := range customCommands {
sc[subcmd.Name] = subcmd sc[subcmd.Name] = subcmd
} }
@ -60,7 +59,7 @@ func main() {
} }
app.Commands = append(app.Commands, commands.FanotifyCommand) app.Commands = append(app.Commands, commands.FanotifyCommand)
if err := app.Run(os.Args); err != nil { if err := app.Run(os.Args); err != nil {
fmt.Fprintf(os.Stderr, "ctr-remote: %v\n", err) fmt.Fprintf(os.Stderr, "ctr: %v\n", err)
os.Exit(1) os.Exit(1)
} }
} }

View File

@ -1,159 +0,0 @@
module github.com/containerd/stargz-snapshotter/cmd
go 1.24.0
toolchain go1.24.2
require (
github.com/containerd/containerd/api v1.9.0
github.com/containerd/containerd/v2 v2.1.4
github.com/containerd/go-cni v1.1.13
github.com/containerd/log v0.1.0
github.com/containerd/platforms v1.0.0-rc.1
github.com/containerd/stargz-snapshotter v0.15.2-0.20240622031358-6405f362966d
github.com/containerd/stargz-snapshotter/estargz v0.17.0
github.com/containerd/stargz-snapshotter/ipfs v0.15.2-0.20240622031358-6405f362966d
github.com/coreos/go-systemd/v22 v22.5.0
github.com/docker/go-metrics v0.0.1
github.com/goccy/go-json v0.10.5
github.com/klauspost/compress v1.18.0
github.com/opencontainers/go-digest v1.0.0
github.com/opencontainers/image-spec v1.1.1
github.com/opencontainers/runtime-spec v1.2.1
github.com/pelletier/go-toml v1.9.5
github.com/rs/xid v1.6.0
github.com/urfave/cli/v2 v2.27.7
go.etcd.io/bbolt v1.4.2
golang.org/x/sync v0.16.0
golang.org/x/sys v0.34.0
google.golang.org/grpc v1.74.2
)
require (
github.com/Microsoft/go-winio v0.6.2 // indirect
github.com/Microsoft/hcsshim v0.13.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/cilium/ebpf v0.16.0 // indirect
github.com/containerd/cgroups/v3 v3.0.5 // indirect
github.com/containerd/console v1.0.5 // indirect
github.com/containerd/continuity v0.4.5 // indirect
github.com/containerd/errdefs v1.0.0 // indirect
github.com/containerd/errdefs/pkg v0.3.0 // indirect
github.com/containerd/fifo v1.1.0 // indirect
github.com/containerd/go-runc v1.1.0 // indirect
github.com/containerd/plugin v1.0.0 // indirect
github.com/containerd/ttrpc v1.2.7 // indirect
github.com/containerd/typeurl/v2 v2.2.3 // indirect
github.com/containernetworking/cni v1.3.0 // indirect
github.com/containernetworking/plugins v1.7.1 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/distribution/reference v0.6.0 // indirect
github.com/docker/cli v28.3.3+incompatible // indirect
github.com/docker/docker-credential-helpers v0.7.0 // indirect
github.com/docker/go-units v0.5.0 // indirect
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsnotify v1.9.0 // indirect
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
github.com/go-logr/logr v1.4.3 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-openapi/jsonpointer v0.21.0 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
github.com/go-openapi/swag v0.23.0 // indirect
github.com/godbus/dbus/v5 v5.1.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/google/gnostic-models v0.6.9 // indirect
github.com/google/go-cmp v0.7.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/hanwen/go-fuse/v2 v2.8.0 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/hashicorp/go-retryablehttp v0.7.8 // indirect
github.com/intel/goresctrl v0.8.0 // indirect
github.com/ipfs/go-cid v0.1.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/cpuid/v2 v2.2.6 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mdlayher/socket v0.5.1 // indirect
github.com/mdlayher/vsock v1.2.1 // indirect
github.com/minio/sha256-simd v1.0.1 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/moby/locker v1.0.1 // indirect
github.com/moby/sys/mountinfo v0.7.2 // indirect
github.com/moby/sys/sequential v0.6.0 // indirect
github.com/moby/sys/signal v0.7.1 // indirect
github.com/moby/sys/symlink v0.3.0 // indirect
github.com/moby/sys/user v0.4.0 // indirect
github.com/moby/sys/userns v0.1.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/mr-tron/base58 v1.2.0 // indirect
github.com/multiformats/go-base32 v0.1.0 // indirect
github.com/multiformats/go-base36 v0.2.0 // indirect
github.com/multiformats/go-multiaddr v0.16.1 // indirect
github.com/multiformats/go-multibase v0.2.0 // indirect
github.com/multiformats/go-multihash v0.2.3 // indirect
github.com/multiformats/go-varint v0.0.7 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 // indirect
github.com/opencontainers/selinux v1.12.0 // indirect
github.com/pelletier/go-toml/v2 v2.2.4 // indirect
github.com/petermattis/goid v0.0.0-20240813172612-4fcff4a6cae7 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_golang v1.23.0 // indirect
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.65.0 // indirect
github.com/prometheus/procfs v0.16.1 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/sasha-s/go-deadlock v0.3.5 // indirect
github.com/sirupsen/logrus v1.9.3 // indirect
github.com/spaolacci/murmur3 v1.1.0 // indirect
github.com/spf13/pflag v1.0.6 // indirect
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
github.com/vbatts/tar-split v0.12.1 // indirect
github.com/x448/float16 v0.8.4 // indirect
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
go.opencensus.io v0.24.0 // indirect
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 // indirect
go.opentelemetry.io/otel v1.36.0 // indirect
go.opentelemetry.io/otel/metric v1.36.0 // indirect
go.opentelemetry.io/otel/trace v1.36.0 // indirect
golang.org/x/crypto v0.38.0 // indirect
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f // indirect
golang.org/x/mod v0.24.0 // indirect
golang.org/x/net v0.40.0 // indirect
golang.org/x/oauth2 v0.30.0 // indirect
golang.org/x/term v0.32.0 // indirect
golang.org/x/text v0.25.0 // indirect
golang.org/x/time v0.9.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a // indirect
google.golang.org/protobuf v1.36.6 // indirect
gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/api v0.33.3 // indirect
k8s.io/apimachinery v0.33.3 // indirect
k8s.io/client-go v0.33.3 // indirect
k8s.io/cri-api v0.33.3 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 // indirect
lukechampine.com/blake3 v1.2.1 // indirect
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
sigs.k8s.io/randfill v1.0.0 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect
tags.cncf.io/container-device-interface v1.0.1 // indirect
tags.cncf.io/container-device-interface/specs-go v1.0.0 // indirect
)
replace (
// Import local packages.
github.com/containerd/stargz-snapshotter => ../
github.com/containerd/stargz-snapshotter/estargz => ../estargz
github.com/containerd/stargz-snapshotter/ipfs => ../ipfs
)

View File

@ -1,539 +0,0 @@
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 h1:He8afgbRMd7mFxO99hRNu+6tazq8nFF9lIwo9JFroBk=
github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/Microsoft/hcsshim v0.13.0 h1:/BcXOiS6Qi7N9XqUcv27vkIuVOkBEcWstd2pMlWSeaA=
github.com/Microsoft/hcsshim v0.13.0/go.mod h1:9KWJ/8DgU+QzYGupX4tzMhRQE8h6w90lH6HAaclpEok=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cilium/ebpf v0.16.0 h1:+BiEnHL6Z7lXnlGUsXQPPAE7+kenAd4ES8MQ5min0Ok=
github.com/cilium/ebpf v0.16.0/go.mod h1:L7u2Blt2jMM/vLAVgjxluxtBKlz3/GWjB0dMOEngfwE=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
github.com/containerd/cgroups/v3 v3.0.5 h1:44na7Ud+VwyE7LIoJ8JTNQOa549a8543BmzaJHo6Bzo=
github.com/containerd/cgroups/v3 v3.0.5/go.mod h1:SA5DLYnXO8pTGYiAHXz94qvLQTKfVM5GEVisn4jpins=
github.com/containerd/console v1.0.5 h1:R0ymNeydRqH2DmakFNdmjR2k0t7UPuiOV/N/27/qqsc=
github.com/containerd/console v1.0.5/go.mod h1:YynlIjWYF8myEu6sdkwKIvGQq+cOckRm6So2avqoYAk=
github.com/containerd/containerd/api v1.9.0 h1:HZ/licowTRazus+wt9fM6r/9BQO7S0vD5lMcWspGIg0=
github.com/containerd/containerd/api v1.9.0/go.mod h1:GhghKFmTR3hNtyznBoQ0EMWr9ju5AqHjcZPsSpTKutI=
github.com/containerd/containerd/v2 v2.1.4 h1:/hXWjiSFd6ftrBOBGfAZ6T30LJcx1dBjdKEeI8xucKQ=
github.com/containerd/containerd/v2 v2.1.4/go.mod h1:8C5QV9djwsYDNhxfTCFjWtTBZrqjditQ4/ghHSYjnHM=
github.com/containerd/continuity v0.4.5 h1:ZRoN1sXq9u7V6QoHMcVWGhOwDFqZ4B9i5H6un1Wh0x4=
github.com/containerd/continuity v0.4.5/go.mod h1:/lNJvtJKUQStBzpVQ1+rasXO1LAWtUQssk28EZvJ3nE=
github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk=
github.com/containerd/fifo v1.1.0 h1:4I2mbh5stb1u6ycIABlBw9zgtlK8viPI9QkQNRQEEmY=
github.com/containerd/fifo v1.1.0/go.mod h1:bmC4NWMbXlt2EZ0Hc7Fx7QzTFxgPID13eH0Qu+MAb2o=
github.com/containerd/go-cni v1.1.13 h1:eFSGOKlhoYNxpJ51KRIMHZNlg5UgocXEIEBGkY7Hnis=
github.com/containerd/go-cni v1.1.13/go.mod h1:nTieub0XDRmvCZ9VI/SBG6PyqT95N4FIhxsauF1vSBI=
github.com/containerd/go-runc v1.1.0 h1:OX4f+/i2y5sUT7LhmcJH7GYrjjhHa1QI4e8yO0gGleA=
github.com/containerd/go-runc v1.1.0/go.mod h1:xJv2hFF7GvHtTJd9JqTS2UVxMkULUYw4JN5XAUZqH5U=
github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
github.com/containerd/platforms v1.0.0-rc.1 h1:83KIq4yy1erSRgOVHNk1HYdPvzdJ5CnsWaRoJX4C41E=
github.com/containerd/platforms v1.0.0-rc.1/go.mod h1:J71L7B+aiM5SdIEqmd9wp6THLVRzJGXfNuWCZCllLA4=
github.com/containerd/plugin v1.0.0 h1:c8Kf1TNl6+e2TtMHZt+39yAPDbouRH9WAToRjex483Y=
github.com/containerd/plugin v1.0.0/go.mod h1:hQfJe5nmWfImiqT1q8Si3jLv3ynMUIBB47bQ+KexvO8=
github.com/containerd/ttrpc v1.2.7 h1:qIrroQvuOL9HQ1X6KHe2ohc7p+HP/0VE6XPU7elJRqQ=
github.com/containerd/ttrpc v1.2.7/go.mod h1:YCXHsb32f+Sq5/72xHubdiJRQY9inL4a4ZQrAbN1q9o=
github.com/containerd/typeurl/v2 v2.2.3 h1:yNA/94zxWdvYACdYO8zofhrTVuQY73fFU1y++dYSw40=
github.com/containerd/typeurl/v2 v2.2.3/go.mod h1:95ljDnPfD3bAbDJRugOiShd/DlAAsxGtUBhJxIn7SCk=
github.com/containernetworking/cni v1.3.0 h1:v6EpN8RznAZj9765HhXQrtXgX+ECGebEYEmnuFjskwo=
github.com/containernetworking/cni v1.3.0/go.mod h1:Bs8glZjjFfGPHMw6hQu82RUgEPNGEaBb9KS5KtNMnJ4=
github.com/containernetworking/plugins v1.7.1 h1:CNAR0jviDj6FS5Vg85NTgKWLDzZPfi/lj+VJfhMDTIs=
github.com/containernetworking/plugins v1.7.1/go.mod h1:xuMdjuio+a1oVQsHKjr/mgzuZ24leAsqUYRnzGoXHy0=
github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo=
github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
github.com/docker/cli v28.3.3+incompatible h1:fp9ZHAr1WWPGdIWBM1b3zLtgCF+83gRdVMTJsUeiyAo=
github.com/docker/cli v28.3.3+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
github.com/docker/docker-credential-helpers v0.7.0 h1:xtCHsjxogADNZcdv1pKUHXryefjlVRqWqIhk/uXJp0A=
github.com/docker/docker-credential-helpers v0.7.0/go.mod h1:rETQfLdHNT3foU5kuNkFR1R1V12OJRRO5lzt2D1b5X0=
github.com/docker/go-metrics v0.0.1 h1:AgB/0SvBxihN0X8OR4SjsblXkbMvalQ8cjmtKQ2rQV8=
github.com/docker/go-metrics v0.0.1/go.mod h1:cG1hvH2utMXtqgqqYE9plW6lDxS3/5ayHzueweSI3Vw=
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs=
github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE=
github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k=
github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
github.com/go-quicktest/qt v1.101.0 h1:O1K29Txy5P2OK0dGo59b7b0LR6wKfIhttaAhHUyn7eI=
github.com/go-quicktest/qt v1.101.0/go.mod h1:14Bz/f7NwaXPtdYEgzsx46kqSxVwTbzVZsDC26tQJow=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4=
github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk=
github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw=
github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8=
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hanwen/go-fuse/v2 v2.8.0 h1:wV8rG7rmCz8XHSOwBZhG5YcVqcYjkzivjmbaMafPlAs=
github.com/hanwen/go-fuse/v2 v2.8.0/go.mod h1:yE6D2PqWwm3CbYRxFXV9xUd8Md5d6NG0WBs5spCswmI=
github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k=
github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/hashicorp/go-retryablehttp v0.7.8 h1:ylXZWnqa7Lhqpk0L1P1LzDtGcCR0rPVUrx/c8Unxc48=
github.com/hashicorp/go-retryablehttp v0.7.8/go.mod h1:rjiScheydd+CxvumBsIrFKlx3iS0jrZ7LvzFGFmuKbw=
github.com/intel/goresctrl v0.8.0 h1:N3shVbS3kA1Hk2AmcbHv8805Hjbv+zqsCIZCGktxx50=
github.com/intel/goresctrl v0.8.0/go.mod h1:T3ZZnuHSNouwELB5wvOoUJaB7l/4Rm23rJy/wuWJlr0=
github.com/ipfs/go-cid v0.1.0 h1:YN33LQulcRHjfom/i25yoOZR4Telp1Hr/2RU3d0PnC0=
github.com/ipfs/go-cid v0.1.0/go.mod h1:rH5/Xv83Rfy8Rw6xG+id3DYAMUVmem1MowoKwdXmN2o=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/josharian/native v1.1.0 h1:uuaP0hAbW7Y4l0ZRQ6C9zfb7Mg1mbFKry/xzDAfmtLA=
github.com/josharian/native v1.1.0/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w=
github.com/jsimonetti/rtnetlink/v2 v2.0.1 h1:xda7qaHDSVOsADNouv7ukSuicKZO7GgVUCXxpaIEIlM=
github.com/jsimonetti/rtnetlink/v2 v2.0.1/go.mod h1:7MoNYNbb3UaDHtF8udiJo/RH6VsTKP1pqKLUTVCvToE=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.6 h1:ndNyv040zDGIDh8thGkXYjnFtiN02M1PVVF+JE/48xc=
github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/mdlayher/netlink v1.7.2 h1:/UtM3ofJap7Vl4QWCPDGXY8d3GIY2UGSDbK+QWmY8/g=
github.com/mdlayher/netlink v1.7.2/go.mod h1:xraEF7uJbxLhc5fpHL4cPe221LI2bdttWlU+ZGLfQSw=
github.com/mdlayher/socket v0.5.1 h1:VZaqt6RkGkt2OE9l3GcC6nZkqD3xKeQLyfleW/uBcos=
github.com/mdlayher/socket v0.5.1/go.mod h1:TjPLHI1UgwEv5J1B5q0zTZq12A/6H7nKmtTanQE37IQ=
github.com/mdlayher/vsock v1.2.1 h1:pC1mTJTvjo1r9n9fbm7S1j04rCgCzhCOS5DY0zqHlnQ=
github.com/mdlayher/vsock v1.2.1/go.mod h1:NRfCibel++DgeMD8z/hP+PPTjlNJsdPOmxcnENvE+SE=
github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1/go.mod h1:pD8RvIylQ358TN4wwqatJ8rNavkEINozVn9DtGI3dfQ=
github.com/minio/sha256-simd v1.0.0/go.mod h1:OuYzVNI5vcoYIAmbIvHPl3N3jUzVedXbKy5RFepssQM=
github.com/minio/sha256-simd v1.0.1 h1:6kaan5IFmwTNynnKKpDHe6FWHohJOHhCPchzK49dzMM=
github.com/minio/sha256-simd v1.0.1/go.mod h1:Pz6AKMiUdngCLpeTL/RJY1M9rUuPMYujV5xJjtbRSN8=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mndrix/tap-go v0.0.0-20171203230836-629fa407e90b/go.mod h1:pzzDgJWZ34fGzaAZGFW22KVZDfyrYW+QABMrWnJBnSs=
github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg=
github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQppc=
github.com/moby/sys/mountinfo v0.7.2 h1:1shs6aH5s4o5H2zQLn796ADW1wMrIwHsyJ2v9KouLrg=
github.com/moby/sys/mountinfo v0.7.2/go.mod h1:1YOa8w8Ih7uW0wALDUgT1dTTSBrZ+HiBLGws92L2RU4=
github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU=
github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko=
github.com/moby/sys/signal v0.7.1 h1:PrQxdvxcGijdo6UXXo/lU/TvHUWyPhj7UOpSo8tuvk0=
github.com/moby/sys/signal v0.7.1/go.mod h1:Se1VGehYokAkrSQwL4tDzHvETwUZlnY7S5XtQ50mQp8=
github.com/moby/sys/symlink v0.3.0 h1:GZX89mEZ9u53f97npBy4Rc3vJKj7JBDj/PN2I22GrNU=
github.com/moby/sys/symlink v0.3.0/go.mod h1:3eNdhduHmYPcgsJtZXW1W4XUJdZGBIkttZ8xKqPUJq0=
github.com/moby/sys/user v0.4.0 h1:jhcMKit7SA80hivmFJcbB1vqmw//wU61Zdui2eQXuMs=
github.com/moby/sys/user v0.4.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85uoHjs=
github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g=
github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/mr-tron/base58 v1.1.0/go.mod h1:xcD2VGqlgYjBdcBLw+TuYLr8afG+Hj8g2eTVqeSzSU8=
github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
github.com/multiformats/go-base32 v0.0.3/go.mod h1:pLiuGC8y0QR3Ue4Zug5UzK9LjgbkL8NSQj0zQ5Nz/AA=
github.com/multiformats/go-base32 v0.1.0 h1:pVx9xoSPqEIQG8o+UbAe7DNi51oej1NtK+aGkbLYxPE=
github.com/multiformats/go-base32 v0.1.0/go.mod h1:Kj3tFY6zNr+ABYMqeUNeGvkIC/UYgtWibDcT0rExnbI=
github.com/multiformats/go-base36 v0.1.0/go.mod h1:kFGE83c6s80PklsHO9sRn2NCoffoRdUUOENyW/Vv6sM=
github.com/multiformats/go-base36 v0.2.0 h1:lFsAbNOGeKtuKozrtBsAkSVhv1p9D0/qedU9rQyccr0=
github.com/multiformats/go-base36 v0.2.0/go.mod h1:qvnKE++v+2MWCfePClUEjE78Z7P2a1UV0xHgWc0hkp4=
github.com/multiformats/go-multiaddr v0.16.1 h1:fgJ0Pitow+wWXzN9do+1b8Pyjmo8m5WhGfzpL82MpCw=
github.com/multiformats/go-multiaddr v0.16.1/go.mod h1:JSVUmXDjsVFiW7RjIFMP7+Ev+h1DTbiJgVeTV/tcmP0=
github.com/multiformats/go-multibase v0.0.3/go.mod h1:5+1R4eQrT3PkYZ24C3W2Ue2tPwIdYQD509ZjSb5y9Oc=
github.com/multiformats/go-multibase v0.2.0 h1:isdYCVLvksgWlMW9OZRYJEa9pZETFivncJHmHnnd87g=
github.com/multiformats/go-multibase v0.2.0/go.mod h1:bFBZX4lKCA/2lyOFSAoKH5SS6oPyjtnzK/XTFDPkNuk=
github.com/multiformats/go-multihash v0.0.15/go.mod h1:D6aZrWNLFTV/ynMpKsNtB40mJzmCl4jb1alC0OvHiHg=
github.com/multiformats/go-multihash v0.2.3 h1:7Lyc8XfX/IY2jWb/gI7JP+o7JEq9hOa7BFvVU9RSh+U=
github.com/multiformats/go-multihash v0.2.3/go.mod h1:dXgKXCXjBzdscBLk9JkjINiEsCKRVch90MdaGiKsvSM=
github.com/multiformats/go-varint v0.0.6/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE=
github.com/multiformats/go-varint v0.0.7 h1:sWSGR+f/eu5ABZA2ZpYKBILXTTs9JWpdEM/nEGOHFS8=
github.com/multiformats/go-varint v0.0.7/go.mod h1:r8PUYw/fD/SjBCiKOoDlGF6QawOELpZAu9eioSos/OU=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus=
github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8=
github.com/onsi/gomega v1.37.0 h1:CdEG8g0S133B4OswTDC/5XPSzE1OeP29QOioj2PID2Y=
github.com/onsi/gomega v1.37.0/go.mod h1:8D9+Txp43QWKhM24yyOBEdpkzN8FvJyAwecBgsU4KU0=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
github.com/opencontainers/runtime-spec v1.0.3-0.20220825212826-86290f6a00fb/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.2.1 h1:S4k4ryNgEpxW1dzyqffOmhI1BHYcjzU8lpJfSlR0xww=
github.com/opencontainers/runtime-spec v1.2.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626 h1:DmNGcqH3WDbV5k8OJ+esPWbqUOX5rMLR2PMvziDMJi0=
github.com/opencontainers/runtime-tools v0.9.1-0.20221107090550-2e043c6bd626/go.mod h1:BRHJJd0E+cx42OybVYSgUvZmU0B8P9gZuRXlZUP7TKI=
github.com/opencontainers/selinux v1.9.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
github.com/opencontainers/selinux v1.12.0 h1:6n5JV4Cf+4y0KNXW48TLj5DwfXpvWlxXplUkdTrmPb8=
github.com/opencontainers/selinux v1.12.0/go.mod h1:BTPX+bjVbWGXw7ZZWUbdENt8w0htPSrlgOOysQaU62U=
github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4=
github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
github.com/petermattis/goid v0.0.0-20240813172612-4fcff4a6cae7 h1:Dx7Ovyv/SFnMFw3fD4oEoeorXc6saIiQ23LrGLth0Gw=
github.com/petermattis/goid v0.0.0-20240813172612-4fcff4a6cae7/go.mod h1:pxMtw7cyUw6B2bRH0ZBANSPg+AoSud1I1iyJHI69jH4=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
github.com/prometheus/client_golang v1.1.0/go.mod h1:I1FGZT9+L76gKKOs5djB6ezCbFQP1xR9D75/vuwEF3g=
github.com/prometheus/client_golang v1.23.0 h1:ust4zpdl9r4trLY/gSjlm07PuiBq2ynaXXlptpfy8Uc=
github.com/prometheus/client_golang v1.23.0/go.mod h1:i/o0R9ByOnHX0McrTMTyhYvKE4haaf2mW08I+jGAjEE=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
github.com/prometheus/common v0.6.0/go.mod h1:eBmuwkDJBwy6iBfxCBob6t6dR6ENT/y+J+Zk0j9GMYc=
github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2VzE=
github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/procfs v0.0.3/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ=
github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
github.com/rs/xid v1.6.0 h1:fV591PaemRlL6JfRxGDEPl69wICngIQ3shQtzfy2gxU=
github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sasha-s/go-deadlock v0.3.5 h1:tNCOEEDG6tBqrNDOX35j/7hL5FcFViG6awUGROb2NsU=
github.com/sasha-s/go-deadlock v0.3.5/go.mod h1:bugP6EGbdGYObIlx7pUZtWqlvo8k9H6vCBBsiChJQ5U=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
github.com/urfave/cli/v2 v2.27.7 h1:bH59vdhbjLv3LAvIu6gd0usJHgoTTPhCFib8qqOwXYU=
github.com/urfave/cli/v2 v2.27.7/go.mod h1:CyNAG/xg+iAOg0N4MPGZqVmv2rCoP267496AOXUZjA4=
github.com/vbatts/tar-split v0.12.1 h1:CqKoORW7BUWBe7UL/iqTVvkTBOF8UvOMKOIZykxnnbo=
github.com/vbatts/tar-split v0.12.1/go.mod h1:eF6B6i6ftWQcDqEn3/iGFRFRo8cBIMSJVOpnNdfTMFA=
github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY=
github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74=
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.etcd.io/bbolt v1.4.2 h1:IrUHp260R8c+zYx/Tm8QZr04CX+qWS5PGfPdevhdm1I=
go.etcd.io/bbolt v1.4.2/go.mod h1:Is8rSHO/b4f3XigBC0lL0+4FwAQv3HXEEIgFMuKHceM=
go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 h1:sbiXRNDSWJOTobXh5HyQKjq6wUC5tNybqjIqDpAY4CU=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0/go.mod h1:69uWxva0WgAA/4bu2Yy70SLDBwZXuQ6PbBpbsa5iZrQ=
go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg=
go.opentelemetry.io/otel v1.36.0/go.mod h1:/TcFMXYjyRNh8khOAO9ybYkqaDBb/70aVwkNML4pP8E=
go.opentelemetry.io/otel/metric v1.36.0 h1:MoWPKVhQvJ+eeXWHFBOPoBOi20jh6Iq2CcCREuTYufE=
go.opentelemetry.io/otel/metric v1.36.0/go.mod h1:zC7Ks+yeyJt4xig9DEw9kuUFe5C3zLbVjV2PzT6qzbs=
go.opentelemetry.io/otel/sdk v1.36.0 h1:b6SYIuLRs88ztox4EyrvRti80uXIFy+Sqzoh9kFULbs=
go.opentelemetry.io/otel/sdk v1.36.0/go.mod h1:+lC+mTgD+MUWfjJubi2vvXWcVxyr9rmlshZni72pXeY=
go.opentelemetry.io/otel/sdk/metric v1.36.0 h1:r0ntwwGosWGaa0CrSt8cuNuTcccMXERFwHX4dThiPis=
go.opentelemetry.io/otel/sdk/metric v1.36.0/go.mod h1:qTNOhFDfKRwX0yXOqJYegL5WRaW376QbB7P4Pb0qva4=
go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKrsNd4w=
go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA=
go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I=
golang.org/x/crypto v0.0.0-20210506145944-38f3c27a63bf/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8=
golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8=
golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f h1:XdNn9LlyWAhLVp6P/i8QYBW+hlyhrhei9uErw2B5GJo=
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f/go.mod h1:D5SMRVC3C2/4+F/DB1wZsLRnSNimn2Sp/NPsCrsv8ak=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.24.0 h1:ZfthKaKaT4NrhGVZHO1/WDTwGES4De8KtWO0SIbNJMU=
golang.org/x/mod v0.24.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.40.0 h1:79Xs7wF06Gbdcg4kdCCIQArK11Z1hr5POQ6+fIYHNuY=
golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=
golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210309074719-68d13333faf2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.34.0 h1:H5Y5sJ2L2JRdyv7ROF1he/lPdvFsd0mJHFw2ThKHxLA=
golang.org/x/sys v0.34.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg=
golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY=
golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.31.0 h1:0EedkvKDbh+qistFTd0Bcwe/YLh4vHwWEkiI0toFIBU=
golang.org/x/tools v0.31.0/go.mod h1:naFTU+Cev749tSJRXJlna0T3WxKvb1kWEx15xA4SdmQ=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a h1:v2PbRU4K3llS09c7zodFpNePeamkAwG3mPrAery9VeE=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc=
google.golang.org/grpc v1.74.2 h1:WoosgB65DlWVC9FqI82dGsZhWFNBSLjQ84bjROOpMu4=
google.golang.org/grpc v1.74.2/go.mod h1:CtQ+BGjaAIXHs/5YS3i473GqwBBa1zGQNevxdeBEXrM=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4=
gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/v3 v3.5.0 h1:Ljk6PdHdOhAb5aDMWXjDLMMhph+BpztA4v1QdqEW2eY=
gotest.tools/v3 v3.5.0/go.mod h1:isy3WKz7GK6uNw/sbHzfKBLvlvXwUyV06n6brMxxopU=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
k8s.io/api v0.33.3 h1:SRd5t//hhkI1buzxb288fy2xvjubstenEKL9K51KBI8=
k8s.io/api v0.33.3/go.mod h1:01Y/iLUjNBM3TAvypct7DIj0M0NIZc+PzAHCIo0CYGE=
k8s.io/apimachinery v0.33.3 h1:4ZSrmNa0c/ZpZJhAgRdcsFcZOw1PQU1bALVQ0B3I5LA=
k8s.io/apimachinery v0.33.3/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM=
k8s.io/client-go v0.33.3 h1:M5AfDnKfYmVJif92ngN532gFqakcGi6RvaOF16efrpA=
k8s.io/client-go v0.33.3/go.mod h1:luqKBQggEf3shbxHY4uVENAxrDISLOarxpTKMiUuujg=
k8s.io/cri-api v0.33.3 h1:aQvK3UxsaVMul4z71lOiblMHdhw9ROaw3Cgg15xDrD4=
k8s.io/cri-api v0.33.3/go.mod h1:OLQvT45OpIA+tv91ZrpuFIGY+Y2Ho23poS7n115Aocs=
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff h1:/usPimJzUKKu+m+TE36gUyGcf03XZEP0ZIKgKj35LS4=
k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8=
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 h1:M3sRQVHv7vB20Xc2ybTt7ODCeFj6JSWYFzOFnYeS6Ro=
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
lukechampine.com/blake3 v1.2.1 h1:YuqqRuaqsGV71BV/nm9xlI0MKUv4QC54jQnBChWbGnI=
lukechampine.com/blake3 v1.2.1/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1LM6k=
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8=
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo=
sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
sigs.k8s.io/structured-merge-diff/v4 v4.6.0 h1:IUA9nvMmnKWcj5jl84xn+T5MnlZKThmUW1TdblaLVAc=
sigs.k8s.io/structured-merge-diff/v4 v4.6.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps=
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=
tags.cncf.io/container-device-interface v1.0.1 h1:KqQDr4vIlxwfYh0Ed/uJGVgX+CHAkahrgabg6Q8GYxc=
tags.cncf.io/container-device-interface v1.0.1/go.mod h1:JojJIOeW3hNbcnOH2q0NrWNha/JuHoDZcmYxAZwb2i0=
tags.cncf.io/container-device-interface/specs-go v1.0.0 h1:8gLw29hH1ZQP9K1YtAzpvkHCjjyIxHZYzBAvlQ+0vD8=
tags.cncf.io/container-device-interface/specs-go v1.0.0/go.mod h1:u86hoFWqnh3hWz3esofRFKbI261bUlvUfLKGrDhJkgQ=

View File

@ -1,97 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"fmt"
"net"
"os"
"path/filepath"
"github.com/containerd/log"
"github.com/containerd/stargz-snapshotter/cmd/containerd-stargz-grpc/fsopts"
fusemanager "github.com/containerd/stargz-snapshotter/fusemanager"
"github.com/containerd/stargz-snapshotter/service"
"github.com/containerd/stargz-snapshotter/service/keychain/keychainconfig"
"google.golang.org/grpc"
)
func init() {
fusemanager.RegisterConfigFunc(func(cc *fusemanager.ConfigContext) ([]service.Option, error) {
fsConfig := fsopts.Config{
EnableIpfs: cc.Config.IPFS,
MetadataStore: cc.Config.MetadataStore,
OpenBoltDB: cc.OpenBoltDB,
}
fsOpts, err := fsopts.ConfigFsOpts(cc.Ctx, cc.RootDir, &fsConfig)
if err != nil {
return nil, err
}
return []service.Option{service.WithFilesystemOptions(fsOpts...)}, nil
})
fusemanager.RegisterConfigFunc(func(cc *fusemanager.ConfigContext) ([]service.Option, error) {
keyChainConfig := keychainconfig.Config{
EnableKubeKeychain: cc.Config.Config.KubeconfigKeychainConfig.EnableKeychain,
EnableCRIKeychain: cc.Config.Config.CRIKeychainConfig.EnableKeychain,
KubeconfigPath: cc.Config.Config.KubeconfigPath,
DefaultImageServiceAddress: cc.Config.DefaultImageServiceAddress,
ImageServicePath: cc.Config.Config.ImageServicePath,
}
if cc.Config.Config.CRIKeychainConfig.EnableKeychain && cc.Config.Config.ListenPath == "" || cc.Config.Config.ListenPath == cc.Address {
return nil, fmt.Errorf("listen path of CRI server must be specified as a separated socket from FUSE manager server")
}
// For CRI keychain, if listening path is different from stargz-snapshotter's socket, prepare for the dedicated grpc server and the socket.
serveCRISocket := cc.Config.Config.CRIKeychainConfig.EnableKeychain && cc.Config.Config.ListenPath != "" && cc.Config.Config.ListenPath != cc.Address
if serveCRISocket {
cc.CRIServer = grpc.NewServer()
}
credsFuncs, err := keychainconfig.ConfigKeychain(cc.Ctx, cc.CRIServer, &keyChainConfig)
if err != nil {
return nil, err
}
if serveCRISocket {
addr := cc.Config.Config.ListenPath
// Prepare the directory for the socket
if err := os.MkdirAll(filepath.Dir(addr), 0700); err != nil {
return nil, fmt.Errorf("failed to create directory %q: %w", filepath.Dir(addr), err)
}
// Try to remove the socket file to avoid EADDRINUSE
if err := os.RemoveAll(addr); err != nil {
return nil, fmt.Errorf("failed to remove %q: %w", addr, err)
}
// Listen and serve
l, err := net.Listen("unix", addr)
if err != nil {
return nil, fmt.Errorf("error on listen socket %q: %w", addr, err)
}
go func() {
if err := cc.CRIServer.Serve(l); err != nil {
log.G(cc.Ctx).WithError(err).Errorf("error on serving CRI via socket %q", addr)
}
}()
}
return []service.Option{service.WithCredsFuncs(credsFuncs...)}, nil
})
}
func main() {
fusemanager.Run()
}

341
cmd/stargz-store/fs.go Normal file
View File

@ -0,0 +1,341 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"context"
"encoding/base64"
"syscall"
"time"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/reference"
fusefs "github.com/hanwen/go-fuse/v2/fs"
"github.com/hanwen/go-fuse/v2/fuse"
digest "github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/pkg/errors"
)
const (
defaultLinkMode = syscall.S_IFLNK | 0400 // -r--------
defaultDirMode = syscall.S_IFDIR | 0500 // dr-x------
poolLink = "pool"
layerLink = "diff"
debugManifestLink = "manifest"
debugConfigLink = "config"
layerInfoLink = "info"
layerUseFile = "use"
)
func mount(mountpoint string, pool *pool, debug bool) error {
timeSec := time.Second
rawFS := fusefs.NewNodeFS(&rootnode{pool: pool}, &fusefs.Options{
AttrTimeout: &timeSec,
EntryTimeout: &timeSec,
NullPermissions: true,
})
server, err := fuse.NewServer(rawFS, mountpoint, &fuse.MountOptions{
AllowOther: true, // allow users other than root&mounter to access fs
Options: []string{"suid"}, // allow setuid inside container
Debug: debug,
})
if err != nil {
return err
}
go server.Serve()
return server.WaitMount()
}
// rootnode is the mountpoint node of stargz-store.w
type rootnode struct {
fusefs.Inode
pool *pool
}
var _ = (fusefs.InodeEmbedder)((*rootnode)(nil))
var _ = (fusefs.NodeLookuper)((*rootnode)(nil))
// Lookup loads manifest and config of specified name (imgae reference)
// and returns refnode of the specified name
func (n *rootnode) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fusefs.Inode, syscall.Errno) {
switch name {
case poolLink:
return n.NewInode(ctx,
&linknode{linkname: n.pool.root()}, defaultLinkAttr(&out.Attr)), 0
}
refBytes, err := base64.StdEncoding.DecodeString(name)
if err != nil {
log.G(ctx).WithError(err).Debugf("failed to decode ref base64 %q", name)
return nil, syscall.EINVAL
}
ref := string(refBytes)
refspec, err := reference.Parse(ref)
if err != nil {
log.G(ctx).WithError(err).Warnf("invalid reference %q for %q", ref, name)
return nil, syscall.EINVAL
}
manifest, mPath, config, cPath, err := n.pool.loadManifestAndConfig(ctx, refspec)
if err != nil {
log.G(ctx).WithError(err).
Warnf("failed to fetch manifest and config of %q(%q)", ref, name)
return nil, syscall.EIO
}
return n.NewInode(ctx, &refnode{
pool: n.pool,
ref: refspec,
manifest: manifest,
manifestPath: mPath,
config: config,
configPath: cPath,
}, defaultDirAttr(&out.Attr)), 0
}
// refnode is the node at <mountpoint>/<imageref>.
type refnode struct {
fusefs.Inode
pool *pool
ref reference.Spec
manifest ocispec.Manifest
manifestPath string
config ocispec.Image
configPath string
}
var _ = (fusefs.InodeEmbedder)((*refnode)(nil))
var _ = (fusefs.NodeLookuper)((*refnode)(nil))
// Lookup returns layernode of the specified name
func (n *refnode) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fusefs.Inode, syscall.Errno) {
switch name {
case debugManifestLink:
return n.NewInode(ctx,
&linknode{linkname: n.manifestPath}, defaultLinkAttr(&out.Attr)), 0
case debugConfigLink:
return n.NewInode(ctx,
&linknode{linkname: n.configPath}, defaultLinkAttr(&out.Attr)), 0
}
targetDigest, err := digest.Parse(name)
if err != nil {
log.G(ctx).WithError(err).Warnf("invalid digest for %q", name)
return nil, syscall.EINVAL
}
var layer *ocispec.Descriptor
for _, l := range n.manifest.Layers {
if l.Digest == targetDigest {
layer = &l
break
}
}
if layer == nil {
log.G(ctx).WithError(err).Warnf("invalid digest for %q: %q", name, targetDigest.String())
return nil, syscall.EINVAL
}
return n.NewInode(ctx, &layernode{
pool: n.pool,
layer: *layer,
layers: n.manifest.Layers,
refnode: n,
}, defaultDirAttr(&out.Attr)), 0
}
var _ = (fusefs.NodeRmdirer)((*refnode)(nil))
// Rmdir marks this layer as "release".
// We don't use layernode.Unlink because Unlink event doesn't reach here when "use" file isn't visible
// to the filesystem client.
func (n *refnode) Rmdir(ctx context.Context, name string) syscall.Errno {
if name == debugManifestLink || name == debugConfigLink {
return syscall.EROFS // nop
}
targetDigest, err := digest.Parse(name)
if err != nil {
log.G(ctx).WithError(err).Warnf("invalid digest for %q during release", name)
return syscall.EINVAL
}
current, err := n.pool.release(n.ref, targetDigest)
if err != nil {
log.G(ctx).WithError(err).Warnf("failed to release layer %v / %v", n.ref, targetDigest)
return syscall.EIO
}
log.G(ctx).WithField("refcounter", current).Warnf("layer %v / %v is marked as RELEASE", n.ref, targetDigest)
return syscall.ENOENT
}
// layernode is the node at <mountpoint>/<imageref>/<layerdigest>.
type layernode struct {
fusefs.Inode
pool *pool
layer ocispec.Descriptor
layers []ocispec.Descriptor
refnode *refnode
}
var _ = (fusefs.InodeEmbedder)((*layernode)(nil))
var _ = (fusefs.NodeCreater)((*layernode)(nil))
// Create marks this layer as "using".
// We don't use refnode.Mkdir because Mkdir event doesn't reach here if layernode already exists.
func (n *layernode) Create(ctx context.Context, name string, flags uint32, mode uint32, out *fuse.EntryOut) (node *fusefs.Inode, fh fusefs.FileHandle, fuseFlags uint32, errno syscall.Errno) {
if name == layerUseFile {
current := n.pool.use(n.refnode.ref, n.layer.Digest)
log.G(ctx).WithField("refcounter", current).Warnf("layer %v / %v is marked as USING",
n.refnode.ref, n.layer.Digest)
}
// TODO: implement cleanup
return nil, nil, 0, syscall.ENOENT
}
var _ = (fusefs.NodeLookuper)((*layernode)(nil))
// Lookup routes to the target file stored in the pool, based on the specified file name.
func (n *layernode) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fusefs.Inode, syscall.Errno) {
switch name {
case layerInfoLink:
var err error
infopath, err := n.pool.loadLayerInfo(ctx, n.refnode.ref, n.layer.Digest)
if err != nil {
log.G(ctx).WithError(err).
Warnf("failed to get layer info for %q: %q", name, n.layer.Digest)
return nil, syscall.EIO
}
return n.NewInode(ctx, &linknode{linkname: infopath}, defaultLinkAttr(&out.Attr)), 0
case layerLink:
l, err := n.pool.loadLayer(ctx, n.refnode.ref, n.layer, n.layers)
if err != nil {
cErr := ctx.Err()
if errors.Is(cErr, context.Canceled) || errors.Is(err, context.Canceled) {
// When filesystem client canceled to lookup this layer,
// do not log this as "preparation failure" because it's
// intensional.
log.G(ctx).WithError(err).
Debugf("error resolving layer (context error: %v)", cErr)
return nil, syscall.EIO
}
log.G(ctx).WithField(remoteSnapshotLogKey, prepareFailed).
WithField("layerdigest", n.layer.Digest).
WithError(err).
Debugf("error resolving layer (context error: %v)", cErr)
log.G(ctx).WithError(err).Warnf("failed to mount layer %q: %q",
name, n.layer.Digest)
return nil, syscall.EIO
}
root, err := l.RootNode()
if err != nil {
log.G(ctx).WithField(remoteSnapshotLogKey, prepareFailed).
WithField("layerdigest", n.layer.Digest).
WithError(err).
Debugf("failed to get root node")
return nil, syscall.EIO
}
var ao fuse.AttrOut
if errno := root.(fusefs.NodeGetattrer).Getattr(ctx, nil, &ao); errno != 0 {
log.G(ctx).WithField(remoteSnapshotLogKey, prepareFailed).
WithField("layerdigest", n.layer.Digest).
WithError(err).
Debugf("failed to get root node")
return nil, errno
}
copyAttr(&out.Attr, &ao.Attr)
return n.NewInode(ctx, root, fusefs.StableAttr{
Mode: out.Attr.Mode,
Ino: out.Attr.Ino,
}), 0
case layerUseFile:
log.G(ctx).Debugf("\"use\" file is referred but return ENOENT for reference management")
return nil, syscall.ENOENT
default:
log.G(ctx).Warnf("unknown filename %q", name)
return nil, syscall.ENOENT
}
}
type linknode struct {
fusefs.Inode
linkname string
}
var _ = (fusefs.InodeEmbedder)((*linknode)(nil))
var _ = (fusefs.NodeReadlinker)((*linknode)(nil))
func (n *linknode) Readlink(ctx context.Context) ([]byte, syscall.Errno) {
return []byte(n.linkname), 0 // TODO: linkname shouldn't statically embedded?
}
func copyAttr(dest, src *fuse.Attr) {
dest.Ino = src.Ino
dest.Size = src.Size
dest.Blocks = src.Blocks
dest.Atime = src.Atime
dest.Mtime = src.Mtime
dest.Ctime = src.Ctime
dest.Atimensec = src.Atimensec
dest.Mtimensec = src.Mtimensec
dest.Ctimensec = src.Ctimensec
dest.Mode = src.Mode
dest.Nlink = src.Nlink
dest.Owner = src.Owner
dest.Rdev = src.Rdev
dest.Blksize = src.Blksize
dest.Padding = src.Padding
}
func defaultDirAttr(out *fuse.Attr) fusefs.StableAttr {
// out.Ino
out.Size = 0
// out.Blksize
// out.Blocks
// out.Nlink
out.Mode = defaultDirMode
out.Owner = fuse.Owner{Uid: 0, Gid: 0}
// out.Mtime
// out.Mtimensec
// out.Rdev
// out.Padding
return fusefs.StableAttr{
Mode: out.Mode,
}
}
func defaultLinkAttr(out *fuse.Attr) fusefs.StableAttr {
// out.Ino
out.Size = 0
// out.Blksize
// out.Blocks
// out.Nlink
out.Mode = defaultLinkMode
out.Owner = fuse.Owner{Uid: 0, Gid: 0}
// out.Mtime
// out.Mtimensec
// out.Rdev
// out.Padding
return fusefs.StableAttr{
Mode: out.Mode,
}
}

View File

@ -1,66 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"context"
"io"
"os"
"time"
"github.com/containerd/containerd/v2/defaults"
"github.com/containerd/containerd/v2/pkg/dialer"
"github.com/containerd/stargz-snapshotter/store/pb"
grpc "google.golang.org/grpc"
"google.golang.org/grpc/backoff"
"google.golang.org/grpc/credentials/insecure"
)
func main() {
var addr = "/var/lib/stargz-store/store.sock" // default
if len(os.Args) >= 2 {
addr = os.Args[1]
}
data, err := io.ReadAll(os.Stdin)
if err != nil {
panic(err)
}
backoffConfig := backoff.DefaultConfig
backoffConfig.MaxDelay = 3 * time.Second
connParams := grpc.ConnectParams{
Backoff: backoffConfig,
}
gopts := []grpc.DialOption{
grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithConnectParams(connParams),
grpc.WithContextDialer(dialer.ContextDialer),
grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(defaults.DefaultMaxRecvMsgSize)),
grpc.WithDefaultCallOptions(grpc.MaxCallSendMsgSize(defaults.DefaultMaxSendMsgSize)),
}
conn, err := grpc.NewClient(dialer.DialAddress(addr), gopts...)
if err != nil {
panic(err)
}
c := pb.NewControllerClient(conn)
_, err = c.AddCredential(context.Background(), &pb.AddCredentialRequest{
Data: data,
})
if err != nil {
panic(err)
}
}

View File

@ -17,50 +17,33 @@
package main package main
import ( import (
"bytes"
"context" "context"
"encoding/json"
"errors"
"flag" "flag"
"fmt"
"io"
golog "log" golog "log"
"math/rand"
"net"
"os" "os"
"os/signal" "os/signal"
"path/filepath"
"sync"
"syscall" "syscall"
"time"
"github.com/containerd/containerd/v2/pkg/reference" "github.com/BurntSushi/toml"
"github.com/containerd/log" "github.com/containerd/containerd/log"
dbmetadata "github.com/containerd/stargz-snapshotter/cmd/containerd-stargz-grpc/db"
"github.com/containerd/stargz-snapshotter/fs/config" "github.com/containerd/stargz-snapshotter/fs/config"
"github.com/containerd/stargz-snapshotter/metadata" "github.com/containerd/stargz-snapshotter/service/keychain"
memorymetadata "github.com/containerd/stargz-snapshotter/metadata/memory"
"github.com/containerd/stargz-snapshotter/service/keychain/kubeconfig"
"github.com/containerd/stargz-snapshotter/service/resolver" "github.com/containerd/stargz-snapshotter/service/resolver"
"github.com/containerd/stargz-snapshotter/store"
"github.com/containerd/stargz-snapshotter/store/pb"
sddaemon "github.com/coreos/go-systemd/v22/daemon" sddaemon "github.com/coreos/go-systemd/v22/daemon"
"github.com/pelletier/go-toml" "github.com/sirupsen/logrus"
bolt "go.etcd.io/bbolt"
grpc "google.golang.org/grpc"
) )
const ( const (
defaultLogLevel = log.InfoLevel defaultLogLevel = logrus.InfoLevel
defaultConfigPath = "/etc/stargz-store/config.toml" defaultConfigPath = "/etc/stargz-store/config.toml"
defaultRootDir = "/var/lib/stargz-store" defaultRootDir = "/var/lib/stargz-store"
defaultMaxConcurrency = 2
) )
var ( var (
configPath = flag.String("config", defaultConfigPath, "path to the configuration file") configPath = flag.String("config", defaultConfigPath, "path to the configuration file")
logLevel = flag.String("log-level", defaultLogLevel.String(), "set the logging level [trace, debug, info, warn, error, fatal, panic]") logLevel = flag.String("log-level", defaultLogLevel.String(), "set the logging level [trace, debug, info, warn, error, fatal, panic]")
rootDir = flag.String("root", defaultRootDir, "path to the root directory for this snapshotter") rootDir = flag.String("root", defaultRootDir, "path to the root directory for this snapshotter")
listenaddr = flag.String("addr", filepath.Join(defaultRootDir, "store.sock"), "path to the socket listened by this snapshotter")
) )
type Config struct { type Config struct {
@ -71,9 +54,6 @@ type Config struct {
// ResolverConfig is config for resolving registries. // ResolverConfig is config for resolving registries.
ResolverConfig `toml:"resolver"` ResolverConfig `toml:"resolver"`
// MetadataStore is the type of the metadata store to use.
MetadataStore string `toml:"metadata_store" default:"memory"`
} }
type KubeconfigKeychainConfig struct { type KubeconfigKeychainConfig struct {
@ -84,22 +64,24 @@ type KubeconfigKeychainConfig struct {
type ResolverConfig resolver.Config type ResolverConfig resolver.Config
func main() { func main() {
rand.Seed(time.Now().UnixNano()) //nolint:staticcheck // Global math/rand seed is deprecated, but still used by external dependencies
flag.Parse() flag.Parse()
mountPoint := flag.Arg(0) mountPoint := flag.Arg(0)
err := log.SetLevel(*logLevel) lvl, err := logrus.ParseLevel(*logLevel)
if err != nil { if err != nil {
log.L.WithError(err).Fatal("failed to prepare logger") log.L.WithError(err).Fatal("failed to prepare logger")
} }
log.SetFormat(log.JSONFormat) logrus.SetLevel(lvl)
logrus.SetFormatter(&logrus.JSONFormatter{
TimestampFormat: log.RFC3339NanoFixed,
})
var ( var (
ctx = log.WithLogger(context.Background(), log.L) ctx = log.WithLogger(context.Background(), log.L)
config Config config Config
) )
// Streams log of standard lib (go-fuse uses this) into debug log // Streams log of standard lib (go-fuse uses this) into debug log
// Snapshotter should use "github.com/containerd/log" otherwise // Snapshotter should use "github.com/containerd/containerd/log" otherwize
// logs are always printed as "debug" mode. // logs are always printed as "debug" mode.
golog.SetOutput(log.G(ctx).WriterLevel(log.DebugLevel)) golog.SetOutput(log.G(ctx).WriterLevel(logrus.DebugLevel))
if mountPoint == "" { if mountPoint == "" {
log.G(ctx).Fatalf("mount point must be specified") log.G(ctx).Fatalf("mount point must be specified")
@ -107,27 +89,19 @@ func main() {
// Get configuration from specified file // Get configuration from specified file
if *configPath != "" { if *configPath != "" {
tree, err := toml.LoadFile(*configPath) if _, err := toml.DecodeFile(*configPath, &config); err != nil && !(os.IsNotExist(err) && *configPath == defaultConfigPath) {
if err != nil && (!os.IsNotExist(err) || *configPath != defaultConfigPath) {
log.G(ctx).WithError(err).Fatalf("failed to load config file %q", *configPath) log.G(ctx).WithError(err).Fatalf("failed to load config file %q", *configPath)
} }
if err := tree.Unmarshal(&config); err != nil {
log.G(ctx).WithError(err).Fatalf("failed to unmarshal config file %q", *configPath)
}
} }
sk := new(storeKeychain)
errCh := serveController(*listenaddr, sk)
// Prepare kubeconfig-based keychain if required // Prepare kubeconfig-based keychain if required
credsFuncs := []resolver.Credential{sk.credentials} credsFuncs := []func(string) (string, string, error){keychain.NewDockerconfigKeychain(ctx)}
if config.EnableKeychain { if config.KubeconfigKeychainConfig.EnableKeychain {
var opts []kubeconfig.Option var opts []keychain.KubeconfigOption
if kcp := config.KubeconfigPath; kcp != "" { if kcp := config.KubeconfigKeychainConfig.KubeconfigPath; kcp != "" {
opts = append(opts, kubeconfig.WithKubeconfigPath(kcp)) opts = append(opts, keychain.WithKubeconfigPath(kcp))
} }
credsFuncs = append(credsFuncs, kubeconfig.NewKubeconfigKeychain(ctx, opts...)) credsFuncs = append(credsFuncs, keychain.NewKubeconfigKeychain(ctx, opts...))
} }
// Use RegistryHosts based on ResolverConfig and keychain // Use RegistryHosts based on ResolverConfig and keychain
@ -140,18 +114,11 @@ func main() {
Fatalf("failed to prepare mountpoint %q", mountPoint) Fatalf("failed to prepare mountpoint %q", mountPoint)
} }
} }
if config.DisableVerification { pool, err := newPool(*rootDir, hosts, config.Config)
log.G(ctx).Fatalf("content verification can't be disabled")
}
mt, err := getMetadataStore(*rootDir, config)
if err != nil {
log.G(ctx).WithError(err).Fatalf("failed to configure metadata store")
}
layerManager, err := store.NewLayerManager(ctx, *rootDir, hosts, mt, config.Config)
if err != nil { if err != nil {
log.G(ctx).WithError(err).Fatalf("failed to prepare pool") log.G(ctx).WithError(err).Fatalf("failed to prepare pool")
} }
if err := store.Mount(ctx, mountPoint, layerManager, config.Debug); err != nil { if err := mount(mountPoint, pool, config.Config.Debug); err != nil {
log.G(ctx).WithError(err).Fatalf("failed to mount fs at %q", mountPoint) log.G(ctx).WithError(err).Fatalf("failed to mount fs at %q", mountPoint)
} }
defer func() { defer func() {
@ -170,125 +137,12 @@ func main() {
} }
}() }()
if err := waitForSignal(ctx, errCh); err != nil { waitForSIGINT()
log.G(ctx).Errorf("error: %v", err) log.G(ctx).Info("Got SIGINT")
os.Exit(1)
}
} }
func waitForSignal(ctx context.Context, errCh <-chan error) error { func waitForSIGINT() {
c := make(chan os.Signal, 1) c := make(chan os.Signal, 1)
signal.Notify(c, os.Interrupt) signal.Notify(c, os.Interrupt)
select { <-c
case s := <-c:
log.G(ctx).Infof("Got %v", s)
case err := <-errCh:
return err
}
return nil
}
const (
memoryMetadataType = "memory"
dbMetadataType = "db"
)
func getMetadataStore(rootDir string, config Config) (metadata.Store, error) {
switch config.MetadataStore {
case "", memoryMetadataType:
return memorymetadata.NewReader, nil
case dbMetadataType:
bOpts := bolt.Options{
NoFreelistSync: true,
InitialMmapSize: 64 * 1024 * 1024,
FreelistType: bolt.FreelistMapType,
}
db, err := bolt.Open(filepath.Join(rootDir, "metadata.db"), 0600, &bOpts)
if err != nil {
return nil, err
}
return func(sr *io.SectionReader, opts ...metadata.Option) (metadata.Reader, error) {
return dbmetadata.NewReader(db, sr, opts...)
}, nil
default:
return nil, fmt.Errorf("unknown metadata store type: %v; must be %v or %v",
config.MetadataStore, memoryMetadataType, dbMetadataType)
}
}
func newController(addCredentialFunc func(data []byte) error) *controller {
return &controller{
addCredentialFunc: addCredentialFunc,
}
}
type controller struct {
addCredentialFunc func(data []byte) error
}
func (c *controller) AddCredential(ctx context.Context, req *pb.AddCredentialRequest) (resp *pb.AddCredentialResponse, _ error) {
return &pb.AddCredentialResponse{}, c.addCredentialFunc(req.Data)
}
type authConfig struct {
Username string `json:"username,omitempty"`
Password string `json:"password,omitempty"`
IdentityToken string `json:"identityToken,omitempty"`
}
type storeKeychain struct {
config map[string]authConfig
configMu sync.Mutex
}
func (sk *storeKeychain) add(data []byte) error {
conf := make(map[string]authConfig)
if err := json.NewDecoder(bytes.NewReader(data)).Decode(&conf); err != nil && !errors.Is(err, io.EOF) {
return err
}
sk.configMu.Lock()
if sk.config == nil {
sk.config = make(map[string]authConfig)
}
for k, c := range conf {
sk.config[k] = c
}
sk.configMu.Unlock()
return nil
}
func (sk *storeKeychain) credentials(host string, refspec reference.Spec) (string, string, error) {
if host != refspec.Hostname() {
return "", "", nil // Do not use creds for mirrors
}
sk.configMu.Lock()
defer sk.configMu.Unlock()
if acfg, ok := sk.config[refspec.String()]; ok {
if acfg.IdentityToken != "" {
return "", acfg.IdentityToken, nil
} else if acfg.Username != "" || acfg.Password != "" {
return acfg.Username, acfg.Password, nil
}
}
return "", "", nil
}
func serveController(addr string, sk *storeKeychain) <-chan error {
// Try to remove the socket file to avoid EADDRINUSE
os.Remove(addr)
rpc := grpc.NewServer()
c := newController(sk.add)
pb.RegisterControllerServer(rpc, c)
errCh := make(chan error, 1)
go func() {
l, err := net.Listen("unix", addr)
if err != nil {
errCh <- fmt.Errorf("error on listen socket %q: %w", addr, err)
return
}
if err := rpc.Serve(l); err != nil {
errCh <- fmt.Errorf("error on serving via socket %q: %w", addr, err)
}
}()
return errCh
} }

530
cmd/stargz-store/pool.go Normal file
View File

@ -0,0 +1,530 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"time"
"github.com/containerd/containerd/images"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/platforms"
"github.com/containerd/containerd/reference"
"github.com/containerd/containerd/remotes"
"github.com/containerd/containerd/remotes/docker"
"github.com/containerd/stargz-snapshotter/estargz"
"github.com/containerd/stargz-snapshotter/fs/config"
"github.com/containerd/stargz-snapshotter/fs/layer"
fsmetrics "github.com/containerd/stargz-snapshotter/fs/metrics"
"github.com/containerd/stargz-snapshotter/task"
"github.com/containerd/stargz-snapshotter/util/namedmutex"
"github.com/docker/go-metrics"
digest "github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/pkg/errors"
)
const (
// remoteSnapshotLogKey is a key for log line, which indicates whether
// `Prepare` method successfully prepared targeting remote snapshot or not, as
// defined in the following:
// - "true" : indicates the snapshot has been successfully prepared as a
// remote snapshot
// - "false" : indicates the snapshot failed to be prepared as a remote
// snapshot
// - null : undetermined
remoteSnapshotLogKey = "remote-snapshot-prepared"
prepareSucceeded = "true"
prepareFailed = "false"
)
func newPool(root string, hosts docker.RegistryHosts, cfg config.Config) (*pool, error) {
var poolroot = filepath.Join(root, "pool")
if err := os.MkdirAll(poolroot, 0700); err != nil {
return nil, err
}
maxConcurrency := cfg.MaxConcurrency
if maxConcurrency == 0 {
maxConcurrency = defaultMaxConcurrency
}
tm := task.NewBackgroundTaskManager(maxConcurrency, 5*time.Second)
r, err := layer.NewResolver(root, tm, cfg)
if err != nil {
return nil, errors.Wrapf(err, "failed to setup resolver")
}
var ns *metrics.Namespace
if !cfg.NoPrometheus {
ns = metrics.NewNamespace("stargz", "fs", nil)
}
c := fsmetrics.NewLayerMetrics(ns)
if ns != nil {
metrics.Register(ns)
}
return &pool{
path: poolroot,
layer: make(map[string]layer.Layer),
hosts: hosts,
refcounter: make(map[string]map[string]int),
resolver: r,
prefetchSize: cfg.PrefetchSize,
noprefetch: cfg.NoPrefetch,
noBackgroundFetch: cfg.NoBackgroundFetch,
backgroundTaskManager: tm,
allowNoVerification: cfg.AllowNoVerification,
disableVerification: cfg.DisableVerification,
metricsController: c,
resolveLock: new(namedmutex.NamedMutex),
}, nil
}
// resolver provides manifests, configs and layers of images.
// This also manages caches for these resources.
type pool struct {
path string
layer map[string]layer.Layer
layerMu sync.Mutex
hosts docker.RegistryHosts
refcounter map[string]map[string]int
refcounterMu sync.Mutex
resolver *layer.Resolver
prefetchSize int64
noprefetch bool
noBackgroundFetch bool
backgroundTaskManager *task.BackgroundTaskManager
allowNoVerification bool
disableVerification bool
metricsController *fsmetrics.Controller
resolveLock *namedmutex.NamedMutex
}
func (p *pool) root() string {
return p.path
}
func (p *pool) metadataDir(refspec reference.Spec) string {
return filepath.Join(p.path,
"metadata--"+colon2dash(digest.FromString(refspec.String()).String()))
}
func (p *pool) manifestFile(refspec reference.Spec) string {
return filepath.Join(p.metadataDir(refspec), "manifest")
}
func (p *pool) configFile(refspec reference.Spec) string {
return filepath.Join(p.metadataDir(refspec), "config")
}
func (p *pool) layerInfoFile(refspec reference.Spec, dgst digest.Digest) string {
return filepath.Join(p.metadataDir(refspec), colon2dash(dgst.String()))
}
func (p *pool) loadManifestAndConfig(ctx context.Context, refspec reference.Spec) (manifest ocispec.Manifest, mPath string, config ocispec.Image, cPath string, err error) {
manifest, mPath, config, cPath, err = p.readManifestAndConfig(refspec)
if err == nil {
log.G(ctx).Debugf("reusing manifest and config of %q", refspec.String())
return
}
log.G(ctx).WithError(err).Debugf("fetching manifest and config of %q", refspec.String())
manifest, config, err = fetchManifestAndConfig(ctx, p.hosts, refspec)
if err != nil {
return ocispec.Manifest{}, "", ocispec.Image{}, "", err
}
mPath, cPath, err = p.writeManifestAndConfig(refspec, manifest, config)
if err != nil {
return ocispec.Manifest{}, "", ocispec.Image{}, "", err
}
return manifest, mPath, config, cPath, err
}
func (p *pool) loadLayerInfo(ctx context.Context, refspec reference.Spec, dgst digest.Digest) (layerInfoPath string, err error) {
layerInfoPath = p.layerInfoFile(refspec, dgst)
if _, err := os.Stat(layerInfoPath); err == nil {
log.G(ctx).Debugf("reusing layer info of %q/%q: %q",
refspec.String(), dgst.String(), layerInfoPath)
return layerInfoPath, nil
}
manifest, _, config, _, err := p.loadManifestAndConfig(ctx, refspec)
if err != nil {
return "", errors.Wrapf(err, "failed to get manifest and config")
}
info, err := genLayerInfo(dgst, manifest, config)
if err != nil {
return "", errors.Wrapf(err, "failed to generate layer info")
}
if err := os.MkdirAll(filepath.Dir(layerInfoPath), 0700); err != nil {
return "", err
}
infoF, err := os.Create(layerInfoPath) // TODO: file mode
if err != nil {
return "", err
}
defer infoF.Close()
return layerInfoPath, json.NewEncoder(infoF).Encode(&info)
}
func (p *pool) loadLayer(ctx context.Context, refspec reference.Spec, target ocispec.Descriptor, preResolve []ocispec.Descriptor) (layer.Layer, error) {
var (
result layer.Layer
resultChan = make(chan layer.Layer)
errChan = make(chan error)
)
for _, l := range append([]ocispec.Descriptor{target}, preResolve...) {
l := l
// Check if layer is already resolved before creating goroutine.
key := refspec.String() + "/" + l.Digest.String()
p.layerMu.Lock()
gotL, ok := p.layer[key]
p.layerMu.Unlock()
if ok {
// Layer already resolved
if l.Digest.String() != target.Digest.String() {
continue // This is not the target layer; nop
}
result = gotL
continue
}
// Resolve the layer
go func() {
// Avoids to get canceled by client.
ctx := context.Background()
gotL, err := p.resolveLayer(ctx, refspec, l)
if l.Digest.String() != target.Digest.String() {
return // This is not target layer
}
if err != nil {
errChan <- errors.Wrapf(err, "failed to resolve layer %q / %q",
refspec, l.Digest)
return
}
// Log this as preparation success
log.G(ctx).WithField(remoteSnapshotLogKey, prepareSucceeded).
Debugf("successfully resolved layer")
resultChan <- gotL
}()
}
if result != nil {
return result, nil
}
// Wait for resolving completion
var l layer.Layer
select {
case l = <-resultChan:
case err := <-errChan:
log.G(ctx).WithError(err).Debug("failed to resolve layer")
return nil, errors.Wrapf(err, "failed to resolve layer")
case <-time.After(30 * time.Second):
log.G(ctx).Debug("failed to resolve layer (timeout)")
return nil, fmt.Errorf("failed to resolve layer (timeout)")
}
return l, nil
}
func (p *pool) resolveLayer(ctx context.Context, refspec reference.Spec, target ocispec.Descriptor) (layer.Layer, error) {
key := refspec.String() + "/" + target.Digest.String()
// Wait if resolving this layer is already running.
p.resolveLock.Lock(key)
defer p.resolveLock.Unlock(key)
p.layerMu.Lock()
gotL, ok := p.layer[key]
p.layerMu.Unlock()
if ok {
// layer already resolved
return gotL, nil
}
// Resolve this layer.
l, err := p.resolver.Resolve(ctx, p.hosts, refspec, target)
if err != nil {
return nil, err
}
// Verify layer's content
labels := target.Annotations
if labels == nil {
labels = make(map[string]string)
}
if p.disableVerification {
// Skip if verification is disabled completely
l.SkipVerify()
log.G(ctx).Debugf("Verification forcefully skipped")
} else if tocDigest, ok := labels[estargz.TOCJSONDigestAnnotation]; ok {
// Verify this layer using the TOC JSON digest passed through label.
dgst, err := digest.Parse(tocDigest)
if err != nil {
log.G(ctx).WithError(err).Debugf("failed to parse passed TOC digest %q", dgst)
return nil, errors.Wrapf(err, "invalid TOC digest: %v", tocDigest)
}
if err := l.Verify(dgst); err != nil {
log.G(ctx).WithError(err).Debugf("invalid layer")
return nil, errors.Wrapf(err, "invalid stargz layer")
}
log.G(ctx).Debugf("verified")
} else {
// Verification must be done. Don't mount this layer.
return nil, fmt.Errorf("digest of TOC JSON must be passed")
}
// Prefetch this layer. We prefetch several layers in parallel. The first
// Check() for this layer waits for the prefetch completion.
if !p.noprefetch {
go func() {
p.backgroundTaskManager.DoPrioritizedTask()
defer p.backgroundTaskManager.DonePrioritizedTask()
if err := l.Prefetch(p.prefetchSize); err != nil {
log.G(ctx).WithError(err).Debug("failed to prefetched layer")
return
}
log.G(ctx).Debug("completed to prefetch")
}()
}
// Fetch whole layer aggressively in background. We use background
// reader for this so prioritized tasks(Mount, Check, etc...) can
// interrupt the reading. This can avoid disturbing prioritized tasks
// about NW traffic.
if !p.noBackgroundFetch {
go func() {
if err := l.BackgroundFetch(); err != nil {
log.G(ctx).WithError(err).Debug("failed to fetch whole layer")
return
}
log.G(ctx).Debug("completed to fetch all layer data in background")
}()
}
// Register this layer.
p.layerMu.Lock()
p.layer[key] = l
p.layerMu.Unlock()
p.metricsController.Add(key, l)
return l, nil
}
func (p *pool) release(ref reference.Spec, dgst digest.Digest) (int, error) {
// TODO: implement GC
targetRef := ref.String()
targetDgst := dgst.String()
p.refcounterMu.Lock()
defer p.refcounterMu.Unlock()
if _, ok := p.refcounter[targetRef]; !ok {
return 0, fmt.Errorf("ref %q not found during release", targetRef)
}
if c, ok := p.refcounter[targetRef][targetDgst]; !ok {
return 0, fmt.Errorf("layer %q/%q not found during release", targetRef, targetDgst)
} else if c <= 0 {
return 0, fmt.Errorf("layer %q/%q isn't used", targetRef, targetDgst)
}
p.refcounter[targetRef][targetDgst]--
return p.refcounter[targetRef][targetDgst], nil
}
func (p *pool) use(ref reference.Spec, dgst digest.Digest) int {
// TODO: implement GC
targetRef := ref.String()
targetDgst := dgst.String()
p.refcounterMu.Lock()
defer p.refcounterMu.Unlock()
if _, ok := p.refcounter[targetRef]; !ok {
p.refcounter[targetRef] = make(map[string]int)
}
p.refcounter[targetRef][targetDgst]++
return p.refcounter[targetRef][targetDgst]
}
func (p *pool) readManifestAndConfig(refspec reference.Spec) (manifest ocispec.Manifest, mPath string, config ocispec.Image, cPath string, _ error) {
mPath, cPath = p.manifestFile(refspec), p.configFile(refspec)
mf, err := os.Open(mPath)
if err != nil {
return ocispec.Manifest{}, "", ocispec.Image{}, "", err
}
defer mf.Close()
if err := json.NewDecoder(mf).Decode(&manifest); err != nil {
return ocispec.Manifest{}, "", ocispec.Image{}, "", err
}
cf, err := os.Open(cPath)
if err != nil {
return ocispec.Manifest{}, "", ocispec.Image{}, "", err
}
defer cf.Close()
if err := json.NewDecoder(cf).Decode(&config); err != nil {
return ocispec.Manifest{}, "", ocispec.Image{}, "", err
}
return manifest, mPath, config, cPath, nil
}
func (p *pool) writeManifestAndConfig(refspec reference.Spec, manifest ocispec.Manifest, config ocispec.Image) (mPath string, cPath string, _ error) {
mPath, cPath = p.manifestFile(refspec), p.configFile(refspec)
if err := os.MkdirAll(filepath.Dir(mPath), 0700); err != nil {
return "", "", err
}
if err := os.MkdirAll(filepath.Dir(cPath), 0700); err != nil {
return "", "", err
}
mf, err := os.Create(mPath) // TODO: file mode
if err != nil {
return "", "", err
}
defer mf.Close()
if err := json.NewEncoder(mf).Encode(&manifest); err != nil {
return "", "", err
}
cf, err := os.Create(cPath) // TODO: file mode
if err != nil {
return "", "", err
}
defer cf.Close()
if err := json.NewEncoder(cf).Encode(&config); err != nil {
return "", "", err
}
return mPath, cPath, nil
}
func fetchManifestAndConfig(ctx context.Context, hosts docker.RegistryHosts, refspec reference.Spec) (ocispec.Manifest, ocispec.Image, error) {
resolver := docker.NewResolver(docker.ResolverOptions{
Hosts: hosts,
})
_, img, err := resolver.Resolve(ctx, refspec.String())
if err != nil {
return ocispec.Manifest{}, ocispec.Image{}, err
}
fetcher, err := resolver.Fetcher(ctx, refspec.String())
if err != nil {
return ocispec.Manifest{}, ocispec.Image{}, err
}
plt := platforms.DefaultSpec() // TODO: should we make this configurable?
manifest, err := fetchManifestPlatform(ctx, fetcher, img, plt)
if err != nil {
return ocispec.Manifest{}, ocispec.Image{}, err
}
r, err := fetcher.Fetch(ctx, manifest.Config)
if err != nil {
return ocispec.Manifest{}, ocispec.Image{}, err
}
defer r.Close()
var config ocispec.Image
if err := json.NewDecoder(r).Decode(&config); err != nil {
return ocispec.Manifest{}, ocispec.Image{}, err
}
return manifest, config, nil
}
func fetchManifestPlatform(ctx context.Context, fetcher remotes.Fetcher, desc ocispec.Descriptor, platform ocispec.Platform) (ocispec.Manifest, error) {
ctx, cancel := context.WithTimeout(ctx, time.Minute)
defer cancel()
r, err := fetcher.Fetch(ctx, desc)
if err != nil {
return ocispec.Manifest{}, err
}
defer r.Close()
var manifest ocispec.Manifest
switch desc.MediaType {
case images.MediaTypeDockerSchema2Manifest, ocispec.MediaTypeImageManifest:
err = json.NewDecoder(r).Decode(&manifest)
case images.MediaTypeDockerSchema2ManifestList, ocispec.MediaTypeImageIndex:
var index ocispec.Index
if err = json.NewDecoder(r).Decode(&index); err != nil {
return ocispec.Manifest{}, err
}
var target ocispec.Descriptor
found := false
for _, m := range index.Manifests {
p := platforms.DefaultSpec()
if m.Platform != nil {
p = *m.Platform
}
if !platforms.NewMatcher(platform).Match(p) {
continue
}
target = m
found = true
break
}
if !found {
return ocispec.Manifest{}, fmt.Errorf("no manifest found for platform")
}
manifest, err = fetchManifestPlatform(ctx, fetcher, target, platform)
default:
err = fmt.Errorf("unknown mediatype %q", desc.MediaType)
}
return manifest, err
}
func colon2dash(s string) string {
return strings.ReplaceAll(s, ":", "-")
}
// Layer represents the layer information. Format is compatible to the one required by
// "additional layer store" of github.com/containers/storage.
type Layer struct {
CompressedDigest digest.Digest `json:"compressed-diff-digest,omitempty"`
CompressedSize int64 `json:"compressed-size,omitempty"`
UncompressedDigest digest.Digest `json:"diff-digest,omitempty"`
UncompressedSize int64 `json:"diff-size,omitempty"`
CompressionType int `json:"compression,omitempty"`
ReadOnly bool `json:"-"`
}
const (
// Defined in https://github.com/containers/storage/blob/b64e13a1afdb0bfed25601090ce4bbbb1bc183fc/pkg/archive/archive.go#L108-L119
gzipTypeMagicNum = 2
)
func genLayerInfo(dgst digest.Digest, manifest ocispec.Manifest, config ocispec.Image) (Layer, error) {
if len(manifest.Layers) != len(config.RootFS.DiffIDs) {
return Layer{}, fmt.Errorf(
"len(manifest.Layers) != len(config.Rootfs): %d != %d",
len(manifest.Layers), len(config.RootFS.DiffIDs))
}
var (
layerIndex = -1
)
for i, l := range manifest.Layers {
if l.Digest == dgst {
layerIndex = i
}
}
if layerIndex == -1 {
return Layer{}, fmt.Errorf("layer %q not found in the manifest", dgst.String())
}
return Layer{
CompressedDigest: manifest.Layers[layerIndex].Digest,
CompressedSize: manifest.Layers[layerIndex].Size,
UncompressedDigest: config.RootFS.DiffIDs[layerIndex],
UncompressedSize: 0, // TODO
CompressionType: gzipTypeMagicNum,
ReadOnly: true,
}, nil
}

View File

@ -28,7 +28,7 @@ To enable lazy pulling of eStargz on containerd, you need to install *Stargz Sna
This section shows the step to install Stargz Snapshotter with systemd. This section shows the step to install Stargz Snapshotter with systemd.
We assume that you are using containerd (> v1.4.2) as a CRI runtime. We assume that you are using containerd (> v1.4.2) as a CRI runtime.
- Download release tarball from [the release page](https://github.com/containerd/stargz-snapshotter/releases). - Download release tarball from [the release page](https://github.com/containerd/stargz-snapshotter/releases). For example, amd64 binary of v0.6.0 is available from https://github.com/containerd/stargz-snapshotter/releases/download/v0.6.0/stargz-snapshotter-v0.6.0-linux-amd64.tar.gz.
- Add the following configuration to containerd's configuration file (typically: /etc/containerd/config.toml). Please see also [an example configuration file](../script/config/etc/containerd/config.toml). - Add the following configuration to containerd's configuration file (typically: /etc/containerd/config.toml). Please see also [an example configuration file](../script/config/etc/containerd/config.toml).
```toml ```toml
@ -44,8 +44,6 @@ We assume that you are using containerd (> v1.4.2) as a CRI runtime.
[proxy_plugins.stargz] [proxy_plugins.stargz]
type = "snapshot" type = "snapshot"
address = "/run/containerd-stargz-grpc/containerd-stargz-grpc.sock" address = "/run/containerd-stargz-grpc/containerd-stargz-grpc.sock"
[proxy_plugins.stargz.exports]
root = "/var/lib/containerd-stargz-grpc/"
``` ```
@ -70,8 +68,8 @@ We assume that you are using containerd (> v1.4.2) as a CRI runtime.
- Start stargz-snapshotter and restart containerd - Start stargz-snapshotter and restart containerd
``` ```
tar -C /usr/local/bin -xvf stargz-snapshotter-${version}-linux-${arch}.tar.gz containerd-stargz-grpc ctr-remote tar -xvf stargz-snapshotter-${version}-linux-${arch}.tar.gz containerd-stargz-grpc ctr-remote -C /usr/local/bin
wget -O /etc/systemd/system/stargz-snapshotter.service https://raw.githubusercontent.com/containerd/stargz-snapshotter/main/script/config/etc/systemd/system/stargz-snapshotter.service wget -O /etc/systemd/system/stargz-snapshotter.service https://raw.githubusercontent.com/containerd/stargz-snapshotter/master/script/config/etc/systemd/system/stargz-snapshotter.service
systemctl enable --now stargz-snapshotter systemctl enable --now stargz-snapshotter
systemctl restart containerd systemctl restart containerd
``` ```
@ -82,7 +80,7 @@ To enable lazy pulling of eStargz on CRI-O/Podman, you need to install *Stargz S
This section shows the step to install Stargz Store with systemd. This section shows the step to install Stargz Store with systemd.
We assume that you are using CRI-O newer than https://github.com/cri-o/cri-o/pull/4850 or Podman newer than https://github.com/containers/podman/pull/10214 . We assume that you are using CRI-O newer than https://github.com/cri-o/cri-o/pull/4850 or Podman newer than https://github.com/containers/podman/pull/10214 .
- Download release tarball from [the release page](https://github.com/containerd/stargz-snapshotter/releases). - Download release tarball from [the release page](https://github.com/containerd/stargz-snapshotter/releases). For example, amd64 binary of v0.6.0 is available from https://github.com/containerd/stargz-snapshotter/releases/download/v0.6.0/stargz-snapshotter-v0.6.0-linux-amd64.tar.gz.
- Add the following configuration to the storage configuration file of CRI-O/Podman (typically: /etc/containers/storage.conf). Please see also [an example configuration file](../script/config-cri-o/etc/containers/storage.conf). - Add the following configuration to the storage configuration file of CRI-O/Podman (typically: /etc/containers/storage.conf). Please see also [an example configuration file](../script/config-cri-o/etc/containers/storage.conf).
```toml ```toml
@ -116,69 +114,8 @@ We assume that you are using CRI-O newer than https://github.com/cri-o/cri-o/pul
- Start stargz-store (CRI-O also needs to be restarted if you are using) - Start stargz-store (CRI-O also needs to be restarted if you are using)
``` ```
tar -C /usr/local/bin -xvf stargz-snapshotter-${version}-linux-${arch}.tar.gz stargz-store tar -xvf stargz-snapshotter-${version}-linux-${arch}.tar.gz stargz-store -C /usr/local/bin
wget -O /etc/systemd/system/stargz-store.service https://raw.githubusercontent.com/containerd/stargz-snapshotter/main/script/config-cri-o/etc/systemd/system/stargz-store.service wget -O /etc/systemd/system/stargz-store.service https://raw.githubusercontent.com/containerd/stargz-snapshotter/master/script/config-cri-o/etc/systemd/system/stargz-store.service
systemctl enable --now stargz-store systemctl enable --now stargz-store
systemctl restart cri-o # if you are using CRI-O systemctl restart cri-o # if you are using CRI-O
``` ```
## Install Stargz Snapshotter for Docker(Moby) with Systemd
- Docker(Moby) newer than [`5c1d6c957b97321c8577e10ddbffe6e01981617a`](https://github.com/moby/moby/commit/5c1d6c957b97321c8577e10ddbffe6e01981617a) is needed on your host. The commit is expected to be included in Docker v24.
- Download stargz-snapshotter release tarball from [the release page](https://github.com/containerd/stargz-snapshotter/releases).
- Enable `containerd-snapshotter` feature and `stargz` snapshotter in Docker. Add the following to docker's configuration file (typically: /etc/docker/daemon.json).
```json
{
"features": {
"containerd-snapshotter": true
},
"storage-driver": "stargz"
}
```
- Enable stargz snapshotter in containerd. Add the following configuration to containerd's configuration file (typically: /etc/containerd/config.toml).
```toml
version = 2
# Plug stargz snapshotter into containerd
[proxy_plugins]
[proxy_plugins.stargz]
type = "snapshot"
address = "/run/containerd-stargz-grpc/containerd-stargz-grpc.sock"
[proxy_plugins.stargz.exports]
root = "/var/lib/containerd-stargz-grpc/"
```
- Install fuse
###### centos
```
# centos 7
yum install fuse
# centos 8
dnf install fuse
modprobe fuse
```
###### ubuntu
```
apt-get install fuse
modprobe fuse
```
- Start stargz-snapshotter and restart containerd and docker
```
tar -C /usr/local/bin -xvf stargz-snapshotter-${version}-linux-${arch}.tar.gz containerd-stargz-grpc ctr-remote
wget -O /etc/systemd/system/stargz-snapshotter.service https://raw.githubusercontent.com/containerd/stargz-snapshotter/${version}/script/config/etc/systemd/system/stargz-snapshotter.service
systemctl enable --now stargz-snapshotter
systemctl restart containerd
systemctl restart docker
```
## Using stargz-snapshotter on Lima
See [`./lima.md`](./lima.md)

View File

@ -20,9 +20,7 @@ This optimization is done by baking the information about files that are likely
On runtime, Stargz Snapshotter prefetches these prioritized files before mounting the layer for making sure these files are locally accessible. On runtime, Stargz Snapshotter prefetches these prioritized files before mounting the layer for making sure these files are locally accessible.
This can avoid downloading chunks on every file read and mitigate the runtime performance drawbacks. This can avoid downloading chunks on every file read and mitigate the runtime performance drawbacks.
:information_source: For more details about eStargz and its optimization, refer also to [eStargz: Standard-Compatible Extensions to Tar.gz Layers for Lazy Pulling Container Images](/docs/stargz-estargz.md). For more details about eStargz and its optimization, refer also to [eStargz: Standard-Compatible Extensions to Tar.gz Layers for Lazy Pulling Container Images](/docs/stargz-estargz.md).
:information_source: Please see also [Creating smaller eStargz images](/docs/smaller-estargz.md) if you're interested in creating a smaller size of eStargz images.
## Requirements ## Requirements
@ -58,29 +56,6 @@ ctr-remote image push --plain-http registry2:5000/golang:1.15.3-esgz
When you run `ctr-remote image optimize`, this runs the source image (`ghcr.io/stargz-containers/golang:1.15.3-buster-org`) as a container and profiles all file accesses during the execution. When you run `ctr-remote image optimize`, this runs the source image (`ghcr.io/stargz-containers/golang:1.15.3-buster-org`) as a container and profiles all file accesses during the execution.
Then these accessed files are marked as "prioritized" files and will be prefetched on runtime. Then these accessed files are marked as "prioritized" files and will be prefetched on runtime.
You can specify the GZIP compression level the converter should use using the `--estargz-compression-level` flag. The values range from 1-9. If the flag isn't provided, the compression level will default to 9.
A value of 9 indicates the archive will be gzipped with max compression. This will reduce the bytes transferred over the network but increase the CPU cycles required to decompress the payload. Whereas gzip compression value 1 indicates archive will be gzipped with least compression. This will increase the bytes transferred over the network but decreases the CPU cycles required to decompress the payload. This value should be chosen based on the workload and host characteristics.
The following example optimizes an image with a compression level of 1.
```console
# ctr-remote image optimize --oci --estargz-compression-level 1 ghcr.io/stargz-containers/golang:1.15.3-buster-org registry2:5000/golang:1.15.3-esgz
```
You can enable host networking for the container using the `net-host` flag.
```console
# ctr-remote i optimize -t -i --oci --entrypoint='[ "/bin/bash", "-c" ]' --net-host --args='[ "ip a && curl example.com" ]' ghcr.io/stargz-containers/centos:8-test registry2:5000/centos:8-test-esgz
```
You can optimize GPU-based images using the `gpu` flag. The flag expects a comma separated list of integers or 'all'.
```console
# ctr-remote i optimize --oci --gpus "0" <src> <target>
# ctr-remote i optimize --oci --gpus "all" <src> <target>
```
`--oci` option is highly recommended to add when you create eStargz image. `--oci` option is highly recommended to add when you create eStargz image.
If the source image is [Docker image](https://github.com/moby/moby/blob/master/image/spec/v1.2.md) that doesn't allow us [content verification of eStargz](/docs/verification.md), `ctr-remote` converts this image into the [OCI starndard compliant image](https://github.com/opencontainers/image-spec/). If the source image is [Docker image](https://github.com/moby/moby/blob/master/image/spec/v1.2.md) that doesn't allow us [content verification of eStargz](/docs/verification.md), `ctr-remote` converts this image into the [OCI starndard compliant image](https://github.com/opencontainers/image-spec/).
OCI image also can run on most of modern container runtimes. OCI image also can run on most of modern container runtimes.
@ -269,38 +244,3 @@ ctr-remote image optimize --oci \
By default, when the source image is a multi-platform image, `ctr-remote` converts the image corresponding to the platform where `ctr-remote` runs. By default, when the source image is a multi-platform image, `ctr-remote` converts the image corresponding to the platform where `ctr-remote` runs.
Note that though the images specified by `--all-platform` and `--platform` are converted to eStargz, images that don't correspond to the current platform aren't *optimized*. That is, these images are lazily pulled but without prefetch. Note that though the images specified by `--all-platform` and `--platform` are converted to eStargz, images that don't correspond to the current platform aren't *optimized*. That is, these images are lazily pulled but without prefetch.
### Dump log of accessed files during optimization (`--record-out`)
You can dump the information of which files are accesssed during optimization, using `--record-out` flag.
For example, the following dumps logs of files accessed during running `ls` in `ubuntu:24.04`.
```
ctr-remote image pull docker.io/library/ubuntu:24.04
ctr-remote image optimize --record-out=/tmp/log.json \
--entrypoint='[ "/bin/bash", "-c" ]' --args='[ "ls" ]' \
docker.io/library/ubuntu:24.04 registry2:5000/ubuntu:24.04
```
The following is the contents of the log (`/tmp/log.json`):
```
{"path":"usr/bin/bash","manifestDigest":"sha256:5d070ad5f7fe63623cbb99b4fc0fd997f5591303d4b03ccce50f403957d0ddc4","layerIndex":0}
{"path":"usr/bin/bash","manifestDigest":"sha256:5d070ad5f7fe63623cbb99b4fc0fd997f5591303d4b03ccce50f403957d0ddc4","layerIndex":0}
{"path":"usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2","manifestDigest":"sha256:5d070ad5f7fe63623cbb99b4fc0fd997f5591303d4b03ccce50f403957d0ddc4","layerIndex":0}
{"path":"etc/ld.so.cache","manifestDigest":"sha256:5d070ad5f7fe63623cbb99b4fc0fd997f5591303d4b03ccce50f403957d0ddc4","layerIndex":0}
{"path":"usr/lib/x86_64-linux-gnu/libtinfo.so.6.4","manifestDigest":"sha256:5d070ad5f7fe63623cbb99b4fc0fd997f5591303d4b03ccce50f403957d0ddc4","layerIndex":0}
{"path":"usr/lib/x86_64-linux-gnu/libc.so.6","manifestDigest":"sha256:5d070ad5f7fe63623cbb99b4fc0fd997f5591303d4b03ccce50f403957d0ddc4","layerIndex":0}
{"path":"etc/nsswitch.conf","manifestDigest":"sha256:5d070ad5f7fe63623cbb99b4fc0fd997f5591303d4b03ccce50f403957d0ddc4","layerIndex":0}
{"path":"etc/nsswitch.conf","manifestDigest":"sha256:5d070ad5f7fe63623cbb99b4fc0fd997f5591303d4b03ccce50f403957d0ddc4","layerIndex":0}
{"path":"etc/passwd","manifestDigest":"sha256:5d070ad5f7fe63623cbb99b4fc0fd997f5591303d4b03ccce50f403957d0ddc4","layerIndex":0}
{"path":"usr/bin/ls","manifestDigest":"sha256:5d070ad5f7fe63623cbb99b4fc0fd997f5591303d4b03ccce50f403957d0ddc4","layerIndex":0}
{"path":"usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2","manifestDigest":"sha256:5d070ad5f7fe63623cbb99b4fc0fd997f5591303d4b03ccce50f403957d0ddc4","layerIndex":0}
{"path":"etc/ld.so.cache","manifestDigest":"sha256:5d070ad5f7fe63623cbb99b4fc0fd997f5591303d4b03ccce50f403957d0ddc4","layerIndex":0}
{"path":"usr/lib/x86_64-linux-gnu/libselinux.so.1","manifestDigest":"sha256:5d070ad5f7fe63623cbb99b4fc0fd997f5591303d4b03ccce50f403957d0ddc4","layerIndex":0}
{"path":"usr/lib/x86_64-linux-gnu/libc.so.6","manifestDigest":"sha256:5d070ad5f7fe63623cbb99b4fc0fd997f5591303d4b03ccce50f403957d0ddc4","layerIndex":0}
{"path":"usr/lib/x86_64-linux-gnu/libpcre2-8.so.0.11.2","manifestDigest":"sha256:5d070ad5f7fe63623cbb99b4fc0fd997f5591303d4b03ccce50f403957d0ddc4","layerIndex":0}
```
For creating an optimized eStargz using this log, you can input this log into [`--estargz-record-in` or `--zstdchunked-record-in` of `nerdctl image convert`](https://github.com/containerd/nerdctl/blob/8b814ca7fe29cb505a02a3d85ba22860e63d15bf/docs/command-reference.md#nerd_face-nerdctl-image-convert) or the same flags for `ctr-remote image convert` .

View File

@ -1,422 +0,0 @@
# eStargz: Standard-Compatible Extension to Container Image Layers for Lazy Pulling
This doc describes the extension to gzip layers of container images (`application/vnd.oci.image.layer.v1.tar+gzip` of [OCI Image Specification](https://github.com/opencontainers/image-spec/) and `application/vnd.docker.image.rootfs.diff.tar.gzip` of [Docker Image Specification](https://github.com/moby/moby/blob/master/image/spec/v1.2.md)) for *lazy pulling*.
The extension is called *eStargz*.
eStargz is a *backward-compatible extension* which means that images can be pushed to the extension-agnostic registry and can run on extension-agnostic runtimes.
This extension is based on stargz (stands for *seekable tar.gz*) proposed by [Google CRFS](https://github.com/google/crfs) project (initially [discussed in Go community](https://github.com/golang/go/issues/30829)).
eStargz extends stargz for chunk-level verification and runtime performance optimization.
Notational convention follows [OCI Image Specification](https://github.com/opencontainers/image-spec/blob/v1.0.1/spec.md#notational-conventions).
## Overview
Lazy pulling is a technique of pulling container images aiming at the faster cold start.
This allows a container to startup without waiting for the entire image layer contents to be locally available.
Instead, necessary files (or chunks for large files) in the layer are fetched *on-demand* during running the container.
For achieving this, runtimes need to fetch and extract each file in a layer independently.
However, layer without eStargz extension doesn't allow this because of the following reasons,
1. The entire layer blob needs to be extracted even for getting a single file entry.
2. Digests aren't provided for each file so it cannot be verified independently.
eStargz solves these issues and enables lazy pulling.
Additionally, it supports prefetching of files.
This can be used to mitigate runtime performance drawbacks caused by the on-demand fetching of each file.
This extension is a backward-compatible so the eStargz-formatted image can be pushed to the registry and can run even on eStargz-agnostic runtimes.
## The structure
![The structure of eStargz](/docs/images/estargz-structure.png)
eStargz is a gzip-compressed tar archive of files and a metadata component called *TOC* (described in the later section).
In an eStargz-formatted blob, each non-empty regular file and each metadata component MUST be separately compressed as gzip.
This structure is inherited from [stargz](https://github.com/google/crfs).
Therefore, the gzip headers MUST locate at the following locations.
- The top of the blob
- The top of the payload of each non-empty regular file tar entry except *TOC*
- The top of *TOC* tar header
- The top of *footer* (described in the later section)
Large regular files in an eStargz blob MAY be chunked into several smaller gzip members.
Each chunked member is called *chunk* in this doc.
Therefore, gzip headers MAY locate at the following locations.
- Arbitrary location within the payload of non-empty regular file entry
An eStargz-formatted blob is the concatenation of these gzip members, which is a still valid gzip blob.
## TOC, TOCEntries and Footer
### TOC and TOCEntries
eStargz contains a regular file called *TOC* which records metadata (e.g. name, file type, owners, offset etc) of all file entries in eStargz, except TOC itself.
Container runtimes MAY use TOC to mount the container's filesystem without downloading the entire layer contents.
TOC MUST be a JSON file contained as the last tar entry and MUST be named `stargz.index.json`.
The following fields contain the primary properties that constitute a TOC.
- **`version`** *int*
This REQUIRED property contains the version of the TOC. This value MUST be `1`.
- **`entries`** *array of objects*
This property MUST contain an array of *TOCEntry* of all tar entries and chunks in the blob, except `stargz.index.json`.
*TOCEntry* consists of metadata of a file or chunk in eStargz.
If metadata in a TOCEntry of a file differs from the corresponding tar entry, TOCEntry SHOULD be respected.
The following fields contain the primary properties that constitute a TOCEntry.
Properties other than `chunkDigest` are inherited from [stargz](https://github.com/google/crfs).
- **`name`** *string*
This REQUIRED property contains the name of the tar entry.
This MUST be the complete path stored in the tar file.
- **`type`** *string*
This REQUIRED property contains the type of tar entry.
This MUST be either of the following.
- `dir`: directory
- `reg`: regular file
- `symlink`: symbolic link
- `hardlink`: hard link
- `char`: character device
- `block`: block device
- `fifo`: fifo
- `chunk`: a chunk of regular file data
As described in the above section, a regular file can be divided into several chunks.
TOCEntry MUST be created for each chunk.
TOCEntry of the first chunk of that file MUST be typed as `reg`.
TOCEntry of each chunk after 2nd MUST be typed as `chunk`.
`chunk` TOCEntry MUST set *offset*, *chunkOffset* and *chunkSize* properties.
- **`size`** *uint64*
This OPTIONAL property contains the uncompressed size of the regular file.
Non-empty `reg` file MUST set this property.
- **`modtime`** *string*
This OPTIONAL property contains the modification time of the tar entry.
Empty means zero or unknown.
Otherwise, the value is in UTC RFC3339 format.
- **`linkName`** *string*
This OPTIONAL property contains the link target.
`symlink` and `hardlink` MUST set this property.
- **`mode`** *int64*
This REQUIRED property contains the permission and mode bits.
- **`uid`** *uint*
This REQUIRED property contains the user ID of the owner of this file.
- **`gid`** *uint*
This REQUIRED property contains the group ID of the owner of this file.
- **`userName`** *string*
This OPTIONAL property contains the username of the owner.
- **`groupName`** *string*
This OPTIONAL property contains the groupname of the owner.
- **`devMajor`** *int*
This OPTIONAL property contains the major device number of device files.
`char` and `block` files MUST set this property.
- **`devMinor`** *int*
This OPTIONAL property contains the minor device number of device files.
`char` and `block` files MUST set this property.
- **`xattrs`** *string-bytes map*
This OPTIONAL property contains the extended attribute for the tar entry.
- **`digest`** *string*
This OPTIONAL property contains the digest of the regular file contents.
- **`offset`** *int64*
This OPTIONAL property contains the offset of the gzip header of the regular file or chunk in the blob.
TOCEntries of non-empty `reg` and `chunk` MUST set this property.
- **`chunkOffset`** *int64*
This OPTIONAL property contains the offset of this chunk in the decompressed regular file payload.
TOCEntries of `chunk` type MUST set this property.
- **`chunkSize`** *int64*
This OPTIONAL property contains the decompressed size of this chunk.
The last `chunk` in a `reg` file or `reg` file that isn't chunked MUST set this property to zero.
Other `reg` and `chunk` MUST set this property.
- **`chunkDigest`** *string*
This OPTIONAL property contains a digest of this chunk.
TOCEntries of non-empty `reg` and `chunk` MUST set this property.
This MAY be used for verifying the data of the chunk.
- **`innerOffset`** *int64*
This OPTIONAL property indicates the uncompressed offset of the "reg" or "chunk" entry payload in a stream starts from `offset` field.
#### Details about `innerOffset`
`innerOffset` enables to put multiple "reg" or "chunk" payloads in one gzip stream starts from `offset`.
This field allows the following structure.
![The structure of eStargz with innerOffset](/docs/images/estargz-inneroffset.png)
Use case of this field is `--estargz-min-chunk-size` flag of `ctr-remote`.
The value of this flag is the minimal number of bytes of data must be written in one gzip stream.
If it's > 0, multiple files and chunks can be written into one gzip stream.
Smaller number of gzip header and smaller size of the result blob can be expected.
### Footer
At the end of the blob, a *footer* MUST be appended.
This MUST be an empty gzip member whose [Extra field](https://tools.ietf.org/html/rfc1952#section-2.3.1.1) contains the offset of TOC in the blob.
The footer MUST be the following 51 bytes (1 byte = 8 bits in gzip).
```
- 10 bytes gzip header
- 2 bytes XLEN (length of Extra field) = 26 (4 bytes header + 16 hex digits + len("STARGZ"))
- 2 bytes Extra: SI1 = 'S', SI2 = 'G'
- 2 bytes Extra: LEN = 22 (16 hex digits + len("STARGZ"))
- 22 bytes Extra: subfield = fmt.Sprintf("%016xSTARGZ", offsetOfTOC)
- 5 bytes flate header: BFINAL = 1(last block), BTYPE = 0(non-compressed block), LEN = 0
- 8 bytes gzip footer
(End of eStargz)
```
Runtimes MAY first read and parse the footer to get the offset of TOC.
Each file's metadata is recorded in the TOC so runtimes don't need to extract other parts of the archive as long as it only uses file metadata.
If runtime needs to get a regular file's content, it can get the size and offset of that content from the TOC and extract that range without scanning the entire blob.
By combining this with HTTP Range Request supported by [OCI Distribution Spec](https://github.com/opencontainers/distribution-spec/blob/ef28f81727c3b5e98ab941ae050098ea664c0960/detail.md#fetch-blob-part), runtimes can selectively download file entries from the registry.
### Notes on compatibility with stargz
eStargz is designed aiming to compatibility with gzip layers.
For achieving this, eStargz's footer structure is incompatible with [stargz's one](https://github.com/google/crfs/blob/71d77da419c90be7b05d12e59945ac7a8c94a543/stargz/stargz.go#L36-L49).
eStargz adds SI1, SI2 and LEN fields to the footer to make it compliant to [Extra field definition in RFC1952](https://tools.ietf.org/html/rfc1952#section-2.3.1.1).
TOC, TOCEntry and the position of gzip headers are still compatible with stargz.
## Prioritized Files and Landmark Files
![Prioritized files and landmark files](/docs/images/estargz-landmark.png)
Lazy pulling can cause runtime performance overhead by on-demand fetching of each file.
eStargz mitigates this by supporting prefetching of important files called *prioritized files*.
eStargz encodes the information about prioritized files to the *order* of file entries with some *landmark* file entries.
File entries in eStargz are grouped into the following groups,
- A. *prioritized files*
- B. non *prioritized files*
If no files are belonging to A, a landmark file *no-prefetch landmark* MUST be contained in the archive.
If one or more files are belonging to A, eStargz MUST consist of two separated areas corresponding to these groups and a landmark file *prefetch landmark* MUST be contained at the boundary between these two areas.
The Landmark file MUST be a regular file entry with 4 bits contents 0xf in eStargz.
It MUST be recorded to TOC as a TOCEntry. Prefetch landmark MUST be named `.prefetch.landmark`. No-prefetch landmark MUST be named `.no.prefetch.landmark`.
### Example use-case of prioritized files: workload-based image optimization in Stargz Snapshotter
Stargz Snapshotter makes use of eStargz's prioritized files for *workload-based* optimization to mitigate the overhead of reading files.
The *workload* of the image is the runtime configuration defined in the Dockerfile, including entrypoint command, environment variables and user.
Stargz snapshotter provides an image converter command `ctr-remote images optimize` to create optimized eStargz images.
When converting the image, this command runs the specified workload in a sandboxed environment and profiles all file accesses.
This command treats all accessed files as prioritized files.
Then it constructs eStargz by
- putting prioritized files from the top of the archive, sorting them by the accessed order,
- putting *prefetch landmark* file entry at the end of this range, and
- putting all other files (non-prioritized files) after the prefetch landmark.
Before running the container, stargz snapshotter prefetches and pre-caches the range where prioritized files are contained, by a single HTTP Range Request supported by the registry.
This can increase the cache hit rate for the specified workload and can mitigate runtime overheads.
## Content Verification in eStargz
The goal of the content verification in eStargz is to ensure the downloaded metadata and contents of all files are the expected ones, based on the calculated digests.
The verification of other components in the image including image manifests is out-of-scope of eStargz.
On the verification step of an eStargz layer, we assume that the manifest that references this eStargz layer is already verified (using digest tag, etc).
![the overview of the verification](/docs/images/estargz-verification.png)
A non-eStargz layer can be verified by recalculating the digest and comparing it with the one written in the layer descriptor referencing that layer in the verified manifest.
However, an eStargz layer is *lazily* pulled from the registry in file (or chunk if that file is large) granularity so each one needs to be independently verified every time fetched.
The following describes how the verification of eStargz is done using the verified manifest.
eStargz consists of the following components to be verified:
- TOC (a set of metadata of all files contained in the layer)
- chunks of contents of each regular file
TOC contains metadata (name, type, mode, etc.) of all files and chunks in the blob.
On mounting eStargz, filesystem fetches the TOC from the registry.
For making the TOC verifiable using the verified manifest, we define an annotation `containerd.io/snapshot/stargz/toc.digest`.
The value of this annotation is the digest of the TOC and this MUST be contained in the descriptor that references this eStargz layer.
Using this annotation, filesystem can verify the TOC by recalculating the digest and comparing it to the annotation value.
Each file's metadata is encoded to a TOCEntry in the TOC.
TOCEntry is created also for each chunk of regular files.
For making the contents of each file and chunk verifiable using the verified manifest, TOCEntry has a property *chunkDigest*.
*chunkDigest* contains the digest of the content of the `reg` or `chunk` entry.
As mentioned above, the TOC is verifiable using the special annotation.
Using *chunkDigest* fields written in the verified TOC, each file and chunk can be independently verified by recalculating the digest and comparing it to the property.
As the conclusion, eStargz MUST contain the following metadata:
- `containerd.io/snapshot/stargz/toc.digest` annotation in the descriptor that references eStargz layer: The value is the digest of the TOC.
- *chunkDigest* properties of non-empty `reg` or `chunk` TOCEntry: The value is the digest of the contents of the file or chunk.
### Example usecase: Content verification in Stargz Snapshotter
Stargz Snapshotter verifies eStargz layers leveraging the above metadata.
As mentioned above, the verification of other image components including the manifests is out-of-scope of the snapshotter.
When this snapshotter mounts an eStargz layer, the manifest that references this layer must be verified in advance and the TOC digest annotation written in the verified manifest must be passed down to this snapshotter.
On mounting a layer, stargz snapshotter fetches the TOC from the registry.
Then it verifies the TOC by recalculating the digest and comparing it with the one written in the manifest.
After the TOC is verified, the snapshotter mounts this layer using the metadata recorded in the TOC.
During runtime of the container, this snapshotter fetches chunks of regular file contents lazily.
Before providing a chunk to the filesystem user, snapshotter recalculates the digest and checks it matches the one recorded in the corresponding TOCEntry.
## eStargz image with an external TOC (OPTIONAL)
This OPTIONAL feature allows separating TOC into another image called *TOC image*.
This type of eStargz is the same as the normal eStargz but doesn't contain TOC JSON file (`stargz.index.json`) in the layer blob and has a special footer.
This feature enables creating a smaller eStargz blob by avoiding including TOC JSON file in that blob.
Footer has the following structure:
```
// The footer is an empty gzip stream with no compression and an Extra header.
//
// 46 comes from:
//
// 10 bytes gzip header
// 2 bytes XLEN (length of Extra field) = 21 (4 bytes header + len("STARGZEXTERNALTOC"))
// 2 bytes Extra: SI1 = 'S', SI2 = 'G'
// 2 bytes Extra: LEN = 17 (len("STARGZEXTERNALTOC"))
// 17 bytes Extra: subfield = "STARGZEXTERNALTOC"
// 5 bytes flate header
// 8 bytes gzip footer
// (End of the eStargz blob)
```
TOC image is an OCI image containing TOC.
Each layer contains a TOC JSON file (`stargz.index.json`) in the root directory.
Layer descriptors in the manifest must contain an annotation `containerd.io/snapshot/stargz/layer.digest`.
The value of this annotation is the digest of the eStargz layer blob corresponding to that TOC.
The following is an example layer descriptor in the TOC image.
This layer (`sha256:64dedefd539280a5578c8b94bae6f7b4ebdbd12cb7a7df0770c4887a53d9af70`) contains the TOC JSON file (`stargz.index.json`) in the root directory and can be used for eStargz layer blob that has the digest `sha256:5da5601c1f2024c07f580c11b2eccf490cd499473883a113c376d64b9b10558f`.
```json
{
"mediaType": "application/vnd.oci.image.layer.v1.tar+gzip",
"digest": "sha256:64dedefd539280a5578c8b94bae6f7b4ebdbd12cb7a7df0770c4887a53d9af70",
"size": 154425,
"annotations": {
"containerd.io/snapshot/stargz/layer.digest": "sha256:5da5601c1f2024c07f580c11b2eccf490cd499473883a113c376d64b9b10558f"
}
}
```
### Example usecase: lazy pulling with Stargz Snapshotter
Stargz Snapshotter supports eStargz with external TOC.
If an eStargz blob's footer indicates that it requires the TOC image, stargz snapshotter also pulls it from the registry.
Stargz snapshotter assumes the TOC image has the reference name same as the eStargz with `-esgztoc` suffix.
For example, if an eStargz image is named `ghcr.io/stargz-containers/ubuntu:22.04-esgz`, stargz snapshotter acquires the TOC image from `ghcr.io/stargz-containers/ubuntu:22.04-esgz-esgztoc`.
Note that future versions of stargz snapshotter will support more ways to search the TOC image (e.g. allowing custom suffix, using OCI Reference Type, etc.)
Once stargz snapshotter acquires TOC image, it tries to find the TOC corresponding to the mounting eStargz blob, by looking `containerd.io/snapshot/stargz/layer.digest` annotations.
As describe in the above, the acquired TOC JSON is validated using `containerd.io/snapshot/stargz/toc.digest` annotation.
## Example of TOC
Here is an example TOC JSON:
```json
{
"version": 1,
"entries": [
{
"name": "bin/",
"type": "dir",
"modtime": "2019-08-20T10:30:43Z",
"mode": 16877,
"NumLink": 0
},
{
"name": "bin/busybox",
"type": "reg",
"size": 833104,
"modtime": "2019-06-12T17:52:45Z",
"mode": 33261,
"offset": 126,
"NumLink": 0,
"digest": "sha256:8b7c559b8cccca0d30d01bc4b5dc944766208a53d18a03aa8afe97252207521f",
"chunkDigest": "sha256:8b7c559b8cccca0d30d01bc4b5dc944766208a53d18a03aa8afe97252207521f"
},
{
"name": "lib/",
"type": "dir",
"modtime": "2019-08-20T10:30:43Z",
"mode": 16877,
"NumLink": 0
},
{
"name": "lib/ld-musl-x86_64.so.1",
"type": "reg",
"size": 580144,
"modtime": "2019-08-07T07:15:30Z",
"mode": 33261,
"offset": 512427,
"NumLink": 0,
"digest": "sha256:45c6ee3bd1862697eab8058ec0e462f5a760927331c709d7d233da8ffee40e9e",
"chunkDigest": "sha256:45c6ee3bd1862697eab8058ec0e462f5a760927331c709d7d233da8ffee40e9e"
},
{
"name": ".prefetch.landmark",
"type": "reg",
"size": 1,
"offset": 886633,
"NumLink": 0,
"digest": "sha256:dc0e9c3658a1a3ed1ec94274d8b19925c93e1abb7ddba294923ad9bde30f8cb8",
"chunkDigest": "sha256:dc0e9c3658a1a3ed1ec94274d8b19925c93e1abb7ddba294923ad9bde30f8cb8"
},
... (omit) ...
```

Binary file not shown.

Before

Width:  |  Height:  |  Size: 133 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 59 KiB

View File

Before

Width:  |  Height:  |  Size: 182 KiB

After

Width:  |  Height:  |  Size: 182 KiB

View File

@ -1,205 +0,0 @@
# Integration of eStargz with other tools
This document lists links and information about integrations of stargz-snapshotter with tools in commuinty.
You can refer to [issue #258 "Tracker issue for adoption status"](https://github.com/containerd/stargz-snapshotter/issues/258) for the list of the latest status of these integrations.
## Kubernetes
To use stargz snapshotter on Kubernetes nodes, you need to use containerd as the CRI runtime.
You also need to run stargz snapshotter on the node.
### Kind
See [`/README.md#quick-start-with-kubernetes`](/README.md#quick-start-with-kubernetes).
### k3s
k3s >= v1.22 supports stagz-snapshotter as an experimental feature.
`--snapshotter=stargz` for k3s server and agent enables this feature.
```
k3s server --snapshotter=stargz
```
Refer to [k3s docs](https://docs.k3s.io/advanced#enabling-lazy-pulling-of-estargz-experimental) for more details.
The following is a quick demo using [k3d](https://github.com/k3d-io/k3d) (k3s in Docker).
```console
$ k3d cluster create mycluster --k3s-arg='--snapshotter=stargz@server:*;agent:*'
$ cat <<'EOF' | kubectl --context=k3d-mycluster apply -f -
apiVersion: v1
kind: Pod
metadata:
name: nodejs
spec:
containers:
- name: nodejs-stargz
image: ghcr.io/stargz-containers/node:17.8.0-esgz
command: ["node"]
args:
- -e
- var http = require('http');
http.createServer(function(req, res) {
res.writeHead(200);
res.end('Hello World!\n');
}).listen(80);
ports:
- containerPort: 80
EOF
$ kubectl --context=k3d-mycluster get po nodejs -w
$ kubectl --context=k3d-mycluster port-forward nodejs 8080:80 &
$ curl 127.0.0.1:8080
Hello World!
$ k3d cluster delete mycluster
```
### Google Kubernetes Engine
There is no node image includes stargz snapshotter by default as of now so you need to manually customize the nodes.
A brief instrcution of enabling stargz snapshotter is the following:
- Create a Kubernetes cluster using containerd-supported Linux node images like `ubuntu_containerd`. containerd must be >= v1.4.2.
- SSH into each node and install stargz snapshotter following [`./INSTALL.md`](./INSTALL.md#install-stargz-snapshotter-for-containerd-with-systemd). You need this installation on all worker nodes.
- Optionally apply configuration to allow stargz-snapshotter to access private registries following [`./overview.md`](./overview.md#authentication).
### Amazon Elastic Kubernetes Service
There is no AMI includes stargz snapshotter by default as of now so you need to manually customize the nodes.
A brief instrcution of enabling stargz snapshotter is the following:
- Create a Kubernetes cluster using containerd-supported Linux AMIs. containerd must be >= v1.4.2. e.g. Amazon EKS optimized Amazon Linux AMIs with [containerd runtime bootstrap flag](https://docs.aws.amazon.com/eks/latest/userguide/eks-optimized-ami.html).
- SSH into each node and install stargz snapshotter following [`./INSTALL.md`](./INSTALL.md#install-stargz-snapshotter-for-containerd-with-systemd). You need this installation on all worker nodes.
- Optionally apply configuration to allow stargz-snapshotter to access private registries following [`./overview.md`](./overview.md#authentication).
## CRI runtimes
### containerd
See [`./INSTALL.md`](./INSTALL.md#install-stargz-snapshotter-for-containerd-with-systemd)
> :information_source: There is also a doc for [integration with firecracker-containerd](https://github.com/firecracker-microvm/firecracker-containerd/blob/24f1fcf99ebf6edcb94edd71a2affbcdae6b08e7/docs/remote-snapshotter-getting-started.md).
### CRI-O
See [`./INSTALL.md`](./INSTALL.md#install-stargz-store-for-cri-opodman-with-systemd).
## High-level container engines
### Docker
#### Moby
Moby supports lazy pulling of eStargz since [`5c1d6c957b97321c8577e10ddbffe6e01981617a`](https://github.com/moby/moby/commit/5c1d6c957b97321c8577e10ddbffe6e01981617a) .
See [`./INSTALL.md`](./INSTALL.md#install-stargz-snapshotter-for-dockermoby-with-systemd) for details.
#### Docker Desktop
Docker Desktop 4.12.0 "Containerd Image Store (Beta)" uses stargz-snapshotter.
Refer to [Docker documentation](https://docs.docker.com/desktop/containerd/).
### nerdctl
See the [docs in nerdctl](https://github.com/containerd/nerdctl/blob/main/docs/stargz.md).
### Podman
See [`./INSTALL.md`](./INSTALL.md#install-stargz-store-for-cri-opodman-with-systemd).
## Image builders
### BuildKit
#### Building eStargz
BuildKit >= v0.10 supports creating eStargz images.
See [`README.md`](/README.md#building-estargz-images-using-buildkit) for details.
#### Lazy pulling of eStargz
BuildKit >= v0.8 supports stargz-snapshotter and can perform lazy pulling of eStargz-formatted base images during build.
`--oci-worker-snapshotter=stargz` flag enables this feature.
You can try this feature using Docker Buildx as the following.
```
$ docker buildx create --use --name lazy-builder --buildkitd-flags '--oci-worker-snapshotter=stargz'
$ docker buildx inspect --bootstrap lazy-builder
```
The following is a sample Dockerfile that uses eStargz-formatted golang image (`ghcr.io/stargz-containers/golang:1.18-esgz`) as the base image.
```Dockerfile
FROM ghcr.io/stargz-containers/golang:1.18-esgz AS dev
COPY ./hello.go /hello.go
RUN go build -o /hello /hello.go
FROM scratch
COPY --from=dev /hello /
ENTRYPOINT [ "/hello" ]
```
Put the following Go source code in the context directory with naming it `hello.go`.
```golang
package main
import "fmt"
func main() {
fmt.Println("Hello, world!")
}
```
The following build performs lazy pulling of the eStargz-formatted golang base image.
```console
$ docker buildx build --load -t hello /tmp/ctx/
$ docker run --rm hello
Hello, world!
```
### Kaniko
#### Building eStargz
Kaniko >= v1.5.0 creates eStargz images when `GGCR_EXPERIMENT_ESTARGZ=1` is specified.
See [`README.md`](/README.md#building-estargz-images-using-kaniko) for details.
### ko
ko >= v0.7.0 creates eStargz images when `GGCR_EXPERIMENT_ESTARGZ=1` is specified.
Please see also [the docs in ko](https://github.com/ko-build/ko/blob/f70e3cad38c3bbd232f51604d922b8baff31144e/docs/advanced/faq.md#can-i-optimize-images-for-estargz-support).
## P2P image distribution
### IPFS
See [`./ipfs.md`](./ipfs.md)
### Dragonfly
Change the `/etc/containerd-stargz-grpc/config.toml` configuration to make dragonfly as registry mirror.
`127.0.0.1:65001` is the proxy address of dragonfly peer,
and the `X-Dragonfly-Registry` header is the address of origin registry,
which is provided for dragonfly to download the images.
```toml
[[resolver.host."docker.io".mirrors]]
host = "127.0.0.1:65001"
insecure = true
[resolver.host."docker.io".mirrors.header]
X-Dragonfly-Registry = ["https://index.docker.io"]
```
For more details about dragonfly as registry mirror,
refer to [How to use Dragonfly With eStargz](https://d7y.io/docs/setup/integration/stargz/).
## Registry-side conversion of eStargz
### Harbor
See the docs in Harbor: https://github.com/goharbor/acceleration-service

View File

@ -1,177 +0,0 @@
# Running containers on IPFS (experimental)
:information_source: This document isn't for Kubernetes environemnt. For information about node-to-node image sharing on Kubernetes, please refer to [the docs in nerdctl project](https://github.com/containerd/nerdctl/tree/main/examples/nerdctl-ipfs-registry-kubernetes).
You can run OCI-compatible container images on IPFS with lazy pulling.
To enable this feature, add the following configuration to `config.toml` of Stargz Snapsohtter (typically located at `/etc/containerd-stargz-grpc/config.toml`).
```toml
ipfs = true
```
> NOTE: containerd-stargz-grpc tries to connect to IPFS API written in `~/.ipfs/api` (or the file under `$IPFS_PATH` if configured) via HTTP (not HTTPS).
## IPFS-enabled OCI Image
For obtaining IPFS-enabled OCI Image, each descriptor in an OCI image must contain the following [IPFS URL](https://docs.ipfs.io/how-to/address-ipfs-on-web/#native-urls) in `urls` field.
```
ipfs://<CID>
```
`<CID>` is the Base32 case-insensitive CIDv1 of the blob that the descriptor points to.
An image is represented as a CID pointing to the OCI descriptor of the top-level blob of the image (i.e. image index).
The following is an example OCI descriptor pointing to the image index of an IPFS-enabled image:
```console
# ipfs cat bafkreie7754qk7fl56ebauawdgfuqqa3kdd7sotvuhsm6wbz3qin6ssw3a | jq
{
"mediaType": "application/vnd.oci.image.index.v1+json",
"digest": "sha256:80d6aec48c0a74635a5f3dc106328c1673afaa21ed6e1270a9a44de66e8ffa55",
"size": 314,
"urls": [
"ipfs://bafkreiea22xmjdakorrvuxz5yeddfdawoox2uipnnyjhbknejxtg5d72ku"
]
}
```
## Lazy pulling with Stargz Snapshotter
If layer descriptors of an image contain the URLs described above and these blobs are formatted as eStargz, Stargz Snapshotter mounts them from IPFS to the container's rootfs using FUSE with lazy pulling support.
Thus container can startup without waiting for the entire image contents being locally available.
Necessary chunks of contents (e.g. each file in the rootfs) are fetched from IPFS on-demand.
If the container image isn't eStargz or the snapshotter isn't Stargz Snapshotter (e.g. overlayfs snapshotter), containerd fetches the entire image contents from IPFS and unpacks it to the local directory before starting the container.
Thus possibly you'll see slow container cold-start.
## Examples
This section describes some examples of storing images to IPFS and running them as containers.
Make sure IPFS daemon runs on your node.
For example, you can run an IPFS daemon using the following command.
```
ipfs daemon
```
:information_source: If you don't want IPFS to communicate with nodes on the internet, you can run IPFS daemon in offline mode using `--offline` flag or you can create a private IPFS network as described in Appendix 1.
### Running a container with lazy pulling
`ctr-remote image ipfs-push` command converts an image to IPFS-enabled eStargz and stores it to IPFS.
```console
# ctr-remote i pull ghcr.io/stargz-containers/python:3.9-org
# ctr-remote i ipfs-push ghcr.io/stargz-containers/python:3.9-org
bafkreie7754qk7fl56ebauawdgfuqqa3kdd7sotvuhsm6wbz3qin6ssw3a
```
The printed IPFS CID (`bafkreie7754qk7fl56ebauawdgfuqqa3kdd7sotvuhsm6wbz3qin6ssw3a`) points to an OCI descriptor which points to the image index of the added image.
```console
# ipfs cat bafkreie7754qk7fl56ebauawdgfuqqa3kdd7sotvuhsm6wbz3qin6ssw3a | jq
{
"mediaType": "application/vnd.oci.image.index.v1+json",
"digest": "sha256:80d6aec48c0a74635a5f3dc106328c1673afaa21ed6e1270a9a44de66e8ffa55",
"size": 314,
"urls": [
"ipfs://bafkreiea22xmjdakorrvuxz5yeddfdawoox2uipnnyjhbknejxtg5d72ku"
]
}
```
You can run this image from IPFS using that CID as an image reference for `ctr-remote image rpull`.
`--ipfs` option is needed for enabling this.
Note that `ctr-remote` accepts an IPFS CID as the image reference but doesn't support `/ipfs`-prefixed path as of now.
We're working on eliminating this limitation.
```console
# time ( ctr-remote i rpull --ipfs bafkreie7754qk7fl56ebauawdgfuqqa3kdd7sotvuhsm6wbz3qin6ssw3a && \
ctr-remote run --snapshotter=stargz --rm -t bafkreie7754qk7fl56ebauawdgfuqqa3kdd7sotvuhsm6wbz3qin6ssw3a foo python -c 'print("Hello, World!")' )
fetching sha256:80d6aec4... application/vnd.oci.image.index.v1+json
fetching sha256:16d36f86... application/vnd.oci.image.manifest.v1+json
fetching sha256:236b4bd7... application/vnd.oci.image.config.v1+json
Hello, World!
real 0m1.099s
user 0m0.047s
sys 0m0.037s
```
### Running a container without lazy pulling
Though eStargz-based lazy pulling is highly recommended for speeding up the container startup time, you can store and run non-eStargz images with IPFS as well.
In this case, containerd fetches the entire image contents from IPFS and unpacks it to the local directory before starting the container.
You can add a non-eStargz image to IPFS using `--estargz=false` option.
```console
# ctr-remote i pull ghcr.io/stargz-containers/python:3.9-org
# ctr-remote i ipfs-push --estargz=false ghcr.io/stargz-containers/python:3.9-org
bafkreienbir4knaofs3o5f57kqw2the2v7zdhdlzpkq346mipuopwvqhty
```
You don't need FUSE nor stargz snapshotter for running this image but will see slow container cold-start.
This example uses overlayfs snapshotter of containerd.
```console
# time ( ctr-remote i rpull --snapshotter=overlayfs --ipfs bafkreienbir4knaofs3o5f57kqw2the2v7zdhdlzpkq346mipuopwvqhty && \
ctr-remote run --snapshotter=overlayfs --rm -t bafkreienbir4knaofs3o5f57kqw2the2v7zdhdlzpkq346mipuopwvqhty foo python -c 'print("Hello, World!")' )
fetching sha256:7240ac9f... application/vnd.oci.image.index.v1+json
fetching sha256:17dc54f4... application/vnd.oci.image.manifest.v1+json
fetching sha256:6f1289b1... application/vnd.oci.image.config.v1+json
fetching sha256:9476e460... application/vnd.oci.image.layer.v1.tar+gzip
fetching sha256:64c0f10e... application/vnd.oci.image.layer.v1.tar+gzip
fetching sha256:4c25b309... application/vnd.oci.image.layer.v1.tar+gzip
fetching sha256:942374d5... application/vnd.oci.image.layer.v1.tar+gzip
fetching sha256:3fff52a3... application/vnd.oci.image.layer.v1.tar+gzip
fetching sha256:5cf06daf... application/vnd.oci.image.layer.v1.tar+gzip
fetching sha256:419e258e... application/vnd.oci.image.layer.v1.tar+gzip
fetching sha256:1acf5650... application/vnd.oci.image.layer.v1.tar+gzip
fetching sha256:b95c0dd0... application/vnd.oci.image.layer.v1.tar+gzip
Hello, World!
real 0m11.320s
user 0m0.556s
sys 0m0.280s
```
## Appendix 1: Creating IPFS private network
You can create a private IPFS network as described in the official docs.
- https://github.com/ipfs/go-ipfs/blob/v0.10.0/docs/experimental-features.md#private-networks
The following is the summary.
First, generate a key and save it to `~/.ipfs/swarm.key` (or under `$IPFS_PATH` if configured) of nodes you want to have in the network.
IPFS only connects to peers having this key.
```
go install github.com/Kubuxu/go-ipfs-swarm-key-gen/ipfs-swarm-key-gen@latest
~/go/bin/ipfs-swarm-key-gen > ~/.ipfs/swarm.key
```
Select nodes as a bootstrap nodes.
IPFS daemons learn about the peers on the private network from them.
Configure all non-bootstrap nodes to recognize only our bootstrap nodes instead of public ones like the following example.
```
ipfs bootstrap rm --all
ipfs bootstrap add /ip4/<ip address of bootstrap node>/tcp/4001/ipfs/<Peer ID of the bootstrap node>
```
:information_source: You can get Peer ID of a node by `ipfs config show | grep "PeerID"`.
Finally, start all nodes in the private network.
```
export LIBP2P_FORCE_PNET=1
ipfs daemon
```
`LIBP2P_FORCE_PNET=1` makes sure that the daemon uses the private network and fails if the private network isn't configured.

View File

@ -1,52 +0,0 @@
# Getting started with Stargz Snapshotter on Lima
[Lima](https://github.com/lima-vm/lima) is a tool to manage Linux virtual machines on various hosts, including MacOS and Linux.
Lima can be used as an easy way to get started with Stargz Snapshotter as Lima provides a default VM image bundling [containerd](https://github.com/containerd/containerd), [nerdctl](https://github.com/containerd/nerdctl)(Docker-compatible CLI of containerd) and Stargz Snapshotter.
This document describes how to get started with Stargz Snapshotter on Lima.
## Enable Stargz Snapshotter using `--snapshotter=stargz` flag
nerdctl's `--snapshotter=stargz` flag enables stargz-snapshotter.
```
$ nerdctl.lima --snapshotter=stargz system info | grep stargz
Storage Driver: stargz
```
Using this flag, you can perform lazy pulling of a python eStargz image and run it.
```
$ nerdctl.lima --snapshotter=stargz run --rm -it --name python ghcr.io/stargz-containers/python:3.13-esgz
Python 3.13.2 (main, Feb 6 2025, 22:37:13) [GCC 12.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>>
```
## Use Stargz Snapshotter as the default snapshotter
nerdctl recognizes an environment variable `CONTAINERD_SNAPSHOTTER` for the snapshotter to use.
You can add this environment variable to the VM by configuring Lima config as shown in the following:
```
$ cat <<EOF >> ~/.lima/_config/override.yaml
env:
CONTAINERD_SNAPSHOTTER: stargz
EOF
$ limactl stop
$ limactl start
$ nerdctl.lima system info | grep Storage
Storage Driver: stargz
```
> NOTE: `override.yaml` applies to all the instances of Lima
You can perform lazy pulling of eStargz using nerdctl, without any extra flags.
```
$ nerdctl.lima run --rm -it --name python ghcr.io/stargz-containers/python:3.13-esgz
Python 3.13.2 (main, Feb 6 2025, 22:37:13) [GCC 12.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>>
```

View File

@ -1,22 +1,22 @@
# Containerd Stargz Snapshotter Plugin Overview # Containerd Stargz Snapshotter Plugin Overview
__Before reading this overview document, we recommend you read [README](../README.md).__ __Before get through this overview document, we recommend you to read [README](README.md).__
Pulling images is one of the most time-consuming steps in the container startup process. Pulling image is one of the time-consuming steps in the container startup process.
In the containerd community, we have had a lot of discussions to address this issue at the following: In containerd community, we have had a lot of discussions to address this issue as the following,
- [#3731 Support remote snapshotter to speed up image pulling](https://github.com/containerd/containerd/issues/3731) - [#3731 Support remote snapshotter to speed up image pulling](https://github.com/containerd/containerd/issues/3731)
- [#2968 Support `Prepare` for existing snapshots in Snapshotter interface](https://github.com/containerd/containerd/issues/2968) - [#2968 Support `Prepare` for existing snapshots in Snapshotter interface](https://github.com/containerd/containerd/issues/2968)
- [#2943 remote filesystem snapshotter](https://github.com/containerd/containerd/issues/2943) - [#2943 remote filesystem snapshotter](https://github.com/containerd/containerd/issues/2943)
The solution for fast image distribution is called *Remote Snapshotter* plugin. The solution for the fast image distribution is called *Remote Snapshotter* plugin.
This prepares the container's rootfs layers by directly mounting from remote stores instead of downloading and unpacking the entire image contents. This prepares container's rootfs layers by directly mounting from remote stores instead of downloading and unpacking the entire image contents.
The actual image contents can be fetched *lazily* so runtimes can start containers before the entire image contents are locally available. The actual image contents can be fetched *lazily* so runtimes can startup containers before the entire image contents to be locally available.
We call these remotely mounted layers *remote snapshots*. We call these remotely mounted layers as *remote snapshots*.
*Stargz Snapshotter* is a remote snapshotter plugin implementation which supports standard compatible remote snapshots functionality. *Stargz Snapshotter* is a remote snapshotter plugin implementation which supports standard compatible remote snapshots functionality.
This snapshotter leverages [eStargz](/docs/stargz-estargz.md) image, which is lazily-pullable and still standard-compatible. This snapshotter leverages [eStargz](/docs/stargz-estargz.md) image, which is lazily-pullable and still standard-compatible.
Because of this compatibility, eStargz images can be pushed to and lazily pulled from [OCI](https://github.com/opencontainers/distribution-spec)/[Docker](https://docs.docker.com/registry/spec/api/) registries (e.g. ghcr.io). Because of this compatibility, eStargz image can be pushed to and lazily pulled from [OCI](https://github.com/opencontainers/distribution-spec)/[Docker](https://docs.docker.com/registry/spec/api/) registries (e.g. ghcr.io).
Furthermore, images can run even on eStargz-agnostic runtimes (e.g. Docker). Furthermore, images can run even on eStargz-agnostic runtimes (e.g. Docker).
When you run a container image and it is formatted by eStargz, stargz snapshotter prepares container's rootfs layers as remote snapshots by mounting layers from the registry to the node, instead of pulling the entire image contents. When you run a container image and it is formatted by eStargz, stargz snapshotter prepares container's rootfs layers as remote snapshots by mounting layers from the registry to the node, instead of pulling the entire image contents.
@ -27,10 +27,10 @@ This document gives you a high-level overview of stargz snapshotter.
## Stargz Snapshotter proxy plugin ## Stargz Snapshotter proxy plugin
Stargz snapshotter is implemented as a [proxy plugin](https://github.com/containerd/containerd/blob/04985039cede6aafbb7dfb3206c9c4d04e2f924d/PLUGINS.md#proxy-plugins) daemon (`containerd-stargz-grpc`) for containerd. Stargz snapshotter is implemented as a [proxy plugin](https://github.com/containerd/containerd/blob/04985039cede6aafbb7dfb3206c9c4d04e2f924d/PLUGINS.md#proxy-plugins) daemon (`containerd-stargz-grpc`) for containerd.
When containerd starts a container, it queries the rootfs snapshots to stargz snapshotter daemon through a unix socket. When containerd starts a container, it queries the rootfs snapshots to stargz snapshotter daemon through an unix socket.
This snapshotter remotely mounts queried eStargz layers from registries to the node and provides these mount points as remote snapshots to containerd. This snapshotter remotely mounts queried eStargz layers from registries to the node and provides these mount points as remote snapshots to containerd.
Containerd recognizes this plugin through a unix socket specified in the configuration file (e.g. `/etc/containerd/config.toml`). Containerd recognizes this plugin through an unix socket specified in the configuration file (e.g. `/etc/containerd/config.toml`).
Stargz snapshotter can also be used through Kubernetes CRI by specifying the snapshotter name in the CRI plugin configuration. Stargz snapshotter can also be used through Kubernetes CRI by specifying the snapshotter name in the CRI plugin configuration.
We assume that you are using containerd (> v1.4.2). We assume that you are using containerd (> v1.4.2).
@ -44,8 +44,6 @@ version = 2
[proxy_plugins.stargz] [proxy_plugins.stargz]
type = "snapshot" type = "snapshot"
address = "/run/containerd-stargz-grpc/containerd-stargz-grpc.sock" address = "/run/containerd-stargz-grpc/containerd-stargz-grpc.sock"
[proxy_plugins.stargz.exports]
root = "/var/lib/containerd-stargz-grpc/"
# Use stargz snapshotter through CRI # Use stargz snapshotter through CRI
[plugins."io.containerd.grpc.v1.cri".containerd] [plugins."io.containerd.grpc.v1.cri".containerd]
@ -53,26 +51,24 @@ version = 2
disable_snapshot_annotations = false disable_snapshot_annotations = false
``` ```
> NOTE: `root` field of `proxy_plugins` is needed for the CRI plugin to recognize stargz snapshotter's root directory.
This repo contains [a Dockerfile as a KinD node image](/Dockerfile) which includes the above configuration. This repo contains [a Dockerfile as a KinD node image](/Dockerfile) which includes the above configuration.
## State directory ## State directory
Stargz snapshotter mounts eStargz layers from registries to the node using FUSE. Stargz snapshotter mounts eStargz layers from registries to the node using FUSE.
Metadata for all files in the image are preserved on the container filesystem and the file contents are fetched from registries on demand. The all files metadata in the image are preserved on the filesystem and files contents are fetched from registries on demand.
At the root of the container filesystem, there is a *state directory* (`/.stargz-snapshotter`) for status monitoring for the filesystem. At the root of the filesystem, there is a *state directory* (`/.stargz-snapshotter`) for status monitoring for the filesystem.
This directory is hidden from `getdents(2)` so you can't see this with `ls -a /`. This directory is hidden from `getdents(2)` so you can't see this with `ls -a /`.
Instead, you can directly access the directory by specifying the path (`/.stargz-snapshotter`). Instead, you can directly access the directory by specifying the path (`/.stargz-snapshotter`).
The state directory contains JSON-formatted metadata files for each layer. State directory contains JSON-formatted metadata files for each layer.
In the following example, metadata JSON files for overlayed 7 layers are visible. In the following example, metadata JSON files for overlayed 7 layers are visible.
In each metadata JSON file, the following fields are contained: In each metadata JSON file, the following fields are contained,
- `digest` contains the layer digest. This is the same value as that in the image's manifest. - `digest` contains the layer digest. This is the same value as that in the image's manifest.
- `size` is the size bytes of the layer. - `size` is the size bytes of the layer.
- `fetchedSize` and `fetchedPercent` indicate how many bytes have been fetched for this layer. Stargz snapshotter aggressively downloads this layer in the background - unless configured otherwise - so these values gradually increase. When `fetchedPercent` reaches `100` percent, this layer has been fully downloaded on the node and no further access will occur for reading files. - `fetchedSize` and `fetchedPercent` indicate how many bytes have been fetched for this layer. Stargz snapshotter aggressively downloads this layer in the background - unless configured otherwise - so these values gradually increase. When `fetchedPercent` reaches to `100` percents, this layer has been fully downloaded on the node and no further access will occur for reading files.
Note that the state directory layout and the metadata JSON structure are subject to change. Note that the state directory layout and the metadata JSON structure are subject to change.
@ -99,59 +95,6 @@ root@1d43741b8d29:/go# cat /.stargz-snapshotter/*
{"digest":"sha256:f077511be7d385c17ba88980379c5cd0aab7068844dffa7a1cefbf68cc3daea3","size":580,"fetchedSize":580,"fetchedPercent":100} {"digest":"sha256:f077511be7d385c17ba88980379c5cd0aab7068844dffa7a1cefbf68cc3daea3","size":580,"fetchedSize":580,"fetchedPercent":100}
``` ```
## Fuse Manager
The fuse manager is designed to maintain the availability of running containers by managing the lifecycle of FUSE mountpoints independently from the stargz snapshotter.
### Fuse Manager Overview
Remote snapshots are mounted using FUSE, and its filesystem processes are attached to the stargz snapshotter. If the stargz snapshotter restarts (due to configuration changes or crashes), all filesystem processes will be killed and restarted, which causes the remount of FUSE mountpoints, making running containers unavailable.
To avoid this, we use a fuse daemon called the fuse manager to handle filesystem processes. The fuse manager is responsible for mounting and unmounting remote snapshotters. Its process is detached from the stargz snapshotter main process to an independent one in a shim-like way during the snapshotter's startup. This design ensures that the restart of the snapshotter won't affect the filesystem processes it manages, keeping mountpoints and running containers available during the restart. However, it is important to note that the restart of the fuse manager itself triggers a remount, so it is recommended to keep the fuse manager running in a good state.
You can enable the fuse manager by adding the following configuration.
```toml
[fusem_anager]
enable = true
```
## Killing and restarting Stargz Snapshotter
Stargz Snapshotter works as a FUSE server for the snapshots.
When you stop Stargz Sanpshotter on the node, it takes the following behaviour depending on the configuration.
### FUSE manager mode is disabled
killing containerd-stargz-grpc will result in unmounting all snapshot mounts managed by Stargz Snapshotter.
When containerd-stargz-grpc is restarted, all those snapshots are mounted again by lazy pulling all layers.
If the snapshotter fails to mount one of the snapshots (e.g. because of lazy pulling failure) during this step, the behaviour differs depending on `allow_invalid_mounts_on_restart` flag in the config TOML.
- `allow_invalid_mounts_on_restart = true`: containerd-stargz-grpc leaves the failed snapshots as empty directories. The user needs to manually remove those snapshot via containerd (e.g. using `ctr snapshot rm` command). The name of those snapshots can be seen in the log with `failed to restore remote snapshot` message.
- `allow_invalid_mounts_on_restart = false`: containerd-stargz-grpc doesn't start. The user needs to manually recover this (e.g. by wiping snapshotter and containerd state).
### FUSE manager mode is enabled
Killing containerd-stargz-grpc using non-SIGINT signal (e.g. using SIGTERM) doesn't affect the snapshot mounts because the FUSE manager process detached from containerd-stargz-grpc keeps on serving FUSE mounts to the kernel.
This is useful when you reload the updated config TOML to Stargz Snapshotter without unmounting existing snapshots.
FUSE manager serves FUSE mounts of the snapshots so if you kill this process, all snapshot mounts will be unavailable.
When stopping FUSE manager for upgrading the binary or restarting the node, you can use SIGINT signal to trigger the graceful exit as shown in the following steps.
1. Stop containers that use Stargz Snapshotter. Stopping FUSE manager makes all snapshot mounts unavailable so containers can't keep working.
2. Stop containerd-stargz-grpc process using SIGINT. This signal triggers unmounting of all snapshots and cleaning up of the associated resources.
3. Kill the FUSE manager process (`stargz-fuse-manager`)
4. Restart the containerd-stargz-grpc process. This restores all snapshot mounts by lazy pulling them. `allow_invalid_mounts_on_restart` (described in the above) can still be used for controlling the behaviour of the error cases.
5. Restart the containers.
### Unexpected restart handling
When Stargz Snapshotter is killed unexpectedly (e.g., by OOM killer or system crash), the process doesn't get a chance to perform graceful cleanup. In such cases, the snapshotter can successfully restart and restore remote snapshots, but this may lead to fscache duplicating cached data.
**Recommended handling:**
Since this scenario is caused by abnormal exit, users are expected to manually clean up the cache directory after an unexpected restart to avoid cache duplication issues. The cache cleanup should be performed before restarting the snapshotter service.
## Registry-related configuration ## Registry-related configuration
You can configure stargz snapshotter for accessing registries with custom configurations. You can configure stargz snapshotter for accessing registries with custom configurations.
@ -163,11 +106,8 @@ Stargz snapshotter doesn't share private registries creds with containerd.
Instead, this supports authentication in the following methods, Instead, this supports authentication in the following methods,
- Using `$DOCKER_CONFIG` or `~/.docker/config.json` - Using `$DOCKER_CONFIG` or `~/.docker/config.json`
- Proxying and scanning CRI Image Service API
- Using Kubernetes secrets (type = `kubernetes.io/dockerconfigjson`) - Using Kubernetes secrets (type = `kubernetes.io/dockerconfigjson`)
#### dockerconfig-based authentication
By default, This snapshotter tries to get creds from `$DOCKER_CONFIG` or `~/.docker/config.json`. By default, This snapshotter tries to get creds from `$DOCKER_CONFIG` or `~/.docker/config.json`.
Following example enables stargz snapshotter to access to private registries using `docker login` command. [`nerdctl login`](https://github.com/containerd/nerdctl) can also be used for this. Following example enables stargz snapshotter to access to private registries using `docker login` command. [`nerdctl login`](https://github.com/containerd/nerdctl) can also be used for this.
Stargz snapshotter doesn't share credentials with containerd so credentials specified by `ctr-remote`'s `--user` option in the example is just for containerd. Stargz snapshotter doesn't share credentials with containerd so credentials specified by `ctr-remote`'s `--user` option in the example is just for containerd.
@ -178,36 +118,7 @@ Stargz snapshotter doesn't share credentials with containerd so credentials spec
# ctr-remote image rpull --user <username>:<password> docker.io/<your-repository>/ubuntu:18.04 # ctr-remote image rpull --user <username>:<password> docker.io/<your-repository>/ubuntu:18.04
``` ```
#### CRI-based authentication Following configuration enables stargz snapshotter to access to private registries using kubernetes secrets (type = `kubernetes.io/dockerconfigjson`) in the cluster using kubeconfig files.
Following configuration (typically located at `/etc/containerd-stargz-grpc/config.toml`) enables stargz snapshotter to pull private images on Kubernetes.
The snapshotter works as a proxy of CRI Image Service and exposes CRI Image Service API on the snapshotter's unix socket (i.e. `/run/containerd-stargz-grpc/containerd-stargz-grpc.sock`).
The snapshotter acquires registry creds by scanning requests.
You must specify `--image-service-endpoint=unix:///run/containerd-stargz-grpc/containerd-stargz-grpc.sock` option to kubelet.
You can specify the backing image service's socket using `image_service_path`.
The default is the containerd's socket (`/run/containerd/containerd.sock`).
```toml
# Stargz Snapshotter proxies CRI Image Service into containerd socket.
[cri_keychain]
enable_keychain = true
image_service_path = "/run/containerd/containerd.sock"
```
The default path where containerd-stargz-grpc serves the CRI Image Service API is `unix:///run/containerd-stargz-grpc/containerd-stargz-grpc.sock`.
You can also change this path using `listen_path` field.
> Note that if you enabled the FUSE manager and CRI-based authentication together, `listen_path` is a mandatory field with some caveats:
> - This path must be different from the FUSE manager's socket path (`/run/containerd-stargz-grpc/fuse-manager.sock`) because they have different lifecycle. Specifically, the CRI socket is recreted on each reload of the configuration to the FUSE manager.
> - containerd-stargz-grpc's socket path (`/run/containerd-stargz-grpc/containerd-stargz-grpc.sock`) can't be used as `listen_path` because the CRI socket is served by the FUSE manager process (not containerd-stargz-grpc process).
#### kubeconfig-based authentication
This is another way to enable lazy pulling of private images on Kubernetes.
Following configuration (typically located at `/etc/containerd-stargz-grpc/config.toml`) enables stargz snapshotter to access to private registries using kubernetes secrets (type = `kubernetes.io/dockerconfigjson`) in the cluster using kubeconfig files.
You can specify the path of kubeconfig file using `kubeconfig_path` option. You can specify the path of kubeconfig file using `kubeconfig_path` option.
It's no problem that the specified file doesn't exist when this snapshotter starts. It's no problem that the specified file doesn't exist when this snapshotter starts.
In this case, snapsohtter polls the file until actually provided. In this case, snapsohtter polls the file until actually provided.
@ -221,8 +132,7 @@ enable_keychain = true
kubeconfig_path = "/etc/kubernetes/snapshotter/config.conf" kubeconfig_path = "/etc/kubernetes/snapshotter/config.conf"
``` ```
Please note that kubeconfig-based authentication requires additional privilege (i.e. kubeconfig to list/watch secrets) to the node. The config file can be passed to stargz snapshotter using `containerd-stargz-grpc`'s `--config` option.
And this doesn't work if kubelet retrieve creds from somewhere not API server (e.g. [credential provider](https://kubernetes.io/docs/tasks/kubelet-credential-provider/kubelet-credential-provider/)).
### Registry mirrors and insecure connection ### Registry mirrors and insecure connection
@ -243,17 +153,6 @@ host = "exampleregistry.io"
insecure = true insecure = true
``` ```
`header` field allows to set headers to send to the server.
```toml
[[resolver.host."registry2:5000".mirrors]]
host = "registry2:5000"
[resolver.host."registry2:5000".mirrors.header]
x-custom-2 = ["value3", "value4"]
```
> NOTE: Headers aren't passed to the redirected location.
The config file can be passed to stargz snapshotter using `containerd-stargz-grpc`'s `--config` option. The config file can be passed to stargz snapshotter using `containerd-stargz-grpc`'s `--config` option.
## Make your remote snapshotter ## Make your remote snapshotter

View File

@ -1,60 +0,0 @@
# Introduction
FUSE Passthrough has been introduced in the Linux kernel version 6.9 ([Linux Kernel Commit](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=6ce8b2ce0d7e3a621cdc9eb66d74436ca7d0e66e)). This feature has shown significant performance improvements, as detailed in the following articles:
[Phoronix Article on FUSE Passthrough](https://www.phoronix.com/news/FUSE-Passthrough-In-6.9-Next)<br>
FUSE Passthrough allows performing read and write (also via memory maps) on a backing file without incurring the overhead of roundtrips to userspace.
![passhthrough feature](/docs/images/passthrough01.png)
Additionally, the `go-fuse` package, which Stargz-Snapshotter depends on, has also added support for this passthrough feature:
[go-fuse Commit 1](https://github.com/hanwen/go-fuse/commit/e0641a46c6cca7e5370fc135f78caf7cb7fc3aa8#diff-f830ac3db25844bf71102b09e4e02f7213e9cdb577b32745979d61d775462bd3R157)<br>
[go-fuse Commit 2](https://github.com/hanwen/go-fuse/commit/e0a0b09ae8287249c38033a27fd69a3593c7e235#diff-1521152f1fc3600273bda897c669523dc1e9fc9cbe24046838f043a8040f0d67R749)<br>
[go-fuse Commit 3](https://github.com/hanwen/go-fuse/commit/1a7d98b0360f945fca50ac79905332b7106c049f)
When a user-defined file implements the `FilePassthroughFder` interface, `go-fuse` will attempt to register the file `fd` from the file with the kernel.
# Configuration
## Basic Configuration
To enable FUSE passthrough mode, first verify that your host's kernel supports this feature. You can check this by running the following command:
```bash
$ cat /boot/config-$(uname -r) | grep "CONFIG_FUSE_PASSTHROUGH=y"
CONFIG_FUSE_PASSTHROUGH=y
```
Once you have confirmed kernel support, you need to enable passthrough mode in your `config.toml` file with the following configuration:
```toml
[fuse]
passthrough = true
```
After updating the configuration, specify the `config.toml` file when starting `containerd-stargz-grpc` and restart the service:
```bash
$ containerd-stargz-grpc -config config.toml
```
## Advanced Configuration
In passthrough mode, the initial pull of an image requires merging chunks into a file. This process can be time-consuming, especially for large files.
To optimize the time taken for the initial image pull, you can use the `merge_buffer_size` and `merge_worker_count` configuration options. The `merge_buffer_size` specifies the size of the buffer used for reading the image, with a default value of 400MB. The `merge_worker_count` determines the level of concurrency for reading the image, with a default value of 10.
By concurrently reading chunks and caching them for batch writing, you can significantly enhance the performance of the initial image pull in passthrough mode.
# Important Considerations
When passthrough mode is enabled, the following configuration is applied by default, even if it is set to false in the configuration file:
```toml
[directory_cache]
direct = true
```
This is because, in passthrough mode, read operations after opening a file are handled directly by the kernel.

View File

@ -1,19 +1,16 @@
# Trying pre-converted images # Trying pre-converted images
We have several pre-converted stargz images on Github Container Registry (`ghcr.io/stargz-containers`), mainly for benchmarking purpose. We have several pre-converted stargz images on Github Container Registry, mainly for benchmarking purpose. This doc lists these images in a table format. You can try them on your machine with our snapshotter. Please refer to README for the procedure.
This document lists them.
:information_source: You can build eStargz from Dockerfile using BuildKit, [using Docker Buildx](../README.md#building-estargz-images-using-buildkit) or [Kaniko](../README.md#building-estargz-images-using-kaniko). Please do not use them in production. You always can build your eStargz images optimized for your workload, using [`ctr-remote` command](/docs/ctr-remote.md).
:information_source: You can convert arbitrary images into eStargz optimized for your workload, using [`ctr-remote` command](/docs/ctr-remote.md).
:information_source: You can convert arbitrary images into eStargz on the registry-side, using [`estargz.kontain.me`](https://estargz.kontain.me).
## Pre-converted images ## Pre-converted images
:information_source: You can request new pre-converted images from our CI repository ([`github.com/stargz-containers/image-ci`](https://github.com/stargz-containers/image-ci)). This section contains a table of pre-converted images which can be used for benchmarking, testing, etc.
In the following table, image names listed in `Image Name` contain the following suffixes based on the type of the image. We have pre-converted images on GitHub Container Registry. Images are stored under the repository `ghcr.io/stargz-containers`.
Additionally, image names listed in `Image Name` contain the following suffixes based on the type of the image.
- `org`: Legacy image copied from `docker.io/library` without optimization. Layers are normal tarballs. - `org`: Legacy image copied from `docker.io/library` without optimization. Layers are normal tarballs.
- `esgz`: eStargz-formatted version of the `org` images. `ctr-remote images optimize` command is used for the optimization. - `esgz`: eStargz-formatted version of the `org` images. `ctr-remote images optimize` command is used for the optimization.
@ -22,61 +19,41 @@ In the following table, image names listed in `Image Name` contain the following
|Image Name|Optimized Workload| |Image Name|Optimized Workload|
---|--- ---|---
|`ghcr.io/stargz-containers/alpine:3.15.3-org`|Executing `echo hello` on the shell| |`ghcr.io/stargz-containers/alpine:3.10.2-org`|Executing `echo hello` on the shell|
|`ghcr.io/stargz-containers/alpine:3.15.3-esgz`|Executing `echo hello` on the shell| |`ghcr.io/stargz-containers/alpine:3.10.2-esgz`|Executing `echo hello` on the shell|
|`ghcr.io/stargz-containers/drupal:9.3.9-org`|Code execution until up and ready message (`apache2 -D FOREGROUND`) is printed| |`ghcr.io/stargz-containers/drupal:8.7.6-org`|Code execution until up and ready message (`apache2 -D FOREGROUND`) is printed|
|`ghcr.io/stargz-containers/drupal:9.3.9-esgz`|Code execution until up and ready message (`apache2 -D FOREGROUND`) is printed| |`ghcr.io/stargz-containers/drupal:8.7.6-esgz`|Code execution until up and ready message (`apache2 -D FOREGROUND`) is printed|
|`ghcr.io/stargz-containers/fedora:35-org`|Executing `echo hello` on the shell| |`ghcr.io/stargz-containers/fedora:30-org`|Executing `echo hello` on the shell|
|`ghcr.io/stargz-containers/fedora:35-esgz`|Executing `echo hello` on the shell| |`ghcr.io/stargz-containers/fedora:30-esgz`|Executing `echo hello` on the shell|
|`ghcr.io/stargz-containers/gcc:11.2.0-org`|Compiling and executing a program which prints `hello`| |`ghcr.io/stargz-containers/gcc:10.2.0-org`|Compiling and executing a program which prints `hello`|
|`ghcr.io/stargz-containers/gcc:11.2.0-esgz`|Compiling and executing a program which prints `hello`| |`ghcr.io/stargz-containers/gcc:10.2.0-esgz`|Compiling and executing a program which prints `hello`|
|`ghcr.io/stargz-containers/golang:1.18-org`|Compiling and executing a program which prints `hello`| |`ghcr.io/stargz-containers/golang:1.12.9-org`|Compiling and executing a program which prints `hello`|
|`ghcr.io/stargz-containers/golang:1.18-esgz`|Compiling and executing a program which prints `hello`| |`ghcr.io/stargz-containers/golang:1.12.9-esgz`|Compiling and executing a program which prints `hello`|
|`ghcr.io/stargz-containers/jenkins:2.60.3-org`|Code execution until up and ready message (`Jenkins is fully up and running`) is printed| |`ghcr.io/stargz-containers/jenkins:2.60.3-org`|Code execution until up and ready message (`Jenkins is fully up and running`) is printed|
|`ghcr.io/stargz-containers/jenkins:2.60.3-esgz`|Code execution until up and ready message (`Jenkins is fully up and running`) is printed| |`ghcr.io/stargz-containers/jenkins:2.60.3-esgz`|Code execution until up and ready message (`Jenkins is fully up and running`) is printed|
|`ghcr.io/stargz-containers/jruby:9.3.4-org`|Printing `hello`| |`ghcr.io/stargz-containers/jruby:9.2.8.0-org`|Printing `hello`|
|`ghcr.io/stargz-containers/jruby:9.3.4-esgz`|Printing `hello`| |`ghcr.io/stargz-containers/jruby:9.2.8.0-esgz`|Printing `hello`|
|`ghcr.io/stargz-containers/node:17.8.0-org`|Printing `hello`| |`ghcr.io/stargz-containers/node:13.13.0-org`|Printing `hello`|
|`ghcr.io/stargz-containers/node:17.8.0-esgz`|Printing `hello`| |`ghcr.io/stargz-containers/node:13.13.0-esgz`|Printing `hello`|
|`ghcr.io/stargz-containers/perl:5.34.1-org`|Printing `hello`| |`ghcr.io/stargz-containers/perl:5.30-org`|Printing `hello`|
|`ghcr.io/stargz-containers/perl:5.34.1-esgz`|Printing `hello`| |`ghcr.io/stargz-containers/perl:5.30-esgz`|Printing `hello`|
|`ghcr.io/stargz-containers/php:8.1.4-org`|Printing `hello`| |`ghcr.io/stargz-containers/php:7.3.8-org`|Printing `hello`|
|`ghcr.io/stargz-containers/php:8.1.4-esgz`|Printing `hello`| |`ghcr.io/stargz-containers/php:7.3.8-esgz`|Printing `hello`|
|`ghcr.io/stargz-containers/pypy:3.9-org`|Printing `hello`| |`ghcr.io/stargz-containers/pypy:3.5-org`|Printing `hello`|
|`ghcr.io/stargz-containers/pypy:3.9-esgz`|Printing `hello`| |`ghcr.io/stargz-containers/pypy:3.5-esgz`|Printing `hello`|
|`ghcr.io/stargz-containers/python:3.10-org`|Printing `hello`| |`ghcr.io/stargz-containers/python:3.9-org`|Printing `hello`|
|`ghcr.io/stargz-containers/python:3.10-esgz`|Printing `hello`| |`ghcr.io/stargz-containers/python:3.9-esgz`|Printing `hello`|
|`ghcr.io/stargz-containers/r-base:4.1.3-org`|Printing `hello`| |`ghcr.io/stargz-containers/r-base:3.6.1-org`|Printing `hello`|
|`ghcr.io/stargz-containers/r-base:4.1.3-esgz`|Printing `hello`| |`ghcr.io/stargz-containers/r-base:3.6.1-esgz`|Printing `hello`|
|`ghcr.io/stargz-containers/redis:6.2.6-org`|Code execution until up and ready message (`Ready to accept connections`) is printed| |`ghcr.io/stargz-containers/redis:5.0.5-org`|Code execution until up and ready message (`Ready to accept connections`) is printed|
|`ghcr.io/stargz-containers/redis:6.2.6-esgz`|Code execution until up and ready message (`Ready to accept connections`) is printed| |`ghcr.io/stargz-containers/redis:5.0.5-esgz`|Code execution until up and ready message (`Ready to accept connections`) is printed|
|`ghcr.io/stargz-containers/rethinkdb:2.4.1-org`|Code execution until up and ready message (`Server ready`) is printed| |`ghcr.io/stargz-containers/rethinkdb:2.3.6-org`|Code execution until up and ready message (`Server ready`) is printed|
|`ghcr.io/stargz-containers/rethinkdb:2.4.1-esgz`|Code execution until up and ready message (`Server ready`) is printed| |`ghcr.io/stargz-containers/rethinkdb:2.3.6-esgz`|Code execution until up and ready message (`Server ready`) is printed|
|`ghcr.io/stargz-containers/tomcat:10.1.0-jdk17-openjdk-bullseye-org`|Code execution until up and ready message (`Server startup`) is printed| |`ghcr.io/stargz-containers/tomcat:10.0.0-jdk15-openjdk-buster-org`|Code execution until up and ready message (`Server startup`) is printed|
|`ghcr.io/stargz-containers/tomcat:10.1.0-jdk17-openjdk-bullseye-esgz`|Code execution until up and ready message (`Server startup`) is printed| |`ghcr.io/stargz-containers/tomcat:10.0.0-jdk15-openjdk-buster-esgz`|Code execution until up and ready message (`Server startup`) is printed|
|`ghcr.io/stargz-containers/postgres:14.2-org`|Code execution until up and ready message (`database system is ready to accept connections`) is printed| |`ghcr.io/stargz-containers/postgres:13.1-org`|Code execution until up and ready message (`database system is ready to accept connections`) is printed|
|`ghcr.io/stargz-containers/postgres:14.2-esgz`|Code execution until up and ready message (`database system is ready to accept connections`) is printed| |`ghcr.io/stargz-containers/postgres:13.1-esgz`|Code execution until up and ready message (`database system is ready to accept connections`) is printed|
|`ghcr.io/stargz-containers/wordpress:5.9.2-org`|Code execution until up and ready message (`apache2 -D FOREGROUND`) is printed| |`ghcr.io/stargz-containers/wordpress:5.7-org`|Code execution until up and ready message (`apache2 -D FOREGROUND`) is printed|
|`ghcr.io/stargz-containers/wordpress:5.9.2-esgz`|Code execution until up and ready message (`apache2 -D FOREGROUND`) is printed| |`ghcr.io/stargz-containers/wordpress:5.7-esgz`|Code execution until up and ready message (`apache2 -D FOREGROUND`) is printed|
|`ghcr.io/stargz-containers/mariadb:10.7.3-org`|Code execution until up and ready message (`mysqld: ready for connections`) is printed| |`ghcr.io/stargz-containers/mariadb:10.5-org`|Code execution until up and ready message (`mysqld: ready for connections`) is printed|
|`ghcr.io/stargz-containers/mariadb:10.7.3-esgz`|Code execution until up and ready message (`mysqld: ready for connections`) is printed| |`ghcr.io/stargz-containers/mariadb:10.5-esgz`|Code execution until up and ready message (`mysqld: ready for connections`) is printed|
|`ghcr.io/stargz-containers/php:8.1.4-apache-bullseye-org`|Code execution until up and ready message (`apache2 -D FOREGROUND`) is printed|
|`ghcr.io/stargz-containers/php:8.1.4-apache-bullseye-esgz`|Code execution until up and ready message (`apache2 -D FOREGROUND`) is printed|
|`ghcr.io/stargz-containers/rabbitmq:3.9.14-org`|Code execution until up and ready message (`Server startup complete`) is printed|
|`ghcr.io/stargz-containers/rabbitmq:3.9.14-esgz`|Code execution until up and ready message (`Server startup complete`) is printed|
|`ghcr.io/stargz-containers/elasticsearch:8.1.1-org`|Code execution until up and ready message (`started`) is printed|
|`ghcr.io/stargz-containers/elasticsearch:8.1.1-esgz`|Code execution until up and ready message (`started`) is printed|
|`ghcr.io/stargz-containers/nixos/nix:2.3.12-org`|Executing `echo hello` on the shell|
|`ghcr.io/stargz-containers/nixos/nix:2.3.12-esgz`|Executing `echo hello` on the shell|
## lazy-pulling-enabled KinD node image
You can enable lazy pulling of eStargz on [KinD](https://github.com/kubernetes-sigs/kind) using our prebuilt node image [`ghcr.io/containerd/stargz-snapshotter:${VERSION}-kind`](https://github.com/orgs/containerd/packages/container/package/stargz-snapshotter) namespace.
Example:
```console
$ kind create cluster --name stargz-demo --image ghcr.io/containerd/stargz-snapshotter:0.12.1-kind
```
Please refer to README for more details.

View File

@ -1,56 +0,0 @@
# Rootless execution of stargz snapshotter
This document lists links and information about how to run Stargz Snapshotter and Stargz Store from the non-root user.
## nerdctl (Stargz Snapshotter)
Rootless Stargz Snapshotter for nerdctl can be installed via `containerd-rootless-setuptool.sh install-stargz` command.
Please see [the doc in nerdctl repo](https://github.com/containerd/nerdctl/blob/v1.1.0/docs/rootless.md#stargz-snapshotter) for details.
## Podman (Stargz Store)
> NOTE: This is an experimental configuration leveraging [`podman unshare`](https://docs.podman.io/en/latest/markdown/podman-unshare.1.html). Limitation: `--uidmap` of `podman run` doesn't work.
First, allow podman using Stargz Store by adding the following store configuration.
Put the configuration file to [`/etc/containers/storage.conf` or `$HOME/.config/containers/storage.conf`](https://github.com/containers/podman/blob/v4.3.1/docs/tutorials/rootless_tutorial.md#storageconf).
> NOTE: Replace `/path/to/home` to the actual home directory.
```
[storage]
driver = "overlay"
[storage.options]
additionallayerstores = ["/path/to/homedir/.local/share/stargz-store/store:ref"]
```
Start Stargz Store in the namespace managed by podman via [`podman unshare`](https://docs.podman.io/en/latest/markdown/podman-unshare.1.html) command.
```
$ podman unshare stargz-store --root $HOME/.local/share/stargz-store/data $HOME/.local/share/stargz-store/store &
```
Podman performs lazy pulling when it pulls eStargz images.
```
$ podman pull ghcr.io/stargz-containers/python:3.9-esgz
```
<details>
<summary>Creating systemd unit file for Stargz Store</summary>
It's possible to create systemd unit file of Stargz Store for easily managing it.
An example systemd unit file can be found [here](../script/podman/config/podman-rootless-stargz-store.service)
After installing that file (e.g. to `$HOME/.config/systemd/user/`), start the service using `systemctl`.
```
$ systemctl --user start podman-rootless-stargz-store
```
</details>
## BuildKit (Stargz Snapshotter)
BuildKit supports running Stargz Snapshotter from the non-root user.
Please see [the doc in BuildKit repo](https://github.com/moby/buildkit/blob/8b132188aa7af944c813d02da63c93308d83cf75/docs/stargz-estargz.md) (unmerged 2023/1/18) for details.

View File

@ -1,79 +0,0 @@
# Creating smaller eStargz images
The following flags of `ctr-remote i convert` and `ctr-remote i optimize` allow users optionally creating smaller eStargz images.
- `--estargz-external-toc`: Separate TOC JSON into another image (called "TOC image"). The result eStargz doesn't contain TOC so we can expect a smaller size than normal eStargz.
- `--estargz-min-chunk-size`: The minimal number of bytes of data must be written in one gzip stream. If it's > 0, multiple files and chunks can be written into one gzip stream. Smaller number of gzip header and smaller size of the result blob can be expected. `--estargz-min-chunk-size=0` produces normal eStargz.
## `--estargz-external-toc` usage
convert:
```console
# ctr-remote i pull ghcr.io/stargz-containers/ubuntu:22.04
# ctr-remote i convert --oci --estargz --estargz-external-toc ghcr.io/stargz-containers/ubuntu:22.04 registry2:5000/ubuntu:22.04-ex
```
Layers in eStargz (`registry2:5000/ubuntu:22.04-ex`) don't contain TOC JSON.
TOC image (`registry2:5000/ubuntu:22.04-ex-esgztoc`) contains TOC of all layers of the eStargz image.
Suffix `-esgztoc` is automatically added to the image name by `ctr-remote`.
Then push eStargz(`registry2:5000/ubuntu:22.04-ex`) and TOC image(`registry2:5000/ubuntu:22.04-ex-esgztoc`) to the same registry:
```console
# ctr-remote i push --plain-http registry2:5000/ubuntu:22.04-ex
# ctr-remote i push --plain-http registry2:5000/ubuntu:22.04-ex-esgztoc
```
Pull it lazily:
```console
# ctr-remote i rpull --plain-http registry2:5000/ubuntu:22.04-ex
fetching sha256:14fb0ea2... application/vnd.oci.image.index.v1+json
fetching sha256:24471b45... application/vnd.oci.image.manifest.v1+json
fetching sha256:d2e4737e... application/vnd.oci.image.config.v1+json
# mount | grep "stargz on"
stargz on /var/lib/containerd-stargz-grpc/snapshotter/snapshots/1/fs type fuse.rawBridge (rw,nodev,relatime,user_id=0,group_id=0,allow_other)
```
Stargz Snapshotter automatically refers to the TOC image on the same registry.
### optional `--estargz-keep-diff-id` flag for conversion without changing layer diffID
`ctr-remote i convert` supports optional flag `--estargz-keep-diff-id` specified with `--estargz-external-toc`.
This converts an image to eStargz without changing the diffID (uncompressed digest) so even eStargz-agnostic gzip decompressor (e.g. gunzip) can restore the original tar blob.
```console
# ctr-remote i pull ghcr.io/stargz-containers/ubuntu:22.04
# ctr-remote i convert --oci --estargz --estargz-external-toc --estargz-keep-diff-id ghcr.io/stargz-containers/ubuntu:22.04 registry2:5000/ubuntu:22.04-ex-keepdiff
# ctr-remote i push --plain-http registry2:5000/ubuntu:22.04-ex-keepdiff
# ctr-remote i push --plain-http registry2:5000/ubuntu:22.04-ex-keepdiff-esgztoc
# crane --insecure blob registry2:5000/ubuntu:22.04-ex-keepdiff@sha256:2dc39ba059dcd42ade30aae30147b5692777ba9ff0779a62ad93a74de02e3e1f | jq -r '.rootfs.diff_ids[]'
sha256:7f5cbd8cc787c8d628630756bcc7240e6c96b876c2882e6fc980a8b60cdfa274
# crane blob ghcr.io/stargz-containers/ubuntu:22.04@sha256:2dc39ba059dcd42ade30aae30147b5692777ba9ff0779a62ad93a74de02e3e1f | jq -r '.rootfs.diff_ids[]'
sha256:7f5cbd8cc787c8d628630756bcc7240e6c96b876c2882e6fc980a8b60cdfa274
```
## `--estargz-min-chunk-size` usage
conversion:
```console
# ctr-remote i pull ghcr.io/stargz-containers/ubuntu:22.04
# ctr-remote i convert --oci --estargz --estargz-min-chunk-size=50000 ghcr.io/stargz-containers/ubuntu:22.04 registry2:5000/ubuntu:22.04-chunk50000
# ctr-remote i push --plain-http registry2:5000/ubuntu:22.04-chunk50000
```
Pull it lazily:
```console
# ctr-remote i rpull --plain-http registry2:5000/ubuntu:22.04-chunk50000
fetching sha256:5d1409a2... application/vnd.oci.image.index.v1+json
fetching sha256:859e2b50... application/vnd.oci.image.manifest.v1+json
fetching sha256:c07a44b9... application/vnd.oci.image.config.v1+json
# mount | grep "stargz on"
stargz on /var/lib/containerd-stargz-grpc/snapshotter/snapshots/1/fs type fuse.rawBridge (rw,nodev,relatime,user_id=0,group_id=0,allow_other)
```
> NOTE: This flag creates an eStargz image with newly-added `innerOffset` funtionality of eStargz. Stargz Snapshotter < v0.13.0 cannot perform lazy pulling for the images created with this flag.

View File

@ -1,3 +1,310 @@
# eStargz: Standard-Compatible Extensions to Tar.gz Layers for Lazy Pulling Container Images # eStargz: Standard-Compatible Extensions to Tar.gz Layers for Lazy Pulling Container Images
Moved to [`/docs/estargz.md`](/docs/estargz.md). This doc describes the extension to image layers for enabling *lazy image pulling*.
The extended layer format is called *eStargz* in this project.
eStargz is backward-compatible to tar.gz layers used in the current [OCI](https://github.com/opencontainers/image-spec/)/[Docker](https://github.com/moby/moby/blob/master/image/spec/v1.2.md) Image Specs so eStargz-formatted images can be pushed to and lazily pulled from standard registries.
Furthermore, they can run even on extension-agnostic runtimes (e.g. Docker).
This extension is based on stargz (stands for *seekable tar.gz*) proposed by [Google CRFS](https://github.com/google/crfs) project (initially [discussed in Go community](https://github.com/golang/go/issues/30829)).
eStargz is an extended-version of stargz and comes with additional features including chunk-level verification and runtime performance optimization.
Notational convention follows [OCI Image Spec](https://github.com/opencontainers/image-spec/blob/v1.0.1/spec.md#notational-conventions).
## Overview
When lazily pulling an image from the registry, necessary chunks of its layers are fetched *on-demand* during running the container, instead of downloading the entire contents of that image at once.
For achieving this, runtimes need to *selectively* fetch and extract files contents in the layer.
However, current OCI/Docker Image Spec uses tar (optionally with compression) for archiving layers, which doesn't suit to this use-case because of the following reasons,
1. The entire archive needs to be scanned even for finding and extracting a single file.
2. If the archive is compressed by gzip, this is no longer seekable.
3. File entries in the archive cannot be verified separately (In Docker/OCI specs, verification is done for *the entire contents of the layer*, not per entry).
eStargz is a tar.gz-compatible archive format which solves these issues and enables lazy pulling.
Each file (or chunk for large files) in eStargz can be extracted selectively and verified separately.
Additionally, eStargz has a feature called *prioritized files* for mitigating runtime performance drawbacks caused by on-demand fetching of each file/chunk.
This format is compatible to tar.gz so eStargz layers are storable to container registries, lazily-pullable from container registries and still runnable even on eStargz-agnostic runtimes.
This doc defines the basic structure of eStargz layer that has the above features.
For details about content verfication in eStargz, please refer to [Content Verification in eStargz](/docs/verification.md).
## The structure
![The structure of eStargz](/docs/images/estargz-structure.png)
In eStargz archive, each non-empty regular file is separately compressed by gzip.
This structure is inherited from [stargz](https://github.com/google/crfs).
The gzip headers MUST locate at the following locations.
- The top of the tar archive
- The top of the payload of each non-empty regular file entry except *TOC*
- The top of *TOC* tar header
- The top of *footer* (described in the later section)
The gzip headers MAY locate at the following locations.
- The end of the payload of each non-empty regular file entry
- Arbitrary location within the payload of non-empty regular file entry
The gzip header locations described in the second item MAY be used for chunking large regular files into several gzip members.
Each chunked member is called *chunk* in this doc.
An eStargz archive is the concatenation of these gzip members, which is a still valid gzip.
## TOC, TOCEntries and Footer
### TOC and TOCEntries
A regular file entry called *TOC* MUST be contained as the last tar entry in the archive.
TOC MUST be a JSON file and MUST be named `stargz.index.json`.
TOC records all file's metadata (e.g. name, file type, owners, offset etc) in the tar archive, except TOC itself.
The TOC is defined as the following.
- **`version`** *int*
This REQUIRED property contains the version of the TOC. This value MUST be `1`.
- **`entries`** *array of objects*
Each item in the array MUST be a TOCEntry.
This property MUST contain TOCEntries that reflect all tar entries and chunks, except `stargz.index.json`.
The TOCEntry is defined as the following.
If the information written in TOCEntry differs from the corresponding tar entry, TOCEntry SHOULD be respected.
TOCEntries fields other than `chunkDigest` are inherited from [stargz](https://github.com/google/crfs).
- **`name`** *string*
This REQUIRED property contains the name of the tar entry.
This MUST be the complete path stored in the tar file.
- **`type`** *string*
This REQUIRED property contains the type of the tar entry.
This MUST be either of the following.
- `dir`: directory
- `reg`: regular file
- `symlink`: symbolic link
- `hardlink`: hard link
- `char`: character device
- `block`: block device
- `fifo`: fifo
- `chunk`: a chunk of regular file data
As described in the above section, a regular file can be divided into several chunks.
Corresponding to the first chunk of that file, TOCEntry typed `reg` MUST be contained.
Corresponding to the chunks after 2nd, TOCEntries typed `chunk` MUST be contained.
`chunk`-typed TOCEntry must set offset, chunkOffset and chunkSize properties.
- **`size`** *uint64*
This OPTIONAL property contains the uncompressed size of the regular file tar entry.
- **`modtime`** *string*
This OPTIONAL property contains the modification time of the tar entry.
Empty means zero or unknown.
Otherwize, the value is in UTC RFC3339 format.
- **`linkName`** *string*
This OPTIONAL property contains the link target of `symlink` and `hardlink`.
- **`mode`** *int64*
This OPTIONAL property contains the permission and mode bits.
- **`uid`** *uint*
This OPTIONAL property contains the user ID of the owner of this file.
- **`gid`** *uint*
This OPTIONAL property contains the group ID of the owner of this file.
- **`userName`** *string*
This OPTIONAL property contains the username of the owner.
- **`groupName`** *string*
This OPTIONAL property contains the groupname of the owner.
- **`offset`** *int64*
This OPTIONAL property contains the offset of the gzip header of the regular file or chunk in the archive.
- **`devMajor`** *int*
This OPTIONAL property contains the major device number for character and block device files.
- **`devMinor`** *int*
This OPTIONAL property contains the minor device number for character and block device files.
- **`xattrs`** *string-bytes map*
This OPTIONAL property contains the extended attribute for the tar entry.
- **`digest`** *string*
This OPTIONAL property contains the OCI [Digest](https://github.com/opencontainers/image-spec/blob/v1.0.1/descriptor.md#digests) of the regular file contents.
TOCEntries of non-empty `reg` file MUST set this property.
- **`chunkOffset`** *int64*
This OPTIONAL property contains the offset of this chunk in the regular file payload.
Note that this is the offset of this chunk in the decompressed file content.
TOCEntries of `chunk` type MUST set this property.
- **`chunkSize`** *int64*
This OPTIONAL property contains the decompressed size of this chunk.
The last `chunk` in a `reg` file or `reg` file that isn't chunked MUST set this property to zero.
Other `reg` and `chunk` MUST set this property.
- **`chunkDigest`** *string*
This OPTIONAL property contains an OCI [Digest](https://github.com/opencontainers/image-spec/blob/v1.0.1/descriptor.md#digests) of this chunk.
TOCEntries of non-empty `reg` and `chunk` MUST set this property.
This MAY be used for verifying the data of this entry in the way described in [Content Verification in eStargz](/docs/verification.md).
### Footer
At the end of the archive, a *footer* MUST be appended.
This MUST be an empty gzip member ([RFC1952](https://tools.ietf.org/html/rfc1952)) whose [Extra field](https://tools.ietf.org/html/rfc1952#section-2.3.1.1) contains the offset of TOC in the archive.
The footer MUST be the following 51 bytes (1 byte = 8 bits in gzip).
```
- 10 bytes gzip header
- 2 bytes XLEN (length of Extra field) = 26 (4 bytes header + 16 hex digits + len("STARGZ"))
- 2 bytes Extra: SI1 = 'S', SI2 = 'G'
- 2 bytes Extra: LEN = 22 (16 hex digits + len("STARGZ"))
- 22 bytes Extra: subfield = fmt.Sprintf("%016xSTARGZ", offsetOfTOC)
- 5 bytes flate header: BFINAL = 1(last block), BTYPE = 0(non-compressed block), LEN = 0
- 8 bytes gzip footer
(End of eStargz)
```
Runtimes MAY first read and parse the footer of the archive to get the offset of TOC.
Each file's metadata is recorded in the TOC so runtimes don't need to extract other parts of the archive as long as it only uses file metadata.
If runtime needs to get a regular file's content, it MAY get size and offset information of that content from the TOC and MAY extract that range without scanning the whole archive.
By combining this with HTTP Range Request supported by [OCI Distribution Spec](https://github.com/opencontainers/distribution-spec/blob/master/spec.md#fetch-blob-part) and [Docker Registry API](https://docs.docker.com/registry/spec/api/#fetch-blob-part), runtimes can selectively download file entries from registries
### Notes on compatibility with stargz
eStargz is designed aiming to the compatibility with tar.gz.
For achieving this, eStargz's footer structure is incompatible to [stargz's one](https://github.com/google/crfs/blob/71d77da419c90be7b05d12e59945ac7a8c94a543/stargz/stargz.go#L36-L49).
eStargz adds SI1, SI2 and LEN fields to the footer for making it compliant to [Extra field definition in RFC1952](https://tools.ietf.org/html/rfc1952#section-2.3.1.1).
TOC, TOCEntry and the position of gzip headers are still compatible with stargz.
## Prioritized Files and Landmark Files
![Prioritized files and landmark files](/docs/images/estargz-landmark.png)
Lazy pulling costs extra time for reading files which induces remotely fetching file contents.
The eStargz archive mitigates this problem with the ability to indicate the likely accessed files called *prioritized files*.
Runtimes can leverage this information (e.g. for prefetching prioritized files) for increasing cache hit ratio and mitigating the read overhead (example usage of this information in Stargz Snapshotter is described in the later section).
eStargz indicates the information about prioritized files as the *order* of file entries, with some [*landmark* file entries](https://github.com/containerd/stargz-snapshotter/blob/28af649b55ac39efc547b2e7f14f81a33a8212e1/stargz/fs.go#L93-L99).
File entries in eStargz are grouped into the following groups,
- A. files *likely accessed* by containers during runtime (i.e. prioritized files), and
- B. files not likely accessed
If there are no files belong to A, a landmark file *no-prefetch landmark* MUST be contained in the archive.
If there are files belong to A, an eStargz archive MUST be made with two separated areas corresponding to these groups and a landmark file *prefetch landmark* MUST be containerd at the border between these two areas.
That is, entries stored in the range between the top and the prefetch landmark are likely accessed during runtime.
Both of landmark files MUST be regular file entries with 4 bits contents 0xf.
Prefetch landmark MUST be registered to TOC as a TOCEntry named `.prefetch.landmark` and no-prefetch landmark MUST be registered as a TOCEntry named `.no.prefetch.landmark`.
On container startup, the runtime SHOULD prefetch the range where prioritized files are contained.
When the runtime finds no-prefetch landmark, it SHOULD NOT prefetch anything.
## Example use-case of prioritized files: workload-based image optimization in Stargz Snapshotter
Stargz Snapshotter makes use of eStargz's prioritized files for *workload-based* optimization for mitigating overhead of reading files.
Generally, container images are built with purpose and the workloads are determined at the build.
In many cases, a workload is defined in the Dockerfile using some parameters including entrypoint command, environment variables and user.
Stargz snapshotter provides an image converter command `ctr-remote images optimize`.
This leverages eStargz archive format and mitigates reading performance for files that are *likely accessed* in the workload defined in the Dockerfile.
When converting the image, this command runs the specified workload in a sandboxed environment and profiles all file accesses.
This command regards all accessed files as likely accessed also in production (i.e. prioritized files).
Then it constructs eStargz archive by
- locating accessed files from top of the archive, with sorting them by the accessed order,
- putting prefetch landmark file entry at the end of this range, and
- locating all other files (not accessed files) after the prefetch landmark.
Before running the container, stargz snapshotter prefetches and pre-caches the range where prioritized files are contained, by a single HTTP Range Request.
This can increase the cache hit rate for the specified workload and can mitigate runtime overheads.
## Example of TOC
You can inspect TOC JSON generated by `ctr-remote` converter like the following:
```
ctr-remote image pull ghcr.io/stargz-containers/alpine:3.10.2-org
ctr-remote image optimize ghcr.io/stargz-containers/alpine:3.10.2-org alpine:3.10.2-esgz
ctr-remote content get sha256:42d069d45aac902b9ad47365613f517bbcfb567674bd78a36fbfe7c2e1ca4d75 \
| tar xzOf - stargz.index.json | jq
```
Then you will get the TOC JSON something like:
```json
{
"version": 1,
"entries": [
{
"name": "bin/",
"type": "dir",
"modtime": "2019-08-20T10:30:43Z",
"mode": 16877,
"NumLink": 0
},
{
"name": "bin/busybox",
"type": "reg",
"size": 833104,
"modtime": "2019-06-12T17:52:45Z",
"mode": 33261,
"offset": 126,
"NumLink": 0,
"digest": "sha256:8b7c559b8cccca0d30d01bc4b5dc944766208a53d18a03aa8afe97252207521f",
"chunkDigest": "sha256:8b7c559b8cccca0d30d01bc4b5dc944766208a53d18a03aa8afe97252207521f"
},
{
"name": "lib/",
"type": "dir",
"modtime": "2019-08-20T10:30:43Z",
"mode": 16877,
"NumLink": 0
},
{
"name": "lib/ld-musl-x86_64.so.1",
"type": "reg",
"size": 580144,
"modtime": "2019-08-07T07:15:30Z",
"mode": 33261,
"offset": 512427,
"NumLink": 0,
"digest": "sha256:45c6ee3bd1862697eab8058ec0e462f5a760927331c709d7d233da8ffee40e9e",
"chunkDigest": "sha256:45c6ee3bd1862697eab8058ec0e462f5a760927331c709d7d233da8ffee40e9e"
},
{
"name": ".prefetch.landmark",
"type": "reg",
"size": 1,
"offset": 886633,
"NumLink": 0,
"digest": "sha256:dc0e9c3658a1a3ed1ec94274d8b19925c93e1abb7ddba294923ad9bde30f8cb8",
"chunkDigest": "sha256:dc0e9c3658a1a3ed1ec94274d8b19925c93e1abb7ddba294923ad9bde30f8cb8"
},
... (omit) ...
```

View File

@ -1,99 +0,0 @@
# Enabling Stargz Snapshotter With Transfer Service
Transfer Service is a containerd component which is used for image management in contianerd (e.g. pulling and pushing images).
For details about Transfer Service, refer to [the official document in the containerd repo](https://github.com/containerd/containerd/blob/6af7c07905a317d4c343a49255e2392f4c8569f9/docs/transfer.md).
To use Stargz Snapshotter on containerd with enabling Transfer Service, additional configurations is needed.
## Availability of Transfer Service
Transfer Service is available since v1.7.
And this is enabled in different settings depending on the containerd version.
|containerd version|`ctr`|CRI|
---|---|---
|containerd >= v1.7 and < v2.0|Disabled by default. Enabled by `--local=false`|Disabled|
|containerd >= v2.0 and < v2.1|Enabled by default. Disabled by `--local`|Disabled|
|containerd >= v2.1|Enabled by default. Disabled by `--local`|Enabled by default. Disabled when conditions described in [containerd's CRI document](https://github.com/containerd/containerd/blob/v2.1.0/docs/cri/config.md#image-pull-configuration-since-containerd-v21) are met|
### Note about containerd v2.1
Before containerd v2.1, `disable_snapshot_annotations = false` in containerd's config TOML was a mandatory field to enable Stargz Snapshotter in CRI.
In containerd v2.1, `disable_snapshot_annotations = false` field can still be used to enable Stargz Snapshotter and containerd disables Transfer Service when this field is detected.
If you want to enable Transfer Service, you need to remove `disable_snapshot_annotations = false` field and apply the configuration explaind in this document.
## How to enable Stargz Snapshotter when Transfer Service is enabled?
In containerd v2.1, Transfer Service added support for remote snapshotters like Stargz Snapshotter.
### For ctr and other non-CRI clients
To enable Stargz Snapshotter with Transfer Service, you need to start containerd-stargz-grpc on the node and add the following configuration to contianerd's config TOML file.
Note that you need to add a field `enable_remote_snapshot_annotations = "true"` in `proxy_plugins.stargz.exports` so that containerd can correctly pass image-related information to Stargz Snapshotter.
```toml
version = 2
# Enable Stargz Snapshotter in Transfer Service
[[plugins."io.containerd.transfer.v1.local".unpack_config]]
platform = "linux"
snapshotter = "stargz"
# Plugin Stargz Snapshotter
[proxy_plugins]
[proxy_plugins.stargz]
type = "snapshot"
address = "/run/containerd-stargz-grpc/containerd-stargz-grpc.sock"
[proxy_plugins.stargz.exports]
root = "/var/lib/containerd-stargz-grpc/"
enable_remote_snapshot_annotations = "true"
```
#### Example client command
When you enable Transfer Service with Stargz Snapshotter, you can perform lazy pulling using the normal `ctr` command. (of course, `ctr-remote` can still be used)
```
# ctr image pull --snapshotter=stargz ghcr.io/stargz-containers/ubuntu:24.04-esgz
```
Then `mount | grep stargz` prints stargz mounts on the node.
### For CRI
To enable Stargz Snapshotter with Transfer Service, you need to start containerd-stargz-grpc on the node and add the following configuration to contianerd's config TOML file.
```toml
version = 2
# Basic CRI configuration with enabling Stargz Snapshotter
[plugins."io.containerd.grpc.v1.cri".containerd]
default_runtime_name = "runc"
snapshotter = "stargz"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v2"
# Enable Stargz Snapshotter in Transfer Service
[[plugins."io.containerd.transfer.v1.local".unpack_config]]
platform = "linux"
snapshotter = "stargz"
# Plugin Stargz Snapshotter
[proxy_plugins]
[proxy_plugins.stargz]
type = "snapshot"
address = "/run/containerd-stargz-grpc/containerd-stargz-grpc.sock"
[proxy_plugins.stargz.exports]
root = "/var/lib/containerd-stargz-grpc/"
enable_remote_snapshot_annotations = "true"
```
#### Example client command
You can quickly check the behaviour using `crictl` command.
```
# crictl image pull ghcr.io/stargz-containers/ubuntu:24.04-esgz
```
Then `mount | grep stargz` prints stargz mounts on the node.

View File

@ -1,3 +1,61 @@
# Content Verification in eStargz # Content Verification in eStargz
Moved to [`/docs/estargz.md`](/docs/estargz.md). The goal of the content verification in eStargz is to ensure the downloaded metadata and contents of all files are the expected ones, based on the calculated OCI [_digests_](https://github.com/opencontainers/image-spec/blob/v1.0.1/descriptor.md#digests).
The verification of other components in the image including image manifests is out-of-scope.
On the verification step of an eStargz layer, we assume that the [_manifest_](https://github.com/opencontainers/image-spec/blob/v1.0.1/manifest.md) that references this eStargz layer is verified in containerd in advance (using digest tag or [`Docker-Content-Digest` header](https://docs.docker.com/registry/spec/api/#digest-header), etc).
![the overview of the verification](/docs/images/verification.png)
## Verifiable eStargz
For a layer that isn't lazily pulled (i.e. traditional tar.gz layer), it can be verified by recalculating the digest and compare it with the one written in the layer [_descriptor_](https://github.com/opencontainers/image-spec/blob/v1.0.1/descriptor.md) referencing that layer in the verified manifest.
However, an eStargz layer is **lazily** pulled from the registry in file (or chunk if that file is large) granularity.
So it's not possible to recalculate and verify the digest of the entire layer on mount.
Assuming that the manifest referencing the eStargz layer has already been verified, we verify that eStargz layer as the following.
When stargz snapshotter lazily pulls an eStargz layer, the following components will be fetched from the registry.
- TOC (a set of metadata of all files contained in the layer)
- chunks of regular file contents
As mentioned in [eStargz documentation](/docs/stargz-estargz.md), eStargz contains an index file called _TOC_.
Not only offset information of file entries, it [contains metadata (name, type, mode, etc.) of all files contained in the layer blob](https://github.com/google/crfs/blob/71d77da419c90be7b05d12e59945ac7a8c94a543/stargz/stargz.go#L214-L218).
On mount the layer, filesystem fetches the TOC from the registry.
For making the TOC verifiable using the manifest, we define an [_annotation_](https://github.com/opencontainers/image-spec/blob/v1.0.1/descriptor.md#properties) `containerd.io/snapshot/stargz/toc.digest`.
The value of this annotation is the digest of the TOC and this annotation must be contained in descriptors that references this eStargz layer in the manifest.
Using this annotation, filesystem can verify the TOC by recalculating the digest and compare it to the one written in the verified manifest.
Each file's metadata (name, type, mode, etc.) is formed as a [_TOCEntry_](https://github.com/google/crfs/blob/71d77da419c90be7b05d12e59945ac7a8c94a543/stargz/stargz.go#L109-L191) in the TOC.
TOCEntry is also created for each chunk of regular file content.
For making each chunk verifiable using the manifest, eStargz extends the TOCEntry definition with [an optional field `chunkDigest`](https://github.com/containerd/stargz-snapshotter/blob/b53e8fe8d37751753bc623b037729b6a6d9c1122/stargz/verify/verify.go#L56-L64).
`chunkDigest` is a field to contain the digest of each chunk.
As mentioned in the above, the TOC is verifiable using the manifest with the special annotation.
So using `chunkDigest` fields, filesystem can verify each chunk by recalculating the digest and compare it to the one written in the verified TOC.
As the conclusion, the following conditions must meet for eStargz.
- the digest of the TOC is contained in the annotation(`containerd.io/snapshot/stargz/toc.digest`) of descriptors that references this layer, and
- `chunkDigest` fields of all chunks in the TOC is filled with the digests of their contents.
`ctr-remote images optimize` command in this project creates the verifiable eStargz image by default.
## Example usecase: Content verification in Stargz Snapshotter
Stargz Snapshotter verifies eStargz layers leveraging the above extensions.
However, as mentioned in the above, the verification of other image component including the manifests is out-of-scope of the snapshotter.
So when this snapshotter mounts an eStargz layer, the manifest that references this layer must be verified in the containerd in advance and the TOC's digest written in the manifest (as a layer annotation `containerd.io/snapshot/stargz/toc.digest`) must be passed down to this snapshotter.
This annotation is valid only when it is specified in `.[]layers.annotations` of [an image manifest](https://github.com/opencontainers/image-spec/blob/v1.0.1/manifest.md#image-manifest-property-descriptions).
If the layer doesn't contain `containerd.io/snapshot/stargz/toc.digest` annotation, verification can't be done for that layer so stargz snapshotter reports an error and doesn't mount it.
You can bypass this check only if both of the following conditions meet.
- `allow_no_verification = true` is specified in `config.toml` of stargz snapshotter, and
- the content descriptor of this layer has an annotation `containerd.io/snapshot/remote/stargz.skipverify` (the value will be ignored).
The other way is to disable verification completely by setting `disable_verification = true` in `config.toml` of stargz snapshotter.
On mounting a layer, stargz snapshotter fetches this layer's TOC from the registry.
Then it verifies the TOC by recaluculating the digest and comparing it with the one passed from containerd (written in the manifest).
If the TOC is successfully verified, then the snapshotter mounts this layer using the metadata stored in the TOC.
During runtime of the container, this snapshotter fetches chunks of regular files lazily.
Before providing a chunk to the filesystem user, snapshotter recalculates the digest and checks it matches the one contained in the corresponding TOCEntry in the TOC.

View File

@ -26,10 +26,10 @@ import (
"archive/tar" "archive/tar"
"bytes" "bytes"
"compress/gzip" "compress/gzip"
"context" "encoding/json"
"errors"
"fmt" "fmt"
"io" "io"
"io/ioutil"
"os" "os"
"path" "path"
"runtime" "runtime"
@ -39,6 +39,7 @@ import (
"github.com/containerd/stargz-snapshotter/estargz/errorutil" "github.com/containerd/stargz-snapshotter/estargz/errorutil"
"github.com/klauspost/compress/zstd" "github.com/klauspost/compress/zstd"
digest "github.com/opencontainers/go-digest" digest "github.com/opencontainers/go-digest"
"github.com/pkg/errors"
"golang.org/x/sync/errgroup" "golang.org/x/sync/errgroup"
) )
@ -47,9 +48,6 @@ type options struct {
compressionLevel int compressionLevel int
prioritizedFiles []string prioritizedFiles []string
missedPrioritizedFiles *[]string missedPrioritizedFiles *[]string
compression Compression
ctx context.Context
minChunkSize int
} }
type Option func(o *options) error type Option func(o *options) error
@ -64,7 +62,6 @@ func WithChunkSize(chunkSize int) Option {
// WithCompressionLevel option specifies the gzip compression level. // WithCompressionLevel option specifies the gzip compression level.
// The default is gzip.BestCompression. // The default is gzip.BestCompression.
// This option will be ignored if WithCompression option is used.
// See also: https://godoc.org/compress/gzip#pkg-constants // See also: https://godoc.org/compress/gzip#pkg-constants
func WithCompressionLevel(level int) Option { func WithCompressionLevel(level int) Option {
return func(o *options) error { return func(o *options) error {
@ -98,35 +95,6 @@ func WithAllowPrioritizeNotFound(missedFiles *[]string) Option {
} }
} }
// WithCompression specifies compression algorithm to be used.
// Default is gzip.
func WithCompression(compression Compression) Option {
return func(o *options) error {
o.compression = compression
return nil
}
}
// WithContext specifies a context that can be used for clean canceleration.
func WithContext(ctx context.Context) Option {
return func(o *options) error {
o.ctx = ctx
return nil
}
}
// WithMinChunkSize option specifies the minimal number of bytes of data
// must be written in one gzip stream.
// By increasing this number, one gzip stream can contain multiple files
// and it hopefully leads to smaller result blob.
// NOTE: This adds a TOC property that old reader doesn't understand.
func WithMinChunkSize(minChunkSize int) Option {
return func(o *options) error {
o.minChunkSize = minChunkSize
return nil
}
}
// Blob is an eStargz blob. // Blob is an eStargz blob.
type Blob struct { type Blob struct {
io.ReadCloser io.ReadCloser
@ -158,33 +126,13 @@ func Build(tarBlob *io.SectionReader, opt ...Option) (_ *Blob, rErr error) {
return nil, err return nil, err
} }
} }
if opts.compression == nil {
opts.compression = newGzipCompressionWithLevel(opts.compressionLevel)
}
layerFiles := newTempFiles() layerFiles := newTempFiles()
ctx := opts.ctx
if ctx == nil {
ctx = context.Background()
}
done := make(chan struct{})
defer close(done)
go func() {
select {
case <-done:
// nop
case <-ctx.Done():
layerFiles.CleanupAll()
}
}()
defer func() { defer func() {
if rErr != nil { if rErr != nil {
if err := layerFiles.CleanupAll(); err != nil { if err := layerFiles.CleanupAll(); err != nil {
rErr = fmt.Errorf("failed to cleanup tmp files: %v: %w", err, rErr) rErr = errors.Wrapf(rErr, "failed to cleanup tmp files: %v", err)
} }
} }
if cErr := ctx.Err(); cErr != nil {
rErr = fmt.Errorf("error from context %q: %w", cErr, rErr)
}
}() }()
tarBlob, err := decompressBlob(tarBlob, layerFiles) tarBlob, err := decompressBlob(tarBlob, layerFiles)
if err != nil { if err != nil {
@ -194,14 +142,7 @@ func Build(tarBlob *io.SectionReader, opt ...Option) (_ *Blob, rErr error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
var tarParts [][]*entry tarParts := divideEntries(entries, runtime.GOMAXPROCS(0))
if opts.minChunkSize > 0 {
// Each entry needs to know the size of the current gzip stream so they
// cannot be processed in parallel.
tarParts = [][]*entry{entries}
} else {
tarParts = divideEntries(entries, runtime.GOMAXPROCS(0))
}
writers := make([]*Writer, len(tarParts)) writers := make([]*Writer, len(tarParts))
payloads := make([]*os.File, len(tarParts)) payloads := make([]*os.File, len(tarParts))
var mu sync.Mutex var mu sync.Mutex
@ -214,15 +155,8 @@ func Build(tarBlob *io.SectionReader, opt ...Option) (_ *Blob, rErr error) {
if err != nil { if err != nil {
return err return err
} }
sw := NewWriterWithCompressor(esgzFile, opts.compression) sw := NewWriterLevel(esgzFile, opts.compressionLevel)
sw.ChunkSize = opts.chunkSize sw.ChunkSize = opts.chunkSize
sw.MinChunkSize = opts.minChunkSize
if sw.needsOpenGzEntries == nil {
sw.needsOpenGzEntries = make(map[string]struct{})
}
for _, f := range []string{PrefetchLandmark, NoPrefetchLandmark} {
sw.needsOpenGzEntries[f] = struct{}{}
}
if err := sw.AppendTar(readerFromEntries(parts...)); err != nil { if err := sw.AppendTar(readerFromEntries(parts...)); err != nil {
return err return err
} }
@ -237,7 +171,7 @@ func Build(tarBlob *io.SectionReader, opt ...Option) (_ *Blob, rErr error) {
rErr = err rErr = err
return nil, err return nil, err
} }
tocAndFooter, tocDgst, err := closeWithCombine(writers...) tocAndFooter, tocDgst, err := closeWithCombine(opts.compressionLevel, writers...)
if err != nil { if err != nil {
rErr = err rErr = err
return nil, err return nil, err
@ -253,12 +187,11 @@ func Build(tarBlob *io.SectionReader, opt ...Option) (_ *Blob, rErr error) {
diffID := digest.Canonical.Digester() diffID := digest.Canonical.Digester()
pr, pw := io.Pipe() pr, pw := io.Pipe()
go func() { go func() {
r, err := opts.compression.Reader(io.TeeReader(io.MultiReader(append(rs, tocAndFooter)...), pw)) r, err := gzip.NewReader(io.TeeReader(io.MultiReader(append(rs, tocAndFooter)...), pw))
if err != nil { if err != nil {
pw.CloseWithError(err) pw.CloseWithError(err)
return return
} }
defer r.Close()
if _, err := io.Copy(diffID.Hash(), r); err != nil { if _, err := io.Copy(diffID.Hash(), r); err != nil {
pw.CloseWithError(err) pw.CloseWithError(err)
return return
@ -280,7 +213,7 @@ func Build(tarBlob *io.SectionReader, opt ...Option) (_ *Blob, rErr error) {
// Writers doesn't write TOC and footer to the underlying writers so they can be // Writers doesn't write TOC and footer to the underlying writers so they can be
// combined into a single eStargz and tocAndFooter returned by this function can // combined into a single eStargz and tocAndFooter returned by this function can
// be appended at the tail of that combined blob. // be appended at the tail of that combined blob.
func closeWithCombine(ws ...*Writer) (tocAndFooterR io.Reader, tocDgst digest.Digest, err error) { func closeWithCombine(compressionLevel int, ws ...*Writer) (tocAndFooter io.Reader, tocDgst digest.Digest, err error) {
if len(ws) == 0 { if len(ws) == 0 {
return nil, "", fmt.Errorf("at least one writer must be passed") return nil, "", fmt.Errorf("at least one writer must be passed")
} }
@ -297,7 +230,7 @@ func closeWithCombine(ws ...*Writer) (tocAndFooterR io.Reader, tocDgst digest.Di
} }
} }
var ( var (
mtoc = new(JTOC) mtoc = new(jtoc)
currentOffset int64 currentOffset int64
) )
mtoc.Version = ws[0].toc.Version mtoc.Version = ws[0].toc.Version
@ -315,16 +248,40 @@ func closeWithCombine(ws ...*Writer) (tocAndFooterR io.Reader, tocDgst digest.Di
currentOffset += w.cw.n currentOffset += w.cw.n
} }
return tocAndFooter(ws[0].compressor, mtoc, currentOffset) tocJSON, err := json.MarshalIndent(mtoc, "", "\t")
}
func tocAndFooter(compressor Compressor, toc *JTOC, offset int64) (io.Reader, digest.Digest, error) {
buf := new(bytes.Buffer)
tocDigest, err := compressor.WriteTOCAndFooter(buf, offset, toc, nil)
if err != nil { if err != nil {
return nil, "", err return nil, "", err
} }
return buf, tocDigest, nil pr, pw := io.Pipe()
go func() {
zw, _ := gzip.NewWriterLevel(pw, compressionLevel)
tw := tar.NewWriter(zw)
if err := tw.WriteHeader(&tar.Header{
Typeflag: tar.TypeReg,
Name: TOCTarName,
Size: int64(len(tocJSON)),
}); err != nil {
pw.CloseWithError(err)
return
}
if _, err := tw.Write(tocJSON); err != nil {
pw.CloseWithError(err)
return
}
if err := tw.Close(); err != nil {
pw.CloseWithError(err)
return
}
if err := zw.Close(); err != nil {
pw.CloseWithError(err)
return
}
pw.Close()
}()
return io.MultiReader(
pr,
bytes.NewReader(footerBytes(currentOffset)),
), digest.FromBytes(tocJSON), nil
} }
// divideEntries divides passed entries to the parts at least the number specified by the // divideEntries divides passed entries to the parts at least the number specified by the
@ -361,7 +318,7 @@ func sortEntries(in io.ReaderAt, prioritized []string, missedPrioritized *[]stri
// Import tar file. // Import tar file.
intar, err := importTar(in) intar, err := importTar(in)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to sort: %w", err) return nil, errors.Wrap(err, "failed to sort")
} }
// Sort the tar file respecting to the prioritized files list. // Sort the tar file respecting to the prioritized files list.
@ -372,7 +329,7 @@ func sortEntries(in io.ReaderAt, prioritized []string, missedPrioritized *[]stri
*missedPrioritized = append(*missedPrioritized, l) *missedPrioritized = append(*missedPrioritized, l)
continue // allow not found continue // allow not found
} }
return nil, fmt.Errorf("failed to sort tar entries: %w", err) return nil, errors.Wrap(err, "failed to sort tar entries")
} }
} }
if len(prioritized) == 0 { if len(prioritized) == 0 {
@ -408,11 +365,11 @@ func readerFromEntries(entries ...*entry) io.Reader {
defer tw.Close() defer tw.Close()
for _, entry := range entries { for _, entry := range entries {
if err := tw.WriteHeader(entry.header); err != nil { if err := tw.WriteHeader(entry.header); err != nil {
pw.CloseWithError(fmt.Errorf("failed to write tar header: %v", err)) pw.CloseWithError(fmt.Errorf("Failed to write tar header: %v", err))
return return
} }
if _, err := io.Copy(tw, entry.payload); err != nil { if _, err := io.Copy(tw, entry.payload); err != nil {
pw.CloseWithError(fmt.Errorf("failed to write tar payload: %v", err)) pw.CloseWithError(fmt.Errorf("Failed to write tar payload: %v", err))
return return
} }
} }
@ -423,9 +380,9 @@ func readerFromEntries(entries ...*entry) io.Reader {
func importTar(in io.ReaderAt) (*tarFile, error) { func importTar(in io.ReaderAt) (*tarFile, error) {
tf := &tarFile{} tf := &tarFile{}
pw, err := newCountReadSeeker(in) pw, err := newCountReader(in)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to make position watcher: %w", err) return nil, errors.Wrap(err, "failed to make position watcher")
} }
tr := tar.NewReader(pw) tr := tar.NewReader(pw)
@ -436,8 +393,9 @@ func importTar(in io.ReaderAt) (*tarFile, error) {
if err != nil { if err != nil {
if err == io.EOF { if err == io.EOF {
break break
} else {
return nil, errors.Wrap(err, "failed to parse tar file")
} }
return nil, fmt.Errorf("failed to parse tar file, %w", err)
} }
switch cleanEntryName(h.Name) { switch cleanEntryName(h.Name) {
case PrefetchLandmark, NoPrefetchLandmark: case PrefetchLandmark, NoPrefetchLandmark:
@ -473,7 +431,7 @@ func moveRec(name string, in *tarFile, out *tarFile) error {
_, okIn := in.get(name) _, okIn := in.get(name)
_, okOut := out.get(name) _, okOut := out.get(name)
if !okIn && !okOut { if !okIn && !okOut {
return fmt.Errorf("file: %q: %w", name, errNotFound) return errors.Wrapf(errNotFound, "file: %q", name)
} }
parent, _ := path.Split(strings.TrimSuffix(name, "/")) parent, _ := path.Split(strings.TrimSuffix(name, "/"))
@ -559,13 +517,12 @@ func newTempFiles() *tempFiles {
} }
type tempFiles struct { type tempFiles struct {
files []*os.File files []*os.File
filesMu sync.Mutex filesMu sync.Mutex
cleanupOnce sync.Once
} }
func (tf *tempFiles) TempFile(dir, pattern string) (*os.File, error) { func (tf *tempFiles) TempFile(dir, pattern string) (*os.File, error) {
f, err := os.CreateTemp(dir, pattern) f, err := ioutil.TempFile(dir, pattern)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -575,14 +532,7 @@ func (tf *tempFiles) TempFile(dir, pattern string) (*os.File, error) {
return f, nil return f, nil
} }
func (tf *tempFiles) CleanupAll() (err error) { func (tf *tempFiles) CleanupAll() error {
tf.cleanupOnce.Do(func() {
err = tf.cleanupAll()
})
return
}
func (tf *tempFiles) cleanupAll() error {
tf.filesMu.Lock() tf.filesMu.Lock()
defer tf.filesMu.Unlock() defer tf.filesMu.Unlock()
var allErr []error var allErr []error
@ -598,19 +548,19 @@ func (tf *tempFiles) cleanupAll() error {
return errorutil.Aggregate(allErr) return errorutil.Aggregate(allErr)
} }
func newCountReadSeeker(r io.ReaderAt) (*countReadSeeker, error) { func newCountReader(r io.ReaderAt) (*countReader, error) {
pos := int64(0) pos := int64(0)
return &countReadSeeker{r: r, cPos: &pos}, nil return &countReader{r: r, cPos: &pos}, nil
} }
type countReadSeeker struct { type countReader struct {
r io.ReaderAt r io.ReaderAt
cPos *int64 cPos *int64
mu sync.Mutex mu sync.Mutex
} }
func (cr *countReadSeeker) Read(p []byte) (int, error) { func (cr *countReader) Read(p []byte) (int, error) {
cr.mu.Lock() cr.mu.Lock()
defer cr.mu.Unlock() defer cr.mu.Unlock()
@ -621,18 +571,18 @@ func (cr *countReadSeeker) Read(p []byte) (int, error) {
return n, err return n, err
} }
func (cr *countReadSeeker) Seek(offset int64, whence int) (int64, error) { func (cr *countReader) Seek(offset int64, whence int) (int64, error) {
cr.mu.Lock() cr.mu.Lock()
defer cr.mu.Unlock() defer cr.mu.Unlock()
switch whence { switch whence {
default: default:
return 0, fmt.Errorf("unknown whence: %v", whence) return 0, fmt.Errorf("Unknown whence: %v", whence)
case io.SeekStart: case io.SeekStart:
case io.SeekCurrent: case io.SeekCurrent:
offset += *cr.cPos offset += *cr.cPos
case io.SeekEnd: case io.SeekEnd:
return 0, fmt.Errorf("unsupported whence: %v", whence) return 0, fmt.Errorf("Unsupported whence: %v", whence)
} }
if offset < 0 { if offset < 0 {
@ -642,7 +592,7 @@ func (cr *countReadSeeker) Seek(offset int64, whence int) (int64, error) {
return offset, nil return offset, nil
} }
func (cr *countReadSeeker) currentPos() int64 { func (cr *countReader) currentPos() int64 {
cr.mu.Lock() cr.mu.Lock()
defer cr.mu.Unlock() defer cr.mu.Unlock()

File diff suppressed because it is too large Load Diff

View File

@ -17,8 +17,9 @@
package errorutil package errorutil
import ( import (
"errors"
"testing" "testing"
"github.com/pkg/errors"
) )
func TestNoError(t *testing.T) { func TestNoError(t *testing.T) {

File diff suppressed because it is too large Load Diff

View File

@ -22,7 +22,832 @@
package estargz package estargz
import "testing" import (
"archive/tar"
"bytes"
"compress/gzip"
"crypto/sha256"
"encoding/json"
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"reflect"
"sort"
"strings"
"testing"
"time"
)
var allowedPrefix = [4]string{"", "./", "/", "../"}
var compressionLevels = [5]int{
gzip.NoCompression,
gzip.BestSpeed,
gzip.BestCompression,
gzip.DefaultCompression,
gzip.HuffmanOnly,
}
// Tests footer encoding, size, and parsing.
func TestFooter(t *testing.T) {
for off := int64(0); off <= 200000; off += 1023 {
checkFooter(t, off)
checkLegacyFooter(t, off)
}
}
func checkFooter(t *testing.T, off int64) {
footer := footerBytes(off)
if len(footer) != FooterSize {
t.Fatalf("for offset %v, footer length was %d, not expected %d. got bytes: %q", off, len(footer), FooterSize, footer)
}
got, size, err := parseFooter(footer)
if err != nil {
t.Fatalf("failed to parse footer for offset %d, footer: %x: err: %v",
off, footer, err)
}
if size != FooterSize {
t.Fatalf("invalid footer size %d; want %d", size, FooterSize)
}
if got != off {
t.Fatalf("ParseFooter(footerBytes(offset %d)) = %d; want %d", off, got, off)
}
}
func checkLegacyFooter(t *testing.T, off int64) {
footer := legacyFooterBytes(off)
if len(footer) != legacyFooterSize {
t.Fatalf("for offset %v, footer length was %d, not expected %d. got bytes: %q", off, len(footer), legacyFooterSize, footer)
}
got, size, err := parseFooter(footer)
if err != nil {
t.Fatalf("failed to parse legacy footer for offset %d, footer: %x: err: %v",
off, footer, err)
}
if size != legacyFooterSize {
t.Fatalf("invalid legacy footer size %d; want %d", size, legacyFooterSize)
}
if got != off {
t.Fatalf("ParseFooter(legacyFooterBytes(offset %d)) = %d; want %d", off, got, off)
}
}
func legacyFooterBytes(tocOff int64) []byte {
buf := bytes.NewBuffer(make([]byte, 0, legacyFooterSize))
gz, _ := gzip.NewWriterLevel(buf, gzip.NoCompression)
gz.Header.Extra = []byte(fmt.Sprintf("%016xSTARGZ", tocOff))
gz.Close()
if buf.Len() != legacyFooterSize {
panic(fmt.Sprintf("footer buffer = %d, not %d", buf.Len(), legacyFooterSize))
}
return buf.Bytes()
}
func TestWriteAndOpen(t *testing.T) {
const content = "Some contents"
invalidUtf8 := "\xff\xfe\xfd"
xAttrFile := xAttr{"foo": "bar", "invalid-utf8": invalidUtf8}
sampleOwner := owner{uid: 50, gid: 100}
tests := []struct {
name string
chunkSize int
in []tarEntry
want []stargzCheck
wantNumGz int // expected number of gzip streams
}{
{
name: "empty",
in: tarOf(),
wantNumGz: 2, // TOC + footer
want: checks(
numTOCEntries(0),
),
},
{
name: "1dir_1empty_file",
in: tarOf(
dir("foo/"),
file("foo/bar.txt", ""),
),
wantNumGz: 3, // dir, TOC, footer
want: checks(
numTOCEntries(2),
hasDir("foo/"),
hasFileLen("foo/bar.txt", 0),
entryHasChildren("foo", "bar.txt"),
hasFileDigest("foo/bar.txt", digestFor("")),
),
},
{
name: "1dir_1file",
in: tarOf(
dir("foo/"),
file("foo/bar.txt", content, xAttrFile),
),
wantNumGz: 4, // var dir, foo.txt alone, TOC, footer
want: checks(
numTOCEntries(2),
hasDir("foo/"),
hasFileLen("foo/bar.txt", len(content)),
hasFileDigest("foo/bar.txt", digestFor(content)),
hasFileContentsRange("foo/bar.txt", 0, content),
hasFileContentsRange("foo/bar.txt", 1, content[1:]),
entryHasChildren("", "foo"),
entryHasChildren("foo", "bar.txt"),
hasFileXattrs("foo/bar.txt", "foo", "bar"),
hasFileXattrs("foo/bar.txt", "invalid-utf8", invalidUtf8),
),
},
{
name: "2meta_2file",
in: tarOf(
dir("bar/", sampleOwner),
dir("foo/", sampleOwner),
file("foo/bar.txt", content, sampleOwner),
),
wantNumGz: 4, // both dirs, foo.txt alone, TOC, footer
want: checks(
numTOCEntries(3),
hasDir("bar/"),
hasDir("foo/"),
hasFileLen("foo/bar.txt", len(content)),
entryHasChildren("", "bar", "foo"),
entryHasChildren("foo", "bar.txt"),
hasChunkEntries("foo/bar.txt", 1),
hasEntryOwner("bar/", sampleOwner),
hasEntryOwner("foo/", sampleOwner),
hasEntryOwner("foo/bar.txt", sampleOwner),
),
},
{
name: "3dir",
in: tarOf(
dir("bar/"),
dir("foo/"),
dir("foo/bar/"),
),
wantNumGz: 3, // 3 dirs, TOC, footer
want: checks(
hasDirLinkCount("bar/", 2),
hasDirLinkCount("foo/", 3),
hasDirLinkCount("foo/bar/", 2),
),
},
{
name: "symlink",
in: tarOf(
dir("foo/"),
symlink("foo/bar", "../../x"),
),
wantNumGz: 3, // metas + TOC + footer
want: checks(
numTOCEntries(2),
hasSymlink("foo/bar", "../../x"),
entryHasChildren("", "foo"),
entryHasChildren("foo", "bar"),
),
},
{
name: "chunked_file",
chunkSize: 4,
in: tarOf(
dir("foo/"),
file("foo/big.txt", "This "+"is s"+"uch "+"a bi"+"g fi"+"le"),
),
wantNumGz: 9,
want: checks(
numTOCEntries(7), // 1 for foo dir, 6 for the foo/big.txt file
hasDir("foo/"),
hasFileLen("foo/big.txt", len("This is such a big file")),
hasFileDigest("foo/big.txt", digestFor("This is such a big file")),
hasFileContentsRange("foo/big.txt", 0, "This is such a big file"),
hasFileContentsRange("foo/big.txt", 1, "his is such a big file"),
hasFileContentsRange("foo/big.txt", 2, "is is such a big file"),
hasFileContentsRange("foo/big.txt", 3, "s is such a big file"),
hasFileContentsRange("foo/big.txt", 4, " is such a big file"),
hasFileContentsRange("foo/big.txt", 5, "is such a big file"),
hasFileContentsRange("foo/big.txt", 6, "s such a big file"),
hasFileContentsRange("foo/big.txt", 7, " such a big file"),
hasFileContentsRange("foo/big.txt", 8, "such a big file"),
hasFileContentsRange("foo/big.txt", 9, "uch a big file"),
hasFileContentsRange("foo/big.txt", 10, "ch a big file"),
hasFileContentsRange("foo/big.txt", 11, "h a big file"),
hasFileContentsRange("foo/big.txt", 12, " a big file"),
hasFileContentsRange("foo/big.txt", len("This is such a big file")-1, ""),
hasChunkEntries("foo/big.txt", 6),
),
},
{
name: "recursive",
in: tarOf(
dir("/", sampleOwner),
dir("bar/", sampleOwner),
dir("foo/", sampleOwner),
file("foo/bar.txt", content, sampleOwner),
),
wantNumGz: 4, // dirs, bar.txt alone, TOC, footer
want: checks(
maxDepth(2), // 0: root directory, 1: "foo/", 2: "bar.txt"
),
},
{
name: "block_char_fifo",
in: tarOf(
tarEntryFunc(func(w *tar.Writer, prefix string) error {
return w.WriteHeader(&tar.Header{
Name: prefix + "b",
Typeflag: tar.TypeBlock,
Devmajor: 123,
Devminor: 456,
})
}),
tarEntryFunc(func(w *tar.Writer, prefix string) error {
return w.WriteHeader(&tar.Header{
Name: prefix + "c",
Typeflag: tar.TypeChar,
Devmajor: 111,
Devminor: 222,
})
}),
tarEntryFunc(func(w *tar.Writer, prefix string) error {
return w.WriteHeader(&tar.Header{
Name: prefix + "f",
Typeflag: tar.TypeFifo,
})
}),
),
wantNumGz: 3,
want: checks(
lookupMatch("b", &TOCEntry{Name: "b", Type: "block", DevMajor: 123, DevMinor: 456, NumLink: 1}),
lookupMatch("c", &TOCEntry{Name: "c", Type: "char", DevMajor: 111, DevMinor: 222, NumLink: 1}),
lookupMatch("f", &TOCEntry{Name: "f", Type: "fifo", NumLink: 1}),
),
},
{
name: "modes",
in: tarOf(
dir("foo1/", 0755|os.ModeDir|os.ModeSetgid),
file("foo1/bar1", content, 0700|os.ModeSetuid),
file("foo1/bar2", content, 0755|os.ModeSetgid),
dir("foo2/", 0755|os.ModeDir|os.ModeSticky),
file("foo2/bar3", content, 0755|os.ModeSticky),
dir("foo3/", 0755|os.ModeDir),
file("foo3/bar4", content, os.FileMode(0700)),
file("foo3/bar5", content, os.FileMode(0755)),
),
wantNumGz: 8, // dir, bar1 alone, bar2 alone + dir, bar3 alone + dir, bar4 alone, bar5 alone, TOC, footer
want: checks(
hasMode("foo1/", 0755|os.ModeDir|os.ModeSetgid),
hasMode("foo1/bar1", 0700|os.ModeSetuid),
hasMode("foo1/bar2", 0755|os.ModeSetgid),
hasMode("foo2/", 0755|os.ModeDir|os.ModeSticky),
hasMode("foo2/bar3", 0755|os.ModeSticky),
hasMode("foo3/", 0755|os.ModeDir),
hasMode("foo3/bar4", os.FileMode(0700)),
hasMode("foo3/bar5", os.FileMode(0755)),
),
},
}
for _, tt := range tests {
for _, cl := range compressionLevels {
cl := cl
for _, prefix := range allowedPrefix {
prefix := prefix
t.Run(tt.name+"-"+fmt.Sprintf("compression=%v-prefix=%q", cl, prefix), func(t *testing.T) {
tr, cancel := buildTar(t, tt.in, prefix)
defer cancel()
var stargzBuf bytes.Buffer
w := NewWriterLevel(&stargzBuf, cl)
w.ChunkSize = tt.chunkSize
if err := w.AppendTar(tr); err != nil {
t.Fatalf("Append: %v", err)
}
if _, err := w.Close(); err != nil {
t.Fatalf("Writer.Close: %v", err)
}
b := stargzBuf.Bytes()
diffID := w.DiffID()
wantDiffID := diffIDOfGz(t, b)
if diffID != wantDiffID {
t.Errorf("DiffID = %q; want %q", diffID, wantDiffID)
}
got := countGzStreams(t, b)
if got != tt.wantNumGz {
t.Errorf("number of gzip streams = %d; want %d", got, tt.wantNumGz)
}
r, err := Open(io.NewSectionReader(bytes.NewReader(b), 0, int64(len(b))))
if err != nil {
t.Fatalf("stargz.Open: %v", err)
}
for _, want := range tt.want {
want.check(t, r)
}
})
}
}
}
}
func diffIDOfGz(t *testing.T, b []byte) string {
h := sha256.New()
zr, err := gzip.NewReader(bytes.NewReader(b))
if err != nil {
t.Fatalf("diffIDOfGz: %v", err)
}
if _, err := io.Copy(h, zr); err != nil {
t.Fatalf("diffIDOfGz.Copy: %v", err)
}
return fmt.Sprintf("sha256:%x", h.Sum(nil))
}
func countGzStreams(t *testing.T, b []byte) (numStreams int) {
len0 := len(b)
br := bytes.NewReader(b)
zr := new(gzip.Reader)
t.Logf("got gzip streams:")
for {
zoff := len0 - br.Len()
if err := zr.Reset(br); err != nil {
if err == io.EOF {
return
}
t.Fatalf("countGzStreams, Reset: %v", err)
}
zr.Multistream(false)
n, err := io.Copy(ioutil.Discard, zr)
if err != nil {
t.Fatalf("countGzStreams, Copy: %v", err)
}
var extra string
if len(zr.Header.Extra) > 0 {
extra = fmt.Sprintf("; extra=%q", zr.Header.Extra)
}
t.Logf(" [%d] at %d in stargz, uncompressed length %d%s", numStreams, zoff, n, extra)
numStreams++
}
}
func digestFor(content string) string {
sum := sha256.Sum256([]byte(content))
return fmt.Sprintf("sha256:%x", sum)
}
type numTOCEntries int
func (n numTOCEntries) check(t *testing.T, r *Reader) {
if r.toc == nil {
t.Fatal("nil TOC")
}
if got, want := len(r.toc.Entries), int(n); got != want {
t.Errorf("got %d TOC entries; want %d", got, want)
}
t.Logf("got TOC entries:")
for i, ent := range r.toc.Entries {
entj, _ := json.Marshal(ent)
t.Logf(" [%d]: %s\n", i, entj)
}
if t.Failed() {
t.FailNow()
}
}
func tarOf(s ...tarEntry) []tarEntry { return s }
func checks(s ...stargzCheck) []stargzCheck { return s }
type stargzCheck interface {
check(t *testing.T, r *Reader)
}
type stargzCheckFn func(*testing.T, *Reader)
func (f stargzCheckFn) check(t *testing.T, r *Reader) { f(t, r) }
func maxDepth(max int) stargzCheck {
return stargzCheckFn(func(t *testing.T, r *Reader) {
e, ok := r.Lookup("")
if !ok {
t.Fatal("root directory not found")
}
d, err := getMaxDepth(t, e, 0, 10*max)
if err != nil {
t.Errorf("failed to get max depth (wanted %d): %v", max, err)
return
}
if d != max {
t.Errorf("invalid depth %d; want %d", d, max)
return
}
})
}
func getMaxDepth(t *testing.T, e *TOCEntry, current, limit int) (max int, rErr error) {
if current > limit {
return -1, fmt.Errorf("walkMaxDepth: exceeds limit: current:%d > limit:%d",
current, limit)
}
max = current
e.ForeachChild(func(baseName string, ent *TOCEntry) bool {
t.Logf("%q(basename:%q) is child of %q\n", ent.Name, baseName, e.Name)
d, err := getMaxDepth(t, ent, current+1, limit)
if err != nil {
rErr = err
return false
}
if d > max {
max = d
}
return true
})
return
}
func hasFileLen(file string, wantLen int) stargzCheck {
return stargzCheckFn(func(t *testing.T, r *Reader) {
for _, ent := range r.toc.Entries {
if ent.Name == file {
if ent.Type != "reg" {
t.Errorf("file type of %q is %q; want \"reg\"", file, ent.Type)
} else if ent.Size != int64(wantLen) {
t.Errorf("file size of %q = %d; want %d", file, ent.Size, wantLen)
}
return
}
}
t.Errorf("file %q not found", file)
})
}
func hasFileXattrs(file, name, value string) stargzCheck {
return stargzCheckFn(func(t *testing.T, r *Reader) {
for _, ent := range r.toc.Entries {
if ent.Name == file {
if ent.Type != "reg" {
t.Errorf("file type of %q is %q; want \"reg\"", file, ent.Type)
}
if ent.Xattrs == nil {
t.Errorf("file %q has no xattrs", file)
return
}
valueFound, found := ent.Xattrs[name]
if !found {
t.Errorf("file %q has no xattr %q", file, name)
return
}
if string(valueFound) != value {
t.Errorf("file %q has xattr %q with value %q instead of %q", file, name, valueFound, value)
}
return
}
}
t.Errorf("file %q not found", file)
})
}
func hasFileDigest(file string, digest string) stargzCheck {
return stargzCheckFn(func(t *testing.T, r *Reader) {
ent, ok := r.Lookup(file)
if !ok {
t.Fatalf("didn't find TOCEntry for file %q", file)
}
if ent.Digest != digest {
t.Fatalf("Digest(%q) = %q, want %q", file, ent.Digest, digest)
}
})
}
func hasFileContentsRange(file string, offset int, want string) stargzCheck {
return stargzCheckFn(func(t *testing.T, r *Reader) {
f, err := r.OpenFile(file)
if err != nil {
t.Fatal(err)
}
got := make([]byte, len(want))
n, err := f.ReadAt(got, int64(offset))
if err != nil {
t.Fatalf("ReadAt(len %d, offset %d) = %v, %v", len(got), offset, n, err)
}
if string(got) != want {
t.Fatalf("ReadAt(len %d, offset %d) = %q, want %q", len(got), offset, got, want)
}
})
}
func hasChunkEntries(file string, wantChunks int) stargzCheck {
return stargzCheckFn(func(t *testing.T, r *Reader) {
ent, ok := r.Lookup(file)
if !ok {
t.Fatalf("no file for %q", file)
}
if ent.Type != "reg" {
t.Fatalf("file %q has unexpected type %q; want reg", file, ent.Type)
}
chunks := r.getChunks(ent)
if len(chunks) != wantChunks {
t.Errorf("len(r.getChunks(%q)) = %d; want %d", file, len(chunks), wantChunks)
return
}
f := chunks[0]
var gotChunks []*TOCEntry
var last *TOCEntry
for off := int64(0); off < f.Size; off++ {
e, ok := r.ChunkEntryForOffset(file, off)
if !ok {
t.Errorf("no ChunkEntryForOffset at %d", off)
return
}
if last != e {
gotChunks = append(gotChunks, e)
last = e
}
}
if !reflect.DeepEqual(chunks, gotChunks) {
t.Errorf("gotChunks=%d, want=%d; contents mismatch", len(gotChunks), wantChunks)
}
// And verify the NextOffset
for i := 0; i < len(gotChunks)-1; i++ {
ci := gotChunks[i]
cnext := gotChunks[i+1]
if ci.NextOffset() != cnext.Offset {
t.Errorf("chunk %d NextOffset %d != next chunk's Offset of %d", i, ci.NextOffset(), cnext.Offset)
}
}
})
}
func entryHasChildren(dir string, want ...string) stargzCheck {
return stargzCheckFn(func(t *testing.T, r *Reader) {
want := append([]string(nil), want...)
var got []string
ent, ok := r.Lookup(dir)
if !ok {
t.Fatalf("didn't find TOCEntry for dir node %q", dir)
}
for baseName := range ent.children {
got = append(got, baseName)
}
sort.Strings(got)
sort.Strings(want)
if !reflect.DeepEqual(got, want) {
t.Errorf("children of %q = %q; want %q", dir, got, want)
}
})
}
func hasDir(file string) stargzCheck {
return stargzCheckFn(func(t *testing.T, r *Reader) {
for _, ent := range r.toc.Entries {
if ent.Name == cleanEntryName(file) {
if ent.Type != "dir" {
t.Errorf("file type of %q is %q; want \"dir\"", file, ent.Type)
}
return
}
}
t.Errorf("directory %q not found", file)
})
}
func hasDirLinkCount(file string, count int) stargzCheck {
return stargzCheckFn(func(t *testing.T, r *Reader) {
for _, ent := range r.toc.Entries {
if ent.Name == cleanEntryName(file) {
if ent.Type != "dir" {
t.Errorf("file type of %q is %q; want \"dir\"", file, ent.Type)
return
}
if ent.NumLink != count {
t.Errorf("link count of %q = %d; want %d", file, ent.NumLink, count)
}
return
}
}
t.Errorf("directory %q not found", file)
})
}
func hasMode(file string, mode os.FileMode) stargzCheck {
return stargzCheckFn(func(t *testing.T, r *Reader) {
for _, ent := range r.toc.Entries {
if ent.Name == cleanEntryName(file) {
if ent.Stat().Mode() != mode {
t.Errorf("invalid mode: got %v; want %v", ent.Stat().Mode(), mode)
return
}
return
}
}
t.Errorf("file %q not found", file)
})
}
func hasSymlink(file, target string) stargzCheck {
return stargzCheckFn(func(t *testing.T, r *Reader) {
for _, ent := range r.toc.Entries {
if ent.Name == file {
if ent.Type != "symlink" {
t.Errorf("file type of %q is %q; want \"symlink\"", file, ent.Type)
} else if ent.LinkName != target {
t.Errorf("link target of symlink %q is %q; want %q", file, ent.LinkName, target)
}
return
}
}
t.Errorf("symlink %q not found", file)
})
}
func lookupMatch(name string, want *TOCEntry) stargzCheck {
return stargzCheckFn(func(t *testing.T, r *Reader) {
e, ok := r.Lookup(name)
if !ok {
t.Fatalf("failed to Lookup entry %q", name)
}
if !reflect.DeepEqual(e, want) {
t.Errorf("entry %q mismatch.\n got: %+v\nwant: %+v\n", name, e, want)
}
})
}
func hasEntryOwner(entry string, owner owner) stargzCheck {
return stargzCheckFn(func(t *testing.T, r *Reader) {
ent, ok := r.Lookup(strings.TrimSuffix(entry, "/"))
if !ok {
t.Errorf("entry %q not found", entry)
return
}
if ent.UID != owner.uid || ent.GID != owner.gid {
t.Errorf("entry %q has invalid owner (uid:%d, gid:%d) instead of (uid:%d, gid:%d)", entry, ent.UID, ent.GID, owner.uid, owner.gid)
return
}
})
}
type tarEntry interface {
appendTar(tw *tar.Writer, prefix string) error
}
type tarEntryFunc func(*tar.Writer, string) error
func (f tarEntryFunc) appendTar(tw *tar.Writer, prefix string) error { return f(tw, prefix) }
func buildTar(t *testing.T, ents []tarEntry, prefix string) (r io.Reader, cancel func()) {
pr, pw := io.Pipe()
go func() {
tw := tar.NewWriter(pw)
for _, ent := range ents {
if err := ent.appendTar(tw, prefix); err != nil {
t.Errorf("building input tar: %v", err)
pw.Close()
return
}
}
if err := tw.Close(); err != nil {
t.Errorf("closing write of input tar: %v", err)
}
pw.Close()
}()
return pr, func() { go pr.Close(); go pw.Close() }
}
func buildTarStatic(t *testing.T, ents []tarEntry, prefix string) *io.SectionReader {
buf := new(bytes.Buffer)
tw := tar.NewWriter(buf)
for _, ent := range ents {
if err := ent.appendTar(tw, prefix); err != nil {
t.Fatalf("building input tar: %v", err)
}
}
if err := tw.Close(); err != nil {
t.Errorf("closing write of input tar: %v", err)
}
data := buf.Bytes()
return io.NewSectionReader(bytes.NewReader(data), 0, int64(len(data)))
}
func dir(name string, opts ...interface{}) tarEntry {
return tarEntryFunc(func(tw *tar.Writer, prefix string) error {
var o owner
mode := os.FileMode(0755)
for _, opt := range opts {
switch v := opt.(type) {
case owner:
o = v
case os.FileMode:
mode = v
default:
return errors.New("unsupported opt")
}
}
if !strings.HasSuffix(name, "/") {
panic(fmt.Sprintf("missing trailing slash in dir %q ", name))
}
tm, err := fileModeToTarMode(mode)
if err != nil {
return err
}
return tw.WriteHeader(&tar.Header{
Typeflag: tar.TypeDir,
Name: prefix + name,
Mode: tm,
Uid: o.uid,
Gid: o.gid,
})
})
}
// xAttr are extended attributes to set on test files created with the file func.
type xAttr map[string]string
// owner is owner ot set on test files and directories with the file and dir functions.
type owner struct {
uid int
gid int
}
func file(name, contents string, opts ...interface{}) tarEntry {
return tarEntryFunc(func(tw *tar.Writer, prefix string) error {
var xattrs xAttr
var o owner
mode := os.FileMode(0644)
for _, opt := range opts {
switch v := opt.(type) {
case xAttr:
xattrs = v
case owner:
o = v
case os.FileMode:
mode = v
default:
return errors.New("unsupported opt")
}
}
if strings.HasSuffix(name, "/") {
return fmt.Errorf("bogus trailing slash in file %q", name)
}
tm, err := fileModeToTarMode(mode)
if err != nil {
return err
}
if err := tw.WriteHeader(&tar.Header{
Typeflag: tar.TypeReg,
Name: prefix + name,
Mode: tm,
Xattrs: xattrs,
Size: int64(len(contents)),
Uid: o.uid,
Gid: o.gid,
}); err != nil {
return err
}
_, err = io.WriteString(tw, contents)
return err
})
}
func symlink(name, target string) tarEntry {
return tarEntryFunc(func(tw *tar.Writer, prefix string) error {
return tw.WriteHeader(&tar.Header{
Typeflag: tar.TypeSymlink,
Name: prefix + name,
Linkname: target,
Mode: 0644,
})
})
}
func fileModeToTarMode(mode os.FileMode) (int64, error) {
h, err := tar.FileInfoHeader(fileInfoOnlyMode(mode), "")
if err != nil {
return 0, err
}
return h.Mode, nil
}
// fileInfoOnlyMode is os.FileMode that populates only file mode.
type fileInfoOnlyMode os.FileMode
func (f fileInfoOnlyMode) Name() string { return "" }
func (f fileInfoOnlyMode) Size() int64 { return 0 }
func (f fileInfoOnlyMode) Mode() os.FileMode { return os.FileMode(f) }
func (f fileInfoOnlyMode) ModTime() time.Time { return time.Now() }
func (f fileInfoOnlyMode) IsDir() bool { return os.FileMode(f).IsDir() }
func (f fileInfoOnlyMode) Sys() interface{} { return nil }
// Tests *Reader.ChunkEntryForOffset about offset and size calculation. // Tests *Reader.ChunkEntryForOffset about offset and size calculation.
func TestChunkEntryForOffset(t *testing.T) { func TestChunkEntryForOffset(t *testing.T) {
@ -81,7 +906,7 @@ func TestChunkEntryForOffset(t *testing.T) {
if ok != te.wantOk { if ok != te.wantOk {
t.Errorf("ok = %v; want (%v)", ok, te.wantOk) t.Errorf("ok = %v; want (%v)", ok, te.wantOk)
} else if ok { } else if ok {
if ce.ChunkOffset != te.wantChunkOffset || ce.ChunkSize != te.wantChunkSize { if !(ce.ChunkOffset == te.wantChunkOffset && ce.ChunkSize == te.wantChunkSize) {
t.Errorf("chunkOffset = %d, ChunkSize = %d; want (chunkOffset = %d, chunkSize = %d)", t.Errorf("chunkOffset = %d, ChunkSize = %d; want (chunkOffset = %d, chunkSize = %d)",
ce.ChunkOffset, ce.ChunkSize, te.wantChunkOffset, te.wantChunkSize) ce.ChunkOffset, ce.ChunkSize, te.wantChunkOffset, te.wantChunkSize)
} }

View File

@ -1,278 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2019 The Go Authors. All rights reserved.
Use of this source code is governed by a BSD-style
license that can be found in the LICENSE file.
*/
package externaltoc
import (
"archive/tar"
"bytes"
"compress/gzip"
"encoding/binary"
"encoding/json"
"fmt"
"hash"
"io"
"sync"
"github.com/containerd/stargz-snapshotter/estargz"
digest "github.com/opencontainers/go-digest"
)
type GzipCompression struct {
*GzipCompressor
*GzipDecompressor
}
func NewGzipCompressionWithLevel(provideTOC func() ([]byte, error), level int) estargz.Compression {
return &GzipCompression{
NewGzipCompressorWithLevel(level),
NewGzipDecompressor(provideTOC),
}
}
func NewGzipCompressor() *GzipCompressor {
return &GzipCompressor{compressionLevel: gzip.BestCompression}
}
func NewGzipCompressorWithLevel(level int) *GzipCompressor {
return &GzipCompressor{compressionLevel: level}
}
type GzipCompressor struct {
compressionLevel int
buf *bytes.Buffer
}
func (gc *GzipCompressor) WriteTOCTo(w io.Writer) (int, error) {
if len(gc.buf.Bytes()) == 0 {
return 0, fmt.Errorf("TOC hasn't been registered")
}
return w.Write(gc.buf.Bytes())
}
func (gc *GzipCompressor) Writer(w io.Writer) (estargz.WriteFlushCloser, error) {
return gzip.NewWriterLevel(w, gc.compressionLevel)
}
func (gc *GzipCompressor) WriteTOCAndFooter(w io.Writer, off int64, toc *estargz.JTOC, diffHash hash.Hash) (digest.Digest, error) {
tocJSON, err := json.MarshalIndent(toc, "", "\t")
if err != nil {
return "", err
}
buf := new(bytes.Buffer)
gz, _ := gzip.NewWriterLevel(buf, gc.compressionLevel)
// TOC isn't written to layer so no effect to diff ID
tw := tar.NewWriter(gz)
if err := tw.WriteHeader(&tar.Header{
Typeflag: tar.TypeReg,
Name: estargz.TOCTarName,
Size: int64(len(tocJSON)),
}); err != nil {
return "", err
}
if _, err := tw.Write(tocJSON); err != nil {
return "", err
}
if err := tw.Close(); err != nil {
return "", err
}
if err := gz.Close(); err != nil {
return "", err
}
gc.buf = buf
footerBytes, err := gzipFooterBytes()
if err != nil {
return "", err
}
if _, err := w.Write(footerBytes); err != nil {
return "", err
}
return digest.FromBytes(tocJSON), nil
}
// The footer is an empty gzip stream with no compression and an Extra header.
//
// 46 comes from:
//
// 10 bytes gzip header
// 2 bytes XLEN (length of Extra field) = 21 (4 bytes header + len("STARGZEXTERNALTOC"))
// 2 bytes Extra: SI1 = 'S', SI2 = 'G'
// 2 bytes Extra: LEN = 17 (len("STARGZEXTERNALTOC"))
// 17 bytes Extra: subfield = "STARGZEXTERNALTOC"
// 5 bytes flate header
// 8 bytes gzip footer
// (End of the eStargz blob)
const FooterSize = 46
// gzipFooterBytes returns the 104 bytes footer.
func gzipFooterBytes() ([]byte, error) {
buf := bytes.NewBuffer(make([]byte, 0, FooterSize))
gz, _ := gzip.NewWriterLevel(buf, gzip.NoCompression) // MUST be NoCompression to keep 51 bytes
// Extra header indicating the offset of TOCJSON
// https://tools.ietf.org/html/rfc1952#section-2.3.1.1
header := make([]byte, 4)
header[0], header[1] = 'S', 'G'
subfield := "STARGZEXTERNALTOC" // len("STARGZEXTERNALTOC") = 17
binary.LittleEndian.PutUint16(header[2:4], uint16(len(subfield))) // little-endian per RFC1952
gz.Extra = append(header, []byte(subfield)...)
if err := gz.Close(); err != nil {
return nil, err
}
if buf.Len() != FooterSize {
panic(fmt.Sprintf("footer buffer = %d, not %d", buf.Len(), FooterSize))
}
return buf.Bytes(), nil
}
func NewGzipDecompressor(provideTOCFunc func() ([]byte, error)) *GzipDecompressor {
return &GzipDecompressor{provideTOCFunc: provideTOCFunc}
}
type GzipDecompressor struct {
provideTOCFunc func() ([]byte, error)
rawTOC []byte // Do not access this field directly. Get this through getTOC() method.
getTOCOnce sync.Once
}
func (gz *GzipDecompressor) getTOC() ([]byte, error) {
if len(gz.rawTOC) == 0 {
var retErr error
gz.getTOCOnce.Do(func() {
if gz.provideTOCFunc == nil {
retErr = fmt.Errorf("TOC hasn't been provided")
return
}
rawTOC, err := gz.provideTOCFunc()
if err != nil {
retErr = err
return
}
gz.rawTOC = rawTOC
})
if retErr != nil {
return nil, retErr
}
if len(gz.rawTOC) == 0 {
return nil, fmt.Errorf("no TOC is provided")
}
}
return gz.rawTOC, nil
}
func (gz *GzipDecompressor) Reader(r io.Reader) (io.ReadCloser, error) {
return gzip.NewReader(r)
}
func (gz *GzipDecompressor) ParseTOC(r io.Reader) (toc *estargz.JTOC, tocDgst digest.Digest, err error) {
if r != nil {
return nil, "", fmt.Errorf("TOC must be provided externally but got internal one")
}
rawTOC, err := gz.getTOC()
if err != nil {
return nil, "", fmt.Errorf("failed to get TOC: %v", err)
}
return parseTOCEStargz(bytes.NewReader(rawTOC))
}
func (gz *GzipDecompressor) ParseFooter(p []byte) (blobPayloadSize, tocOffset, tocSize int64, err error) {
if len(p) != FooterSize {
return 0, 0, 0, fmt.Errorf("invalid length %d cannot be parsed", len(p))
}
zr, err := gzip.NewReader(bytes.NewReader(p))
if err != nil {
return 0, 0, 0, err
}
defer zr.Close()
extra := zr.Extra
si1, si2, subfieldlen, subfield := extra[0], extra[1], extra[2:4], extra[4:]
if si1 != 'S' || si2 != 'G' {
return 0, 0, 0, fmt.Errorf("invalid subfield IDs: %q, %q; want E, S", si1, si2)
}
if slen := binary.LittleEndian.Uint16(subfieldlen); slen != uint16(len("STARGZEXTERNALTOC")) {
return 0, 0, 0, fmt.Errorf("invalid length of subfield %d; want %d", slen, 16+len("STARGZ"))
}
if string(subfield) != "STARGZEXTERNALTOC" {
return 0, 0, 0, fmt.Errorf("STARGZ magic string must be included in the footer subfield")
}
// tocOffset < 0 indicates external TOC.
// blobPayloadSize < 0 indicates the entire blob size.
return -1, -1, 0, nil
}
func (gz *GzipDecompressor) FooterSize() int64 {
return FooterSize
}
func (gz *GzipDecompressor) DecompressTOC(r io.Reader) (tocJSON io.ReadCloser, err error) {
if r != nil {
return nil, fmt.Errorf("TOC must be provided externally but got internal one")
}
rawTOC, err := gz.getTOC()
if err != nil {
return nil, fmt.Errorf("failed to get TOC: %v", err)
}
return decompressTOCEStargz(bytes.NewReader(rawTOC))
}
func parseTOCEStargz(r io.Reader) (toc *estargz.JTOC, tocDgst digest.Digest, err error) {
tr, err := decompressTOCEStargz(r)
if err != nil {
return nil, "", err
}
dgstr := digest.Canonical.Digester()
toc = new(estargz.JTOC)
if err := json.NewDecoder(io.TeeReader(tr, dgstr.Hash())).Decode(&toc); err != nil {
return nil, "", fmt.Errorf("error decoding TOC JSON: %v", err)
}
if err := tr.Close(); err != nil {
return nil, "", err
}
return toc, dgstr.Digest(), nil
}
func decompressTOCEStargz(r io.Reader) (tocJSON io.ReadCloser, err error) {
zr, err := gzip.NewReader(r)
if err != nil {
return nil, fmt.Errorf("malformed TOC gzip header: %v", err)
}
zr.Multistream(false)
tr := tar.NewReader(zr)
h, err := tr.Next()
if err != nil {
return nil, fmt.Errorf("failed to find tar header in TOC gzip stream: %v", err)
}
if h.Name != estargz.TOCTarName {
return nil, fmt.Errorf("TOC tar entry had name %q; expected %q", h.Name, estargz.TOCTarName)
}
return readCloser{tr, zr.Close}, nil
}
type readCloser struct {
io.Reader
closeFunc func() error
}
func (rc readCloser) Close() error {
return rc.closeFunc()
}

View File

@ -1,102 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package externaltoc
import (
"bytes"
"compress/gzip"
"fmt"
"testing"
"github.com/containerd/stargz-snapshotter/estargz"
)
// TestGzipEStargz tests gzip-based external TOC eStargz
func TestGzipEStargz(t *testing.T) {
testRunner := &estargz.TestRunner{
TestingT: t,
Runner: func(testingT estargz.TestingT, name string, run func(t estargz.TestingT)) {
tt, ok := testingT.(*testing.T)
if !ok {
testingT.Fatal("TestingT is not a *testing.T")
return
}
tt.Run(name, func(t *testing.T) {
run(t)
})
},
}
estargz.CompressionTestSuite(testRunner,
gzipControllerWithLevel(gzip.NoCompression),
gzipControllerWithLevel(gzip.BestSpeed),
gzipControllerWithLevel(gzip.BestCompression),
gzipControllerWithLevel(gzip.DefaultCompression),
gzipControllerWithLevel(gzip.HuffmanOnly),
)
}
func gzipControllerWithLevel(compressionLevel int) estargz.TestingControllerFactory {
return func() estargz.TestingController {
compressor := NewGzipCompressorWithLevel(compressionLevel)
decompressor := NewGzipDecompressor(func() ([]byte, error) {
buf := new(bytes.Buffer)
if _, err := compressor.WriteTOCTo(buf); err != nil {
return nil, err
}
return buf.Bytes(), nil
})
return &gzipController{compressor, decompressor}
}
}
type gzipController struct {
*GzipCompressor
*GzipDecompressor
}
func (gc *gzipController) String() string {
return fmt.Sprintf("externaltoc_gzip_compression_level=%v", gc.compressionLevel)
}
// TestStream tests the passed estargz blob contains the specified list of streams.
func (gc *gzipController) TestStreams(t estargz.TestingT, b []byte, streams []int64) {
estargz.CheckGzipHasStreams(t, b, streams)
}
func (gc *gzipController) DiffIDOf(t estargz.TestingT, b []byte) string {
return estargz.GzipDiffIDOf(t, b)
}
// Tests footer encoding, size, and parsing of gzip-based eStargz.
func TestGzipFooter(t *testing.T) {
footer, err := gzipFooterBytes()
if err != nil {
t.Fatalf("failed gzipFooterBytes: %v", err)
}
if len(footer) != FooterSize {
t.Fatalf("footer length was %d, not expected %d. got bytes: %q", len(footer), FooterSize, footer)
}
_, gotTOCOffset, _, err := (&GzipDecompressor{}).ParseFooter(footer)
if err != nil {
t.Fatalf("failed to parse footer, footer: %x: err: %v", footer, err)
}
if gotTOCOffset != -1 {
t.Fatalf("ParseFooter(footerBytes) must return -1 for external toc but got %d", gotTOCOffset)
}
}

View File

@ -1,10 +1,10 @@
module github.com/containerd/stargz-snapshotter/estargz module github.com/containerd/stargz-snapshotter/estargz
go 1.23.0 go 1.16
require ( require (
github.com/klauspost/compress v1.18.0 github.com/klauspost/compress v1.12.2
github.com/opencontainers/go-digest v1.0.0 github.com/opencontainers/go-digest v1.0.0
github.com/vbatts/tar-split v0.12.1 github.com/pkg/errors v0.9.1
golang.org/x/sync v0.16.0 golang.org/x/sync v0.0.0-20201207232520-09787c993a3a
) )

View File

@ -1,8 +1,10 @@
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA=
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/klauspost/compress v1.12.2 h1:2KCfW3I9M7nSc5wOqXAlW2v2U6v+w6cbjvbfp+OykW8=
github.com/klauspost/compress v1.12.2/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/vbatts/tar-split v0.12.1 h1:CqKoORW7BUWBe7UL/iqTVvkTBOF8UvOMKOIZykxnnbo= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/vbatts/tar-split v0.12.1/go.mod h1:eF6B6i6ftWQcDqEn3/iGFRFRo8cBIMSJVOpnNdfTMFA= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a h1:DcqTD9SDLc+1P/r1EmRBwnVsrOwW+kk2vWf9n+1sGhs=
golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=

View File

@ -1,237 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2019 The Go Authors. All rights reserved.
Use of this source code is governed by a BSD-style
license that can be found in the LICENSE file.
*/
package estargz
import (
"archive/tar"
"bytes"
"compress/gzip"
"encoding/binary"
"encoding/json"
"fmt"
"hash"
"io"
"strconv"
digest "github.com/opencontainers/go-digest"
)
type gzipCompression struct {
*GzipCompressor
*GzipDecompressor
}
func newGzipCompressionWithLevel(level int) Compression {
return &gzipCompression{
&GzipCompressor{level},
&GzipDecompressor{},
}
}
func NewGzipCompressor() *GzipCompressor {
return &GzipCompressor{gzip.BestCompression}
}
func NewGzipCompressorWithLevel(level int) *GzipCompressor {
return &GzipCompressor{level}
}
type GzipCompressor struct {
compressionLevel int
}
func (gc *GzipCompressor) Writer(w io.Writer) (WriteFlushCloser, error) {
return gzip.NewWriterLevel(w, gc.compressionLevel)
}
func (gc *GzipCompressor) WriteTOCAndFooter(w io.Writer, off int64, toc *JTOC, diffHash hash.Hash) (digest.Digest, error) {
tocJSON, err := json.MarshalIndent(toc, "", "\t")
if err != nil {
return "", err
}
gz, _ := gzip.NewWriterLevel(w, gc.compressionLevel)
gw := io.Writer(gz)
if diffHash != nil {
gw = io.MultiWriter(gz, diffHash)
}
tw := tar.NewWriter(gw)
if err := tw.WriteHeader(&tar.Header{
Typeflag: tar.TypeReg,
Name: TOCTarName,
Size: int64(len(tocJSON)),
}); err != nil {
return "", err
}
if _, err := tw.Write(tocJSON); err != nil {
return "", err
}
if err := tw.Close(); err != nil {
return "", err
}
if err := gz.Close(); err != nil {
return "", err
}
if _, err := w.Write(gzipFooterBytes(off)); err != nil {
return "", err
}
return digest.FromBytes(tocJSON), nil
}
// gzipFooterBytes returns the 51 bytes footer.
func gzipFooterBytes(tocOff int64) []byte {
buf := bytes.NewBuffer(make([]byte, 0, FooterSize))
gz, _ := gzip.NewWriterLevel(buf, gzip.NoCompression) // MUST be NoCompression to keep 51 bytes
// Extra header indicating the offset of TOCJSON
// https://tools.ietf.org/html/rfc1952#section-2.3.1.1
header := make([]byte, 4)
header[0], header[1] = 'S', 'G'
subfield := fmt.Sprintf("%016xSTARGZ", tocOff)
binary.LittleEndian.PutUint16(header[2:4], uint16(len(subfield))) // little-endian per RFC1952
gz.Extra = append(header, []byte(subfield)...)
gz.Close()
if buf.Len() != FooterSize {
panic(fmt.Sprintf("footer buffer = %d, not %d", buf.Len(), FooterSize))
}
return buf.Bytes()
}
type GzipDecompressor struct{}
func (gz *GzipDecompressor) Reader(r io.Reader) (io.ReadCloser, error) {
return gzip.NewReader(r)
}
func (gz *GzipDecompressor) ParseTOC(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error) {
return parseTOCEStargz(r)
}
func (gz *GzipDecompressor) ParseFooter(p []byte) (blobPayloadSize, tocOffset, tocSize int64, err error) {
if len(p) != FooterSize {
return 0, 0, 0, fmt.Errorf("invalid length %d cannot be parsed", len(p))
}
zr, err := gzip.NewReader(bytes.NewReader(p))
if err != nil {
return 0, 0, 0, err
}
defer zr.Close()
extra := zr.Extra
si1, si2, subfieldlen, subfield := extra[0], extra[1], extra[2:4], extra[4:]
if si1 != 'S' || si2 != 'G' {
return 0, 0, 0, fmt.Errorf("invalid subfield IDs: %q, %q; want E, S", si1, si2)
}
if slen := binary.LittleEndian.Uint16(subfieldlen); slen != uint16(16+len("STARGZ")) {
return 0, 0, 0, fmt.Errorf("invalid length of subfield %d; want %d", slen, 16+len("STARGZ"))
}
if string(subfield[16:]) != "STARGZ" {
return 0, 0, 0, fmt.Errorf("STARGZ magic string must be included in the footer subfield")
}
tocOffset, err = strconv.ParseInt(string(subfield[:16]), 16, 64)
if err != nil {
return 0, 0, 0, fmt.Errorf("legacy: failed to parse toc offset: %w", err)
}
return tocOffset, tocOffset, 0, nil
}
func (gz *GzipDecompressor) FooterSize() int64 {
return FooterSize
}
func (gz *GzipDecompressor) DecompressTOC(r io.Reader) (tocJSON io.ReadCloser, err error) {
return decompressTOCEStargz(r)
}
type LegacyGzipDecompressor struct{}
func (gz *LegacyGzipDecompressor) Reader(r io.Reader) (io.ReadCloser, error) {
return gzip.NewReader(r)
}
func (gz *LegacyGzipDecompressor) ParseTOC(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error) {
return parseTOCEStargz(r)
}
func (gz *LegacyGzipDecompressor) ParseFooter(p []byte) (blobPayloadSize, tocOffset, tocSize int64, err error) {
if len(p) != legacyFooterSize {
return 0, 0, 0, fmt.Errorf("legacy: invalid length %d cannot be parsed", len(p))
}
zr, err := gzip.NewReader(bytes.NewReader(p))
if err != nil {
return 0, 0, 0, fmt.Errorf("legacy: failed to get footer gzip reader: %w", err)
}
defer zr.Close()
extra := zr.Extra
if len(extra) != 16+len("STARGZ") {
return 0, 0, 0, fmt.Errorf("legacy: invalid stargz's extra field size")
}
if string(extra[16:]) != "STARGZ" {
return 0, 0, 0, fmt.Errorf("legacy: magic string STARGZ not found")
}
tocOffset, err = strconv.ParseInt(string(extra[:16]), 16, 64)
if err != nil {
return 0, 0, 0, fmt.Errorf("legacy: failed to parse toc offset: %w", err)
}
return tocOffset, tocOffset, 0, nil
}
func (gz *LegacyGzipDecompressor) FooterSize() int64 {
return legacyFooterSize
}
func (gz *LegacyGzipDecompressor) DecompressTOC(r io.Reader) (tocJSON io.ReadCloser, err error) {
return decompressTOCEStargz(r)
}
func parseTOCEStargz(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error) {
tr, err := decompressTOCEStargz(r)
if err != nil {
return nil, "", err
}
dgstr := digest.Canonical.Digester()
toc = new(JTOC)
if err := json.NewDecoder(io.TeeReader(tr, dgstr.Hash())).Decode(&toc); err != nil {
return nil, "", fmt.Errorf("error decoding TOC JSON: %v", err)
}
if err := tr.Close(); err != nil {
return nil, "", err
}
return toc, dgstr.Digest(), nil
}
func decompressTOCEStargz(r io.Reader) (tocJSON io.ReadCloser, err error) {
zr, err := gzip.NewReader(r)
if err != nil {
return nil, fmt.Errorf("malformed TOC gzip header: %v", err)
}
zr.Multistream(false)
tr := tar.NewReader(zr)
h, err := tr.Next()
if err != nil {
return nil, fmt.Errorf("failed to find tar header in TOC gzip stream: %v", err)
}
if h.Name != TOCTarName {
return nil, fmt.Errorf("TOC tar entry had name %q; expected %q", h.Name, TOCTarName)
}
return readCloser{tr, zr.Close}, nil
}

View File

@ -1,130 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2019 The Go Authors. All rights reserved.
Use of this source code is governed by a BSD-style
license that can be found in the LICENSE file.
*/
package estargz
import (
"bytes"
"compress/gzip"
"fmt"
"testing"
)
// TestGzipEStargz tests gzip-based eStargz
func TestGzipEStargz(t *testing.T) {
testRunner := &TestRunner{
TestingT: t,
Runner: func(testingT TestingT, name string, run func(t TestingT)) {
tt, ok := testingT.(*testing.T)
if !ok {
testingT.Fatal("TestingT is not a *testing.T")
return
}
tt.Run(name, func(t *testing.T) {
run(t)
})
},
}
CompressionTestSuite(testRunner,
gzipControllerWithLevel(gzip.NoCompression),
gzipControllerWithLevel(gzip.BestSpeed),
gzipControllerWithLevel(gzip.BestCompression),
gzipControllerWithLevel(gzip.DefaultCompression),
gzipControllerWithLevel(gzip.HuffmanOnly),
)
}
func gzipControllerWithLevel(compressionLevel int) TestingControllerFactory {
return func() TestingController {
return &gzipController{&GzipCompressor{compressionLevel}, &GzipDecompressor{}}
}
}
type gzipController struct {
*GzipCompressor
*GzipDecompressor
}
func (gc *gzipController) String() string {
return fmt.Sprintf("gzip_compression_level=%v", gc.compressionLevel)
}
// TestStream tests the passed estargz blob contains the specified list of streams.
func (gc *gzipController) TestStreams(t TestingT, b []byte, streams []int64) {
CheckGzipHasStreams(t, b, streams)
}
func (gc *gzipController) DiffIDOf(t TestingT, b []byte) string {
return GzipDiffIDOf(t, b)
}
// Tests footer encoding, size, and parsing of gzip-based eStargz.
func TestGzipFooter(t *testing.T) {
for off := int64(0); off <= 200000; off += 1023 {
checkFooter(t, off)
checkLegacyFooter(t, off)
}
}
// TODO: check fallback
func checkFooter(t *testing.T, off int64) {
footer := gzipFooterBytes(off)
if len(footer) != FooterSize {
t.Fatalf("for offset %v, footer length was %d, not expected %d. got bytes: %q", off, len(footer), FooterSize, footer)
}
_, got, _, err := (&GzipDecompressor{}).ParseFooter(footer)
if err != nil {
t.Fatalf("failed to parse footer for offset %d, footer: %x: err: %v",
off, footer, err)
}
if got != off {
t.Fatalf("ParseFooter(footerBytes(offset %d)) = %d; want %d", off, got, off)
}
}
func checkLegacyFooter(t *testing.T, off int64) {
footer := legacyFooterBytes(off)
if len(footer) != legacyFooterSize {
t.Fatalf("for offset %v, footer length was %d, not expected %d. got bytes: %q", off, len(footer), legacyFooterSize, footer)
}
_, got, _, err := (&LegacyGzipDecompressor{}).ParseFooter(footer)
if err != nil {
t.Fatalf("failed to parse legacy footer for offset %d, footer: %x: err: %v",
off, footer, err)
}
if got != off {
t.Fatalf("ParseFooter(legacyFooterBytes(offset %d)) = %d; want %d", off, got, off)
}
}
func legacyFooterBytes(tocOff int64) []byte {
buf := bytes.NewBuffer(make([]byte, 0, legacyFooterSize))
gz, _ := gzip.NewWriterLevel(buf, gzip.NoCompression)
gz.Extra = []byte(fmt.Sprintf("%016xSTARGZ", tocOff))
gz.Close()
if buf.Len() != legacyFooterSize {
panic(fmt.Sprintf("footer buffer = %d, not %d", buf.Len(), legacyFooterSize))
}
return buf.Bytes()
}

File diff suppressed because it is too large Load Diff

View File

@ -24,8 +24,6 @@ package estargz
import ( import (
"archive/tar" "archive/tar"
"hash"
"io"
"os" "os"
"path" "path"
"time" "time"
@ -75,12 +73,6 @@ const (
// of an image manifest. // of an image manifest.
TOCJSONDigestAnnotation = "containerd.io/snapshot/stargz/toc.digest" TOCJSONDigestAnnotation = "containerd.io/snapshot/stargz/toc.digest"
// StoreUncompressedSizeAnnotation is an additional annotation key for eStargz to enable lazy
// pulling on containers/storage. Stargz Store is required to expose the layer's uncompressed size
// to the runtime but current OCI image doesn't ship this information by default. So we store this
// to the special annotation.
StoreUncompressedSizeAnnotation = "io.containers.estargz.uncompressed-size"
// PrefetchLandmark is a file entry which indicates the end position of // PrefetchLandmark is a file entry which indicates the end position of
// prefetch in the stargz file. // prefetch in the stargz file.
PrefetchLandmark = ".prefetch.landmark" PrefetchLandmark = ".prefetch.landmark"
@ -92,8 +84,8 @@ const (
landmarkContents = 0xf landmarkContents = 0xf
) )
// JTOC is the JSON-serialized table of contents index of the files in the stargz file. // jtoc is the JSON-serialized table of contents index of the files in the stargz file.
type JTOC struct { type jtoc struct {
Version int `json:"version"` Version int `json:"version"`
Entries []*TOCEntry `json:"entries"` Entries []*TOCEntry `json:"entries"`
} }
@ -149,12 +141,6 @@ type TOCEntry struct {
// ChunkSize. // ChunkSize.
Offset int64 `json:"offset,omitempty"` Offset int64 `json:"offset,omitempty"`
// InnerOffset is an optional field indicates uncompressed offset
// of this "reg" or "chunk" payload in a stream starts from Offset.
// This field enables to put multiple "reg" or "chunk" payloads
// in one chunk with having the same Offset but different InnerOffset.
InnerOffset int64 `json:"innerOffset,omitempty"`
nextOffset int64 // the Offset of the next entry with a non-zero Offset nextOffset int64 // the Offset of the next entry with a non-zero Offset
// DevMajor is the major device number for "char" and "block" types. // DevMajor is the major device number for "char" and "block" types.
@ -165,8 +151,7 @@ type TOCEntry struct {
// NumLink is the number of entry names pointing to this entry. // NumLink is the number of entry names pointing to this entry.
// Zero means one name references this entry. // Zero means one name references this entry.
// This field is calculated during runtime and not recorded in TOC JSON. NumLink int
NumLink int `json:"-"`
// Xattrs are the extended attribute for the entry. // Xattrs are the extended attribute for the entry.
Xattrs map[string][]byte `json:"xattrs,omitempty"` Xattrs map[string][]byte `json:"xattrs,omitempty"`
@ -192,9 +177,6 @@ type TOCEntry struct {
ChunkDigest string `json:"chunkDigest,omitempty"` ChunkDigest string `json:"chunkDigest,omitempty"`
children map[string]*TOCEntry children map[string]*TOCEntry
// chunkTopIndex is index of the entry where Offset starts in the blob.
chunkTopIndex int
} }
// ModTime returns the entry's modification time. // ModTime returns the entry's modification time.
@ -274,69 +256,3 @@ type TOCEntryVerifier interface {
// contents of the specified TOCEntry. // contents of the specified TOCEntry.
Verifier(ce *TOCEntry) (digest.Verifier, error) Verifier(ce *TOCEntry) (digest.Verifier, error)
} }
// Compression provides the compression helper to be used creating and parsing eStargz.
// This package provides gzip-based Compression by default, but any compression
// algorithm (e.g. zstd) can be used as long as it implements Compression.
type Compression interface {
Compressor
Decompressor
}
// Compressor represents the helper mothods to be used for creating eStargz.
type Compressor interface {
// Writer returns WriteCloser to be used for writing a chunk to eStargz.
// Everytime a chunk is written, the WriteCloser is closed and Writer is
// called again for writing the next chunk.
//
// The returned writer should implement "Flush() error" function that flushes
// any pending compressed data to the underlying writer.
Writer(w io.Writer) (WriteFlushCloser, error)
// WriteTOCAndFooter is called to write JTOC to the passed Writer.
// diffHash calculates the DiffID (uncompressed sha256 hash) of the blob
// WriteTOCAndFooter can optionally write anything that affects DiffID calculation
// (e.g. uncompressed TOC JSON).
//
// This function returns tocDgst that represents the digest of TOC that will be used
// to verify this blob when it's parsed.
WriteTOCAndFooter(w io.Writer, off int64, toc *JTOC, diffHash hash.Hash) (tocDgst digest.Digest, err error)
}
// Decompressor represents the helper mothods to be used for parsing eStargz.
type Decompressor interface {
// Reader returns ReadCloser to be used for decompressing file payload.
Reader(r io.Reader) (io.ReadCloser, error)
// FooterSize returns the size of the footer of this blob.
FooterSize() int64
// ParseFooter parses the footer and returns the offset and (compressed) size of TOC.
// payloadBlobSize is the (compressed) size of the blob payload (i.e. the size between
// the top until the TOC JSON).
//
// If tocOffset < 0, we assume that TOC isn't contained in the blob and pass nil reader
// to ParseTOC. We expect that ParseTOC acquire TOC from the external location and return it.
//
// tocSize is optional. If tocSize <= 0, it's by default the size of the range from tocOffset until the beginning of the
// footer (blob size - tocOff - FooterSize).
// If blobPayloadSize < 0, blobPayloadSize become the blob size.
ParseFooter(p []byte) (blobPayloadSize, tocOffset, tocSize int64, err error)
// ParseTOC parses TOC from the passed reader. The reader provides the partial contents
// of the underlying blob that has the range specified by ParseFooter method.
//
// This function returns tocDgst that represents the digest of TOC that will be used
// to verify this blob. This must match to the value returned from
// Compressor.WriteTOCAndFooter that is used when creating this blob.
//
// If tocOffset returned by ParseFooter is < 0, we assume that TOC isn't contained in the blob.
// Pass nil reader to ParseTOC then we expect that ParseTOC acquire TOC from the external location
// and return it.
ParseTOC(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error)
}
type WriteFlushCloser interface {
io.WriteCloser
Flush() error
}

View File

@ -1,201 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package zstdchunked
import (
"bufio"
"bytes"
"encoding/binary"
"encoding/json"
"fmt"
"hash"
"io"
"sync"
"github.com/containerd/stargz-snapshotter/estargz"
"github.com/klauspost/compress/zstd"
digest "github.com/opencontainers/go-digest"
)
const (
// ManifestChecksumAnnotation is an annotation that contains the compressed TOC Digset
ManifestChecksumAnnotation = "io.containers.zstd-chunked.manifest-checksum"
// ManifestPositionAnnotation is an annotation that contains the offset to the TOC.
ManifestPositionAnnotation = "io.containers.zstd-chunked.manifest-position"
// FooterSize is the size of the footer
FooterSize = 40
manifestTypeCRFS = 1
)
var (
skippableFrameMagic = []byte{0x50, 0x2a, 0x4d, 0x18}
zstdFrameMagic = []byte{0x28, 0xb5, 0x2f, 0xfd}
zstdChunkedFrameMagic = []byte{0x47, 0x6e, 0x55, 0x6c, 0x49, 0x6e, 0x55, 0x78}
)
type Decompressor struct{}
func (zz *Decompressor) Reader(r io.Reader) (io.ReadCloser, error) {
decoder, err := zstd.NewReader(r)
if err != nil {
return nil, err
}
return &zstdReadCloser{decoder}, nil
}
func (zz *Decompressor) ParseTOC(r io.Reader) (toc *estargz.JTOC, tocDgst digest.Digest, err error) {
zr, err := zstd.NewReader(r)
if err != nil {
return nil, "", err
}
defer zr.Close()
dgstr := digest.Canonical.Digester()
toc = new(estargz.JTOC)
if err := json.NewDecoder(io.TeeReader(zr, dgstr.Hash())).Decode(&toc); err != nil {
return nil, "", fmt.Errorf("error decoding TOC JSON: %w", err)
}
return toc, dgstr.Digest(), nil
}
func (zz *Decompressor) ParseFooter(p []byte) (blobPayloadSize, tocOffset, tocSize int64, err error) {
offset := binary.LittleEndian.Uint64(p[0:8])
compressedLength := binary.LittleEndian.Uint64(p[8:16])
if !bytes.Equal(zstdChunkedFrameMagic, p[32:40]) {
return 0, 0, 0, fmt.Errorf("invalid magic number")
}
// 8 is the size of the zstd skippable frame header + the frame size (see WriteTOCAndFooter)
return int64(offset - 8), int64(offset), int64(compressedLength), nil
}
func (zz *Decompressor) FooterSize() int64 {
return FooterSize
}
func (zz *Decompressor) DecompressTOC(r io.Reader) (tocJSON io.ReadCloser, err error) {
decoder, err := zstd.NewReader(r)
if err != nil {
return nil, err
}
br := bufio.NewReader(decoder)
if _, err := br.Peek(1); err != nil {
return nil, err
}
return &reader{br, decoder.Close}, nil
}
type reader struct {
io.Reader
closeFunc func()
}
func (r *reader) Close() error { r.closeFunc(); return nil }
type zstdReadCloser struct{ *zstd.Decoder }
func (z *zstdReadCloser) Close() error {
z.Decoder.Close()
return nil
}
type Compressor struct {
CompressionLevel zstd.EncoderLevel
Metadata map[string]string
pool sync.Pool
}
func (zc *Compressor) Writer(w io.Writer) (estargz.WriteFlushCloser, error) {
if wc := zc.pool.Get(); wc != nil {
ec := wc.(*zstd.Encoder)
ec.Reset(w)
return &poolEncoder{ec, zc}, nil
}
ec, err := zstd.NewWriter(w, zstd.WithEncoderLevel(zc.CompressionLevel), zstd.WithLowerEncoderMem(true))
if err != nil {
return nil, err
}
return &poolEncoder{ec, zc}, nil
}
type poolEncoder struct {
*zstd.Encoder
zc *Compressor
}
func (w *poolEncoder) Close() error {
if err := w.Encoder.Close(); err != nil {
return err
}
w.zc.pool.Put(w.Encoder)
return nil
}
func (zc *Compressor) WriteTOCAndFooter(w io.Writer, off int64, toc *estargz.JTOC, diffHash hash.Hash) (digest.Digest, error) {
tocJSON, err := json.MarshalIndent(toc, "", "\t")
if err != nil {
return "", err
}
buf := new(bytes.Buffer)
encoder, err := zstd.NewWriter(buf, zstd.WithEncoderLevel(zc.CompressionLevel))
if err != nil {
return "", err
}
if _, err := encoder.Write(tocJSON); err != nil {
return "", err
}
if err := encoder.Close(); err != nil {
return "", err
}
compressedTOC := buf.Bytes()
_, err = io.Copy(w, bytes.NewReader(appendSkippableFrameMagic(compressedTOC)))
// 8 is the size of the zstd skippable frame header + the frame size
tocOff := uint64(off) + 8
if _, err := w.Write(appendSkippableFrameMagic(
zstdFooterBytes(tocOff, uint64(len(tocJSON)), uint64(len(compressedTOC)))),
); err != nil {
return "", err
}
if zc.Metadata != nil {
zc.Metadata[ManifestChecksumAnnotation] = digest.FromBytes(compressedTOC).String()
zc.Metadata[ManifestPositionAnnotation] = fmt.Sprintf("%d:%d:%d:%d",
tocOff, len(compressedTOC), len(tocJSON), manifestTypeCRFS)
}
return digest.FromBytes(tocJSON), err
}
// zstdFooterBytes returns the 40 bytes footer.
func zstdFooterBytes(tocOff, tocRawSize, tocCompressedSize uint64) []byte {
footer := make([]byte, FooterSize)
binary.LittleEndian.PutUint64(footer, tocOff)
binary.LittleEndian.PutUint64(footer[8:], tocCompressedSize)
binary.LittleEndian.PutUint64(footer[16:], tocRawSize)
binary.LittleEndian.PutUint64(footer[24:], manifestTypeCRFS)
copy(footer[32:40], zstdChunkedFrameMagic)
return footer
}
func appendSkippableFrameMagic(b []byte) []byte {
size := make([]byte, 4)
binary.LittleEndian.PutUint32(size, uint32(len(b)))
return append(append(skippableFrameMagic, size...), b...)
}

View File

@ -1,182 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package zstdchunked
import (
"bytes"
"crypto/sha256"
"fmt"
"io"
"sort"
"testing"
"github.com/containerd/stargz-snapshotter/estargz"
"github.com/klauspost/compress/zstd"
)
// TestZstdChunked tests zstd:chunked
func TestZstdChunked(t *testing.T) {
testRunner := &estargz.TestRunner{
TestingT: t,
Runner: func(testingT estargz.TestingT, name string, run func(t estargz.TestingT)) {
tt, ok := testingT.(*testing.T)
if !ok {
testingT.Fatal("TestingT is not a *testing.T")
return
}
tt.Run(name, func(t *testing.T) {
run(t)
})
},
}
estargz.CompressionTestSuite(testRunner,
zstdControllerWithLevel(zstd.SpeedFastest),
zstdControllerWithLevel(zstd.SpeedDefault),
zstdControllerWithLevel(zstd.SpeedBetterCompression),
// zstdControllerWithLevel(zstd.SpeedBestCompression), // consumes too much memory to pass on CI
)
}
func zstdControllerWithLevel(compressionLevel zstd.EncoderLevel) estargz.TestingControllerFactory {
return func() estargz.TestingController {
return &zstdController{&Compressor{CompressionLevel: compressionLevel}, &Decompressor{}}
}
}
type zstdController struct {
*Compressor
*Decompressor
}
func (zc *zstdController) String() string {
return fmt.Sprintf("zstd_compression_level=%v", zc.CompressionLevel)
}
// TestStream tests the passed zstdchunked blob contains the specified list of streams.
// The last entry of streams must be the offset of footer (len(b) - footerSize).
func (zc *zstdController) TestStreams(t estargz.TestingT, b []byte, streams []int64) {
t.Logf("got zstd streams (compressed size: %d):", len(b))
if len(streams) == 0 {
return // nop
}
// We expect the last offset is footer offset.
// 8 is the size of the zstd skippable frame header + the frame size (see WriteTOCAndFooter)
sort.Slice(streams, func(i, j int) bool {
return streams[i] < streams[j]
})
streams[len(streams)-1] = streams[len(streams)-1] - 8
wants := map[int64]struct{}{}
for _, s := range streams {
wants[s] = struct{}{}
}
magicLen := 4 // length of magic bytes and skippable frame magic bytes
zoff := 0
numStreams := 0
for {
if len(b) <= zoff {
break
} else if len(b)-zoff <= magicLen {
t.Fatalf("invalid frame size %d is too small", len(b)-zoff)
}
delete(wants, int64(zoff)) // offset found
remainingFrames := b[zoff:]
// Check if zoff points to the beginning of a frame
if !bytes.Equal(remainingFrames[:magicLen], zstdFrameMagic) {
if !bytes.Equal(remainingFrames[:magicLen], skippableFrameMagic) {
t.Fatalf("frame must start from magic bytes; but %x",
remainingFrames[:magicLen])
}
}
searchBase := magicLen
nextMagicIdx := nextIndex(remainingFrames[searchBase:], zstdFrameMagic)
nextSkippableIdx := nextIndex(remainingFrames[searchBase:], skippableFrameMagic)
nextFrame := len(remainingFrames)
for _, i := range []int{nextMagicIdx, nextSkippableIdx} {
if 0 < i && searchBase+i < nextFrame {
nextFrame = searchBase + i
}
}
t.Logf(" [%d] at %d in stargz (nextFrame: %d/%d): %v, %v",
numStreams, zoff, zoff+nextFrame, len(b), nextMagicIdx, nextSkippableIdx)
zoff += nextFrame
numStreams++
}
if len(wants) != 0 {
t.Fatalf("some stream offsets not found in the blob: %v", wants)
}
}
func nextIndex(s1, sub []byte) int {
for i := 0; i < len(s1); i++ {
if len(s1)-i < len(sub) {
return -1
} else if bytes.Equal(s1[i:i+len(sub)], sub) {
return i
}
}
return -1
}
func (zc *zstdController) DiffIDOf(t estargz.TestingT, b []byte) string {
h := sha256.New()
zr, err := zstd.NewReader(bytes.NewReader(b))
if err != nil {
t.Fatalf("diffIDOf(zstd): %v", err)
}
defer zr.Close()
if _, err := io.Copy(h, zr); err != nil {
t.Fatalf("diffIDOf(zstd).Copy: %v", err)
}
return fmt.Sprintf("sha256:%x", h.Sum(nil))
}
// Tests footer encoding, size, and parsing of zstd:chunked.
func TestZstdChunkedFooter(t *testing.T) {
max := int64(200000)
for off := int64(0); off <= max; off += 1023 {
size := max - off
checkZstdChunkedFooter(t, off, size, size/2)
}
}
func checkZstdChunkedFooter(t *testing.T, off, size, cSize int64) {
footer := zstdFooterBytes(uint64(off), uint64(size), uint64(cSize))
if len(footer) != FooterSize {
t.Fatalf("for offset %v, footer length was %d, not expected %d. got bytes: %q", off, len(footer), FooterSize, footer)
}
gotBlobPayloadSize, gotOff, gotSize, err := (&Decompressor{}).ParseFooter(footer)
if err != nil {
t.Fatalf("failed to parse footer for offset %d, footer: %x: err: %v",
off, footer, err)
}
if gotBlobPayloadSize != off-8 {
// 8 is the size of the zstd skippable frame header + the frame size (see WriteTOCAndFooter)
t.Fatalf("ParseFooter(footerBytes(offset %d)) = blobPayloadSize %d; want %d", off, gotBlobPayloadSize, off-8)
}
if gotOff != off {
t.Fatalf("ParseFooter(footerBytes(offset %d)) = off %d; want %d", off, gotOff, off)
}
if gotSize != cSize {
t.Fatalf("ParseFooter(footerBytes(offset %d)) = size %d; want %d", off, gotSize, cSize)
}
}

View File

@ -33,131 +33,36 @@ const (
TargetPrefetchSizeLabel = "containerd.io/snapshot/remote/stargz.prefetch" TargetPrefetchSizeLabel = "containerd.io/snapshot/remote/stargz.prefetch"
) )
// Config is configuration for stargz snapshotter filesystem.
type Config struct { type Config struct {
// Type of cache for compressed contents fetched from the registry. "memory" stores them on memory. HTTPCacheType string `toml:"http_cache_type"`
// Other values default to cache them on disk. FSCacheType string `toml:"filesystem_cache_type"`
HTTPCacheType string `toml:"http_cache_type" json:"http_cache_type"` ResolveResultEntry int `toml:"resolve_result_entry"`
PrefetchSize int64 `toml:"prefetch_size"`
// Type of cache for uncompressed files contents. "memory" stores them on memory. Other values PrefetchTimeoutSec int64 `toml:"prefetch_timeout_sec"`
// default to cache them on disk. NoPrefetch bool `toml:"noprefetch"`
FSCacheType string `toml:"filesystem_cache_type" json:"filesystem_cache_type"` NoBackgroundFetch bool `toml:"no_background_fetch"`
Debug bool `toml:"debug"`
// ResolveResultEntryTTLSec is TTL (in sec) to cache resolved layers for AllowNoVerification bool `toml:"allow_no_verification"`
// future use. (default 120s) DisableVerification bool `toml:"disable_verification"`
ResolveResultEntryTTLSec int `toml:"resolve_result_entry_ttl_sec" json:"resolve_result_entry_ttl_sec"` MaxConcurrency int64 `toml:"max_concurrency"`
NoPrometheus bool `toml:"no_prometheus"`
// PrefetchSize is the default size (in bytes) to prefetch when mounting a layer. Default is 0. Stargz-snapshotter still
// uses the value specified by the image using "containerd.io/snapshot/remote/stargz.prefetch" or the landmark file.
PrefetchSize int64 `toml:"prefetch_size" json:"prefetch_size"`
// PrefetchTimeoutSec is the default timeout (in seconds) when the prefetching takes long. Default is 10s.
PrefetchTimeoutSec int64 `toml:"prefetch_timeout_sec" json:"prefetch_timeout_sec"`
// NoPrefetch disables prefetching. Default is false.
NoPrefetch bool `toml:"noprefetch" json:"noprefetch"`
// NoBackgroundFetch disables the behaviour of fetching the entire layer contents in background. Default is false.
NoBackgroundFetch bool `toml:"no_background_fetch" json:"no_background_fetch"`
// Debug enables filesystem debug log.
Debug bool `toml:"debug" json:"debug"`
// AllowNoVerification allows mouting images without verification. Default is false.
AllowNoVerification bool `toml:"allow_no_verification" json:"allow_no_verification"`
// DisableVerification disables verifying layer contents. Default is false.
DisableVerification bool `toml:"disable_verification" json:"disable_verification"`
// MaxConcurrency is max number of concurrent background tasks for fetching layer contents. Default is 2.
MaxConcurrency int64 `toml:"max_concurrency" json:"max_concurrency"`
// NoPrometheus disables exposing filesystem-related metrics. Default is false.
NoPrometheus bool `toml:"no_prometheus" json:"no_prometheus"`
// BlobConfig is config for layer blob management. // BlobConfig is config for layer blob management.
BlobConfig `toml:"blob" json:"blob"` BlobConfig `toml:"blob"`
// DirectoryCacheConfig is config for directory-based cache. // DirectoryCacheConfig is config for directory-based cache.
DirectoryCacheConfig `toml:"directory_cache" json:"directory_cache"` DirectoryCacheConfig `toml:"directory_cache"`
// FuseConfig is configurations for FUSE fs.
FuseConfig `toml:"fuse" json:"fuse"`
// ResolveResultEntry is a deprecated field.
ResolveResultEntry int `toml:"resolve_result_entry" json:"resolve_result_entry"` // deprecated
} }
// BlobConfig is configuration for the logic to fetching blobs.
type BlobConfig struct { type BlobConfig struct {
// ValidInterval specifies a duration (in seconds) during which the layer can be reused without ValidInterval int64 `toml:"valid_interval"`
// checking the connection to the registry. Default is 60. CheckAlways bool `toml:"check_always"`
ValidInterval int64 `toml:"valid_interval" json:"valid_interval"` ChunkSize int64 `toml:"chunk_size"`
FetchTimeoutSec int64 `toml:"fetching_timeout_sec"`
// CheckAlways overwrites ValidInterval to 0 if it's true. Default is false.
CheckAlways bool `toml:"check_always" json:"check_always"`
// ChunkSize is the granularity (in bytes) at which background fetch and on-demand reads
// are fetched from the remote registry. Default is 50000.
ChunkSize int64 `toml:"chunk_size" json:"chunk_size"`
// FetchTimeoutSec is a timeout duration (in seconds) for fetching chunks from the registry. Default is 300.
FetchTimeoutSec int64 `toml:"fetching_timeout_sec" json:"fetching_tieout_sec"`
// ForceSingleRangeMode disables using of multiple ranges in a Range Request and always specifies one larger
// region that covers them. Default is false.
ForceSingleRangeMode bool `toml:"force_single_range_mode" json:"force_single_range_mode"`
// PrefetchChunkSize is the maximum bytes transferred per http GET from remote registry
// during prefetch. It is recommended to have PrefetchChunkSize > ChunkSize.
// If PrefetchChunkSize < ChunkSize prefetch bytes will be fetched as a single http GET,
// else total GET requests for prefetch = ceil(PrefetchSize / PrefetchChunkSize).
// Default is 0.
PrefetchChunkSize int64 `toml:"prefetch_chunk_size" json:"prefetch_chunk_size"`
// MaxRetries is a max number of reries of a HTTP request. Default is 5.
MaxRetries int `toml:"max_retries" json:"max_retries"`
// MinWaitMSec is minimal delay (in seconds) for the next retrying after a request failure. Default is 30.
MinWaitMSec int `toml:"min_wait_msec" json:"min_wait_msec"`
// MinWaitMSec is maximum delay (in seconds) for the next retrying after a request failure. Default is 30.
MaxWaitMSec int `toml:"max_wait_msec" json:"max_wait_msec"`
} }
// DirectoryCacheConfig is configuration for the disk-based cache.
type DirectoryCacheConfig struct { type DirectoryCacheConfig struct {
// MaxLRUCacheEntry is the number of entries of LRU cache to cache data on memory. Default is 10. MaxLRUCacheEntry int `toml:"max_lru_cache_entry"`
MaxLRUCacheEntry int `toml:"max_lru_cache_entry" json:"max_lru_cache_entry"` MaxCacheFds int `toml:"max_cache_fds"`
SyncAdd bool `toml:"sync_add"`
// MaxCacheFds is the number of entries of LRU cache to hold fds of files of cached contents. Default is 10.
MaxCacheFds int `toml:"max_cache_fds" json:"max_cache_fds"`
// SyncAdd being true means that each adding of data to the cache blocks until the data is fully written to the
// cache directory. Default is false.
SyncAdd bool `toml:"sync_add" json:"sync_add"`
// Direct disables on-memory data cache. Default is true for saving memory usage.
Direct bool `toml:"direct" default:"true" json:"direct"`
// FadvDontNeed forcefully clean fscache pagecache for saving memory. Default is false.
FadvDontNeed bool `toml:"fadv_dontneed" json:"fadv_dontneed"`
}
// FuseConfig is configuration for FUSE fs.
type FuseConfig struct {
// AttrTimeout defines overall timeout attribute for a file system in seconds.
AttrTimeout int64 `toml:"attr_timeout" json:"attr_timeout"`
// EntryTimeout defines TTL for directory, name lookup in seconds.
EntryTimeout int64 `toml:"entry_timeout" json:"entry_timeout"`
// PassThrough indicates whether to enable FUSE passthrough mode to improve local file read performance. Default is false.
PassThrough bool `toml:"passthrough" default:"false" json:"passthrough"`
// MergeBufferSize is the size of the buffer to merge chunks (in bytes) for passthrough mode. Default is 400MB.
MergeBufferSize int64 `toml:"merge_buffer_size" default:"419430400" json:"merge_buffer_size"`
// MergeWorkerCount is the number of workers to merge chunks for passthrough mode. Default is 10.
MergeWorkerCount int `toml:"merge_worker_count" default:"10" json:"merge_worker_count"`
} }

296
fs/fs.go
View File

@ -39,23 +39,18 @@ package fs
import ( import (
"context" "context"
"fmt" "fmt"
"os/exec"
"strconv" "strconv"
"sync" "sync"
"syscall"
"time" "time"
"github.com/containerd/containerd/v2/core/remotes/docker" "github.com/containerd/containerd/log"
"github.com/containerd/containerd/v2/pkg/reference" "github.com/containerd/containerd/remotes/docker"
"github.com/containerd/log"
"github.com/containerd/stargz-snapshotter/estargz" "github.com/containerd/stargz-snapshotter/estargz"
"github.com/containerd/stargz-snapshotter/fs/config" "github.com/containerd/stargz-snapshotter/fs/config"
"github.com/containerd/stargz-snapshotter/fs/layer" "github.com/containerd/stargz-snapshotter/fs/layer"
commonmetrics "github.com/containerd/stargz-snapshotter/fs/metrics/common" fsmetrics "github.com/containerd/stargz-snapshotter/fs/metrics"
layermetrics "github.com/containerd/stargz-snapshotter/fs/metrics/layer"
"github.com/containerd/stargz-snapshotter/fs/remote"
"github.com/containerd/stargz-snapshotter/fs/source" "github.com/containerd/stargz-snapshotter/fs/source"
"github.com/containerd/stargz-snapshotter/metadata"
memorymetadata "github.com/containerd/stargz-snapshotter/metadata/memory"
"github.com/containerd/stargz-snapshotter/snapshot" "github.com/containerd/stargz-snapshotter/snapshot"
"github.com/containerd/stargz-snapshotter/task" "github.com/containerd/stargz-snapshotter/task"
metrics "github.com/docker/go-metrics" metrics "github.com/docker/go-metrics"
@ -63,31 +58,15 @@ import (
"github.com/hanwen/go-fuse/v2/fuse" "github.com/hanwen/go-fuse/v2/fuse"
digest "github.com/opencontainers/go-digest" digest "github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1" ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"golang.org/x/sys/unix" "github.com/pkg/errors"
) )
const ( const defaultMaxConcurrency = 2
defaultFuseTimeout = time.Second
defaultMaxConcurrency = 2
)
var fusermountBin = []string{"fusermount", "fusermount3"}
var (
nsLock = sync.Mutex{}
ns *metrics.Namespace
metricsCtr *layermetrics.Controller
)
type Option func(*options) type Option func(*options)
type options struct { type options struct {
getSources source.GetSources getSources source.GetSources
resolveHandlers map[string]remote.Handler
metadataStore metadata.Store
metricsLogLevel *log.Level
overlayOpaqueType layer.OverlayOpaqueType
additionalDecompressors func(context.Context, source.RegistryHosts, reference.Spec, ocispec.Descriptor) []metadata.Decompressor
} }
func WithGetSources(s source.GetSources) Option { func WithGetSources(s source.GetSources) Option {
@ -96,39 +75,6 @@ func WithGetSources(s source.GetSources) Option {
} }
} }
func WithResolveHandler(name string, handler remote.Handler) Option {
return func(opts *options) {
if opts.resolveHandlers == nil {
opts.resolveHandlers = make(map[string]remote.Handler)
}
opts.resolveHandlers[name] = handler
}
}
func WithMetadataStore(metadataStore metadata.Store) Option {
return func(opts *options) {
opts.metadataStore = metadataStore
}
}
func WithMetricsLogLevel(logLevel log.Level) Option {
return func(opts *options) {
opts.metricsLogLevel = &logLevel
}
}
func WithOverlayOpaqueType(overlayOpaqueType layer.OverlayOpaqueType) Option {
return func(opts *options) {
opts.overlayOpaqueType = overlayOpaqueType
}
}
func WithAdditionalDecompressors(d func(context.Context, source.RegistryHosts, reference.Spec, ocispec.Descriptor) []metadata.Decompressor) Option {
return func(opts *options) {
opts.additionalDecompressors = d
}
}
func NewFilesystem(root string, cfg config.Config, opts ...Option) (_ snapshot.FileSystem, err error) { func NewFilesystem(root string, cfg config.Config, opts ...Option) (_ snapshot.FileSystem, err error) {
var fsOpts options var fsOpts options
for _, o := range opts { for _, o := range opts {
@ -138,48 +84,24 @@ func NewFilesystem(root string, cfg config.Config, opts ...Option) (_ snapshot.F
if maxConcurrency == 0 { if maxConcurrency == 0 {
maxConcurrency = defaultMaxConcurrency maxConcurrency = defaultMaxConcurrency
} }
attrTimeout := time.Duration(cfg.AttrTimeout) * time.Second
if attrTimeout == 0 {
attrTimeout = defaultFuseTimeout
}
entryTimeout := time.Duration(cfg.EntryTimeout) * time.Second
if entryTimeout == 0 {
entryTimeout = defaultFuseTimeout
}
metadataStore := fsOpts.metadataStore
if metadataStore == nil {
metadataStore = memorymetadata.NewReader
}
getSources := fsOpts.getSources getSources := fsOpts.getSources
if getSources == nil { if getSources == nil {
getSources = source.FromDefaultLabels(func(refspec reference.Spec) (hosts []docker.RegistryHost, _ error) { getSources = source.FromDefaultLabels(
return docker.ConfigureDefaultRegistries(docker.WithPlainHTTP(docker.MatchLocalhost))(refspec.Hostname()) docker.ConfigureDefaultRegistries(docker.WithPlainHTTP(docker.MatchLocalhost)))
})
} }
tm := task.NewBackgroundTaskManager(maxConcurrency, 5*time.Second) tm := task.NewBackgroundTaskManager(maxConcurrency, 5*time.Second)
r, err := layer.NewResolver(root, tm, cfg, fsOpts.resolveHandlers, metadataStore, fsOpts.overlayOpaqueType, fsOpts.additionalDecompressors) r, err := layer.NewResolver(root, tm, cfg)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to setup resolver: %w", err) return nil, errors.Wrapf(err, "failed to setup resolver")
} }
nsLock.Lock() var ns *metrics.Namespace
defer nsLock.Unlock() if !cfg.NoPrometheus {
if !cfg.NoPrometheus && ns == nil {
ns = metrics.NewNamespace("stargz", "fs", nil) ns = metrics.NewNamespace("stargz", "fs", nil)
logLevel := log.DebugLevel
if fsOpts.metricsLogLevel != nil {
logLevel = *fsOpts.metricsLogLevel
}
commonmetrics.Register(logLevel) // Register common metrics. This will happen only once.
metrics.Register(ns) // Register layer metrics.
} }
if metricsCtr == nil { c := fsmetrics.NewLayerMetrics(ns)
metricsCtr = layermetrics.NewLayerMetrics(ns) if ns != nil {
metrics.Register(ns)
} }
return &filesystem{ return &filesystem{
@ -193,9 +115,7 @@ func NewFilesystem(root string, cfg config.Config, opts ...Option) (_ snapshot.F
backgroundTaskManager: tm, backgroundTaskManager: tm,
allowNoVerification: cfg.AllowNoVerification, allowNoVerification: cfg.AllowNoVerification,
disableVerification: cfg.DisableVerification, disableVerification: cfg.DisableVerification,
metricsController: metricsCtr, metricsController: c,
attrTimeout: attrTimeout,
entryTimeout: entryTimeout,
}, nil }, nil
} }
@ -211,15 +131,10 @@ type filesystem struct {
allowNoVerification bool allowNoVerification bool
disableVerification bool disableVerification bool
getSources source.GetSources getSources source.GetSources
metricsController *layermetrics.Controller metricsController *fsmetrics.Controller
attrTimeout time.Duration
entryTimeout time.Duration
} }
func (fs *filesystem) Mount(ctx context.Context, mountpoint string, labels map[string]string) (retErr error) { func (fs *filesystem) Mount(ctx context.Context, mountpoint string, labels map[string]string) (retErr error) {
// Setting the start time to measure the Mount operation duration.
start := time.Now()
// This is a prioritized task and all background tasks will be stopped // This is a prioritized task and all background tasks will be stopped
// execution so this can avoid being disturbed for NW traffic by background // execution so this can avoid being disturbed for NW traffic by background
// tasks. // tasks.
@ -235,13 +150,6 @@ func (fs *filesystem) Mount(ctx context.Context, mountpoint string, labels map[s
return fmt.Errorf("source must be passed") return fmt.Errorf("source must be passed")
} }
defaultPrefetchSize := fs.prefetchSize
if psStr, ok := labels[config.TargetPrefetchSizeLabel]; ok {
if ps, err := strconv.ParseInt(psStr, 10, 64); err == nil {
defaultPrefetchSize = ps
}
}
// Resolve the target layer // Resolve the target layer
var ( var (
resultChan = make(chan layer.Layer) resultChan = make(chan layer.Layer)
@ -253,10 +161,10 @@ func (fs *filesystem) Mount(ctx context.Context, mountpoint string, labels map[s
l, err := fs.resolver.Resolve(ctx, s.Hosts, s.Name, s.Target) l, err := fs.resolver.Resolve(ctx, s.Hosts, s.Name, s.Target)
if err == nil { if err == nil {
resultChan <- l resultChan <- l
fs.prefetch(ctx, l, defaultPrefetchSize, start)
return return
} }
rErr = fmt.Errorf("failed to resolve layer %q from %q: %v: %w", s.Target.Digest, s.Name, err, rErr) rErr = errors.Wrapf(rErr, "failed to resolve layer %q from %q: %v",
s.Target.Digest, s.Name, err)
} }
errChan <- rErr errChan <- rErr
}() }()
@ -267,17 +175,12 @@ func (fs *filesystem) Mount(ctx context.Context, mountpoint string, labels map[s
desc := desc desc := desc
go func() { go func() {
// Avoids to get canceled by client. // Avoids to get canceled by client.
ctx := log.WithLogger(context.Background(), log.G(ctx).WithField("mountpoint", mountpoint)) ctx := log.WithLogger(context.Background(),
l, err := fs.resolver.Resolve(ctx, preResolve.Hosts, preResolve.Name, desc) log.G(ctx).WithField("mountpoint", mountpoint))
err := fs.resolver.Cache(ctx, preResolve.Hosts, preResolve.Name, desc)
if err != nil { if err != nil {
log.G(ctx).WithError(err).Debug("failed to pre-resolve") log.G(ctx).WithError(err).Debug("failed to pre-resolve")
return
} }
fs.prefetch(ctx, l, defaultPrefetchSize, start)
// Release this layer because this isn't target and we don't use it anymore here.
// However, this will remain on the resolver cache until eviction.
l.Done()
}() }()
} }
@ -287,7 +190,7 @@ func (fs *filesystem) Mount(ctx context.Context, mountpoint string, labels map[s
case l = <-resultChan: case l = <-resultChan:
case err := <-errChan: case err := <-errChan:
log.G(ctx).WithError(err).Debug("failed to resolve layer") log.G(ctx).WithError(err).Debug("failed to resolve layer")
return fmt.Errorf("failed to resolve layer: %w", err) return errors.Wrapf(err, "failed to resolve layer")
case <-time.After(30 * time.Second): case <-time.After(30 * time.Second):
log.G(ctx).Debug("failed to resolve layer (timeout)") log.G(ctx).Debug("failed to resolve layer (timeout)")
return fmt.Errorf("failed to resolve layer (timeout)") return fmt.Errorf("failed to resolve layer (timeout)")
@ -302,17 +205,17 @@ func (fs *filesystem) Mount(ctx context.Context, mountpoint string, labels map[s
if fs.disableVerification { if fs.disableVerification {
// Skip if verification is disabled completely // Skip if verification is disabled completely
l.SkipVerify() l.SkipVerify()
log.G(ctx).Infof("Verification forcefully skipped") log.G(ctx).Debugf("Verification forcefully skipped")
} else if tocDigest, ok := labels[estargz.TOCJSONDigestAnnotation]; ok { } else if tocDigest, ok := labels[estargz.TOCJSONDigestAnnotation]; ok {
// Verify this layer using the TOC JSON digest passed through label. // Verify this layer using the TOC JSON digest passed through label.
dgst, err := digest.Parse(tocDigest) dgst, err := digest.Parse(tocDigest)
if err != nil { if err != nil {
log.G(ctx).WithError(err).Debugf("failed to parse passed TOC digest %q", dgst) log.G(ctx).WithError(err).Debugf("failed to parse passed TOC digest %q", dgst)
return fmt.Errorf("invalid TOC digest: %v: %w", tocDigest, err) return errors.Wrapf(err, "invalid TOC digest: %v", tocDigest)
} }
if err := l.Verify(dgst); err != nil { if err := l.Verify(dgst); err != nil {
log.G(ctx).WithError(err).Debugf("invalid layer") log.G(ctx).WithError(err).Debugf("invalid layer")
return fmt.Errorf("invalid stargz layer: %w", err) return errors.Wrapf(err, "invalid stargz layer")
} }
log.G(ctx).Debugf("verified") log.G(ctx).Debugf("verified")
} else if _, ok := labels[config.TargetSkipVerifyLabel]; ok && fs.allowNoVerification { } else if _, ok := labels[config.TargetSkipVerifyLabel]; ok && fs.allowNoVerification {
@ -325,42 +228,66 @@ func (fs *filesystem) Mount(ctx context.Context, mountpoint string, labels map[s
// Verification must be done. Don't mount this layer. // Verification must be done. Don't mount this layer.
return fmt.Errorf("digest of TOC JSON must be passed") return fmt.Errorf("digest of TOC JSON must be passed")
} }
node, err := l.RootNode(0) node, err := l.RootNode()
if err != nil { if err != nil {
log.G(ctx).WithError(err).Warnf("Failed to get root node") log.G(ctx).WithError(err).Warnf("Failed to get root node")
return fmt.Errorf("failed to get root node: %w", err) return errors.Wrapf(err, "failed to get root node")
} }
// Measuring duration of Mount operation for resolved layer.
digest := l.Info().Digest // get layer sha
defer commonmetrics.MeasureLatencyInMilliseconds(commonmetrics.Mount, digest, start)
// Register the mountpoint layer // Register the mountpoint layer
fs.layerMu.Lock() fs.layerMu.Lock()
fs.layer[mountpoint] = l fs.layer[mountpoint] = l
fs.layerMu.Unlock() fs.layerMu.Unlock()
fs.metricsController.Add(mountpoint, l) fs.metricsController.Add(mountpoint, l)
// Prefetch this layer. We prefetch several layers in parallel. The first
// Check() for this layer waits for the prefetch completion.
if !fs.noprefetch {
prefetchSize := fs.prefetchSize
if psStr, ok := labels[config.TargetPrefetchSizeLabel]; ok {
if ps, err := strconv.ParseInt(psStr, 10, 64); err == nil {
prefetchSize = ps
}
}
go func() {
fs.backgroundTaskManager.DoPrioritizedTask()
defer fs.backgroundTaskManager.DonePrioritizedTask()
if err := l.Prefetch(prefetchSize); err != nil {
log.G(ctx).WithError(err).Debug("failed to prefetched layer")
return
}
log.G(ctx).Debug("completed to prefetch")
}()
}
// Fetch whole layer aggressively in background. We use background
// reader for this so prioritized tasks(Mount, Check, etc...) can
// interrupt the reading. This can avoid disturbing prioritized tasks
// about NW traffic.
if !fs.noBackgroundFetch {
go func() {
if err := l.BackgroundFetch(); err != nil {
log.G(ctx).WithError(err).Debug("failed to fetch whole layer")
return
}
log.G(ctx).Debug("completed to fetch all layer data in background")
}()
}
// mount the node to the specified mountpoint // mount the node to the specified mountpoint
// TODO: bind mount the state directory as a read-only fs on snapshotter's side // TODO: bind mount the state directory as a read-only fs on snapshotter's side
timeSec := time.Second
rawFS := fusefs.NewNodeFS(node, &fusefs.Options{ rawFS := fusefs.NewNodeFS(node, &fusefs.Options{
AttrTimeout: &fs.attrTimeout, AttrTimeout: &timeSec,
EntryTimeout: &fs.entryTimeout, EntryTimeout: &timeSec,
NullPermissions: true, NullPermissions: true,
}) })
mountOpts := &fuse.MountOptions{ server, err := fuse.NewServer(rawFS, mountpoint, &fuse.MountOptions{
AllowOther: true, // allow users other than root&mounter to access fs AllowOther: true, // allow users other than root&mounter to access fs
FsName: "stargz", // name this filesystem as "stargz" FsName: "stargz", // name this filesystem as "stargz"
Options: []string{"suid"}, // allow setuid inside container
Debug: fs.debug, Debug: fs.debug,
} })
if isFusermountBinExist() {
log.G(ctx).Infof("fusermount detected")
mountOpts.Options = []string{"suid"} // option for fusermount; allow setuid inside container
} else {
log.G(ctx).WithError(err).Infof("%s not installed; trying direct mount", fusermountBin)
mountOpts.DirectMount = true
}
server, err := fuse.NewServer(rawFS, mountpoint, mountOpts)
if err != nil { if err != nil {
log.G(ctx).WithError(err).Debug("failed to make filesystem server") log.G(ctx).WithError(err).Debug("failed to make filesystem server")
return err return err
@ -377,8 +304,6 @@ func (fs *filesystem) Check(ctx context.Context, mountpoint string, labels map[s
fs.backgroundTaskManager.DoPrioritizedTask() fs.backgroundTaskManager.DoPrioritizedTask()
defer fs.backgroundTaskManager.DonePrioritizedTask() defer fs.backgroundTaskManager.DonePrioritizedTask()
defer commonmetrics.MeasureLatencyInMilliseconds(commonmetrics.PrefetchesCompleted, digest.FromString(""), time.Now()) // measuring the time the container launch is blocked on prefetch to complete
ctx = log.WithLogger(ctx, log.G(ctx).WithField("mountpoint", mountpoint)) ctx = log.WithLogger(ctx, log.G(ctx).WithField("mountpoint", mountpoint))
fs.layerMu.Lock() fs.layerMu.Lock()
@ -389,13 +314,10 @@ func (fs *filesystem) Check(ctx context.Context, mountpoint string, labels map[s
return fmt.Errorf("layer not registered") return fmt.Errorf("layer not registered")
} }
if l.Info().FetchedSize < l.Info().Size { // Check the blob connectivity and try to refresh the connection on failure
// Image contents hasn't fully cached yet. if err := fs.check(ctx, l, labels); err != nil {
// Check the blob connectivity and try to refresh the connection on failure log.G(ctx).WithError(err).Warn("check failed")
if err := fs.check(ctx, l, labels); err != nil { return err
log.G(ctx).WithError(err).Warn("check failed")
return err
}
} }
// Wait for prefetch compeletion // Wait for prefetch compeletion
@ -432,8 +354,10 @@ func (fs *filesystem) check(ctx context.Context, l layer.Layer, labels map[strin
log.G(ctx).Debug("Successfully refreshed connection") log.G(ctx).Debug("Successfully refreshed connection")
return nil return nil
} }
log.G(ctx).WithError(err).Warnf("failed to refresh the layer %q from %q", s.Target.Digest, s.Name) log.G(ctx).WithError(err).Warnf("failed to refresh the layer %q from %q",
rErr = fmt.Errorf("failed(layer:%q, ref:%q): %v: %w", s.Target.Digest, s.Name, err, rErr) s.Target.Digest, s.Name)
rErr = errors.Wrapf(rErr, "failed(layer:%q, ref:%q): %v",
s.Target.Digest, s.Name, err)
} }
} }
@ -441,59 +365,22 @@ func (fs *filesystem) check(ctx context.Context, l layer.Layer, labels map[strin
} }
func (fs *filesystem) Unmount(ctx context.Context, mountpoint string) error { func (fs *filesystem) Unmount(ctx context.Context, mountpoint string) error {
if mountpoint == "" {
return fmt.Errorf("mount point must be specified")
}
fs.layerMu.Lock() fs.layerMu.Lock()
l, ok := fs.layer[mountpoint] l, ok := fs.layer[mountpoint]
if !ok { if !ok {
fs.layerMu.Unlock() fs.layerMu.Unlock()
return fmt.Errorf("specified path %q isn't a mountpoint", mountpoint) return fmt.Errorf("specified path %q isn't a mountpoint", mountpoint)
} }
delete(fs.layer, mountpoint) // unregisters the corresponding layer delete(fs.layer, mountpoint) // unregisters the corresponding layer
if err := l.Close(); err != nil { // Cleanup associated resources l.Done()
log.G(ctx).WithError(err).Warn("failed to release resources of the layer")
}
fs.layerMu.Unlock() fs.layerMu.Unlock()
fs.metricsController.Remove(mountpoint) fs.metricsController.Remove(mountpoint)
// The goroutine which serving the mountpoint possibly becomes not responding.
if err := unmount(mountpoint, 0); err != nil { // In case of such situations, we use MNT_FORCE here and abort the connection.
if err != unix.EBUSY { // In the future, we might be able to consider to kill that specific hanging
return err // goroutine using channel, etc.
} // See also: https://www.kernel.org/doc/html/latest/filesystems/fuse.html#aborting-a-filesystem-connection
// Try force unmount return syscall.Unmount(mountpoint, syscall.MNT_FORCE)
log.G(ctx).WithError(err).Debugf("trying force unmount %q", mountpoint)
if err := unmount(mountpoint, unix.MNT_FORCE); err != nil {
return err
}
}
return nil
}
func unmount(target string, flags int) error {
for {
if err := unix.Unmount(target, flags); err != unix.EINTR {
return err
}
}
}
func (fs *filesystem) prefetch(ctx context.Context, l layer.Layer, defaultPrefetchSize int64, start time.Time) {
// Prefetch a layer. The first Check() for this layer waits for the prefetch completion.
if !fs.noprefetch {
go l.Prefetch(defaultPrefetchSize)
}
// Fetch whole layer aggressively in background.
if !fs.noBackgroundFetch {
go func() {
if err := l.BackgroundFetch(); err == nil {
// write log record for the latency between mount start and last on demand fetch
commonmetrics.LogLatencyForLastOnDemandFetch(ctx, l.Info().Digest, start, l.Info().ReadTime)
}
}()
}
} }
// neighboringLayers returns layer descriptors except the `target` layer in the specified manifest. // neighboringLayers returns layer descriptors except the `target` layer in the specified manifest.
@ -505,12 +392,3 @@ func neighboringLayers(manifest ocispec.Manifest, target ocispec.Descriptor) (de
} }
return return
} }
func isFusermountBinExist() bool {
for _, b := range fusermountBin {
if _, err := exec.LookPath(b); err == nil {
return true
}
}
return false
}

View File

@ -28,10 +28,9 @@ import (
"testing" "testing"
"time" "time"
"github.com/containerd/containerd/v2/core/remotes/docker" "github.com/containerd/containerd/reference"
"github.com/containerd/containerd/v2/pkg/reference" "github.com/containerd/containerd/remotes/docker"
"github.com/containerd/stargz-snapshotter/fs/layer" "github.com/containerd/stargz-snapshotter/fs/layer"
"github.com/containerd/stargz-snapshotter/fs/remote"
"github.com/containerd/stargz-snapshotter/fs/source" "github.com/containerd/stargz-snapshotter/fs/source"
"github.com/containerd/stargz-snapshotter/task" "github.com/containerd/stargz-snapshotter/task"
fusefs "github.com/hanwen/go-fuse/v2/fs" fusefs "github.com/hanwen/go-fuse/v2/fs"
@ -46,9 +45,8 @@ func TestCheck(t *testing.T) {
"test": bl, "test": bl,
}, },
backgroundTaskManager: task.NewBackgroundTaskManager(1, time.Millisecond), backgroundTaskManager: task.NewBackgroundTaskManager(1, time.Millisecond),
getSources: source.FromDefaultLabels(func(refspec reference.Spec) (hosts []docker.RegistryHost, _ error) { getSources: source.FromDefaultLabels(
return docker.ConfigureDefaultRegistries(docker.WithPlainHTTP(docker.MatchLocalhost))(refspec.Hostname()) docker.ConfigureDefaultRegistries(docker.WithPlainHTTP(docker.MatchLocalhost))),
}),
} }
bl.success = true bl.success = true
if err := fs.Check(context.TODO(), "test", nil); err != nil { if err := fs.Check(context.TODO(), "test", nil); err != nil {
@ -65,31 +63,23 @@ type breakableLayer struct {
success bool success bool
} }
func (l *breakableLayer) Info() layer.Info { func (l *breakableLayer) Info() layer.Info { return layer.Info{} }
return layer.Info{ func (l *breakableLayer) RootNode() (fusefs.InodeEmbedder, error) { return nil, nil }
Size: 1, func (l *breakableLayer) Verify(tocDigest digest.Digest) error { return nil }
} func (l *breakableLayer) SkipVerify() {}
} func (l *breakableLayer) Prefetch(prefetchSize int64) error { return fmt.Errorf("fail") }
func (l *breakableLayer) RootNode(uint32) (fusefs.InodeEmbedder, error) { return nil, nil } func (l *breakableLayer) WaitForPrefetchCompletion() error { return fmt.Errorf("fail") }
func (l *breakableLayer) Verify(tocDigest digest.Digest) error { return nil } func (l *breakableLayer) BackgroundFetch() error { return fmt.Errorf("fail") }
func (l *breakableLayer) SkipVerify() {}
func (l *breakableLayer) Prefetch(prefetchSize int64) error { return fmt.Errorf("fail") }
func (l *breakableLayer) ReadAt([]byte, int64, ...remote.Option) (int, error) {
return 0, fmt.Errorf("fail")
}
func (l *breakableLayer) WaitForPrefetchCompletion() error { return fmt.Errorf("fail") }
func (l *breakableLayer) BackgroundFetch() error { return fmt.Errorf("fail") }
func (l *breakableLayer) Check() error { func (l *breakableLayer) Check() error {
if !l.success { if !l.success {
return fmt.Errorf("failed") return fmt.Errorf("failed")
} }
return nil return nil
} }
func (l *breakableLayer) Refresh(ctx context.Context, hosts source.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) error { func (l *breakableLayer) Refresh(ctx context.Context, hosts docker.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) error {
if !l.success { if !l.success {
return fmt.Errorf("failed") return fmt.Errorf("failed")
} }
return nil return nil
} }
func (l *breakableLayer) Done() {} func (l *breakableLayer) Done() {}
func (l *breakableLayer) Close() error { return nil }

View File

@ -27,127 +27,102 @@ import (
"context" "context"
"fmt" "fmt"
"io" "io"
"io/ioutil"
"os" "os"
"path/filepath" "path/filepath"
"sync" "sync"
"time" "time"
"github.com/containerd/containerd/v2/pkg/reference" "github.com/containerd/containerd/log"
"github.com/containerd/log" "github.com/containerd/containerd/reference"
"github.com/containerd/containerd/remotes/docker"
"github.com/containerd/stargz-snapshotter/cache" "github.com/containerd/stargz-snapshotter/cache"
"github.com/containerd/stargz-snapshotter/estargz" "github.com/containerd/stargz-snapshotter/estargz"
"github.com/containerd/stargz-snapshotter/estargz/zstdchunked"
"github.com/containerd/stargz-snapshotter/fs/config" "github.com/containerd/stargz-snapshotter/fs/config"
commonmetrics "github.com/containerd/stargz-snapshotter/fs/metrics/common"
"github.com/containerd/stargz-snapshotter/fs/reader" "github.com/containerd/stargz-snapshotter/fs/reader"
"github.com/containerd/stargz-snapshotter/fs/remote" "github.com/containerd/stargz-snapshotter/fs/remote"
"github.com/containerd/stargz-snapshotter/fs/source"
"github.com/containerd/stargz-snapshotter/metadata"
"github.com/containerd/stargz-snapshotter/task" "github.com/containerd/stargz-snapshotter/task"
"github.com/containerd/stargz-snapshotter/util/cacheutil" "github.com/containerd/stargz-snapshotter/util/lrucache"
"github.com/containerd/stargz-snapshotter/util/namedmutex" "github.com/containerd/stargz-snapshotter/util/namedmutex"
fusefs "github.com/hanwen/go-fuse/v2/fs" fusefs "github.com/hanwen/go-fuse/v2/fs"
digest "github.com/opencontainers/go-digest" digest "github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1" ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
) )
const ( const (
defaultResolveResultEntryTTLSec = 120 defaultResolveResultEntry = 30
defaultMaxLRUCacheEntry = 10 defaultMaxLRUCacheEntry = 10
defaultMaxCacheFds = 10 defaultMaxCacheFds = 10
defaultPrefetchTimeoutSec = 10 defaultPrefetchTimeoutSec = 10
memoryCacheType = "memory" memoryCacheType = "memory"
) )
// passThroughConfig contains configuration for FUSE passthrough mode
type passThroughConfig struct {
// enable indicates whether to enable FUSE passthrough mode
enable bool
// mergeBufferSize is the size of the buffer to merge chunks (in bytes)
mergeBufferSize int64
// mergeWorkerCount is the number of workers to merge chunks
mergeWorkerCount int
}
// Layer represents a layer. // Layer represents a layer.
type Layer interface { type Layer interface {
// Info returns the information of this layer. // Info returns the information of this layer.
Info() Info Info() Info
// RootNode returns the root node of this layer. // RootNode returns the root node of this layer.
RootNode(baseInode uint32) (fusefs.InodeEmbedder, error) RootNode() (fusefs.InodeEmbedder, error)
// Check checks if the layer is still connectable. // Check checks if the layer is still connectable.
Check() error Check() error
// Refresh refreshes the layer connection. // Refresh refreshes the layer connection.
Refresh(ctx context.Context, hosts source.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) error Refresh(ctx context.Context, hosts docker.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) error
// Verify verifies this layer using the passed TOC Digest. // Verify verifies this layer using the passed TOC Digest.
// Nop if Verify() or SkipVerify() was already called.
Verify(tocDigest digest.Digest) (err error) Verify(tocDigest digest.Digest) (err error)
// SkipVerify skips verification for this layer. // SkipVerify skips verification for this layer.
// Nop if Verify() or SkipVerify() was already called.
SkipVerify() SkipVerify()
// Prefetch prefetches the specified size. If the layer is eStargz and contains landmark files, // Prefetch prefetches the specified size. If the layer is eStargz and contains landmark files,
// the range indicated by these files is respected. // the range indicated by these files is respected.
// Calling this function before calling Verify or SkipVerify will fail.
Prefetch(prefetchSize int64) error Prefetch(prefetchSize int64) error
// ReadAt reads this layer.
ReadAt([]byte, int64, ...remote.Option) (int, error)
// WaitForPrefetchCompletion waits untils Prefetch completes. // WaitForPrefetchCompletion waits untils Prefetch completes.
WaitForPrefetchCompletion() error WaitForPrefetchCompletion() error
// BackgroundFetch fetches the entire layer contents to the cache. // BackgroundFetch fetches the entire layer contents to the cache.
// Fetching contents is done as a background task. // Fetching contents is done as a background task.
// Calling this function before calling Verify or SkipVerify will fail.
BackgroundFetch() error BackgroundFetch() error
// Done releases the reference to this layer. The resources related to this layer will be // Done releases the reference to this layer. The resources related to this layer will be
// discarded sooner or later. Queries after calling this function won't be serviced. // discarded sooner or later. Queries after calling this function won't be serviced.
Done() Done()
// Close is the same as Done. But this evicts the resources related to this Layer immediately.
// This can be used for cleaning up resources on unmount.
Close() error
} }
// Info is the current status of a layer. // Info is the current status of a layer.
type Info struct { type Info struct {
Digest digest.Digest Digest digest.Digest
Size int64 // layer size in bytes Size int64
FetchedSize int64 // layer fetched size in bytes FetchedSize int64
PrefetchSize int64 // layer prefetch size in bytes
ReadTime time.Time // last time the layer was read
TOCDigest digest.Digest
} }
// Resolver resolves the layer location and provieds the handler of that layer. // Resolver resolves the layer location and provieds the handler of that layer.
type Resolver struct { type Resolver struct {
rootDir string rootDir string
resolver *remote.Resolver resolver *remote.Resolver
prefetchTimeout time.Duration prefetchTimeout time.Duration
layerCache *cacheutil.TTLCache layerCache *lrucache.Cache
layerCacheMu sync.Mutex layerCacheMu sync.Mutex
blobCache *cacheutil.TTLCache blobCache *lrucache.Cache
blobCacheMu sync.Mutex blobCacheMu sync.Mutex
backgroundTaskManager *task.BackgroundTaskManager backgroundTaskManager *task.BackgroundTaskManager
resolveLock *namedmutex.NamedMutex resolveLock *namedmutex.NamedMutex
config config.Config config config.Config
metadataStore metadata.Store
overlayOpaqueType OverlayOpaqueType
additionalDecompressors func(context.Context, source.RegistryHosts, reference.Spec, ocispec.Descriptor) []metadata.Decompressor
} }
// NewResolver returns a new layer resolver. // NewResolver returns a new layer resolver.
func NewResolver(root string, backgroundTaskManager *task.BackgroundTaskManager, cfg config.Config, resolveHandlers map[string]remote.Handler, metadataStore metadata.Store, overlayOpaqueType OverlayOpaqueType, additionalDecompressors func(context.Context, source.RegistryHosts, reference.Spec, ocispec.Descriptor) []metadata.Decompressor) (*Resolver, error) { func NewResolver(root string, backgroundTaskManager *task.BackgroundTaskManager, cfg config.Config) (*Resolver, error) {
resolveResultEntryTTL := time.Duration(cfg.ResolveResultEntryTTLSec) * time.Second resolveResultEntry := cfg.ResolveResultEntry
if resolveResultEntryTTL == 0 { if resolveResultEntry == 0 {
resolveResultEntryTTL = defaultResolveResultEntryTTLSec * time.Second resolveResultEntry = defaultResolveResultEntry
} }
prefetchTimeout := time.Duration(cfg.PrefetchTimeoutSec) * time.Second prefetchTimeout := time.Duration(cfg.PrefetchTimeoutSec) * time.Second
if prefetchTimeout == 0 { if prefetchTimeout == 0 {
@ -157,42 +132,35 @@ func NewResolver(root string, backgroundTaskManager *task.BackgroundTaskManager,
// layerCache caches resolved layers for future use. This is useful in a use-case where // layerCache caches resolved layers for future use. This is useful in a use-case where
// the filesystem resolves and caches all layers in an image (not only queried one) in parallel, // the filesystem resolves and caches all layers in an image (not only queried one) in parallel,
// before they are actually queried. // before they are actually queried.
layerCache := cacheutil.NewTTLCache(resolveResultEntryTTL) layerCache := lrucache.New(resolveResultEntry)
layerCache.OnEvicted = func(key string, value interface{}) { layerCache.OnEvicted = func(key string, value interface{}) {
if err := value.(*layer).close(); err != nil { if err := value.(*layer).close(); err != nil {
log.L.WithField("key", key).WithError(err).Warnf("failed to clean up layer") logrus.WithField("key", key).WithError(err).Warnf("failed to clean up layer")
return return
} }
log.L.WithField("key", key).Debugf("cleaned up layer") logrus.WithField("key", key).Debugf("cleaned up layer")
} }
// blobCache caches resolved blobs for futural use. This is especially useful when a layer // blobCache caches resolved blobs for futural use. This is especially useful when a layer
// isn't eStargz/stargz (the *layer object won't be created/cached in this case). // isn't eStargz/stargz (the *layer object won't be created/cached in this case).
blobCache := cacheutil.NewTTLCache(resolveResultEntryTTL) blobCache := lrucache.New(resolveResultEntry)
blobCache.OnEvicted = func(key string, value interface{}) { blobCache.OnEvicted = func(key string, value interface{}) {
if err := value.(remote.Blob).Close(); err != nil { if err := value.(remote.Blob).Close(); err != nil {
log.L.WithField("key", key).WithError(err).Warnf("failed to clean up blob") logrus.WithField("key", key).WithError(err).Warnf("failed to clean up blob")
return return
} }
log.L.WithField("key", key).Debugf("cleaned up blob") logrus.WithField("key", key).Debugf("cleaned up blob")
}
if err := os.MkdirAll(root, 0700); err != nil {
return nil, err
} }
return &Resolver{ return &Resolver{
rootDir: root, rootDir: root,
resolver: remote.NewResolver(cfg.BlobConfig, resolveHandlers), resolver: remote.NewResolver(cfg.BlobConfig),
layerCache: layerCache, layerCache: layerCache,
blobCache: blobCache, blobCache: blobCache,
prefetchTimeout: prefetchTimeout, prefetchTimeout: prefetchTimeout,
backgroundTaskManager: backgroundTaskManager, backgroundTaskManager: backgroundTaskManager,
config: cfg, config: cfg,
resolveLock: new(namedmutex.NamedMutex), resolveLock: new(namedmutex.NamedMutex),
metadataStore: metadataStore,
overlayOpaqueType: overlayOpaqueType,
additionalDecompressors: additionalDecompressors,
}, nil }, nil
} }
@ -216,9 +184,8 @@ func newCache(root string, cacheType string, cfg config.Config) (cache.BlobCache
return new(bytes.Buffer) return new(bytes.Buffer)
}, },
} }
dCache, fCache := cacheutil.NewLRUCache(maxDataEntry), cacheutil.NewLRUCache(maxFdEntry) dCache, fCache := lrucache.New(maxDataEntry), lrucache.New(maxFdEntry)
dCache.OnEvicted = func(key string, value interface{}) { dCache.OnEvicted = func(key string, value interface{}) {
value.(*bytes.Buffer).Reset()
bufPool.Put(value) bufPool.Put(value)
} }
fCache.OnEvicted = func(key string, value interface{}) { fCache.OnEvicted = func(key string, value interface{}) {
@ -228,35 +195,33 @@ func newCache(root string, cacheType string, cfg config.Config) (cache.BlobCache
if err := os.MkdirAll(root, 0700); err != nil { if err := os.MkdirAll(root, 0700); err != nil {
return nil, err return nil, err
} }
cachePath, err := os.MkdirTemp(root, "") cachePath, err := ioutil.TempDir(root, "")
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to initialize directory cache: %w", err) return nil, errors.Wrapf(err, "failed to initialize directory cache")
} }
return cache.NewDirectoryCache( return cache.NewDirectoryCache(
cachePath, cachePath,
cache.DirectoryCacheConfig{ cache.DirectoryCacheConfig{
SyncAdd: dcc.SyncAdd, SyncAdd: dcc.SyncAdd,
DataCache: dCache, DataCache: dCache,
FdCache: fCache, FdCache: fCache,
BufPool: bufPool, BufPool: bufPool,
Direct: dcc.Direct,
FadvDontNeed: dcc.FadvDontNeed,
}, },
) )
} }
// Resolve resolves a layer based on the passed layer blob information. // Resolve resolves a layer based on the passed layer blob information.
func (r *Resolver) Resolve(ctx context.Context, hosts source.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor, esgzOpts ...metadata.Option) (_ Layer, retErr error) { func (r *Resolver) Resolve(ctx context.Context, hosts docker.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) (_ Layer, retErr error) {
name := refspec.String() + "/" + desc.Digest.String() name := refspec.String() + "/" + desc.Digest.String()
// Wait if resolving this layer is already running. The result // Wait if resolving this layer is already running. The result
// can hopefully get from the cache. // can hopefully get from the LRU cache.
r.resolveLock.Lock(name) r.resolveLock.Lock(name)
defer r.resolveLock.Unlock(name) defer r.resolveLock.Unlock(name)
ctx = log.WithLogger(ctx, log.G(ctx).WithField("src", name)) ctx = log.WithLogger(ctx, log.G(ctx).WithField("src", name))
// First, try to retrieve this layer from the underlying cache. // First, try to retrieve this layer from the underlying LRU cache.
r.layerCacheMu.Lock() r.layerCacheMu.Lock()
c, done, ok := r.layerCache.Get(name) c, done, ok := r.layerCache.Get(name)
r.layerCacheMu.Unlock() r.layerCacheMu.Unlock()
@ -266,7 +231,7 @@ func (r *Resolver) Resolve(ctx context.Context, hosts source.RegistryHosts, refs
return &layerRef{l, done}, nil return &layerRef{l, done}, nil
} }
// Cached layer is invalid // Cached layer is invalid
done(true) done()
r.layerCacheMu.Lock() r.layerCacheMu.Lock()
r.layerCache.Remove(name) r.layerCache.Remove(name)
r.layerCacheMu.Unlock() r.layerCacheMu.Unlock()
@ -277,17 +242,17 @@ func (r *Resolver) Resolve(ctx context.Context, hosts source.RegistryHosts, refs
// Resolve the blob. // Resolve the blob.
blobR, err := r.resolveBlob(ctx, hosts, refspec, desc) blobR, err := r.resolveBlob(ctx, hosts, refspec, desc)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to resolve the blob: %w", err) return nil, errors.Wrapf(err, "failed to resolve the blob")
} }
defer func() { defer func() {
if retErr != nil { if retErr != nil {
blobR.done(true) blobR.done()
} }
}() }()
fsCache, err := newCache(filepath.Join(r.rootDir, "fscache"), r.config.FSCacheType, r.config) fsCache, err := newCache(filepath.Join(r.rootDir, "fscache"), r.config.FSCacheType, r.config)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create fs cache: %w", err) return nil, errors.Wrapf(err, "failed to create fs cache")
} }
defer func() { defer func() {
if retErr != nil { if retErr != nil {
@ -304,39 +269,13 @@ func (r *Resolver) Resolve(ctx context.Context, hosts source.RegistryHosts, refs
defer r.backgroundTaskManager.DonePrioritizedTask() defer r.backgroundTaskManager.DonePrioritizedTask()
return blobR.ReadAt(p, offset) return blobR.ReadAt(p, offset)
}), 0, blobR.Size()) }), 0, blobR.Size())
// define telemetry hooks to measure latency metrics inside estargz package vr, err := reader.NewReader(sr, fsCache)
telemetry := metadata.Telemetry{
GetFooterLatency: func(start time.Time) {
commonmetrics.MeasureLatencyInMilliseconds(commonmetrics.StargzFooterGet, desc.Digest, start)
},
GetTocLatency: func(start time.Time) {
commonmetrics.MeasureLatencyInMilliseconds(commonmetrics.StargzTocGet, desc.Digest, start)
},
DeserializeTocLatency: func(start time.Time) {
commonmetrics.MeasureLatencyInMilliseconds(commonmetrics.DeserializeTocJSON, desc.Digest, start)
},
}
additionalDecompressors := []metadata.Decompressor{new(zstdchunked.Decompressor)}
if r.additionalDecompressors != nil {
additionalDecompressors = append(additionalDecompressors, r.additionalDecompressors(ctx, hosts, refspec, desc)...)
}
meta, err := r.metadataStore(sr,
append(esgzOpts, metadata.WithTelemetry(&telemetry), metadata.WithDecompressors(additionalDecompressors...))...)
if err != nil { if err != nil {
return nil, err return nil, errors.Wrap(err, "failed to read layer")
}
vr, err := reader.NewReader(meta, fsCache, desc.Digest)
if err != nil {
return nil, fmt.Errorf("failed to read layer: %w", err)
} }
// Combine layer information together and cache it. // Combine layer information together and cache it.
l := newLayer(r, desc, blobR, vr, passThroughConfig{ l := newLayer(r, desc, blobR, vr)
enable: r.config.PassThrough,
mergeBufferSize: r.config.MergeBufferSize,
mergeWorkerCount: r.config.MergeWorkerCount,
})
r.layerCacheMu.Lock() r.layerCacheMu.Lock()
cachedL, done2, added := r.layerCache.Add(name, l) cachedL, done2, added := r.layerCache.Add(name, l)
r.layerCacheMu.Unlock() r.layerCacheMu.Unlock()
@ -349,10 +288,10 @@ func (r *Resolver) Resolve(ctx context.Context, hosts source.RegistryHosts, refs
} }
// resolveBlob resolves a blob based on the passed layer blob information. // resolveBlob resolves a blob based on the passed layer blob information.
func (r *Resolver) resolveBlob(ctx context.Context, hosts source.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) (_ *blobRef, retErr error) { func (r *Resolver) resolveBlob(ctx context.Context, hosts docker.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) (_ *blobRef, retErr error) {
name := refspec.String() + "/" + desc.Digest.String() name := refspec.String() + "/" + desc.Digest.String()
// Try to retrieve the blob from the underlying cache. // Try to retrieve the blob from the underlying LRU cache.
r.blobCacheMu.Lock() r.blobCacheMu.Lock()
c, done, ok := r.blobCache.Get(name) c, done, ok := r.blobCache.Get(name)
r.blobCacheMu.Unlock() r.blobCacheMu.Unlock()
@ -361,7 +300,7 @@ func (r *Resolver) resolveBlob(ctx context.Context, hosts source.RegistryHosts,
return &blobRef{blob, done}, nil return &blobRef{blob, done}, nil
} }
// invalid blob. discard this. // invalid blob. discard this.
done(true) done()
r.blobCacheMu.Lock() r.blobCacheMu.Lock()
r.blobCache.Remove(name) r.blobCache.Remove(name)
r.blobCacheMu.Unlock() r.blobCacheMu.Unlock()
@ -369,7 +308,7 @@ func (r *Resolver) resolveBlob(ctx context.Context, hosts source.RegistryHosts,
httpCache, err := newCache(filepath.Join(r.rootDir, "httpcache"), r.config.HTTPCacheType, r.config) httpCache, err := newCache(filepath.Join(r.rootDir, "httpcache"), r.config.HTTPCacheType, r.config)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create http cache: %w", err) return nil, errors.Wrapf(err, "failed to create http cache")
} }
defer func() { defer func() {
if retErr != nil { if retErr != nil {
@ -380,7 +319,7 @@ func (r *Resolver) resolveBlob(ctx context.Context, hosts source.RegistryHosts,
// Resolve the blob and cache the result. // Resolve the blob and cache the result.
b, err := r.resolver.Resolve(ctx, hosts, refspec, desc, httpCache) b, err := r.resolver.Resolve(ctx, hosts, refspec, desc, httpCache)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to resolve the source: %w", err) return nil, errors.Wrap(err, "failed to resolve the source")
} }
r.blobCacheMu.Lock() r.blobCacheMu.Lock()
cachedB, done, added := r.blobCache.Add(name, b) cachedB, done, added := r.blobCache.Add(name, b)
@ -391,12 +330,23 @@ func (r *Resolver) resolveBlob(ctx context.Context, hosts source.RegistryHosts,
return &blobRef{cachedB.(remote.Blob), done}, nil return &blobRef{cachedB.(remote.Blob), done}, nil
} }
// Cache is similar to Resolve but the result isn't returned. Instead, it'll be stored in the cache.
func (r *Resolver) Cache(ctx context.Context, hosts docker.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) error {
l, err := r.Resolve(ctx, hosts, refspec, desc)
if err != nil {
return err
}
// Release this layer. However, this will remain on the cache until eviction.
// Until then, the client can reuse this (already pre-resolved) layer.
l.Done()
return nil
}
func newLayer( func newLayer(
resolver *Resolver, resolver *Resolver,
desc ocispec.Descriptor, desc ocispec.Descriptor,
blob *blobRef, blob *blobRef,
vr *reader.VerifiableReader, vr *reader.VerifiableReader,
pth passThroughConfig,
) *layer { ) *layer {
return &layer{ return &layer{
resolver: resolver, resolver: resolver,
@ -404,7 +354,6 @@ func newLayer(
blob: blob, blob: blob,
verifiableReader: vr, verifiableReader: vr,
prefetchWaiter: newWaiter(), prefetchWaiter: newWaiter(),
passThrough: pth,
} }
} }
@ -415,41 +364,20 @@ type layer struct {
verifiableReader *reader.VerifiableReader verifiableReader *reader.VerifiableReader
prefetchWaiter *waiter prefetchWaiter *waiter
prefetchSize int64
prefetchSizeMu sync.Mutex
r reader.Reader r reader.Reader
closed bool closed bool
closedMu sync.Mutex closedMu sync.Mutex
prefetchOnce sync.Once
backgroundFetchOnce sync.Once
passThrough passThroughConfig
} }
func (l *layer) Info() Info { func (l *layer) Info() Info {
var readTime time.Time
if l.r != nil {
readTime = l.r.LastOnDemandReadTime()
}
return Info{ return Info{
Digest: l.desc.Digest, Digest: l.desc.Digest,
Size: l.blob.Size(), Size: l.blob.Size(),
FetchedSize: l.blob.FetchedSize(), FetchedSize: l.blob.FetchedSize(),
PrefetchSize: l.prefetchedSize(),
ReadTime: readTime,
TOCDigest: l.verifiableReader.Metadata().TOCDigest(),
} }
} }
func (l *layer) prefetchedSize() int64 {
l.prefetchSizeMu.Lock()
sz := l.prefetchSize
l.prefetchSizeMu.Unlock()
return sz
}
func (l *layer) Check() error { func (l *layer) Check() error {
if l.isClosed() { if l.isClosed() {
return fmt.Errorf("layer is already closed") return fmt.Errorf("layer is already closed")
@ -457,7 +385,7 @@ func (l *layer) Check() error {
return l.blob.Check() return l.blob.Check()
} }
func (l *layer) Refresh(ctx context.Context, hosts source.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) error { func (l *layer) Refresh(ctx context.Context, hosts docker.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) error {
if l.isClosed() { if l.isClosed() {
return fmt.Errorf("layer is already closed") return fmt.Errorf("layer is already closed")
} }
@ -468,84 +396,45 @@ func (l *layer) Verify(tocDigest digest.Digest) (err error) {
if l.isClosed() { if l.isClosed() {
return fmt.Errorf("layer is already closed") return fmt.Errorf("layer is already closed")
} }
if l.r != nil {
return nil
}
l.r, err = l.verifiableReader.VerifyTOC(tocDigest) l.r, err = l.verifiableReader.VerifyTOC(tocDigest)
return return
} }
func (l *layer) SkipVerify() { func (l *layer) SkipVerify() {
if l.r != nil {
return
}
l.r = l.verifiableReader.SkipVerify() l.r = l.verifiableReader.SkipVerify()
} }
func (l *layer) Prefetch(prefetchSize int64) (err error) { func (l *layer) Prefetch(prefetchSize int64) error {
l.prefetchOnce.Do(func() {
ctx := context.Background()
l.resolver.backgroundTaskManager.DoPrioritizedTask()
defer l.resolver.backgroundTaskManager.DonePrioritizedTask()
err = l.prefetch(ctx, prefetchSize)
if err != nil {
log.G(ctx).WithError(err).Warnf("failed to prefetch layer=%v", l.desc.Digest)
return
}
log.G(ctx).Debug("completed to prefetch")
})
return
}
func (l *layer) prefetch(ctx context.Context, prefetchSize int64) error {
defer l.prefetchWaiter.done() // Notify the completion defer l.prefetchWaiter.done() // Notify the completion
// Measuring the total time to complete prefetch (use defer func() because l.Info().PrefetchSize is set later)
start := time.Now()
defer func() {
commonmetrics.WriteLatencyWithBytesLogValue(ctx, l.desc.Digest, commonmetrics.PrefetchTotal, start, commonmetrics.PrefetchSize, l.prefetchedSize())
}()
if l.isClosed() { if l.isClosed() {
return fmt.Errorf("layer is already closed") return fmt.Errorf("layer is already closed")
} }
rootID := l.verifiableReader.Metadata().RootID() if l.r == nil {
if _, _, err := l.verifiableReader.Metadata().GetChild(rootID, estargz.NoPrefetchLandmark); err == nil { return fmt.Errorf("layer hasn't been verified yet")
}
lr := l.r
if _, ok := lr.Lookup(estargz.NoPrefetchLandmark); ok {
// do not prefetch this layer // do not prefetch this layer
return nil return nil
} else if id, _, err := l.verifiableReader.Metadata().GetChild(rootID, estargz.PrefetchLandmark); err == nil { } else if e, ok := lr.Lookup(estargz.PrefetchLandmark); ok {
offset, err := l.verifiableReader.Metadata().GetOffset(id)
if err != nil {
return fmt.Errorf("failed to get offset of prefetch landmark: %w", err)
}
// override the prefetch size with optimized value // override the prefetch size with optimized value
prefetchSize = offset prefetchSize = e.Offset
} else if prefetchSize > l.blob.Size() { } else if prefetchSize > l.blob.Size() {
// adjust prefetch size not to exceed the whole layer size // adjust prefetch size not to exceed the whole layer size
prefetchSize = l.blob.Size() prefetchSize = l.blob.Size()
} }
// Fetch the target range // Fetch the target range
downloadStart := time.Now() if err := l.blob.Cache(0, prefetchSize); err != nil {
err := l.blob.Cache(0, prefetchSize) return errors.Wrap(err, "failed to prefetch layer")
commonmetrics.WriteLatencyLogValue(ctx, l.desc.Digest, commonmetrics.PrefetchDownload, downloadStart) // time to download prefetch data
if err != nil {
return fmt.Errorf("failed to prefetch layer: %w", err)
} }
// Set prefetch size for metrics after prefetch completed
l.prefetchSizeMu.Lock()
l.prefetchSize = prefetchSize
l.prefetchSizeMu.Unlock()
// Cache uncompressed contents of the prefetched range // Cache uncompressed contents of the prefetched range
decompressStart := time.Now() if err := lr.Cache(reader.WithFilter(func(e *estargz.TOCEntry) bool {
err = l.verifiableReader.Cache(reader.WithFilter(func(offset int64) bool { return e.Offset < prefetchSize // Cache only prefetch target
return offset < prefetchSize // Cache only prefetch target })); err != nil {
})) return errors.Wrap(err, "failed to cache prefetched layer")
commonmetrics.WriteLatencyLogValue(ctx, l.desc.Digest, commonmetrics.PrefetchDecompress, decompressStart) // time to decompress prefetch data
if err != nil {
return fmt.Errorf("failed to cache prefetched layer: %w", err)
} }
return nil return nil
@ -558,28 +447,16 @@ func (l *layer) WaitForPrefetchCompletion() error {
return l.prefetchWaiter.wait(l.resolver.prefetchTimeout) return l.prefetchWaiter.wait(l.resolver.prefetchTimeout)
} }
func (l *layer) BackgroundFetch() (err error) { func (l *layer) BackgroundFetch() error {
l.backgroundFetchOnce.Do(func() {
ctx := context.Background()
err = l.backgroundFetch(ctx)
if err != nil {
log.G(ctx).WithError(err).Warnf("failed to fetch whole layer=%v", l.desc.Digest)
return
}
log.G(ctx).Debug("completed to fetch all layer data in background")
})
return
}
func (l *layer) backgroundFetch(ctx context.Context) error {
defer commonmetrics.WriteLatencyLogValue(ctx, l.desc.Digest, commonmetrics.BackgroundFetchTotal, time.Now())
if l.isClosed() { if l.isClosed() {
return fmt.Errorf("layer is already closed") return fmt.Errorf("layer is already closed")
} }
if l.r == nil {
return fmt.Errorf("layer hasn't been verified yet")
}
lr := l.r
br := io.NewSectionReader(readerAtFunc(func(p []byte, offset int64) (retN int, retErr error) { br := io.NewSectionReader(readerAtFunc(func(p []byte, offset int64) (retN int, retErr error) {
l.resolver.backgroundTaskManager.InvokeBackgroundTask(func(ctx context.Context) { l.resolver.backgroundTaskManager.InvokeBackgroundTask(func(ctx context.Context) {
// Measuring the time to download background fetch data (in milliseconds)
defer commonmetrics.MeasureLatencyInMilliseconds(commonmetrics.BackgroundFetchDownload, l.Info().Digest, time.Now()) // time to download background fetch data
retN, retErr = l.blob.ReadAt( retN, retErr = l.blob.ReadAt(
p, p,
offset, offset,
@ -589,34 +466,24 @@ func (l *layer) backgroundFetch(ctx context.Context) error {
}, 120*time.Second) }, 120*time.Second)
return return
}), 0, l.blob.Size()) }), 0, l.blob.Size())
defer commonmetrics.WriteLatencyLogValue(ctx, l.desc.Digest, commonmetrics.BackgroundFetchDecompress, time.Now()) // time to decompress background fetch data (in milliseconds) return lr.Cache(
return l.verifiableReader.Cache(
reader.WithReader(br), // Read contents in background reader.WithReader(br), // Read contents in background
reader.WithCacheOpts(cache.Direct()), // Do not pollute mem cache reader.WithCacheOpts(cache.Direct()), // Do not pollute mem cache
) )
} }
func (l *layerRef) Done() { func (l *layerRef) Done() {
l.done(false) // leave chances to reuse this l.done()
} }
func (l *layerRef) Close() error { func (l *layer) RootNode() (fusefs.InodeEmbedder, error) {
l.done(true) // evict this from the cache
return nil
}
func (l *layer) RootNode(baseInode uint32) (fusefs.InodeEmbedder, error) {
if l.isClosed() { if l.isClosed() {
return nil, fmt.Errorf("layer is already closed") return nil, fmt.Errorf("layer is already closed")
} }
if l.r == nil { if l.r == nil {
return nil, fmt.Errorf("layer hasn't been verified yet") return nil, fmt.Errorf("layer hasn't been verified yet")
} }
return newNode(l.desc.Digest, l.r, l.blob, baseInode, l.resolver.overlayOpaqueType, l.passThrough) return newNode(l.desc.Digest, l.r, l.blob)
}
func (l *layer) ReadAt(p []byte, offset int64, opts ...remote.Option) (int, error) {
return l.blob.ReadAt(p, offset, opts...)
} }
func (l *layer) close() error { func (l *layer) close() error {
@ -626,7 +493,7 @@ func (l *layer) close() error {
return nil return nil
} }
l.closed = true l.closed = true
defer l.blob.done(true) // Close reader first, then close the blob defer l.blob.done() // Close reader first, then close the blob
l.verifiableReader.Close() l.verifiableReader.Close()
if l.r != nil { if l.r != nil {
return l.r.Close() return l.r.Close()
@ -646,7 +513,7 @@ func (l *layer) isClosed() bool {
// to this blob will be discarded. // to this blob will be discarded.
type blobRef struct { type blobRef struct {
remote.Blob remote.Blob
done func(bool) done func()
} }
// layerRef is a reference to the layer in the cache. Calling `Done` or `done` decreases the // layerRef is a reference to the layer in the cache. Calling `Done` or `done` decreases the
@ -654,7 +521,7 @@ type blobRef struct {
// cache, resources bound to this layer will be discarded. // cache, resources bound to this layer will be discarded.
type layerRef struct { type layerRef struct {
*layer *layer
done func(bool) done func()
} }
func newWaiter() *waiter { func newWaiter() *waiter {

View File

@ -23,31 +23,196 @@
package layer package layer
import ( import (
"context"
"io"
"io/ioutil"
"net/http"
"testing" "testing"
"time" "time"
memorymetadata "github.com/containerd/stargz-snapshotter/metadata/memory" "github.com/containerd/containerd/reference"
"github.com/containerd/containerd/remotes/docker"
"github.com/containerd/stargz-snapshotter/cache"
"github.com/containerd/stargz-snapshotter/estargz"
"github.com/containerd/stargz-snapshotter/fs/reader"
"github.com/containerd/stargz-snapshotter/fs/remote"
"github.com/containerd/stargz-snapshotter/util/testutil"
digest "github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
) )
func TestLayer(t *testing.T) { const (
testRunner := &TestRunner{ sampleChunkSize = 3
TestingT: t, sampleData1 = "0123456789"
Runner: func(testingT TestingT, name string, run func(t TestingT)) { sampleData2 = "abcdefghij"
tt, ok := testingT.(*testing.T) )
if !ok {
testingT.Fatal("TestingT is not a *testing.T")
return
}
tt.Run(name, func(t *testing.T) { var testStateLayerDigest = digest.FromString("dummy")
run(t)
}) // Tests prefetch method of each stargz file.
func TestPrefetch(t *testing.T) {
defaultPrefetchSize := int64(10000)
landmarkPosition := func(t *testing.T, l *layer) int64 {
if l.r == nil {
t.Fatalf("layer hasn't been verified yet")
}
if e, ok := l.r.Lookup(estargz.PrefetchLandmark); ok {
return e.Offset
}
return defaultPrefetchSize
}
tests := []struct {
name string
in []testutil.TarEntry
wantNum int // number of chunks wanted in the cache
wants []string // filenames to compare
prefetchSize func(*testing.T, *layer) int64
prioritizedFiles []string
}{
{
name: "no_prefetch",
in: []testutil.TarEntry{
testutil.File("foo.txt", sampleData1),
},
wantNum: 0,
prioritizedFiles: nil,
},
{
name: "prefetch",
in: []testutil.TarEntry{
testutil.File("foo.txt", sampleData1),
testutil.File("bar.txt", sampleData2),
},
wantNum: chunkNum(sampleData1),
wants: []string{"foo.txt"},
prefetchSize: landmarkPosition,
prioritizedFiles: []string{"foo.txt"},
},
{
name: "with_dir",
in: []testutil.TarEntry{
testutil.Dir("foo/"),
testutil.File("foo/bar.txt", sampleData1),
testutil.Dir("buz/"),
testutil.File("buz/buzbuz.txt", sampleData2),
},
wantNum: chunkNum(sampleData1),
wants: []string{"foo/bar.txt"},
prefetchSize: landmarkPosition,
prioritizedFiles: []string{"foo/", "foo/bar.txt"},
}, },
} }
TestSuiteLayer(testRunner, memorymetadata.NewReader) for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
sr, dgst, err := testutil.BuildEStargz(tt.in,
testutil.WithEStargzOptions(
estargz.WithChunkSize(sampleChunkSize),
estargz.WithPrioritizedFiles(tt.prioritizedFiles),
))
if err != nil {
t.Fatalf("failed to build eStargz: %v", err)
}
blob := newBlob(sr)
mcache := cache.NewMemoryCache()
vr, err := reader.NewReader(sr, mcache)
if err != nil {
t.Fatalf("failed to make stargz reader: %v", err)
}
l := newLayer(
&Resolver{
prefetchTimeout: time.Second,
},
ocispec.Descriptor{Digest: testStateLayerDigest},
&blobRef{blob, func() {}},
vr,
)
if err := l.Verify(dgst); err != nil {
t.Errorf("failed to verify reader: %v", err)
return
}
prefetchSize := int64(0)
if tt.prefetchSize != nil {
prefetchSize = tt.prefetchSize(t, l)
}
if err := l.Prefetch(defaultPrefetchSize); err != nil {
t.Errorf("failed to prefetch: %v", err)
return
}
if blob.calledPrefetchOffset != 0 {
t.Errorf("invalid prefetch offset %d; want %d",
blob.calledPrefetchOffset, 0)
}
if blob.calledPrefetchSize != prefetchSize {
t.Errorf("invalid prefetch size %d; want %d",
blob.calledPrefetchSize, prefetchSize)
}
if cLen := len(mcache.(*cache.MemoryCache).Membuf); tt.wantNum != cLen {
t.Errorf("number of chunks in the cache %d; want %d: %v", cLen, tt.wantNum, err)
return
}
lr := l.r
if lr == nil {
t.Fatalf("failed to get reader from layer: %v", err)
}
for _, file := range tt.wants {
e, ok := lr.Lookup(file)
if !ok {
t.Fatalf("failed to lookup %q", file)
}
wantFile, err := lr.OpenFile(file)
if err != nil {
t.Fatalf("failed to open file %q", file)
}
blob.readCalled = false
if _, err := io.Copy(ioutil.Discard, io.NewSectionReader(wantFile, 0, e.Size)); err != nil {
t.Fatalf("failed to read file %q", file)
}
if blob.readCalled {
t.Errorf("chunks of file %q aren't cached", file)
return
}
}
})
}
} }
func chunkNum(data string) int {
return (len(data)-1)/sampleChunkSize + 1
}
func newBlob(sr *io.SectionReader) *sampleBlob {
return &sampleBlob{
r: sr,
}
}
type sampleBlob struct {
r *io.SectionReader
readCalled bool
calledPrefetchOffset int64
calledPrefetchSize int64
}
func (sb *sampleBlob) Authn(tr http.RoundTripper) (http.RoundTripper, error) { return nil, nil }
func (sb *sampleBlob) Check() error { return nil }
func (sb *sampleBlob) Size() int64 { return sb.r.Size() }
func (sb *sampleBlob) FetchedSize() int64 { return 0 }
func (sb *sampleBlob) ReadAt(p []byte, offset int64, opts ...remote.Option) (int, error) {
sb.readCalled = true
return sb.r.ReadAt(p, offset)
}
func (sb *sampleBlob) Cache(offset int64, size int64, option ...remote.Option) error {
sb.calledPrefetchOffset = offset
sb.calledPrefetchSize = size
return nil
}
func (sb *sampleBlob) Refresh(ctx context.Context, hosts docker.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) error {
return nil
}
func (sb *sampleBlob) Close() error { return nil }
func TestWaiter(t *testing.T) { func TestWaiter(t *testing.T) {
var ( var (
w = newWaiter() w = newWaiter()

View File

@ -26,7 +26,6 @@ import (
"bytes" "bytes"
"context" "context"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"io" "io"
"os" "os"
@ -34,14 +33,11 @@ import (
"strings" "strings"
"sync" "sync"
"syscall" "syscall"
"time" "unsafe"
"github.com/containerd/log"
"github.com/containerd/stargz-snapshotter/estargz" "github.com/containerd/stargz-snapshotter/estargz"
commonmetrics "github.com/containerd/stargz-snapshotter/fs/metrics/common"
"github.com/containerd/stargz-snapshotter/fs/reader" "github.com/containerd/stargz-snapshotter/fs/reader"
"github.com/containerd/stargz-snapshotter/fs/remote" "github.com/containerd/stargz-snapshotter/fs/remote"
"github.com/containerd/stargz-snapshotter/metadata"
fusefs "github.com/hanwen/go-fuse/v2/fs" fusefs "github.com/hanwen/go-fuse/v2/fs"
"github.com/hanwen/go-fuse/v2/fuse" "github.com/hanwen/go-fuse/v2/fuse"
digest "github.com/opencontainers/go-digest" digest "github.com/opencontainers/go-digest"
@ -50,106 +46,35 @@ import (
const ( const (
blockSize = 4096 blockSize = 4096
physicalBlockSize = 512 whiteoutPrefix = ".wh."
// physicalBlockRatio is the ratio of blockSize to physicalBlockSize. whiteoutOpaqueDir = whiteoutPrefix + whiteoutPrefix + ".opq"
// It can be used to convert from # blockSize-byte blocks to # physicalBlockSize-byte blocks opaqueXattrValue = "y"
physicalBlockRatio = blockSize / physicalBlockSize stateDirName = ".stargz-snapshotter"
whiteoutPrefix = ".wh." statFileMode = syscall.S_IFREG | 0400 // -r--------
whiteoutOpaqueDir = whiteoutPrefix + whiteoutPrefix + ".opq" stateDirMode = syscall.S_IFDIR | 0500 // dr-x------
opaqueXattrValue = "y"
stateDirName = ".stargz-snapshotter"
statFileMode = syscall.S_IFREG | 0400 // -r--------
stateDirMode = syscall.S_IFDIR | 0500 // dr-x------
) )
type OverlayOpaqueType int var opaqueXattrs = []string{"trusted.overlay.opaque", "user.overlay.opaque"}
const ( func newNode(layerDgst digest.Digest, r reader.Reader, blob remote.Blob) (fusefs.InodeEmbedder, error) {
OverlayOpaqueAll OverlayOpaqueType = iota root, ok := r.Lookup("")
OverlayOpaqueTrusted
OverlayOpaqueUser
)
var opaqueXattrs = map[OverlayOpaqueType][]string{
OverlayOpaqueAll: {"trusted.overlay.opaque", "user.overlay.opaque"},
OverlayOpaqueTrusted: {"trusted.overlay.opaque"},
OverlayOpaqueUser: {"user.overlay.opaque"},
}
func newNode(layerDgst digest.Digest, r reader.Reader, blob remote.Blob, baseInode uint32, opaque OverlayOpaqueType, pth passThroughConfig) (fusefs.InodeEmbedder, error) {
rootID := r.Metadata().RootID()
rootAttr, err := r.Metadata().GetAttr(rootID)
if err != nil {
return nil, err
}
opq, ok := opaqueXattrs[opaque]
if !ok { if !ok {
return nil, fmt.Errorf("unknown overlay opaque type") return nil, fmt.Errorf("failed to get a TOCEntry of the root")
} }
ffs := &fs{
r: r,
layerDigest: layerDgst,
baseInode: baseInode,
rootID: rootID,
opaqueXattrs: opq,
passThrough: pth,
}
ffs.s = ffs.newState(layerDgst, blob)
return &node{ return &node{
id: rootID, r: r,
attr: rootAttr, e: root,
fs: ffs, s: newState(layerDgst, blob),
}, nil }, nil
} }
// fs contains global metadata used by nodes
type fs struct {
r reader.Reader
s *state
layerDigest digest.Digest
baseInode uint32
rootID uint32
opaqueXattrs []string
passThrough passThroughConfig
}
func (fs *fs) inodeOfState() uint64 {
return (uint64(fs.baseInode) << 32) | 1 // reserved
}
func (fs *fs) inodeOfStatFile() uint64 {
return (uint64(fs.baseInode) << 32) | 2 // reserved
}
func (fs *fs) inodeOfID(id uint32) (uint64, error) {
// 0 is reserved by go-fuse 1 and 2 are reserved by the state dir
if id > ^uint32(0)-3 {
return 0, fmt.Errorf("too many inodes")
}
return (uint64(fs.baseInode) << 32) | uint64(3+id), nil
}
// node is a filesystem inode abstraction. // node is a filesystem inode abstraction.
type node struct { type node struct {
fusefs.Inode fusefs.Inode
fs *fs r reader.Reader
id uint32 e *estargz.TOCEntry
attr metadata.Attr s *state
opaque bool // true if this node is an overlayfs opaque directory
ents []fuse.DirEntry
entsCached bool
entsMu sync.Mutex
}
func (n *node) isRootNode() bool {
return n.id == n.fs.rootID
}
func (n *node) isOpaque() bool {
if _, _, err := n.fs.r.Metadata().GetChild(n.id, whiteoutOpaqueDir); err == nil {
return true
}
return false
} }
var _ = (fusefs.InodeEmbedder)((*node)(nil)) var _ = (fusefs.InodeEmbedder)((*node)(nil))
@ -157,79 +82,43 @@ var _ = (fusefs.InodeEmbedder)((*node)(nil))
var _ = (fusefs.NodeReaddirer)((*node)(nil)) var _ = (fusefs.NodeReaddirer)((*node)(nil))
func (n *node) Readdir(ctx context.Context) (fusefs.DirStream, syscall.Errno) { func (n *node) Readdir(ctx context.Context) (fusefs.DirStream, syscall.Errno) {
ents, errno := n.readdir()
if errno != 0 {
return nil, errno
}
return fusefs.NewListDirStream(ents), 0
}
func (n *node) readdir() ([]fuse.DirEntry, syscall.Errno) {
// Measure how long node_readdir operation takes (in microseconds).
start := time.Now() // set start time
defer commonmetrics.MeasureLatencyInMicroseconds(commonmetrics.NodeReaddir, n.fs.layerDigest, start)
n.entsMu.Lock()
if n.entsCached {
ents := n.ents
n.entsMu.Unlock()
return ents, 0
}
n.entsMu.Unlock()
isRoot := n.isRootNode()
var ents []fuse.DirEntry var ents []fuse.DirEntry
whiteouts := map[string]uint32{} whiteouts := map[string]*estargz.TOCEntry{}
normalEnts := map[string]bool{} normalEnts := map[string]bool{}
var lastErr error n.e.ForeachChild(func(baseName string, ent *estargz.TOCEntry) bool {
if err := n.fs.r.Metadata().ForeachChild(n.id, func(name string, id uint32, mode os.FileMode) bool {
// We don't want to show prefetch landmarks in "/". // We don't want to show prefetch landmarks in "/".
if isRoot && (name == estargz.PrefetchLandmark || name == estargz.NoPrefetchLandmark) { if n.e.Name == "" && (baseName == estargz.PrefetchLandmark || baseName == estargz.NoPrefetchLandmark) {
return true return true
} }
// We don't want to show whiteouts. // We don't want to show whiteouts.
if strings.HasPrefix(name, whiteoutPrefix) { if strings.HasPrefix(baseName, whiteoutPrefix) {
if name == whiteoutOpaqueDir { if baseName == whiteoutOpaqueDir {
return true return true
} }
// Add the overlayfs-compiant whiteout later. // Add the overlayfs-compiant whiteout later.
whiteouts[name] = id whiteouts[baseName] = ent
return true return true
} }
// This is a normal entry. // This is a normal entry.
normalEnts[name] = true normalEnts[baseName] = true
ino, err := n.fs.inodeOfID(id)
if err != nil {
lastErr = err
return false
}
ents = append(ents, fuse.DirEntry{ ents = append(ents, fuse.DirEntry{
Mode: fileModeToSystemMode(mode), Mode: modeOfEntry(ent),
Name: name, Name: baseName,
Ino: ino, Ino: inodeOfEnt(ent),
}) })
return true return true
}); err != nil || lastErr != nil { })
n.fs.s.report(fmt.Errorf("node.Readdir: err = %v; lastErr = %v", err, lastErr))
return nil, syscall.EIO
}
// Append whiteouts if no entry replaces the target entry in the lower layer. // Append whiteouts if no entry replaces the target entry in the lower layer.
for w, id := range whiteouts { for w, ent := range whiteouts {
if !normalEnts[w[len(whiteoutPrefix):]] { if !normalEnts[w[len(whiteoutPrefix):]] {
ino, err := n.fs.inodeOfID(id)
if err != nil {
n.fs.s.report(fmt.Errorf("node.Readdir: err = %v; lastErr = %v", err, lastErr))
return nil, syscall.EIO
}
ents = append(ents, fuse.DirEntry{ ents = append(ents, fuse.DirEntry{
Mode: syscall.S_IFCHR, Mode: syscall.S_IFCHR,
Name: w[len(whiteoutPrefix):], Name: w[len(whiteoutPrefix):],
Ino: ino, Ino: inodeOfEnt(ent),
}) })
} }
@ -239,21 +128,15 @@ func (n *node) readdir() ([]fuse.DirEntry, syscall.Errno) {
sort.Slice(ents, func(i, j int) bool { sort.Slice(ents, func(i, j int) bool {
return ents[i].Name < ents[j].Name return ents[i].Name < ents[j].Name
}) })
n.entsMu.Lock()
defer n.entsMu.Unlock()
n.ents, n.entsCached = ents, true // cache it
return ents, 0 return fusefs.NewListDirStream(ents), 0
} }
var _ = (fusefs.NodeLookuper)((*node)(nil)) var _ = (fusefs.NodeLookuper)((*node)(nil))
func (n *node) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fusefs.Inode, syscall.Errno) { func (n *node) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fusefs.Inode, syscall.Errno) {
isRoot := n.isRootNode()
// We don't want to show prefetch landmarks in "/". // We don't want to show prefetch landmarks in "/".
if isRoot && (name == estargz.PrefetchLandmark || name == estargz.NoPrefetchLandmark) { if n.e.Name == "" && (name == estargz.PrefetchLandmark || name == estargz.NoPrefetchLandmark) {
return nil, syscall.ENOENT return nil, syscall.ENOENT
} }
@ -263,130 +146,62 @@ func (n *node) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fu
} }
// state directory // state directory
if isRoot && name == stateDirName { if n.e.Name == "" && name == stateDirName {
return n.NewInode(ctx, n.fs.s, n.fs.stateToAttr(&out.Attr)), 0 return n.NewInode(ctx, n.s, stateToAttr(n.s, &out.Attr)), 0
} }
// lookup on memory nodes // lookup stargz TOCEntry
if cn := n.GetChild(name); cn != nil { ce, ok := n.e.LookupChild(name)
switch tn := cn.Operations().(type) { if !ok {
case *node:
ino, err := n.fs.inodeOfID(tn.id)
if err != nil {
n.fs.s.report(fmt.Errorf("node.Lookup: %v", err))
return nil, syscall.EIO
}
entryToAttr(ino, tn.attr, &out.Attr)
case *whiteout:
ino, err := n.fs.inodeOfID(tn.id)
if err != nil {
n.fs.s.report(fmt.Errorf("node.Lookup: %v", err))
return nil, syscall.EIO
}
entryToAttr(ino, tn.attr, &out.Attr)
default:
n.fs.s.report(fmt.Errorf("node.Lookup: uknown node type detected"))
return nil, syscall.EIO
}
return cn, 0
}
// early return if this entry doesn't exist
n.entsMu.Lock()
if n.entsCached {
var found bool
for _, e := range n.ents {
if e.Name == name {
found = true
}
}
if !found {
n.entsMu.Unlock()
return nil, syscall.ENOENT
}
}
n.entsMu.Unlock()
id, ce, err := n.fs.r.Metadata().GetChild(n.id, name)
if err != nil {
// If the entry exists as a whiteout, show an overlayfs-styled whiteout node. // If the entry exists as a whiteout, show an overlayfs-styled whiteout node.
if whID, wh, err := n.fs.r.Metadata().GetChild(n.id, fmt.Sprintf("%s%s", whiteoutPrefix, name)); err == nil { if wh, ok := n.e.LookupChild(fmt.Sprintf("%s%s", whiteoutPrefix, name)); ok {
ino, err := n.fs.inodeOfID(whID)
if err != nil {
n.fs.s.report(fmt.Errorf("node.Lookup: %v", err))
return nil, syscall.EIO
}
return n.NewInode(ctx, &whiteout{ return n.NewInode(ctx, &whiteout{
id: whID, e: wh,
fs: n.fs, }, entryToWhAttr(wh, &out.Attr)), 0
attr: wh,
}, entryToWhAttr(ino, wh, &out.Attr)), 0
} }
n.readdir() // This code path is very expensive. Cache child entries here so that the next call don't reach here.
return nil, syscall.ENOENT return nil, syscall.ENOENT
} }
var opaque bool
ino, err := n.fs.inodeOfID(id) if _, ok := ce.LookupChild(whiteoutOpaqueDir); ok {
if err != nil { // This entry is an opaque directory so make it recognizable for overlayfs.
n.fs.s.report(fmt.Errorf("node.Lookup: %v", err)) opaque = true
return nil, syscall.EIO
} }
return n.NewInode(ctx, &node{ return n.NewInode(ctx, &node{
id: id, r: n.r,
fs: n.fs, e: ce,
attr: ce, s: n.s,
}, entryToAttr(ino, ce, &out.Attr)), 0 opaque: opaque,
}, entryToAttr(ce, &out.Attr)), 0
} }
var _ = (fusefs.NodeOpener)((*node)(nil)) var _ = (fusefs.NodeOpener)((*node)(nil))
func (n *node) Open(ctx context.Context, flags uint32) (fh fusefs.FileHandle, fuseFlags uint32, errno syscall.Errno) { func (n *node) Open(ctx context.Context, flags uint32) (fh fusefs.FileHandle, fuseFlags uint32, errno syscall.Errno) {
ra, err := n.fs.r.OpenFile(n.id) ra, err := n.r.OpenFile(n.e.Name)
if err != nil { if err != nil {
n.fs.s.report(fmt.Errorf("node.Open: %v", err)) n.s.report(fmt.Errorf("failed to open node: %v", err))
return nil, 0, syscall.EIO return nil, 0, syscall.EIO
} }
return &file{
f := &file{
n: n, n: n,
e: n.e,
ra: ra, ra: ra,
fd: -1, }, 0, 0
}
if n.fs.passThrough.enable {
if getter, ok := ra.(reader.PassthroughFdGetter); ok {
fd, err := getter.GetPassthroughFd(n.fs.passThrough.mergeBufferSize, n.fs.passThrough.mergeWorkerCount)
if err != nil {
n.fs.s.report(fmt.Errorf("passThrough model failed due to node.Open: %v", err))
n.fs.passThrough.enable = false
} else {
f.InitFd(int(fd))
}
}
}
return f, fuse.FOPEN_KEEP_CACHE, 0
} }
var _ = (fusefs.NodeGetattrer)((*node)(nil)) var _ = (fusefs.NodeGetattrer)((*node)(nil))
func (n *node) Getattr(ctx context.Context, f fusefs.FileHandle, out *fuse.AttrOut) syscall.Errno { func (n *node) Getattr(ctx context.Context, f fusefs.FileHandle, out *fuse.AttrOut) syscall.Errno {
ino, err := n.fs.inodeOfID(n.id) entryToAttr(n.e, &out.Attr)
if err != nil {
n.fs.s.report(fmt.Errorf("node.Getattr: %v", err))
return syscall.EIO
}
entryToAttr(ino, n.attr, &out.Attr)
return 0 return 0
} }
var _ = (fusefs.NodeGetxattrer)((*node)(nil)) var _ = (fusefs.NodeGetxattrer)((*node)(nil))
func (n *node) Getxattr(ctx context.Context, attr string, dest []byte) (uint32, syscall.Errno) { func (n *node) Getxattr(ctx context.Context, attr string, dest []byte) (uint32, syscall.Errno) {
ent := n.attr for _, opaqueXattr := range opaqueXattrs {
opq := n.isOpaque() if attr == opaqueXattr && n.opaque {
for _, opaqueXattr := range n.fs.opaqueXattrs {
if attr == opaqueXattr && opq {
// This node is an opaque directory so give overlayfs-compliant indicator. // This node is an opaque directory so give overlayfs-compliant indicator.
if len(dest) < len(opaqueXattrValue) { if len(dest) < len(opaqueXattrValue) {
return uint32(len(opaqueXattrValue)), syscall.ERANGE return uint32(len(opaqueXattrValue)), syscall.ERANGE
@ -394,7 +209,7 @@ func (n *node) Getxattr(ctx context.Context, attr string, dest []byte) (uint32,
return uint32(copy(dest, opaqueXattrValue)), 0 return uint32(copy(dest, opaqueXattrValue)), 0
} }
} }
if v, ok := ent.Xattrs[attr]; ok { if v, ok := n.e.Xattrs[attr]; ok {
if len(dest) < len(v) { if len(dest) < len(v) {
return uint32(len(v)), syscall.ERANGE return uint32(len(v)), syscall.ERANGE
} }
@ -406,16 +221,14 @@ func (n *node) Getxattr(ctx context.Context, attr string, dest []byte) (uint32,
var _ = (fusefs.NodeListxattrer)((*node)(nil)) var _ = (fusefs.NodeListxattrer)((*node)(nil))
func (n *node) Listxattr(ctx context.Context, dest []byte) (uint32, syscall.Errno) { func (n *node) Listxattr(ctx context.Context, dest []byte) (uint32, syscall.Errno) {
ent := n.attr
opq := n.isOpaque()
var attrs []byte var attrs []byte
if opq { if n.opaque {
// This node is an opaque directory so add overlayfs-compliant indicator. // This node is an opaque directory so add overlayfs-compliant indicator.
for _, opaqueXattr := range n.fs.opaqueXattrs { for _, opaqueXattr := range opaqueXattrs {
attrs = append(attrs, []byte(opaqueXattr+"\x00")...) attrs = append(attrs, []byte(opaqueXattr+"\x00")...)
} }
} }
for k := range ent.Xattrs { for k := range n.e.Xattrs {
attrs = append(attrs, []byte(k+"\x00")...) attrs = append(attrs, []byte(k+"\x00")...)
} }
if len(dest) < len(attrs) { if len(dest) < len(attrs) {
@ -427,8 +240,7 @@ func (n *node) Listxattr(ctx context.Context, dest []byte) (uint32, syscall.Errn
var _ = (fusefs.NodeReadlinker)((*node)(nil)) var _ = (fusefs.NodeReadlinker)((*node)(nil))
func (n *node) Readlink(ctx context.Context) ([]byte, syscall.Errno) { func (n *node) Readlink(ctx context.Context) ([]byte, syscall.Errno) {
ent := n.attr return []byte(n.e.LinkName), 0
return []byte(ent.LinkName), 0
} }
var _ = (fusefs.NodeStatfser)((*node)(nil)) var _ = (fusefs.NodeStatfser)((*node)(nil))
@ -441,18 +253,16 @@ func (n *node) Statfs(ctx context.Context, out *fuse.StatfsOut) syscall.Errno {
// file is a file abstraction which implements file handle in go-fuse. // file is a file abstraction which implements file handle in go-fuse.
type file struct { type file struct {
n *node n *node
e *estargz.TOCEntry
ra io.ReaderAt ra io.ReaderAt
fd int
} }
var _ = (fusefs.FileReader)((*file)(nil)) var _ = (fusefs.FileReader)((*file)(nil))
func (f *file) Read(ctx context.Context, dest []byte, off int64) (fuse.ReadResult, syscall.Errno) { func (f *file) Read(ctx context.Context, dest []byte, off int64) (fuse.ReadResult, syscall.Errno) {
defer commonmetrics.MeasureLatencyInMicroseconds(commonmetrics.ReadOnDemand, f.n.fs.layerDigest, time.Now()) // measure time for on-demand file reads (in microseconds)
defer commonmetrics.IncOperationCount(commonmetrics.OnDemandReadAccessCount, f.n.fs.layerDigest) // increment the counter for on-demand file accesses
n, err := f.ra.ReadAt(dest, off) n, err := f.ra.ReadAt(dest, off)
if err != nil && err != io.EOF { if err != nil && err != io.EOF {
f.n.fs.s.report(fmt.Errorf("file.Read: %v", err)) f.n.s.report(fmt.Errorf("failed to read node: %v", err))
return nil, syscall.EIO return nil, syscall.EIO
} }
return fuse.ReadResultData(dest[:n]), 0 return fuse.ReadResultData(dest[:n]), 0
@ -461,46 +271,20 @@ func (f *file) Read(ctx context.Context, dest []byte, off int64) (fuse.ReadResul
var _ = (fusefs.FileGetattrer)((*file)(nil)) var _ = (fusefs.FileGetattrer)((*file)(nil))
func (f *file) Getattr(ctx context.Context, out *fuse.AttrOut) syscall.Errno { func (f *file) Getattr(ctx context.Context, out *fuse.AttrOut) syscall.Errno {
ino, err := f.n.fs.inodeOfID(f.n.id) entryToAttr(f.e, &out.Attr)
if err != nil {
f.n.fs.s.report(fmt.Errorf("file.Getattr: %v", err))
return syscall.EIO
}
entryToAttr(ino, f.n.attr, &out.Attr)
return 0 return 0
} }
// Implement PassthroughFd to enable go-fuse passthrough
var _ = (fusefs.FilePassthroughFder)((*file)(nil))
func (f *file) PassthroughFd() (int, bool) {
if f.fd <= 0 {
return -1, false
}
return f.fd, true
}
func (f *file) InitFd(fd int) {
f.fd = fd
}
// whiteout is a whiteout abstraction compliant to overlayfs. // whiteout is a whiteout abstraction compliant to overlayfs.
type whiteout struct { type whiteout struct {
fusefs.Inode fusefs.Inode
id uint32 e *estargz.TOCEntry
fs *fs
attr metadata.Attr
} }
var _ = (fusefs.NodeGetattrer)((*whiteout)(nil)) var _ = (fusefs.NodeGetattrer)((*whiteout)(nil))
func (w *whiteout) Getattr(ctx context.Context, f fusefs.FileHandle, out *fuse.AttrOut) syscall.Errno { func (w *whiteout) Getattr(ctx context.Context, f fusefs.FileHandle, out *fuse.AttrOut) syscall.Errno {
ino, err := w.fs.inodeOfID(w.id) entryToWhAttr(w.e, &out.Attr)
if err != nil {
w.fs.s.report(fmt.Errorf("whiteout.Getattr: %v", err))
return syscall.EIO
}
entryToWhAttr(ino, w.attr, &out.Attr)
return 0 return 0
} }
@ -513,7 +297,7 @@ func (w *whiteout) Statfs(ctx context.Context, out *fuse.StatfsOut) syscall.Errn
// newState provides new state directory node. // newState provides new state directory node.
// It creates statFile at the same time to give it stable inode number. // It creates statFile at the same time to give it stable inode number.
func (fs *fs) newState(layerDigest digest.Digest, blob remote.Blob) *state { func newState(layerDigest digest.Digest, blob remote.Blob) *state {
return &state{ return &state{
statFile: &statFile{ statFile: &statFile{
name: layerDigest.String() + ".json", name: layerDigest.String() + ".json",
@ -522,9 +306,7 @@ func (fs *fs) newState(layerDigest digest.Digest, blob remote.Blob) *state {
Size: blob.Size(), Size: blob.Size(),
}, },
blob: blob, blob: blob,
fs: fs,
}, },
fs: fs,
} }
} }
@ -535,7 +317,6 @@ func (fs *fs) newState(layerDigest digest.Digest, blob remote.Blob) *state {
type state struct { type state struct {
fusefs.Inode fusefs.Inode
statFile *statFile statFile *statFile
fs *fs
} }
var _ = (fusefs.NodeReaddirer)((*state)(nil)) var _ = (fusefs.NodeReaddirer)((*state)(nil))
@ -545,7 +326,7 @@ func (s *state) Readdir(ctx context.Context) (fusefs.DirStream, syscall.Errno) {
{ {
Mode: statFileMode, Mode: statFileMode,
Name: s.statFile.name, Name: s.statFile.name,
Ino: s.fs.inodeOfStatFile(), Ino: inodeOfStatFile(s.statFile),
}, },
}), 0 }), 0
} }
@ -566,7 +347,7 @@ func (s *state) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*f
var _ = (fusefs.NodeGetattrer)((*state)(nil)) var _ = (fusefs.NodeGetattrer)((*state)(nil))
func (s *state) Getattr(ctx context.Context, f fusefs.FileHandle, out *fuse.AttrOut) syscall.Errno { func (s *state) Getattr(ctx context.Context, f fusefs.FileHandle, out *fuse.AttrOut) syscall.Errno {
s.fs.stateToAttr(&out.Attr) stateToAttr(s, &out.Attr)
return 0 return 0
} }
@ -600,7 +381,6 @@ type statFile struct {
blob remote.Blob blob remote.Blob
statJSON statJSON statJSON statJSON
mu sync.Mutex mu sync.Mutex
fs *fs
} }
var _ = (fusefs.NodeOpener)((*statFile)(nil)) var _ = (fusefs.NodeOpener)((*statFile)(nil))
@ -639,22 +419,10 @@ func (sf *statFile) Statfs(ctx context.Context, out *fuse.StatfsOut) syscall.Err
return 0 return 0
} }
// logContents puts the contents of statFile in the log
// to keep that information accessible for troubleshooting.
// The entries naming is kept to be consistend with the field naming in statJSON.
func (sf *statFile) logContents() {
ctx := context.Background()
log.G(ctx).WithFields(log.Fields{
"digest": sf.statJSON.Digest, "size": sf.statJSON.Size,
"fetchedSize": sf.statJSON.FetchedSize, "fetchedPercent": sf.statJSON.FetchedPercent,
}).WithError(errors.New(sf.statJSON.Error)).Error("statFile error")
}
func (sf *statFile) report(err error) { func (sf *statFile) report(err error) {
sf.mu.Lock() sf.mu.Lock()
defer sf.mu.Unlock() defer sf.mu.Unlock()
sf.statJSON.Error = err.Error() sf.statJSON.Error = err.Error()
sf.logContents()
} }
func (sf *statFile) attr(out *fuse.Attr) (fusefs.StableAttr, syscall.Errno) { func (sf *statFile) attr(out *fuse.Attr) (fusefs.StableAttr, syscall.Errno) {
@ -666,7 +434,7 @@ func (sf *statFile) attr(out *fuse.Attr) (fusefs.StableAttr, syscall.Errno) {
return fusefs.StableAttr{}, syscall.EIO return fusefs.StableAttr{}, syscall.EIO
} }
return sf.fs.statFileToAttr(uint64(len(st)), out), 0 return statFileToAttr(sf, uint64(len(st)), out), 0
} }
func (sf *statFile) updateStatUnlocked() ([]byte, error) { func (sf *statFile) updateStatUnlocked() ([]byte, error) {
@ -680,18 +448,24 @@ func (sf *statFile) updateStatUnlocked() ([]byte, error) {
return j, nil return j, nil
} }
// entryToAttr converts metadata.Attr to go-fuse's Attr. // inodeOfEnt calculates the inode number which is one-to-one correspondence
func entryToAttr(ino uint64, e metadata.Attr, out *fuse.Attr) fusefs.StableAttr { // with the TOCEntry instance.
out.Ino = ino func inodeOfEnt(e *estargz.TOCEntry) uint64 {
return uint64(uintptr(unsafe.Pointer(e)))
}
// entryToAttr converts stargz's TOCEntry to go-fuse's Attr.
func entryToAttr(e *estargz.TOCEntry, out *fuse.Attr) fusefs.StableAttr {
out.Ino = inodeOfEnt(e)
out.Size = uint64(e.Size) out.Size = uint64(e.Size)
if e.Mode&os.ModeSymlink != 0 {
out.Size = uint64(len(e.LinkName))
}
out.Blksize = blockSize out.Blksize = blockSize
out.Blocks = (out.Size + uint64(out.Blksize) - 1) / uint64(out.Blksize) * physicalBlockRatio out.Blocks = out.Size / uint64(out.Blksize)
mtime := e.ModTime if out.Size%uint64(out.Blksize) > 0 {
out.Blocks++
}
mtime := e.ModTime()
out.SetTimes(nil, &mtime, nil) out.SetTimes(nil, &mtime, nil)
out.Mode = fileModeToSystemMode(e.Mode) out.Mode = modeOfEntry(e)
out.Owner = fuse.Owner{Uid: uint32(e.UID), Gid: uint32(e.GID)} out.Owner = fuse.Owner{Uid: uint32(e.UID), Gid: uint32(e.GID)}
out.Rdev = uint32(unix.Mkdev(uint32(e.DevMajor), uint32(e.DevMinor))) out.Rdev = uint32(unix.Mkdev(uint32(e.DevMajor), uint32(e.DevMinor)))
out.Nlink = uint32(e.NumLink) out.Nlink = uint32(e.NumLink)
@ -709,13 +483,14 @@ func entryToAttr(ino uint64, e metadata.Attr, out *fuse.Attr) fusefs.StableAttr
} }
} }
// entryToWhAttr converts metadata.Attr to go-fuse's Attr of whiteouts. // entryToWhAttr converts stargz's TOCEntry to go-fuse's Attr of whiteouts.
func entryToWhAttr(ino uint64, e metadata.Attr, out *fuse.Attr) fusefs.StableAttr { func entryToWhAttr(e *estargz.TOCEntry, out *fuse.Attr) fusefs.StableAttr {
out.Ino = ino fi := e.Stat()
out.Ino = inodeOfEnt(e)
out.Size = 0 out.Size = 0
out.Blksize = blockSize out.Blksize = blockSize
out.Blocks = 0 out.Blocks = 0
mtime := e.ModTime mtime := fi.ModTime()
out.SetTimes(nil, &mtime, nil) out.SetTimes(nil, &mtime, nil)
out.Mode = syscall.S_IFCHR out.Mode = syscall.S_IFCHR
out.Owner = fuse.Owner{Uid: 0, Gid: 0} out.Owner = fuse.Owner{Uid: 0, Gid: 0}
@ -732,9 +507,15 @@ func entryToWhAttr(ino uint64, e metadata.Attr, out *fuse.Attr) fusefs.StableAtt
} }
} }
// inodeOfState calculates the inode number which is one-to-one correspondence
// with the state directory instance which was created on mount.
func inodeOfState(s *state) uint64 {
return uint64(uintptr(unsafe.Pointer(s)))
}
// stateToAttr converts state directory to go-fuse's Attr. // stateToAttr converts state directory to go-fuse's Attr.
func (fs *fs) stateToAttr(out *fuse.Attr) fusefs.StableAttr { func stateToAttr(s *state, out *fuse.Attr) fusefs.StableAttr {
out.Ino = fs.inodeOfState() out.Ino = inodeOfState(s)
out.Size = 0 out.Size = 0
out.Blksize = blockSize out.Blksize = blockSize
out.Blocks = 0 out.Blocks = 0
@ -759,13 +540,18 @@ func (fs *fs) stateToAttr(out *fuse.Attr) fusefs.StableAttr {
} }
} }
// inodeOfStatFile calculates the inode number which is one-to-one correspondence
// with the stat file instance which was created on mount.
func inodeOfStatFile(s *statFile) uint64 {
return uint64(uintptr(unsafe.Pointer(s)))
}
// statFileToAttr converts stat file to go-fuse's Attr. // statFileToAttr converts stat file to go-fuse's Attr.
// func statFileToAttr(id uint64, sf *statFile, size uint64, out *fuse.Attr) fusefs.StableAttr { func statFileToAttr(sf *statFile, size uint64, out *fuse.Attr) fusefs.StableAttr {
func (fs *fs) statFileToAttr(size uint64, out *fuse.Attr) fusefs.StableAttr { out.Ino = inodeOfStatFile(sf)
out.Ino = fs.inodeOfStatFile()
out.Size = size out.Size = size
out.Blksize = blockSize out.Blksize = blockSize
out.Blocks = (out.Size + uint64(out.Blksize) - 1) / uint64(out.Blksize) * physicalBlockRatio out.Blocks = out.Size / uint64(out.Blksize)
out.Nlink = 1 out.Nlink = 1
// Root can read it ("-r-------- root root"). // Root can read it ("-r-------- root root").
@ -787,7 +573,10 @@ func (fs *fs) statFileToAttr(size uint64, out *fuse.Attr) fusefs.StableAttr {
} }
} }
func fileModeToSystemMode(m os.FileMode) uint32 { // modeOfEntry gets system's mode bits from TOCEntry
func modeOfEntry(e *estargz.TOCEntry) uint32 {
m := e.Stat().Mode()
// Permission bits // Permission bits
res := uint32(m & os.ModePerm) res := uint32(m & os.ModePerm)
@ -832,7 +621,7 @@ func defaultStatfs(stat *fuse.StatfsOut) {
stat.Files = 0 // dummy stat.Files = 0 // dummy
stat.Ffree = 0 stat.Ffree = 0
stat.Bsize = blockSize stat.Bsize = blockSize
stat.NameLen = 255 // Standard max filename length for most filesystems (ext4, etc.) for compatibility stat.NameLen = 1<<32 - 1
stat.Frsize = blockSize stat.Frsize = blockSize
stat.Padding = 0 stat.Padding = 0
stat.Spare = [6]uint32{} stat.Spare = [6]uint32{}

638
fs/layer/node_test.go Normal file
View File

@ -0,0 +1,638 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2019 The Go Authors. All rights reserved.
Use of this source code is governed by a BSD-style
license that can be found in the NOTICE.md file.
*/
package layer
import (
"bytes"
"context"
"crypto/sha256"
"encoding/json"
"fmt"
"io"
"math/rand"
"os"
"path/filepath"
"strings"
"syscall"
"testing"
"time"
"github.com/containerd/containerd/reference"
"github.com/containerd/containerd/remotes/docker"
"github.com/containerd/stargz-snapshotter/estargz"
"github.com/containerd/stargz-snapshotter/fs/reader"
"github.com/containerd/stargz-snapshotter/fs/remote"
"github.com/containerd/stargz-snapshotter/util/testutil"
fusefs "github.com/hanwen/go-fuse/v2/fs"
"github.com/hanwen/go-fuse/v2/fuse"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"golang.org/x/sys/unix"
)
const (
sampleMiddleOffset = sampleChunkSize / 2
lastChunkOffset1 = sampleChunkSize * (int64(len(sampleData1)) / sampleChunkSize)
)
// Tests Read method of each file node.
func TestNodeRead(t *testing.T) {
sizeCond := map[string]int64{
"single_chunk": sampleChunkSize - sampleMiddleOffset,
"multi_chunks": sampleChunkSize + sampleMiddleOffset,
}
innerOffsetCond := map[string]int64{
"at_top": 0,
"at_middle": sampleMiddleOffset,
}
baseOffsetCond := map[string]int64{
"of_1st_chunk": sampleChunkSize * 0,
"of_2nd_chunk": sampleChunkSize * 1,
"of_last_chunk": lastChunkOffset1,
}
fileSizeCond := map[string]int64{
"in_1_chunk_file": sampleChunkSize * 1,
"in_2_chunks_file": sampleChunkSize * 2,
"in_max_size_file": int64(len(sampleData1)),
}
for sn, size := range sizeCond {
for in, innero := range innerOffsetCond {
for bo, baseo := range baseOffsetCond {
for fn, filesize := range fileSizeCond {
t.Run(fmt.Sprintf("reading_%s_%s_%s_%s", sn, in, bo, fn), func(t *testing.T) {
if filesize > int64(len(sampleData1)) {
t.Fatal("sample file size is larger than sample data")
}
wantN := size
offset := baseo + innero
if remain := filesize - offset; remain < wantN {
if wantN = remain; wantN < 0 {
wantN = 0
}
}
// use constant string value as a data source.
want := strings.NewReader(sampleData1)
// data we want to get.
wantData := make([]byte, wantN)
_, err := want.ReadAt(wantData, offset)
if err != nil && err != io.EOF {
t.Fatalf("want.ReadAt (offset=%d,size=%d): %v", offset, wantN, err)
}
// data we get from the file node.
f := makeNodeReader(t, []byte(sampleData1)[:filesize], sampleChunkSize)
tmpbuf := make([]byte, size) // fuse library can request bigger than remain
rr, errno := f.Read(context.Background(), tmpbuf, offset)
if errno != 0 {
t.Errorf("failed to read off=%d, size=%d, filesize=%d: %v", offset, size, filesize, err)
return
}
if rsize := rr.Size(); int64(rsize) != wantN {
t.Errorf("read size: %d; want: %d", rsize, wantN)
return
}
tmpbuf = make([]byte, len(tmpbuf))
respData, fs := rr.Bytes(tmpbuf)
if fs != fuse.OK {
t.Errorf("failed to read result data for off=%d, size=%d, filesize=%d: %v", offset, size, filesize, err)
}
if !bytes.Equal(wantData, respData) {
t.Errorf("off=%d, filesize=%d; read data{size=%d,data=%q}; want (size=%d,data=%q)",
offset, filesize, len(respData), string(respData), wantN, string(wantData))
return
}
})
}
}
}
}
}
func makeNodeReader(t *testing.T, contents []byte, chunkSize int) *file {
testName := "test"
sgz, _, err := testutil.BuildEStargz(
[]testutil.TarEntry{testutil.File(testName, string(contents))},
testutil.WithEStargzOptions(estargz.WithChunkSize(chunkSize)),
)
if err != nil {
t.Fatalf("failed to build sample eStargz: %v", err)
}
r, err := estargz.Open(sgz)
if err != nil {
t.Fatal("failed to make stargz")
}
rootNode := getRootNode(t, r)
var eo fuse.EntryOut
inode, errno := rootNode.Lookup(context.Background(), testName, &eo)
if errno != 0 {
t.Fatalf("failed to lookup test node; errno: %v", errno)
}
f, _, errno := inode.Operations().(fusefs.NodeOpener).Open(context.Background(), 0)
if errno != 0 {
t.Fatalf("failed to open test file; errno: %v", errno)
}
return f.(*file)
}
func TestExistence(t *testing.T) {
tests := []struct {
name string
in []testutil.TarEntry
want []check
}{
{
name: "1_whiteout_with_sibling",
in: []testutil.TarEntry{
testutil.Dir("foo/"),
testutil.File("foo/bar.txt", ""),
testutil.File("foo/.wh.foo.txt", ""),
},
want: []check{
hasValidWhiteout("foo/foo.txt"),
fileNotExist("foo/.wh.foo.txt"),
},
},
{
name: "1_whiteout_with_duplicated_name",
in: []testutil.TarEntry{
testutil.Dir("foo/"),
testutil.File("foo/bar.txt", "test"),
testutil.File("foo/.wh.bar.txt", ""),
},
want: []check{
hasFileDigest("foo/bar.txt", digestFor("test")),
fileNotExist("foo/.wh.bar.txt"),
},
},
{
name: "1_opaque",
in: []testutil.TarEntry{
testutil.Dir("foo/"),
testutil.File("foo/.wh..wh..opq", ""),
},
want: []check{
hasNodeXattrs("foo/", opaqueXattrs[0], opaqueXattrValue),
hasNodeXattrs("foo/", opaqueXattrs[1], opaqueXattrValue),
fileNotExist("foo/.wh..wh..opq"),
},
},
{
name: "1_opaque_with_sibling",
in: []testutil.TarEntry{
testutil.Dir("foo/"),
testutil.File("foo/.wh..wh..opq", ""),
testutil.File("foo/bar.txt", "test"),
},
want: []check{
hasNodeXattrs("foo/", opaqueXattrs[0], opaqueXattrValue),
hasNodeXattrs("foo/", opaqueXattrs[1], opaqueXattrValue),
hasFileDigest("foo/bar.txt", digestFor("test")),
fileNotExist("foo/.wh..wh..opq"),
},
},
{
name: "1_opaque_with_xattr",
in: []testutil.TarEntry{
testutil.Dir("foo/", testutil.WithDirXattrs(map[string]string{"foo": "bar"})),
testutil.File("foo/.wh..wh..opq", ""),
},
want: []check{
hasNodeXattrs("foo/", opaqueXattrs[0], opaqueXattrValue),
hasNodeXattrs("foo/", opaqueXattrs[1], opaqueXattrValue),
hasNodeXattrs("foo/", "foo", "bar"),
fileNotExist("foo/.wh..wh..opq"),
},
},
{
name: "prefetch_landmark",
in: []testutil.TarEntry{
testutil.File(estargz.PrefetchLandmark, "test"),
testutil.Dir("foo/"),
testutil.File(fmt.Sprintf("foo/%s", estargz.PrefetchLandmark), "test"),
},
want: []check{
fileNotExist(estargz.PrefetchLandmark),
hasFileDigest(fmt.Sprintf("foo/%s", estargz.PrefetchLandmark), digestFor("test")),
},
},
{
name: "no_prefetch_landmark",
in: []testutil.TarEntry{
testutil.File(estargz.NoPrefetchLandmark, "test"),
testutil.Dir("foo/"),
testutil.File(fmt.Sprintf("foo/%s", estargz.NoPrefetchLandmark), "test"),
},
want: []check{
fileNotExist(estargz.NoPrefetchLandmark),
hasFileDigest(fmt.Sprintf("foo/%s", estargz.NoPrefetchLandmark), digestFor("test")),
},
},
{
name: "state_file",
in: []testutil.TarEntry{
testutil.File("test", "test"),
},
want: []check{
hasFileDigest("test", digestFor("test")),
hasStateFile(t, testStateLayerDigest.String()+".json"),
},
},
{
name: "file_suid",
in: []testutil.TarEntry{
testutil.File("test", "test", testutil.WithFileMode(0644|os.ModeSetuid)),
},
want: []check{
hasExtraMode("test", os.ModeSetuid),
},
},
{
name: "dir_sgid",
in: []testutil.TarEntry{
testutil.Dir("test/", testutil.WithDirMode(0755|os.ModeSetgid)),
},
want: []check{
hasExtraMode("test/", os.ModeSetgid),
},
},
{
name: "file_sticky",
in: []testutil.TarEntry{
testutil.File("test", "test", testutil.WithFileMode(0644|os.ModeSticky)),
},
want: []check{
hasExtraMode("test", os.ModeSticky),
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
sgz, _, err := testutil.BuildEStargz(tt.in)
if err != nil {
t.Fatalf("failed to build sample eStargz: %v", err)
}
r, err := estargz.Open(sgz)
if err != nil {
t.Fatalf("stargz.Open: %v", err)
}
rootNode := getRootNode(t, r)
for _, want := range tt.want {
want(t, rootNode)
}
})
}
}
func getRootNode(t *testing.T, r *estargz.Reader) *node {
rootNode, err := newNode(testStateLayerDigest, &testReader{r}, &testBlobState{10, 5})
if err != nil {
t.Fatalf("failed to get root node: %v", err)
}
fusefs.NewNodeFS(rootNode, &fusefs.Options{}) // initializes root node
return rootNode.(*node)
}
type testReader struct {
r *estargz.Reader
}
func (tr *testReader) OpenFile(name string) (io.ReaderAt, error) { return tr.r.OpenFile(name) }
func (tr *testReader) Lookup(name string) (*estargz.TOCEntry, bool) { return tr.r.Lookup(name) }
func (tr *testReader) Cache(opts ...reader.CacheOption) error { return nil }
func (tr *testReader) Close() error { return nil }
type testBlobState struct {
size int64
fetchedSize int64
}
func (tb *testBlobState) Check() error { return nil }
func (tb *testBlobState) Size() int64 { return tb.size }
func (tb *testBlobState) FetchedSize() int64 { return tb.fetchedSize }
func (tb *testBlobState) ReadAt(p []byte, offset int64, opts ...remote.Option) (int, error) {
return 0, nil
}
func (tb *testBlobState) Cache(offset int64, size int64, opts ...remote.Option) error { return nil }
func (tb *testBlobState) Refresh(ctx context.Context, host docker.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) error {
return nil
}
func (tb *testBlobState) Close() error { return nil }
type check func(*testing.T, *node)
func fileNotExist(file string) check {
return func(t *testing.T, root *node) {
if _, _, err := getDirentAndNode(t, root, file); err == nil {
t.Errorf("Node %q exists", file)
}
}
}
func hasFileDigest(file string, digest string) check {
return func(t *testing.T, root *node) {
_, n, err := getDirentAndNode(t, root, file)
if err != nil {
t.Fatalf("failed to get node %q: %v", file, err)
}
if ndgst := n.Operations().(*node).e.Digest; ndgst != digest {
t.Fatalf("Digest(%q) = %q, want %q", file, ndgst, digest)
}
}
}
func hasExtraMode(name string, mode os.FileMode) check {
return func(t *testing.T, root *node) {
_, n, err := getDirentAndNode(t, root, name)
if err != nil {
t.Fatalf("failed to get node %q: %v", name, err)
}
var ao fuse.AttrOut
if errno := n.Operations().(fusefs.NodeGetattrer).Getattr(context.Background(), nil, &ao); errno != 0 {
t.Fatalf("failed to get attributes of node %q: %v", name, errno)
}
a := ao.Attr
gotMode := a.Mode & (syscall.S_ISUID | syscall.S_ISGID | syscall.S_ISVTX)
wantMode := extraModeToTarMode(mode)
if gotMode != uint32(wantMode) {
t.Fatalf("got mode = %b, want %b", gotMode, wantMode)
}
}
}
func hasValidWhiteout(name string) check {
return func(t *testing.T, root *node) {
ent, n, err := getDirentAndNode(t, root, name)
if err != nil {
t.Fatalf("failed to get node %q: %v", name, err)
}
var ao fuse.AttrOut
if errno := n.Operations().(fusefs.NodeGetattrer).Getattr(context.Background(), nil, &ao); errno != 0 {
t.Fatalf("failed to get attributes of file %q: %v", name, errno)
}
a := ao.Attr
if a.Ino != ent.Ino {
t.Errorf("inconsistent inodes %d(Node) != %d(Dirent)", a.Ino, ent.Ino)
return
}
// validate the direntry
if ent.Mode != syscall.S_IFCHR {
t.Errorf("whiteout entry %q isn't a char device", name)
return
}
// validate the node
if a.Mode != syscall.S_IFCHR {
t.Errorf("whiteout %q has an invalid mode %o; want %o",
name, a.Mode, syscall.S_IFCHR)
return
}
if a.Rdev != uint32(unix.Mkdev(0, 0)) {
t.Errorf("whiteout %q has invalid device numbers (%d, %d); want (0, 0)",
name, unix.Major(uint64(a.Rdev)), unix.Minor(uint64(a.Rdev)))
return
}
}
}
func hasNodeXattrs(entry, name, value string) check {
return func(t *testing.T, root *node) {
_, n, err := getDirentAndNode(t, root, entry)
if err != nil {
t.Fatalf("failed to get node %q: %v", entry, err)
}
// check xattr exists in the xattrs list.
buf := make([]byte, 1000)
nb, errno := n.Operations().(fusefs.NodeListxattrer).Listxattr(context.Background(), buf)
if errno != 0 {
t.Fatalf("failed to get xattrs list of node %q: %v", entry, err)
}
attrs := strings.Split(string(buf[:nb]), "\x00")
var found bool
for _, x := range attrs {
if x == name {
found = true
}
}
if !found {
t.Errorf("node %q doesn't have an opaque xattr %q", entry, value)
return
}
// check the xattr has valid value.
v := make([]byte, len(value))
nv, errno := n.Operations().(fusefs.NodeGetxattrer).Getxattr(context.Background(), name, v)
if errno != 0 {
t.Fatalf("failed to get xattr %q of node %q: %v", name, entry, err)
}
if int(nv) != len(value) {
t.Fatalf("invalid xattr size for file %q, value %q got %d; want %d",
name, value, nv, len(value))
}
if string(v) != value {
t.Errorf("node %q has an invalid xattr %q; want %q", entry, v, value)
return
}
}
}
func hasEntry(t *testing.T, name string, ents fusefs.DirStream) (fuse.DirEntry, bool) {
for ents.HasNext() {
de, errno := ents.Next()
if errno != 0 {
t.Fatalf("faield to read entries for %q", name)
}
if de.Name == name {
return de, true
}
}
return fuse.DirEntry{}, false
}
func hasStateFile(t *testing.T, id string) check {
return func(t *testing.T, root *node) {
// Check the state dir is hidden on OpenDir for "/"
ents, errno := root.Readdir(context.Background())
if errno != 0 {
t.Errorf("failed to open root directory: %v", errno)
return
}
if _, ok := hasEntry(t, stateDirName, ents); ok {
t.Errorf("state direntry %q should not be listed", stateDirName)
return
}
// Check existence of state dir
var eo fuse.EntryOut
sti, errno := root.Lookup(context.Background(), stateDirName, &eo)
if errno != 0 {
t.Errorf("failed to lookup directory %q: %v", stateDirName, errno)
return
}
st, ok := sti.Operations().(*state)
if !ok {
t.Errorf("directory %q isn't a state node", stateDirName)
return
}
// Check existence of state file
ents, errno = st.Readdir(context.Background())
if errno != 0 {
t.Errorf("failed to open directory %q: %v", stateDirName, errno)
return
}
if _, ok := hasEntry(t, id, ents); !ok {
t.Errorf("direntry %q not found in %q", id, stateDirName)
return
}
inode, errno := st.Lookup(context.Background(), id, &eo)
if errno != 0 {
t.Errorf("failed to lookup node %q in %q: %v", id, stateDirName, errno)
return
}
n, ok := inode.Operations().(*statFile)
if !ok {
t.Errorf("entry %q isn't a normal node", id)
return
}
// wanted data
rand.Seed(time.Now().UnixNano())
wantErr := fmt.Errorf("test-%d", rand.Int63())
// report the data
root.s.report(wantErr)
// obtain file size (check later)
var ao fuse.AttrOut
errno = n.Operations().(fusefs.NodeGetattrer).Getattr(context.Background(), nil, &ao)
if errno != 0 {
t.Errorf("failed to get attr of state file: %v", errno)
return
}
attr := ao.Attr
// get data via state file
tmp := make([]byte, 4096)
res, errno := n.Read(context.Background(), nil, tmp, 0)
if errno != 0 {
t.Errorf("failed to read state file: %v", errno)
return
}
gotState, status := res.Bytes(nil)
if status != fuse.OK {
t.Errorf("failed to get result bytes of state file: %v", errno)
return
}
if attr.Size != uint64(len(string(gotState))) {
t.Errorf("size %d; want %d", attr.Size, len(string(gotState)))
return
}
var j statJSON
if err := json.Unmarshal(gotState, &j); err != nil {
t.Errorf("failed to unmarshal %q: %v", string(gotState), err)
return
}
if wantErr.Error() != j.Error {
t.Errorf("expected error %q, got %q", wantErr.Error(), j.Error)
return
}
}
}
// getDirentAndNode gets dirent and node at the specified path at once and makes
// sure that the both of them exist.
func getDirentAndNode(t *testing.T, root *node, path string) (ent fuse.DirEntry, n *fusefs.Inode, err error) {
dir, base := filepath.Split(filepath.Clean(path))
// get the target's parent directory.
var eo fuse.EntryOut
d := root
for _, name := range strings.Split(dir, "/") {
if len(name) == 0 {
continue
}
di, errno := d.Lookup(context.Background(), name, &eo)
if errno != 0 {
err = fmt.Errorf("failed to lookup directory %q: %v", name, errno)
return
}
var ok bool
if d, ok = di.Operations().(*node); !ok {
err = fmt.Errorf("directory %q isn't a normal node", name)
return
}
}
// get the target's direntry.
ents, errno := d.Readdir(context.Background())
if errno != 0 {
err = fmt.Errorf("failed to open directory %q: %v", path, errno)
}
ent, ok := hasEntry(t, base, ents)
if !ok {
err = fmt.Errorf("direntry %q not found in the parent directory of %q", base, path)
}
// get the target's node.
n, errno = d.Lookup(context.Background(), base, &eo)
if errno != 0 {
err = fmt.Errorf("failed to lookup node %q: %v", path, errno)
}
return
}
func digestFor(content string) string {
sum := sha256.Sum256([]byte(content))
return fmt.Sprintf("sha256:%x", sum)
}
// suid, guid, sticky bits for archive/tar
// https://github.com/golang/go/blob/release-branch.go1.13/src/archive/tar/common.go#L607-L609
const (
cISUID = 04000 // Set uid
cISGID = 02000 // Set gid
cISVTX = 01000 // Save text (sticky bit)
)
func extraModeToTarMode(fm os.FileMode) (tm int64) {
if fm&os.ModeSetuid != 0 {
tm |= cISUID
}
if fm&os.ModeSetgid != 0 {
tm |= cISGID
}
if fm&os.ModeSticky != 0 {
tm |= cISVTX
}
return
}

File diff suppressed because it is too large Load Diff

View File

@ -1,216 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package commonmetrics
import (
"context"
"sync"
"time"
"github.com/containerd/log"
digest "github.com/opencontainers/go-digest"
"github.com/prometheus/client_golang/prometheus"
)
const (
// OperationLatencyKeyMilliseconds is the key for stargz operation latency metrics in milliseconds.
OperationLatencyKeyMilliseconds = "operation_duration_milliseconds"
// OperationLatencyKeyMicroseconds is the key for stargz operation latency metrics in microseconds.
OperationLatencyKeyMicroseconds = "operation_duration_microseconds"
// OperationCountKey is the key for stargz operation count metrics.
OperationCountKey = "operation_count"
// BytesServedKey is the key for any metric related to counting bytes served as the part of specific operation.
BytesServedKey = "bytes_served"
// Keep namespace as stargz and subsystem as fs.
namespace = "stargz"
subsystem = "fs"
)
// Lists all metric labels.
const (
// prometheus metrics
Mount = "mount"
RemoteRegistryGet = "remote_registry_get"
NodeReaddir = "node_readdir"
StargzHeaderGet = "stargz_header_get"
StargzFooterGet = "stargz_footer_get"
StargzTocGet = "stargz_toc_get"
DeserializeTocJSON = "stargz_toc_json_deserialize"
PrefetchesCompleted = "all_prefetches_completed"
ReadOnDemand = "read_on_demand"
MountLayerToLastOnDemandFetch = "mount_layer_to_last_on_demand_fetch"
OnDemandReadAccessCount = "on_demand_read_access_count"
OnDemandRemoteRegistryFetchCount = "on_demand_remote_registry_fetch_count"
OnDemandBytesServed = "on_demand_bytes_served"
OnDemandBytesFetched = "on_demand_bytes_fetched"
// logs metrics
PrefetchTotal = "prefetch_total"
PrefetchDownload = "prefetch_download"
PrefetchDecompress = "prefetch_decompress"
BackgroundFetchTotal = "background_fetch_total"
BackgroundFetchDownload = "background_fetch_download"
BackgroundFetchDecompress = "background_fetch_decompress"
PrefetchSize = "prefetch_size"
)
var (
// Buckets for OperationLatency metrics.
latencyBucketsMilliseconds = []float64{1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384} // in milliseconds
latencyBucketsMicroseconds = []float64{1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024} // in microseconds
// operationLatencyMilliseconds collects operation latency numbers in milliseconds grouped by
// operation, type and layer digest.
operationLatencyMilliseconds = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: OperationLatencyKeyMilliseconds,
Help: "Latency in milliseconds of stargz snapshotter operations. Broken down by operation type and layer sha.",
Buckets: latencyBucketsMilliseconds,
},
[]string{"operation_type", "layer"},
)
// operationLatencyMicroseconds collects operation latency numbers in microseconds grouped by
// operation, type and layer digest.
operationLatencyMicroseconds = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: OperationLatencyKeyMicroseconds,
Help: "Latency in microseconds of stargz snapshotter operations. Broken down by operation type and layer sha.",
Buckets: latencyBucketsMicroseconds,
},
[]string{"operation_type", "layer"},
)
// operationCount collects operation count numbers by operation
// type and layer sha.
operationCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: OperationCountKey,
Help: "The count of stargz snapshotter operations. Broken down by operation type and layer sha.",
},
[]string{"operation_type", "layer"},
)
// bytesCount reflects the number of bytes served as the part of specitic operation type per layer sha.
bytesCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: BytesServedKey,
Help: "The number of bytes served per stargz snapshotter operations. Broken down by operation type and layer sha.",
},
[]string{"operation_type", "layer"},
)
)
var register sync.Once
var logLevel = log.DebugLevel
// sinceInMilliseconds gets the time since the specified start in milliseconds.
// The division by 1e6 is made to have the milliseconds value as floating point number, since the native method
// .Milliseconds() returns an integer value and you can lost a precision for sub-millisecond values.
func sinceInMilliseconds(start time.Time) float64 {
return float64(time.Since(start).Nanoseconds()) / 1e6
}
// sinceInMicroseconds gets the time since the specified start in microseconds.
// The division by 1e3 is made to have the microseconds value as floating point number, since the native method
// .Microseconds() returns an integer value and you can lost a precision for sub-microsecond values.
func sinceInMicroseconds(start time.Time) float64 {
return float64(time.Since(start).Nanoseconds()) / 1e3
}
// Register registers metrics. This is always called only once.
func Register(l log.Level) {
register.Do(func() {
logLevel = l
prometheus.MustRegister(operationLatencyMilliseconds)
prometheus.MustRegister(operationLatencyMicroseconds)
prometheus.MustRegister(operationCount)
prometheus.MustRegister(bytesCount)
})
}
// MeasureLatencyInMilliseconds wraps the labels attachment as well as calling Observe into a single method.
// Right now we attach the operation and layer digest, so it's possible to see the breakdown for latency
// by operation and individual layers.
// If you want this to be layer agnostic, just pass the digest from empty string, e.g.
// layerDigest := digest.FromString("")
func MeasureLatencyInMilliseconds(operation string, layer digest.Digest, start time.Time) {
operationLatencyMilliseconds.WithLabelValues(operation, layer.String()).Observe(sinceInMilliseconds(start))
}
// MeasureLatencyInMicroseconds wraps the labels attachment as well as calling Observe into a single method.
// Right now we attach the operation and layer digest, so it's possible to see the breakdown for latency
// by operation and individual layers.
// If you want this to be layer agnostic, just pass the digest from empty string, e.g.
// layerDigest := digest.FromString("")
func MeasureLatencyInMicroseconds(operation string, layer digest.Digest, start time.Time) {
operationLatencyMicroseconds.WithLabelValues(operation, layer.String()).Observe(sinceInMicroseconds(start))
}
// IncOperationCount wraps the labels attachment as well as calling Inc into a single method.
func IncOperationCount(operation string, layer digest.Digest) {
operationCount.WithLabelValues(operation, layer.String()).Inc()
}
// AddBytesCount wraps the labels attachment as well as calling Add into a single method.
func AddBytesCount(operation string, layer digest.Digest, bytes int64) {
bytesCount.WithLabelValues(operation, layer.String()).Add(float64(bytes))
}
// WriteLatencyLogValue wraps writing the log info record for latency in milliseconds. The log record breaks down by operation and layer digest.
func WriteLatencyLogValue(ctx context.Context, layer digest.Digest, operation string, start time.Time) {
ctx = log.WithLogger(ctx, log.G(ctx).WithField("metrics", "latency").WithField("operation", operation).WithField("layer_sha", layer.String()))
log.G(ctx).Logf(logLevel, "value=%v milliseconds", sinceInMilliseconds(start))
}
// WriteLatencyWithBytesLogValue wraps writing the log info record for latency in milliseconds with adding the size in bytes.
// The log record breaks down by operation, layer digest and byte value.
func WriteLatencyWithBytesLogValue(ctx context.Context, layer digest.Digest, latencyOperation string, start time.Time, bytesMetricName string, bytesMetricValue int64) {
ctx = log.WithLogger(ctx, log.G(ctx).WithField("metrics", "latency").WithField("operation", latencyOperation).WithField("layer_sha", layer.String()))
log.G(ctx).Logf(logLevel, "value=%v milliseconds; %v=%v bytes", sinceInMilliseconds(start), bytesMetricName, bytesMetricValue)
}
// LogLatencyForLastOnDemandFetch implements a special case for measuring the latency of last on demand fetch, which must be invoked at the end of
// background fetch operation only. Since this is expected to happen only once per container launch, it writes a log line,
// instead of directly emitting a metric.
// We do that in the following way:
// 1. We record the mount start time
// 2. We constantly record the timestamps when we do on demand fetch for each layer sha
// 3. On background fetch completed we measure the difference between the last on demand fetch and mount start time
// and record it as a metric
func LogLatencyForLastOnDemandFetch(ctx context.Context, layer digest.Digest, start time.Time, end time.Time) {
diffInMilliseconds := float64(end.Sub(start).Milliseconds())
// value can be negative if we pass the default value for time.Time as `end`
// this can happen if there were no on-demand fetch for the particular layer
if diffInMilliseconds > 0 {
ctx = log.WithLogger(ctx, log.G(ctx).WithField("metrics", "latency").WithField("operation", MountLayerToLastOnDemandFetch).WithField("layer_sha", layer.String()))
log.G(ctx).Logf(logLevel, "value=%v milliseconds", diffInMilliseconds)
}
}

View File

@ -14,7 +14,7 @@
limitations under the License. limitations under the License.
*/ */
package layermetrics package metrics
import ( import (
"github.com/containerd/stargz-snapshotter/fs/layer" "github.com/containerd/stargz-snapshotter/fs/layer"
@ -36,19 +36,6 @@ var layerMetrics = []*metric{
} }
}, },
}, },
{
name: "layer_prefetch_size",
help: "Total prefetched size of the layer",
unit: metrics.Bytes,
vt: prometheus.CounterValue,
getValues: func(l layer.Layer) []value {
return []value{
{
v: float64(l.Info().PrefetchSize),
},
}
},
},
{ {
name: "layer_size", name: "layer_size",
help: "Total size of the layer", help: "Total size of the layer",

View File

@ -14,7 +14,7 @@
limitations under the License. limitations under the License.
*/ */
package layermetrics package metrics
import ( import (
"sync" "sync"

File diff suppressed because it is too large Load Diff

View File

@ -23,26 +23,317 @@
package reader package reader
import ( import (
"bytes"
"fmt"
"io"
"strings"
"testing" "testing"
memorymetadata "github.com/containerd/stargz-snapshotter/metadata/memory" "github.com/containerd/stargz-snapshotter/cache"
"github.com/containerd/stargz-snapshotter/estargz"
"github.com/containerd/stargz-snapshotter/util/testutil"
digest "github.com/opencontainers/go-digest"
) )
func TestReader(t *testing.T) { const (
testRunner := &TestRunner{ sampleChunkSize = 3
TestingT: t, sampleMiddleOffset = sampleChunkSize / 2
Runner: func(testingT TestingT, name string, run func(t TestingT)) { sampleData1 = "0123456789"
tt, ok := testingT.(*testing.T) lastChunkOffset1 = sampleChunkSize * (int64(len(sampleData1)) / sampleChunkSize)
if !ok { )
testingT.Fatal("TestingT is not a *testing.T")
return
}
tt.Run(name, func(t *testing.T) { // Tests Reader for failure cases.
run(t) func TestFailReader(t *testing.T) {
}) testFileName := "test"
}, stargzFile, _, err := testutil.BuildEStargz([]testutil.TarEntry{
testutil.File(testFileName, sampleData1),
}, testutil.WithEStargzOptions(estargz.WithChunkSize(sampleChunkSize)))
if err != nil {
t.Fatalf("failed to build sample estargz")
}
br := &breakReaderAt{
ReaderAt: stargzFile,
success: true,
}
bev := &testTOCEntryVerifier{true}
mcache := cache.NewMemoryCache()
gr, _, err := newReader(io.NewSectionReader(br, 0, stargzFile.Size()), mcache, bev)
if err != nil {
t.Fatalf("Failed to open stargz file: %v", err)
} }
TestSuiteReader(testRunner, memorymetadata.NewReader) // tests for opening file
_, err = gr.OpenFile("dummy")
if err == nil {
t.Errorf("succeeded to open file but wanted to fail")
return
}
fr, err := gr.OpenFile(testFileName)
if err != nil {
t.Errorf("failed to open file but wanted to succeed: %v", err)
}
for _, rs := range []bool{true, false} {
for _, vs := range []bool{true, false} {
mcache.(*cache.MemoryCache).Membuf = map[string]*bytes.Buffer{}
br.success = rs
bev.success = vs
// tests for reading file
p := make([]byte, len(sampleData1))
n, err := fr.ReadAt(p, 0)
if rs && vs {
if err != nil || n != len(sampleData1) || !bytes.Equal([]byte(sampleData1), p) {
t.Errorf("failed to read data but wanted to succeed: %v", err)
return
}
} else {
if err == nil {
t.Errorf("succeeded to read data but wanted to fail (reader:%v,verify:%v)", rs, vs)
return
}
}
// tests for caching reader
err = gr.Cache()
if rs && vs {
if err != nil {
t.Errorf("failed to cache reader but wanted to succeed")
}
} else {
if err == nil {
t.Errorf("succeeded to cache reader but wanted to fail (reader:%v,verify:%v)", rs, vs)
}
}
}
}
}
type breakReaderAt struct {
io.ReaderAt
success bool
}
func (br *breakReaderAt) ReadAt(p []byte, off int64) (int, error) {
if br.success {
return br.ReaderAt.ReadAt(p, off)
}
return 0, fmt.Errorf("failed")
}
type testTOCEntryVerifier struct {
success bool
}
func (bev *testTOCEntryVerifier) Verifier(ce *estargz.TOCEntry) (digest.Verifier, error) {
return &testVerifier{bev.success}, nil
}
type testVerifier struct {
success bool
}
func (bv *testVerifier) Write(p []byte) (n int, err error) {
return len(p), nil
}
func (bv *testVerifier) Verified() bool {
return bv.success
}
type region struct{ b, e int64 }
// Tests ReadAt method of each file.
func TestFileReadAt(t *testing.T) {
sizeCond := map[string]int64{
"single_chunk": sampleChunkSize - sampleMiddleOffset,
"multi_chunks": sampleChunkSize + sampleMiddleOffset,
}
innerOffsetCond := map[string]int64{
"at_top": 0,
"at_middle": sampleMiddleOffset,
}
baseOffsetCond := map[string]int64{
"of_1st_chunk": sampleChunkSize * 0,
"of_2nd_chunk": sampleChunkSize * 1,
"of_last_chunk": lastChunkOffset1,
}
fileSizeCond := map[string]int64{
"in_1_chunk_file": sampleChunkSize * 1,
"in_2_chunks_file": sampleChunkSize * 2,
"in_max_size_file": int64(len(sampleData1)),
}
cacheCond := map[string][]region{
"with_clean_cache": nil,
"with_edge_filled_cache": {
region{0, sampleChunkSize - 1},
region{lastChunkOffset1, int64(len(sampleData1)) - 1},
},
"with_sparse_cache": {
region{0, sampleChunkSize - 1},
region{2 * sampleChunkSize, 3*sampleChunkSize - 1},
},
}
for sn, size := range sizeCond {
for in, innero := range innerOffsetCond {
for bo, baseo := range baseOffsetCond {
for fn, filesize := range fileSizeCond {
for cc, cacheExcept := range cacheCond {
t.Run(fmt.Sprintf("reading_%s_%s_%s_%s_%s", sn, in, bo, fn, cc), func(t *testing.T) {
if filesize > int64(len(sampleData1)) {
t.Fatal("sample file size is larger than sample data")
}
wantN := size
offset := baseo + innero
if remain := filesize - offset; remain < wantN {
if wantN = remain; wantN < 0 {
wantN = 0
}
}
// use constant string value as a data source.
want := strings.NewReader(sampleData1)
// data we want to get.
wantData := make([]byte, wantN)
_, err := want.ReadAt(wantData, offset)
if err != nil && err != io.EOF {
t.Fatalf("want.ReadAt (offset=%d,size=%d): %v", offset, wantN, err)
}
// data we get through a file.
f := makeFile(t, []byte(sampleData1)[:filesize], sampleChunkSize)
f.ra = newExceptSectionReader(t, f.ra, cacheExcept...)
for _, reg := range cacheExcept {
id := genID(f.digest, reg.b, reg.e-reg.b+1)
w, err := f.cache.Add(id)
if err != nil {
w.Close()
t.Fatalf("failed to add cache %v: %v", id, err)
}
if _, err := w.Write([]byte(sampleData1[reg.b : reg.e+1])); err != nil {
w.Close()
t.Fatalf("failed to write cache %v: %v", id, err)
}
if err := w.Commit(); err != nil {
w.Close()
t.Fatalf("failed to commit cache %v: %v", id, err)
}
w.Close()
}
respData := make([]byte, size)
n, err := f.ReadAt(respData, offset)
if err != nil {
t.Errorf("failed to read off=%d, size=%d, filesize=%d: %v", offset, size, filesize, err)
return
}
respData = respData[:n]
if !bytes.Equal(wantData, respData) {
t.Errorf("off=%d, filesize=%d; read data{size=%d,data=%q}; want (size=%d,data=%q)",
offset, filesize, len(respData), string(respData), wantN, string(wantData))
return
}
// check cache has valid contents.
cn := 0
nr := 0
for int64(nr) < wantN {
ce, ok := f.r.ChunkEntryForOffset(f.name, offset+int64(nr))
if !ok {
break
}
data := make([]byte, ce.ChunkSize)
id := genID(f.digest, ce.ChunkOffset, ce.ChunkSize)
r, err := f.cache.Get(id)
if err != nil {
t.Errorf("missed cache of offset=%d, size=%d: %v(got size=%d)", ce.ChunkOffset, ce.ChunkSize, err, n)
return
}
defer r.Close()
if n, err := r.ReadAt(data, 0); (err != nil && err != io.EOF) || n != int(ce.ChunkSize) {
t.Errorf("failed to read cache of offset=%d, size=%d: %v(got size=%d)", ce.ChunkOffset, ce.ChunkSize, err, n)
return
}
nr += n
cn++
}
})
}
}
}
}
}
}
type exceptSectionReader struct {
ra io.ReaderAt
except map[region]bool
t *testing.T
}
func newExceptSectionReader(t *testing.T, ra io.ReaderAt, except ...region) io.ReaderAt {
er := exceptSectionReader{ra: ra, t: t}
er.except = map[region]bool{}
for _, reg := range except {
er.except[reg] = true
}
return &er
}
func (er *exceptSectionReader) ReadAt(p []byte, offset int64) (int, error) {
if er.except[region{offset, offset + int64(len(p)) - 1}] {
er.t.Fatalf("Requested prohibited region of chunk: (%d, %d)", offset, offset+int64(len(p))-1)
}
return er.ra.ReadAt(p, offset)
}
func makeFile(t *testing.T, contents []byte, chunkSize int) *file {
testName := "test"
sr, dgst, err := testutil.BuildEStargz([]testutil.TarEntry{
testutil.File(testName, string(contents)),
}, testutil.WithEStargzOptions(estargz.WithChunkSize(chunkSize)))
if err != nil {
t.Fatalf("failed to build sample estargz")
}
sgz, err := estargz.Open(sr)
if err != nil {
t.Fatalf("failed to parse converted stargz: %v", err)
}
ev, err := sgz.VerifyTOC(dgst)
if err != nil {
t.Fatalf("failed to verify stargz: %v", err)
}
r, _, err := newReader(sr, cache.NewMemoryCache(), ev)
if err != nil {
t.Fatalf("Failed to open stargz file: %v", err)
}
ra, err := r.OpenFile(testName)
if err != nil {
t.Fatalf("Failed to open testing file: %v", err)
}
f, ok := ra.(*file)
if !ok {
t.Fatalf("invalid type of file %q", testName)
}
return f
}
func newReader(sr *io.SectionReader, cache cache.BlobCache, ev estargz.TOCEntryVerifier) (*reader, *estargz.TOCEntry, error) {
var r *reader
vr, err := NewReader(sr, cache)
if vr != nil {
r = vr.r
r.verifier = ev
}
root, ok := r.Lookup("")
if !ok {
return nil, nil, fmt.Errorf("failed to get root")
}
return r, root, err
} }

File diff suppressed because it is too large Load Diff

View File

@ -26,18 +26,16 @@ import (
"context" "context"
"fmt" "fmt"
"io" "io"
"io/ioutil"
"regexp" "regexp"
"sort"
"strings"
"sync" "sync"
"time" "time"
"github.com/containerd/containerd/v2/pkg/reference" "github.com/containerd/containerd/reference"
"github.com/containerd/containerd/remotes/docker"
"github.com/containerd/stargz-snapshotter/cache" "github.com/containerd/stargz-snapshotter/cache"
"github.com/containerd/stargz-snapshotter/fs/source"
ocispec "github.com/opencontainers/image-spec/specs-go/v1" ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"golang.org/x/sync/errgroup" "github.com/pkg/errors"
"golang.org/x/sync/singleflight"
) )
var contentRangeRegexp = regexp.MustCompile(`bytes ([0-9]+)-([0-9]+)/([0-9]+|\\*)`) var contentRangeRegexp = regexp.MustCompile(`bytes ([0-9]+)-([0-9]+)/([0-9]+|\\*)`)
@ -48,27 +46,24 @@ type Blob interface {
FetchedSize() int64 FetchedSize() int64
ReadAt(p []byte, offset int64, opts ...Option) (int, error) ReadAt(p []byte, offset int64, opts ...Option) (int, error)
Cache(offset int64, size int64, opts ...Option) error Cache(offset int64, size int64, opts ...Option) error
Refresh(ctx context.Context, host source.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) error Refresh(ctx context.Context, host docker.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) error
Close() error Close() error
} }
type blob struct { type blob struct {
fetcher fetcher fetcher *fetcher
fetcherMu sync.Mutex fetcherMu sync.Mutex
size int64 size int64
chunkSize int64 chunkSize int64
prefetchChunkSize int64 cache cache.BlobCache
cache cache.BlobCache lastCheck time.Time
lastCheck time.Time lastCheckMu sync.Mutex
lastCheckMu sync.Mutex checkInterval time.Duration
checkInterval time.Duration fetchTimeout time.Duration
fetchTimeout time.Duration
fetchedRegionSet regionSet fetchedRegionSet regionSet
fetchedRegionSetMu sync.Mutex fetchedRegionSetMu sync.Mutex
fetchedRegionGroup singleflight.Group
fetchedRegionCopyMu sync.Mutex
resolver *Resolver resolver *Resolver
@ -76,22 +71,6 @@ type blob struct {
closedMu sync.Mutex closedMu sync.Mutex
} }
func makeBlob(fetcher fetcher, size int64, chunkSize int64, prefetchChunkSize int64,
blobCache cache.BlobCache, lastCheck time.Time, checkInterval time.Duration,
r *Resolver, fetchTimeout time.Duration) *blob {
return &blob{
fetcher: fetcher,
size: size,
chunkSize: chunkSize,
prefetchChunkSize: prefetchChunkSize,
cache: blobCache,
lastCheck: lastCheck,
checkInterval: checkInterval,
resolver: r,
fetchTimeout: fetchTimeout,
}
}
func (b *blob) Close() error { func (b *blob) Close() error {
b.closedMu.Lock() b.closedMu.Lock()
defer b.closedMu.Unlock() defer b.closedMu.Unlock()
@ -109,23 +88,22 @@ func (b *blob) isClosed() bool {
return closed return closed
} }
func (b *blob) Refresh(ctx context.Context, hosts source.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) error { func (b *blob) Refresh(ctx context.Context, hosts docker.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) error {
if b.isClosed() { if b.isClosed() {
return fmt.Errorf("blob is already closed") return fmt.Errorf("blob is already closed")
} }
// refresh the fetcher // refresh the fetcher
f, newSize, err := b.resolver.resolveFetcher(ctx, hosts, refspec, desc) new, newSize, err := newFetcher(ctx, hosts, refspec, desc)
if err != nil { if err != nil {
return err return err
} } else if newSize != b.size {
if newSize != b.size { return fmt.Errorf("Invalid size of new blob %d; want %d", newSize, b.size)
return fmt.Errorf("invalid size of new blob %d; want %d", newSize, b.size)
} }
// update the blob's fetcher with new one // update the blob's fetcher with new one
b.fetcherMu.Lock() b.fetcherMu.Lock()
b.fetcher = f b.fetcher = new
b.fetcherMu.Unlock() b.fetcherMu.Unlock()
b.lastCheckMu.Lock() b.lastCheckMu.Lock()
b.lastCheck = time.Now() b.lastCheck = time.Now()
@ -173,35 +151,6 @@ func (b *blob) FetchedSize() int64 {
return sz return sz
} }
func makeSyncKey(allData map[region]io.Writer) string {
keys := make([]string, len(allData))
keysIndex := 0
for key := range allData {
keys[keysIndex] = fmt.Sprintf("[%d,%d]", key.b, key.e)
keysIndex++
}
sort.Strings(keys)
return strings.Join(keys, ",")
}
func (b *blob) cacheAt(offset int64, size int64, fr fetcher, cacheOpts *options) error {
fetchReg := region{floor(offset, b.chunkSize), ceil(offset+size-1, b.chunkSize) - 1}
discard := make(map[region]io.Writer)
err := b.walkChunks(fetchReg, func(reg region) error {
if r, err := b.cache.Get(fr.genID(reg), cacheOpts.cacheOpts...); err == nil {
return r.Close() // nop if the cache hits
}
discard[reg] = io.Discard
return nil
})
if err != nil {
return err
}
return b.fetchRange(discard, cacheOpts)
}
func (b *blob) Cache(offset int64, size int64, opts ...Option) error { func (b *blob) Cache(offset int64, size int64, opts ...Option) error {
if b.isClosed() { if b.isClosed() {
return fmt.Errorf("blob is already closed") return fmt.Errorf("blob is already closed")
@ -216,26 +165,20 @@ func (b *blob) Cache(offset int64, size int64, opts ...Option) error {
fr := b.fetcher fr := b.fetcher
b.fetcherMu.Unlock() b.fetcherMu.Unlock()
if b.prefetchChunkSize <= b.chunkSize { fetchReg := region{floor(offset, b.chunkSize), ceil(offset+size-1, b.chunkSize) - 1}
return b.cacheAt(offset, size, fr, &cacheOpts) discard := make(map[region]io.Writer)
} b.walkChunks(fetchReg, func(reg region) error {
if r, err := b.cache.Get(fr.genID(reg), cacheOpts.cacheOpts...); err == nil {
eg, _ := errgroup.WithContext(context.Background()) return r.Close() // nop if the cache hits
fetchSize := b.chunkSize * (b.prefetchChunkSize / b.chunkSize)
end := offset + size
for i := offset; i < end; i += fetchSize {
i, l := i, fetchSize
if i+l > end {
l = end - i
} }
eg.Go(func() error { discard[reg] = ioutil.Discard
return b.cacheAt(i, l, fr, &cacheOpts) return nil
}) })
if err := b.fetchRange(discard, &cacheOpts); err != nil {
return err
} }
return eg.Wait() return nil
} }
// ReadAt reads remote chunks from specified offset for the buffer size. // ReadAt reads remote chunks from specified offset for the buffer size.
@ -259,23 +202,13 @@ func (b *blob) ReadAt(p []byte, offset int64, opts ...Option) (int, error) {
o(&readAtOpts) o(&readAtOpts)
} }
fr := b.getFetcher() // Fetcher can be suddenly updated so we take and use the snapshot of it for
// consistency.
b.fetcherMu.Lock()
fr := b.fetcher
b.fetcherMu.Unlock()
if err := b.prepareChunksForRead(allRegion, offset, p, fr, allData, &readAtOpts); err != nil { b.walkChunks(allRegion, func(chunk region) error {
return 0, err
}
// Read required data
if err := b.fetchRange(allData, &readAtOpts); err != nil {
return 0, err
}
return b.adjustBufferSize(p, offset), nil
}
// prepareChunksForRead prepares chunks for reading by checking cache and setting up writers
func (b *blob) prepareChunksForRead(allRegion region, offset int64, p []byte, fr fetcher, allData map[region]io.Writer, opts *options) error {
return b.walkChunks(allRegion, func(chunk region) error {
var ( var (
base = positive(chunk.b - offset) base = positive(chunk.b - offset)
lowerUnread = positive(offset - chunk.b) lowerUnread = positive(offset - chunk.b)
@ -283,9 +216,14 @@ func (b *blob) prepareChunksForRead(allRegion region, offset int64, p []byte, fr
expectedSize = chunk.size() - upperUnread - lowerUnread expectedSize = chunk.size() - upperUnread - lowerUnread
) )
// Try to read from cache first // Check if the content exists in the cache
if err := b.readFromCache(chunk, p[base:base+expectedSize], lowerUnread, fr, opts); err == nil { r, err := b.cache.Get(fr.genID(chunk), readAtOpts.cacheOpts...)
return nil if err == nil {
defer r.Close()
n, err := r.ReadAt(p[base:base+expectedSize], lowerUnread)
if (err == nil || err == io.EOF) && int64(n) == expectedSize {
return nil
}
} }
// We missed cache. Take it from remote registry. // We missed cache. Take it from remote registry.
@ -294,47 +232,45 @@ func (b *blob) prepareChunksForRead(allRegion region, offset int64, p []byte, fr
allData[chunk] = newBytesWriter(p[base:base+expectedSize], lowerUnread) allData[chunk] = newBytesWriter(p[base:base+expectedSize], lowerUnread)
return nil return nil
}) })
// Read required data
if err := b.fetchRange(allData, &readAtOpts); err != nil {
return 0, err
}
// Adjust the buffer size according to the blob size
if remain := b.size - offset; int64(len(p)) >= remain {
if remain < 0 {
remain = 0
}
p = p[:remain]
}
return len(p), nil
} }
// readFromCache attempts to read chunk data from cache // fetchRange fetches all specified chunks from local cache and remote blob.
func (b *blob) readFromCache(chunk region, dest []byte, offset int64, fr fetcher, opts *options) error { func (b *blob) fetchRange(allData map[region]io.Writer, opts *options) error {
r, err := b.cache.Get(fr.genID(chunk), opts.cacheOpts...)
if err != nil {
return err
}
defer r.Close()
n, err := r.ReadAt(dest, offset)
if err != nil && err != io.EOF {
return err
}
if n != len(dest) {
return fmt.Errorf("incomplete read from cache: read %d bytes, expected %d bytes", n, len(dest))
}
return nil
}
// fetchRegions fetches all specified chunks from remote blob and puts it in the local cache.
// It must be called from within fetchRange and need to ensure that it is inside the singleflight `Do` operation.
func (b *blob) fetchRegions(allData map[region]io.Writer, fetched map[region]bool, opts *options) error {
if len(allData) == 0 { if len(allData) == 0 {
return nil return nil
} }
fr := b.getFetcher() // Fetcher can be suddenly updated so we take and use the snapshot of it for
// consistency.
b.fetcherMu.Lock()
fr := b.fetcher
b.fetcherMu.Unlock()
// request missed regions // request missed regions
var req []region var req []region
fetched := make(map[region]bool)
for reg := range allData { for reg := range allData {
req = append(req, reg) req = append(req, reg)
fetched[reg] = false fetched[reg] = false
} }
ctx, cancel := context.WithTimeout(context.Background(), b.fetchTimeout)
fetchCtx, cancel := context.WithTimeout(context.Background(), b.fetchTimeout)
defer cancel() defer cancel()
if opts.ctx != nil { mr, err := fr.fetch(ctx, req, true, opts)
fetchCtx = opts.ctx
}
mr, err := fr.fetch(fetchCtx, req, true)
if err != nil { if err != nil {
return err return err
} }
@ -352,15 +288,41 @@ func (b *blob) fetchRegions(allData map[region]io.Writer, fetched map[region]boo
if err == io.EOF { if err == io.EOF {
break break
} else if err != nil { } else if err != nil {
return fmt.Errorf("failed to read multipart resp: %w", err) return errors.Wrapf(err, "failed to read multipart resp")
} }
if err := b.walkChunks(reg, func(chunk region) (retErr error) { if err := b.walkChunks(reg, func(chunk region) (retErr error) {
if err := b.cacheChunkData(chunk, p, fr, allData, fetched, opts); err != nil { id := fr.genID(chunk)
cw, err := b.cache.Add(id, opts.cacheOpts...)
if err != nil {
return err return err
} }
defer cw.Close()
w := io.Writer(cw)
// If this chunk is one of the targets, write the content to the
// passed reader too.
if _, ok := fetched[chunk]; ok {
w = io.MultiWriter(w, allData[chunk])
}
// Copy the target chunk
if _, err := io.CopyN(w, p, chunk.size()); err != nil {
cw.Abort()
return err
}
// Add the target chunk to the cache
if err := cw.Commit(); err != nil {
return err
}
b.fetchedRegionSetMu.Lock()
b.fetchedRegionSet.add(chunk)
b.fetchedRegionSetMu.Unlock()
fetched[chunk] = true
return nil return nil
}); err != nil { }); err != nil {
return fmt.Errorf("failed to get chunks: %w", err) return errors.Wrapf(err, "failed to get chunks")
} }
} }
@ -378,81 +340,6 @@ func (b *blob) fetchRegions(allData map[region]io.Writer, fetched map[region]boo
return nil return nil
} }
// fetchRange fetches all specified chunks from local cache and remote blob.
func (b *blob) fetchRange(allData map[region]io.Writer, opts *options) error {
if len(allData) == 0 {
return nil
}
key := makeSyncKey(allData)
fetched := make(map[region]bool)
_, err, shared := b.fetchedRegionGroup.Do(key, func() (interface{}, error) {
return nil, b.fetchRegions(allData, fetched, opts)
})
// When unblocked try to read from cache in case if there were no errors
// If we fail reading from cache, fetch from remote registry again
if err == nil && shared {
if err := b.handleSharedFetch(allData, fetched, opts); err != nil {
return b.fetchRange(allData, opts) // retry on error
}
}
return err
}
// handleSharedFetch handles the case when multiple goroutines share the same fetch result
func (b *blob) handleSharedFetch(allData map[region]io.Writer, fetched map[region]bool, opts *options) error {
for reg := range allData {
if _, ok := fetched[reg]; ok {
continue
}
if err := b.copyFetchedChunks(reg, allData, opts); err != nil {
return err
}
}
return nil
}
// copyFetchedChunks copies fetched chunks from cache to target writer
func (b *blob) copyFetchedChunks(reg region, allData map[region]io.Writer, opts *options) error {
return b.walkChunks(reg, func(chunk region) error {
fr := b.getFetcher()
r, err := b.cache.Get(fr.genID(chunk), opts.cacheOpts...)
if err != nil {
return err
}
defer r.Close()
b.fetchedRegionCopyMu.Lock()
defer b.fetchedRegionCopyMu.Unlock()
if _, err := io.CopyN(allData[chunk], io.NewSectionReader(r, 0, chunk.size()), chunk.size()); err != nil {
return err
}
return nil
})
}
// getFetcher safely gets the current fetcher
// Fetcher can be suddenly updated so we take and use the snapshot of it for consistency.
func (b *blob) getFetcher() fetcher {
b.fetcherMu.Lock()
defer b.fetcherMu.Unlock()
return b.fetcher
}
// adjustBufferSize adjusts buffer size according to the blob size
func (b *blob) adjustBufferSize(p []byte, offset int64) int {
if remain := b.size - offset; int64(len(p)) >= remain {
if remain < 0 {
remain = 0
}
p = p[:remain]
}
return len(p)
}
type walkFunc func(reg region) error type walkFunc func(reg region) error
// walkChunks walks chunks from begin to end in order in the specified region. // walkChunks walks chunks from begin to end in order in the specified region.
@ -526,34 +413,3 @@ func positive(n int64) int64 {
} }
return n return n
} }
// cacheChunkData handles caching of chunk data
func (b *blob) cacheChunkData(chunk region, r io.Reader, fr fetcher, allData map[region]io.Writer, fetched map[region]bool, opts *options) error {
id := fr.genID(chunk)
cw, err := b.cache.Add(id, opts.cacheOpts...)
if err != nil {
return fmt.Errorf("failed to create cache writer: %w", err)
}
defer cw.Close()
w := io.Writer(cw)
if _, ok := fetched[chunk]; ok {
w = io.MultiWriter(w, allData[chunk])
}
if _, err := io.CopyN(w, r, chunk.size()); err != nil {
cw.Abort()
return fmt.Errorf("failed to write chunk data: %w", err)
}
if err := cw.Commit(); err != nil {
return fmt.Errorf("failed to commit chunk: %w", err)
}
b.fetchedRegionSetMu.Lock()
b.fetchedRegionSet.add(chunk)
b.fetchedRegionSetMu.Unlock()
fetched[chunk] = true
return nil
}

View File

@ -26,6 +26,7 @@ import (
"bytes" "bytes"
"fmt" "fmt"
"io" "io"
"io/ioutil"
"mime" "mime"
"mime/multipart" "mime/multipart"
"net/http" "net/http"
@ -33,8 +34,6 @@ import (
"sort" "sort"
"strconv" "strconv"
"strings" "strings"
"sync"
"sync/atomic"
"testing" "testing"
"time" "time"
@ -42,13 +41,12 @@ import (
) )
const ( const (
testURL = "http://testdummy.com/v2/library/test/blobs/sha256:deadbeaf" testURL = "http://testdummy.com/v2/library/test/blobs/sha256:deadbeaf"
rangeHeaderPrefix = "bytes=" rangeHeaderPrefix = "bytes="
sampleChunkSize = 3 sampleChunkSize = 3
sampleMiddleOffset = sampleChunkSize / 2 sampleMiddleOffset = sampleChunkSize / 2
sampleData1 = "0123456789" sampleData1 = "0123456789"
lastChunkOffset1 = sampleChunkSize * (int64(len(sampleData1)) / sampleChunkSize) lastChunkOffset1 = sampleChunkSize * (int64(len(sampleData1)) / sampleChunkSize)
defaultPrefetchChunkSize = 0
) )
// Tests ReadAt and Cache method of each file. // Tests ReadAt and Cache method of each file.
@ -71,10 +69,6 @@ func TestReadAt(t *testing.T) {
"in_3_chunks_blob": sampleChunkSize * 3, "in_3_chunks_blob": sampleChunkSize * 3,
"in_max_size_blob": int64(len(sampleData1)), "in_max_size_blob": int64(len(sampleData1)),
} }
prefetchChunkSizeCond := map[string]int64{
"single_get_prefetch": 0,
"multiple_get_prefetch": sampleChunkSize * 2,
}
type cacheCond struct { type cacheCond struct {
reg region reg region
mustHit bool mustHit bool
@ -125,57 +119,54 @@ func TestReadAt(t *testing.T) {
for in, innero := range innerOffsetCond { for in, innero := range innerOffsetCond {
for bo, baseo := range baseOffsetCond { for bo, baseo := range baseOffsetCond {
for bs, blobsize := range blobSizeCond { for bs, blobsize := range blobSizeCond {
for pc, prefetchchunksize := range prefetchChunkSizeCond { for tc, trCond := range transportCond {
for tc, trCond := range transportCond { t.Run(fmt.Sprintf("reading_%s_%s_%s_%s_%s", sn, in, bo, bs, tc), func(t *testing.T) {
t.Run(fmt.Sprintf("reading_%s_%s_%s_%s_%s_%s", sn, in, bo, bs, pc, tc), func(t *testing.T) { if blobsize > int64(len(sampleData1)) {
if blobsize > int64(len(sampleData1)) { t.Fatal("sample file size is larger than sample data")
t.Fatal("sample file size is larger than sample data") }
wantN := size
offset := baseo + innero
if remain := blobsize - offset; remain < wantN {
if wantN = remain; wantN < 0 {
wantN = 0
} }
}
wantN := size // use constant string value as a data source.
offset := baseo + innero want := strings.NewReader(sampleData1)
if remain := blobsize - offset; remain < wantN {
if wantN = remain; wantN < 0 { // data we want to get.
wantN = 0 wantData := make([]byte, wantN)
} _, err := want.ReadAt(wantData, offset)
if err != nil && err != io.EOF {
t.Fatalf("want.ReadAt (offset=%d,size=%d): %v", offset, wantN, err)
}
// data we get through a remote blob.
blob := []byte(sampleData1)[:blobsize]
// Check with allowing multi range requests
var cacheChunks []region
var except []region
for _, cond := range trCond.cacheCond {
cacheChunks = append(cacheChunks, cond.reg)
if cond.mustHit {
except = append(except, cond.reg)
} }
}
tr := multiRoundTripper(t, blob, allowMultiRange(trCond.allowMultiRange), exceptChunks(except))
// use constant string value as a data source. // Check ReadAt method
want := strings.NewReader(sampleData1) bb1 := makeBlob(t, blobsize, sampleChunkSize, tr)
cacheAll(t, bb1, cacheChunks)
// data we want to get. checkRead(t, wantData, bb1, offset, size)
wantData := make([]byte, wantN)
_, err := want.ReadAt(wantData, offset)
if err != nil && err != io.EOF {
t.Fatalf("want.ReadAt (offset=%d,size=%d): %v", offset, wantN, err)
}
// data we get through a remote blob.
blob := []byte(sampleData1)[:blobsize]
// Check with allowing multi range requests
var cacheChunks []region
var except []region
for _, cond := range trCond.cacheCond {
cacheChunks = append(cacheChunks, cond.reg)
if cond.mustHit {
except = append(except, cond.reg)
}
}
tr := multiRoundTripper(t, blob, allowMultiRange(trCond.allowMultiRange), exceptChunks(except))
// Check ReadAt method
bb1 := makeTestBlob(t, blobsize, sampleChunkSize, prefetchchunksize, tr)
cacheAll(t, bb1, cacheChunks)
checkRead(t, wantData, bb1, offset, size)
// Check Cache method
bb2 := makeTestBlob(t, blobsize, sampleChunkSize, prefetchchunksize, tr)
cacheAll(t, bb2, cacheChunks)
checkCache(t, bb2, offset, size)
})
}
// Check Cache method
bb2 := makeBlob(t, blobsize, sampleChunkSize, tr)
cacheAll(t, bb2, cacheChunks)
checkCache(t, bb2, offset, size)
})
} }
} }
} }
@ -260,7 +251,7 @@ func checkAllCached(t *testing.T, r *blob, offset, size int64) {
func TestFailReadAt(t *testing.T) { func TestFailReadAt(t *testing.T) {
// test failed http respose. // test failed http respose.
r := makeTestBlob(t, int64(len(sampleData1)), sampleChunkSize, defaultPrefetchChunkSize, failRoundTripper()) r := makeBlob(t, int64(len(sampleData1)), sampleChunkSize, failRoundTripper())
respData := make([]byte, len(sampleData1)) respData := make([]byte, len(sampleData1))
_, err := r.ReadAt(respData, 0) _, err := r.ReadAt(respData, 0)
if err == nil || err == io.EOF { if err == nil || err == io.EOF {
@ -279,12 +270,12 @@ func TestFailReadAt(t *testing.T) {
func checkBrokenBody(t *testing.T, allowMultiRange bool) { func checkBrokenBody(t *testing.T, allowMultiRange bool) {
respData := make([]byte, len(sampleData1)) respData := make([]byte, len(sampleData1))
r := makeTestBlob(t, int64(len(sampleData1)), sampleChunkSize, defaultPrefetchChunkSize, brokenBodyRoundTripper(t, []byte(sampleData1), allowMultiRange)) r := makeBlob(t, int64(len(sampleData1)), sampleChunkSize, brokenBodyRoundTripper(t, []byte(sampleData1), allowMultiRange))
if _, err := r.ReadAt(respData, 0); err == nil || err == io.EOF { if _, err := r.ReadAt(respData, 0); err == nil || err == io.EOF {
t.Errorf("must be fail for broken full body but err=%v (allowMultiRange=%v)", err, allowMultiRange) t.Errorf("must be fail for broken full body but err=%v (allowMultiRange=%v)", err, allowMultiRange)
return return
} }
r = makeTestBlob(t, int64(len(sampleData1)), sampleChunkSize, defaultPrefetchChunkSize, brokenBodyRoundTripper(t, []byte(sampleData1), allowMultiRange)) r = makeBlob(t, int64(len(sampleData1)), sampleChunkSize, brokenBodyRoundTripper(t, []byte(sampleData1), allowMultiRange))
if _, err := r.ReadAt(respData[0:len(sampleData1)/2], 0); err == nil || err == io.EOF { if _, err := r.ReadAt(respData[0:len(sampleData1)/2], 0); err == nil || err == io.EOF {
t.Errorf("must be fail for broken multipart body but err=%v (allowMultiRange=%v)", err, allowMultiRange) t.Errorf("must be fail for broken multipart body but err=%v (allowMultiRange=%v)", err, allowMultiRange)
return return
@ -292,7 +283,7 @@ func checkBrokenBody(t *testing.T, allowMultiRange bool) {
} }
func checkBrokenHeader(t *testing.T, allowMultiRange bool) { func checkBrokenHeader(t *testing.T, allowMultiRange bool) {
r := makeTestBlob(t, int64(len(sampleData1)), sampleChunkSize, defaultPrefetchChunkSize, brokenHeaderRoundTripper(t, []byte(sampleData1), allowMultiRange)) r := makeBlob(t, int64(len(sampleData1)), sampleChunkSize, brokenHeaderRoundTripper(t, []byte(sampleData1), allowMultiRange))
respData := make([]byte, len(sampleData1)) respData := make([]byte, len(sampleData1))
if _, err := r.ReadAt(respData[0:len(sampleData1)/2], 0); err == nil || err == io.EOF { if _, err := r.ReadAt(respData[0:len(sampleData1)/2], 0); err == nil || err == io.EOF {
t.Errorf("must be fail for broken multipart header but err=%v (allowMultiRange=%v)", err, allowMultiRange) t.Errorf("must be fail for broken multipart header but err=%v (allowMultiRange=%v)", err, allowMultiRange)
@ -300,283 +291,18 @@ func checkBrokenHeader(t *testing.T, allowMultiRange bool) {
} }
} }
func TestParallelDownloadingBehavior(t *testing.T) { func makeBlob(t *testing.T, size int64, chunkSize int64, fn RoundTripFunc) *blob {
type regionsBoundaries struct { return &blob{
regions []region fetcher: &fetcher{
start int64
end int64
}
type testData struct {
name string
regions [3]regionsBoundaries
roundtripCount int64
chunkSize int64
content string
}
tests := []testData{
{
name: "no_data",
regions: [3]regionsBoundaries{},
roundtripCount: 0,
chunkSize: 4,
},
{
name: "same_regions",
regions: [3]regionsBoundaries{
{
regions: []region{
{
b: 0,
e: 3,
},
},
start: 0,
end: 3,
},
{
regions: []region{
{
b: 0,
e: 3,
},
},
start: 0,
end: 3,
},
{
regions: []region{
{
b: 0,
e: 3,
},
},
start: 0,
end: 3,
},
},
roundtripCount: 1,
chunkSize: 4,
content: "test",
},
{
name: "same_regions_multiple_values",
regions: [3]regionsBoundaries{
{
regions: []region{
{
b: 0,
e: 3,
},
{
b: 4,
e: 7,
},
},
start: 0,
end: 7,
},
{
regions: []region{
{
b: 0,
e: 3,
},
{
b: 4,
e: 7,
},
},
start: 0,
end: 7,
},
{
regions: []region{
{
b: 0,
e: 3,
},
{
b: 4,
e: 7,
},
},
start: 0,
end: 7,
},
},
roundtripCount: 1,
chunkSize: 4,
content: "test1234",
},
{
name: "different_regions",
regions: [3]regionsBoundaries{
{
regions: []region{
{
b: 0,
e: 3,
},
},
start: 0,
end: 3,
},
{
regions: []region{
{
b: 4,
e: 7,
},
},
start: 4,
end: 7,
},
{
regions: []region{
{
b: 8,
e: 11,
},
},
start: 8,
end: 11,
},
},
roundtripCount: 3,
chunkSize: 4,
content: "test12345678",
},
{
name: "some_overlap",
regions: [3]regionsBoundaries{
{
regions: []region{
{
b: 0,
e: 3,
},
},
start: 0,
end: 3,
},
{
regions: []region{
{
b: 0,
e: 3,
},
},
start: 0,
end: 3,
},
{
regions: []region{
{
b: 4,
e: 7,
},
},
start: 4,
end: 7,
},
},
roundtripCount: 2,
chunkSize: 4,
content: "test1234",
},
}
var wg sync.WaitGroup
// we always run 3 routines
routines := 3
for _, tst := range tests {
var (
tr = &callsCountRoundTripper{
content: tst.content,
}
b = &blob{
fetcher: &httpFetcher{
url: "test",
tr: tr,
},
chunkSize: tst.chunkSize,
size: int64(len(tst.content)),
cache: cache.NewMemoryCache(),
}
)
start := make(chan struct{})
wg.Add(routines)
var contentBytes [3][]byte
for i := 0; i < routines; i++ {
p := make([]byte, len(tst.content))
contentBytes[i] = p
allData := make(map[region]io.Writer)
if i < len(tst.regions) {
offset := int64(0)
for j := range tst.regions[i].regions {
r := tst.regions[i].regions[j]
var (
base = positive(r.b - offset)
lowerUnread = positive(offset - r.b)
upperUnread = positive(r.e + 1 - (offset + int64(len(p))))
expectedSize = r.size() - upperUnread - lowerUnread
)
allData[tst.regions[i].regions[j]] = newBytesWriter(p[base:base+expectedSize], lowerUnread)
}
}
go func() {
<-start // by blocking on channel start we can ensure that the goroutines will run at approximately the same time
defer wg.Done()
b.fetchRange(allData, &options{})
}()
}
close(start) // starting
wg.Wait()
// We expect the number of round trip calls to be 1, since we are making 5 calls to fetchRange with
// overlapping intervals.
if tr.count != tst.roundtripCount {
t.Errorf("%v test failed: the round trip count should be %v, but was %v", tst.name, tst.roundtripCount, tr.count)
}
// Check for contents
for j := range contentBytes {
start := tst.regions[j].start
end := tst.regions[j].end
for i := start; i < end; i++ {
if contentBytes[j][i] != []byte(tst.content)[i] {
t.Errorf("%v test failed: the output sequence is wrong, wanted %v, got %v", tst.name, []byte(tst.content)[start:end], contentBytes[j][start:end])
break
}
}
}
}
}
func makeTestBlob(t *testing.T, size int64, chunkSize int64, prefetchChunkSize int64, fn RoundTripFunc) *blob {
var (
lastCheck time.Time
checkInterval time.Duration
)
return makeBlob(
&httpFetcher{
url: testURL, url: testURL,
tr: fn, tr: fn,
}, },
size, size: size,
chunkSize, chunkSize: chunkSize,
prefetchChunkSize, cache: cache.NewMemoryCache(),
cache.NewMemoryCache(), resolver: &Resolver{},
lastCheck, fetchTimeout: time.Duration(defaultFetchTimeoutSec) * time.Second,
checkInterval, }
&Resolver{},
time.Duration(defaultFetchTimeoutSec)*time.Second)
} }
func TestCheckInterval(t *testing.T) { func TestCheckInterval(t *testing.T) {
@ -584,7 +310,7 @@ func TestCheckInterval(t *testing.T) {
tr = &calledRoundTripper{} tr = &calledRoundTripper{}
firstTime = time.Now() firstTime = time.Now()
b = &blob{ b = &blob{
fetcher: &httpFetcher{ fetcher: &fetcher{
url: "test", url: "test",
tr: tr, tr: tr,
}, },
@ -609,7 +335,7 @@ func TestCheckInterval(t *testing.T) {
if !tr.called { if !tr.called {
return b.lastCheck, false return b.lastCheck, false
} }
if !b.lastCheck.After(beforeUpdate) || !b.lastCheck.Before(afterUpdate) { if !(b.lastCheck.After(beforeUpdate) && b.lastCheck.Before(afterUpdate)) {
t.Errorf("%q: updated time must be after %q and before %q but %q", name, beforeUpdate, afterUpdate, b.lastCheck) t.Errorf("%q: updated time must be after %q and before %q but %q", name, beforeUpdate, afterUpdate, b.lastCheck)
} }
@ -631,24 +357,6 @@ func TestCheckInterval(t *testing.T) {
} }
} }
type callsCountRoundTripper struct {
count int64
content string
}
func (c *callsCountRoundTripper) RoundTrip(req *http.Request) (res *http.Response, err error) {
atomic.AddInt64(&c.count, 1)
time.Sleep(50 * time.Millisecond) // sleep for 50 milliseconds to emulate the http call and to make sure that we can run tests on parallel goroutines
convertBody := func(r io.ReadCloser) io.ReadCloser { return r }
header := make(http.Header)
header.Add("Content-Length", fmt.Sprintf("%d", len(c.content)))
return &http.Response{
StatusCode: http.StatusOK,
Header: header,
Body: convertBody(io.NopCloser(bytes.NewReader([]byte(c.content)))),
}, nil
}
type calledRoundTripper struct { type calledRoundTripper struct {
called bool called bool
} }
@ -658,7 +366,7 @@ func (c *calledRoundTripper) RoundTrip(req *http.Request) (res *http.Response, e
res = &http.Response{ res = &http.Response{
StatusCode: http.StatusOK, StatusCode: http.StatusOK,
Header: make(http.Header), Header: make(http.Header),
Body: io.NopCloser(bytes.NewReader([]byte("test"))), Body: ioutil.NopCloser(bytes.NewReader([]byte("test"))),
} }
return return
} }
@ -690,7 +398,7 @@ func multiRoundTripper(t *testing.T, contents []byte, opts ...interface{}) Round
return &http.Response{ return &http.Response{
StatusCode: statusCode, StatusCode: statusCode,
Header: make(http.Header), Header: make(http.Header),
Body: io.NopCloser(bytes.NewReader([]byte{})), Body: ioutil.NopCloser(bytes.NewReader([]byte{})),
} }
} }
@ -739,7 +447,7 @@ func multiRoundTripper(t *testing.T, contents []byte, opts ...interface{}) Round
return &http.Response{ return &http.Response{
StatusCode: http.StatusOK, StatusCode: http.StatusOK,
Header: header, Header: header,
Body: convertBody(io.NopCloser(bytes.NewReader(contents))), Body: convertBody(ioutil.NopCloser(bytes.NewReader(contents))),
} }
} }
} }
@ -767,7 +475,7 @@ func multiRoundTripper(t *testing.T, contents []byte, opts ...interface{}) Round
return &http.Response{ return &http.Response{
StatusCode: http.StatusPartialContent, StatusCode: http.StatusPartialContent,
Header: header, Header: header,
Body: convertBody(io.NopCloser(bytes.NewReader(part))), Body: convertBody(ioutil.NopCloser(bytes.NewReader(part))),
} }
} }
@ -808,7 +516,7 @@ func multiRoundTripper(t *testing.T, contents []byte, opts ...interface{}) Round
return &http.Response{ return &http.Response{
StatusCode: http.StatusPartialContent, StatusCode: http.StatusPartialContent,
Header: header, Header: header,
Body: convertBody(io.NopCloser(&buf)), Body: convertBody(ioutil.NopCloser(&buf)),
} }
} }
} }
@ -818,7 +526,7 @@ func failRoundTripper() RoundTripFunc {
return &http.Response{ return &http.Response{
StatusCode: http.StatusInternalServerError, StatusCode: http.StatusInternalServerError,
Header: make(http.Header), Header: make(http.Header),
Body: io.NopCloser(bytes.NewReader([]byte{})), Body: ioutil.NopCloser(bytes.NewReader([]byte{})),
} }
} }
} }
@ -826,11 +534,11 @@ func failRoundTripper() RoundTripFunc {
func brokenBodyRoundTripper(t *testing.T, contents []byte, multiRange bool) RoundTripFunc { func brokenBodyRoundTripper(t *testing.T, contents []byte, multiRange bool) RoundTripFunc {
breakReadCloser := func(r io.ReadCloser) io.ReadCloser { breakReadCloser := func(r io.ReadCloser) io.ReadCloser {
defer r.Close() defer r.Close()
data, err := io.ReadAll(r) data, err := ioutil.ReadAll(r)
if err != nil { if err != nil {
t.Fatalf("failed to break read closer faild to read original: %v", err) t.Fatalf("failed to break read closer faild to read original: %v", err)
} }
return io.NopCloser(bytes.NewReader(data[:len(data)/2])) return ioutil.NopCloser(bytes.NewReader(data[:len(data)/2]))
} }
tr := multiRoundTripper(t, contents, allowMultiRange(multiRange), bodyConverter(breakReadCloser)) tr := multiRoundTripper(t, contents, allowMultiRange(multiRange), bodyConverter(breakReadCloser))
return func(req *http.Request) *http.Response { return func(req *http.Request) *http.Response {

View File

@ -24,12 +24,10 @@ package remote
import ( import (
"context" "context"
"crypto/rand"
"crypto/sha256" "crypto/sha256"
"errors"
"fmt" "fmt"
"io" "io"
"math/big" "io/ioutil"
"mime" "mime"
"mime/multipart" "mime/multipart"
"net/http" "net/http"
@ -39,30 +37,22 @@ import (
"sync" "sync"
"time" "time"
"github.com/containerd/containerd/v2/core/remotes/docker" "github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/v2/pkg/reference" "github.com/containerd/containerd/reference"
"github.com/containerd/errdefs" "github.com/containerd/containerd/remotes/docker"
"github.com/containerd/log"
"github.com/containerd/stargz-snapshotter/cache" "github.com/containerd/stargz-snapshotter/cache"
"github.com/containerd/stargz-snapshotter/fs/config" "github.com/containerd/stargz-snapshotter/fs/config"
commonmetrics "github.com/containerd/stargz-snapshotter/fs/metrics/common"
"github.com/containerd/stargz-snapshotter/fs/source"
rhttp "github.com/hashicorp/go-retryablehttp"
digest "github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1" ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/pkg/errors"
) )
const ( const (
defaultChunkSize = 50000 defaultChunkSize = 50000
defaultValidIntervalSec = 60 defaultValidIntervalSec = 60
defaultFetchTimeoutSec = 300 defaultFetchTimeoutSec = 300
defaultMaxRetries = 5
defaultMinWaitMSec = 30
defaultMaxWaitMSec = 300000
) )
func NewResolver(cfg config.BlobConfig, handlers map[string]Handler) *Resolver { func NewResolver(cfg config.BlobConfig) *Resolver {
if cfg.ChunkSize == 0 { // zero means "use default chunk size" if cfg.ChunkSize == 0 { // zero means "use default chunk size"
cfg.ChunkSize = defaultChunkSize cfg.ChunkSize = defaultChunkSize
} }
@ -75,137 +65,43 @@ func NewResolver(cfg config.BlobConfig, handlers map[string]Handler) *Resolver {
if cfg.FetchTimeoutSec == 0 { if cfg.FetchTimeoutSec == 0 {
cfg.FetchTimeoutSec = defaultFetchTimeoutSec cfg.FetchTimeoutSec = defaultFetchTimeoutSec
} }
if cfg.MaxRetries == 0 {
cfg.MaxRetries = defaultMaxRetries
}
if cfg.MinWaitMSec == 0 {
cfg.MinWaitMSec = defaultMinWaitMSec
}
if cfg.MaxWaitMSec == 0 {
cfg.MaxWaitMSec = defaultMaxWaitMSec
}
return &Resolver{ return &Resolver{
blobConfig: cfg, blobConfig: cfg,
handlers: handlers,
} }
} }
type Resolver struct { type Resolver struct {
blobConfig config.BlobConfig blobConfig config.BlobConfig
handlers map[string]Handler
} }
type fetcher interface { func (r *Resolver) Resolve(ctx context.Context, hosts docker.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor, blobCache cache.BlobCache) (Blob, error) {
fetch(ctx context.Context, rs []region, retry bool) (multipartReadCloser, error) fetcher, size, err := newFetcher(ctx, hosts, refspec, desc)
check() error
genID(reg region) string
}
func (r *Resolver) Resolve(ctx context.Context, hosts source.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor, blobCache cache.BlobCache) (Blob, error) {
f, size, err := r.resolveFetcher(ctx, hosts, refspec, desc)
if err != nil { if err != nil {
return nil, err return nil, err
} }
blobConfig := &r.blobConfig return &blob{
return makeBlob(f, fetcher: fetcher,
size, size: size,
blobConfig.ChunkSize, chunkSize: r.blobConfig.ChunkSize,
blobConfig.PrefetchChunkSize, cache: blobCache,
blobCache, lastCheck: time.Now(),
time.Now(), checkInterval: time.Duration(r.blobConfig.ValidInterval) * time.Second,
time.Duration(blobConfig.ValidInterval)*time.Second, resolver: r,
r, fetchTimeout: time.Duration(r.blobConfig.FetchTimeoutSec) * time.Second,
time.Duration(blobConfig.FetchTimeoutSec)*time.Second), nil }, nil
} }
func (r *Resolver) resolveFetcher(ctx context.Context, hosts source.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) (f fetcher, size int64, err error) { func newFetcher(ctx context.Context, hosts docker.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) (*fetcher, int64, error) {
blobConfig := &r.blobConfig reghosts, err := hosts(refspec.Hostname())
fc := &fetcherConfig{
hosts: hosts,
refspec: refspec,
desc: desc,
maxRetries: blobConfig.MaxRetries,
minWait: time.Duration(blobConfig.MinWaitMSec) * time.Millisecond,
maxWait: time.Duration(blobConfig.MaxWaitMSec) * time.Millisecond,
}
var errs []error
for name, p := range r.handlers {
// TODO: allow to configure the selection of readers based on the hostname in refspec
r, size, err := p.Handle(ctx, desc)
if err != nil {
errs = append(errs, err)
continue
}
log.G(ctx).WithField("handler name", name).WithField("ref", refspec.String()).WithField("digest", desc.Digest).
Debugf("contents is provided by a handler")
return &remoteFetcher{r}, size, nil
}
handlersErr := errors.Join(errs...)
log.G(ctx).WithError(handlersErr).WithField("ref", refspec.String()).WithField("digest", desc.Digest).Debugf("using default handler")
hf, size, err := newHTTPFetcher(ctx, fc)
if err != nil { if err != nil {
return nil, 0, err return nil, 0, err
} }
if blobConfig.ForceSingleRangeMode {
hf.singleRangeMode()
}
return hf, size, err
}
type fetcherConfig struct {
hosts source.RegistryHosts
refspec reference.Spec
desc ocispec.Descriptor
maxRetries int
minWait time.Duration
maxWait time.Duration
}
func jitter(duration time.Duration) time.Duration {
if duration <= 0 {
return duration
}
b, err := rand.Int(rand.Reader, big.NewInt(int64(duration)))
if err != nil {
panic(err)
}
return time.Duration(b.Int64() + int64(duration))
}
// backoffStrategy extends retryablehttp's DefaultBackoff to add a random jitter to avoid overwhelming the repository
// when it comes back online
// DefaultBackoff either tries to parse the 'Retry-After' header of the response; or, it uses an exponential backoff
// 2 ^ numAttempts, limited by max
func backoffStrategy(min, max time.Duration, attemptNum int, resp *http.Response) time.Duration {
delayTime := rhttp.DefaultBackoff(min, max, attemptNum, resp)
return jitter(delayTime)
}
// retryStrategy extends retryablehttp's DefaultRetryPolicy to debug log the error when retrying
// DefaultRetryPolicy retries whenever err is non-nil (except for some url errors) or if returned
// status code is 429 or 5xx (except 501)
func retryStrategy(ctx context.Context, resp *http.Response, err error) (bool, error) {
retry, err2 := rhttp.DefaultRetryPolicy(ctx, resp, err)
if retry {
log.G(ctx).WithError(err).Debugf("Retrying request")
}
return retry, err2
}
func newHTTPFetcher(ctx context.Context, fc *fetcherConfig) (*httpFetcher, int64, error) {
reghosts, err := fc.hosts(fc.refspec)
if err != nil {
return nil, 0, err
}
desc := fc.desc
if desc.Digest.String() == "" { if desc.Digest.String() == "" {
return nil, 0, fmt.Errorf("digest is mandatory in layer descriptor") return nil, 0, fmt.Errorf("Digest is mandatory in layer descriptor")
} }
digest := desc.Digest digest := desc.Digest
pullScope, err := docker.RepositoryScope(fc.refspec, false) pullScope, err := docker.RepositoryScope(refspec, false)
if err != nil { if err != nil {
return nil, 0, err return nil, 0, err
} }
@ -214,24 +110,15 @@ func newHTTPFetcher(ctx context.Context, fc *fetcherConfig) (*httpFetcher, int64
rErr := fmt.Errorf("failed to resolve") rErr := fmt.Errorf("failed to resolve")
for _, host := range reghosts { for _, host := range reghosts {
if host.Host == "" || strings.Contains(host.Host, "/") { if host.Host == "" || strings.Contains(host.Host, "/") {
rErr = fmt.Errorf("invalid destination (host %q, ref:%q, digest:%q): %w", host.Host, fc.refspec, digest, rErr) rErr = errors.Wrapf(rErr, "invalid destination (host %q, ref:%q, digest:%q)",
host.Host, refspec, digest)
continue // Try another continue // Try another
} }
// Prepare transport with authorization functionality // Prepare transport with authorization functionality
tr := host.Client.Transport tr := host.Client.Transport
timeout := host.Client.Timeout timeout := host.Client.Timeout
if rt, ok := tr.(*rhttp.RoundTripper); ok {
rt.Client.RetryMax = fc.maxRetries
rt.Client.RetryWaitMin = fc.minWait
rt.Client.RetryWaitMax = fc.maxWait
rt.Client.Backoff = backoffStrategy
rt.Client.CheckRetry = retryStrategy
timeout = rt.Client.HTTPClient.Timeout
}
if host.Authorizer != nil { if host.Authorizer != nil {
tr = &transport{ tr = &transport{
inner: tr, inner: tr,
@ -244,37 +131,34 @@ func newHTTPFetcher(ctx context.Context, fc *fetcherConfig) (*httpFetcher, int64
blobURL := fmt.Sprintf("%s://%s/%s/blobs/%s", blobURL := fmt.Sprintf("%s://%s/%s/blobs/%s",
host.Scheme, host.Scheme,
path.Join(host.Host, host.Path), path.Join(host.Host, host.Path),
strings.TrimPrefix(fc.refspec.Locator, fc.refspec.Hostname()+"/"), strings.TrimPrefix(refspec.Locator, refspec.Hostname()+"/"),
digest) digest)
url, header, err := redirect(ctx, blobURL, tr, timeout, host.Header) url, err := redirect(ctx, blobURL, tr, timeout)
if err != nil { if err != nil {
rErr = fmt.Errorf("failed to redirect (host %q, ref:%q, digest:%q): %v: %w", host.Host, fc.refspec, digest, err, rErr) rErr = errors.Wrapf(rErr, "failed to redirect (host %q, ref:%q, digest:%q): %v",
host.Host, refspec, digest, err)
continue // Try another continue // Try another
} }
// Get size information // Get size information
// TODO: we should try to use the Size field in the descriptor here. // TODO: we should try to use the Size field in the descriptor here.
start := time.Now() // start time before getting layer header size, err := getSize(ctx, url, tr, timeout)
size, err := getSize(ctx, url, tr, timeout, header)
commonmetrics.MeasureLatencyInMilliseconds(commonmetrics.StargzHeaderGet, digest, start) // time to get layer header
if err != nil { if err != nil {
rErr = fmt.Errorf("failed to get size (host %q, ref:%q, digest:%q): %v: %w", host.Host, fc.refspec, digest, err, rErr) rErr = errors.Wrapf(rErr, "failed to get size (host %q, ref:%q, digest:%q): %v",
host.Host, refspec, digest, err)
continue // Try another continue // Try another
} }
// Hit one destination // Hit one destination
return &httpFetcher{ return &fetcher{
url: url, url: url,
tr: tr, tr: tr,
blobURL: blobURL, blobURL: blobURL,
digest: digest, timeout: timeout,
timeout: timeout,
header: header,
orgHeader: host.Header,
}, size, nil }, size, nil
} }
return nil, 0, fmt.Errorf("cannot resolve layer: %w", rErr) return nil, 0, errors.Wrapf(rErr, "cannot resolve layer")
} }
type transport struct { type transport struct {
@ -302,7 +186,6 @@ func (tr *transport) RoundTrip(req *http.Request) (*http.Response, error) {
// TODO: support more status codes and retries // TODO: support more status codes and retries
if resp.StatusCode == http.StatusUnauthorized { if resp.StatusCode == http.StatusUnauthorized {
log.G(ctx).Infof("Received status code: %v. Refreshing creds...", resp.Status)
// prepare authorization for the target host using docker.Authorizer // prepare authorization for the target host using docker.Authorizer
if err := tr.auth.AddResponses(ctx, []*http.Response{resp}); err != nil { if err := tr.auth.AddResponses(ctx, []*http.Response{resp}); err != nil {
@ -319,7 +202,7 @@ func (tr *transport) RoundTrip(req *http.Request) (*http.Response, error) {
return resp, nil return resp, nil
} }
func redirect(ctx context.Context, blobURL string, tr http.RoundTripper, timeout time.Duration, header http.Header) (url string, withHeader http.Header, err error) { func redirect(ctx context.Context, blobURL string, tr http.RoundTripper, timeout time.Duration) (url string, err error) {
if timeout > 0 { if timeout > 0 {
var cancel context.CancelFunc var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, timeout) ctx, cancel = context.WithTimeout(ctx, timeout)
@ -330,38 +213,32 @@ func redirect(ctx context.Context, blobURL string, tr http.RoundTripper, timeout
// ghcr.io returns 200 on HEAD without Location header (2020). // ghcr.io returns 200 on HEAD without Location header (2020).
req, err := http.NewRequestWithContext(ctx, "GET", blobURL, nil) req, err := http.NewRequestWithContext(ctx, "GET", blobURL, nil)
if err != nil { if err != nil {
return "", nil, fmt.Errorf("failed to make request to the registry: %w", err) return "", errors.Wrapf(err, "failed to make request to the registry")
}
req.Header = http.Header{}
for k, v := range header {
req.Header[k] = v
} }
req.Close = false req.Close = false
req.Header.Set("Range", "bytes=0-1") req.Header.Set("Range", "bytes=0-1")
res, err := tr.RoundTrip(req) res, err := tr.RoundTrip(req)
if err != nil { if err != nil {
return "", nil, fmt.Errorf("failed to request: %w", err) return "", errors.Wrapf(err, "failed to request")
} }
defer func() { defer func() {
io.Copy(io.Discard, res.Body) io.Copy(ioutil.Discard, res.Body)
res.Body.Close() res.Body.Close()
}() }()
if res.StatusCode/100 == 2 { if res.StatusCode/100 == 2 {
url = blobURL url = blobURL
withHeader = header
} else if redir := res.Header.Get("Location"); redir != "" && res.StatusCode/100 == 3 { } else if redir := res.Header.Get("Location"); redir != "" && res.StatusCode/100 == 3 {
// TODO: Support nested redirection // TODO: Support nested redirection
url = redir url = redir
// Do not pass headers to the redirected location.
} else { } else {
return "", nil, fmt.Errorf("failed to access to the registry with code %v", res.StatusCode) return "", fmt.Errorf("failed to access to the registry with code %v", res.StatusCode)
} }
return return
} }
func getSize(ctx context.Context, url string, tr http.RoundTripper, timeout time.Duration, header http.Header) (int64, error) { func getSize(ctx context.Context, url string, tr http.RoundTripper, timeout time.Duration) (int64, error) {
if timeout > 0 { if timeout > 0 {
var cancel context.CancelFunc var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, timeout) ctx, cancel = context.WithTimeout(ctx, timeout)
@ -371,10 +248,6 @@ func getSize(ctx context.Context, url string, tr http.RoundTripper, timeout time
if err != nil { if err != nil {
return 0, err return 0, err
} }
req.Header = http.Header{}
for k, v := range header {
req.Header[k] = v
}
req.Close = false req.Close = false
res, err := tr.RoundTrip(req) res, err := tr.RoundTrip(req)
if err != nil { if err != nil {
@ -391,27 +264,22 @@ func getSize(ctx context.Context, url string, tr http.RoundTripper, timeout time
// HEAD request (2020). // HEAD request (2020).
req, err = http.NewRequestWithContext(ctx, "GET", url, nil) req, err = http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil { if err != nil {
return 0, fmt.Errorf("failed to make request to the registry: %w", err) return 0, errors.Wrapf(err, "failed to make request to the registry")
}
req.Header = http.Header{}
for k, v := range header {
req.Header[k] = v
} }
req.Close = false req.Close = false
req.Header.Set("Range", "bytes=0-1") req.Header.Set("Range", "bytes=0-1")
res, err = tr.RoundTrip(req) res, err = tr.RoundTrip(req)
if err != nil { if err != nil {
return 0, fmt.Errorf("failed to request: %w", err) return 0, errors.Wrapf(err, "failed to request")
} }
defer func() { defer func() {
io.Copy(io.Discard, res.Body) io.Copy(ioutil.Discard, res.Body)
res.Body.Close() res.Body.Close()
}() }()
switch res.StatusCode { if res.StatusCode == http.StatusOK {
case http.StatusOK:
return strconv.ParseInt(res.Header.Get("Content-Length"), 10, 64) return strconv.ParseInt(res.Header.Get("Content-Length"), 10, 64)
case http.StatusPartialContent: } else if res.StatusCode == http.StatusPartialContent {
_, size, err := parseRange(res.Header.Get("Content-Range")) _, size, err := parseRange(res.Header.Get("Content-Range"))
return size, err return size, err
} }
@ -420,17 +288,14 @@ func getSize(ctx context.Context, url string, tr http.RoundTripper, timeout time
headStatusCode, res.StatusCode) headStatusCode, res.StatusCode)
} }
type httpFetcher struct { type fetcher struct {
url string url string
urlMu sync.Mutex urlMu sync.Mutex
tr http.RoundTripper tr http.RoundTripper
blobURL string blobURL string
digest digest.Digest
singleRange bool singleRange bool
singleRangeMu sync.Mutex singleRangeMu sync.Mutex
timeout time.Duration timeout time.Duration
header http.Header
orgHeader http.Header
} }
type multipartReadCloser interface { type multipartReadCloser interface {
@ -438,7 +303,7 @@ type multipartReadCloser interface {
Close() error Close() error
} }
func (f *httpFetcher) fetch(ctx context.Context, rs []region, retry bool) (multipartReadCloser, error) { func (f *fetcher) fetch(ctx context.Context, rs []region, retry bool, opts *options) (multipartReadCloser, error) {
if len(rs) == 0 { if len(rs) == 0 {
return nil, fmt.Errorf("no request queried") return nil, fmt.Errorf("no request queried")
} }
@ -448,6 +313,13 @@ func (f *httpFetcher) fetch(ctx context.Context, rs []region, retry bool) (multi
singleRangeMode = f.isSingleRangeMode() singleRangeMode = f.isSingleRangeMode()
) )
if opts.ctx != nil {
ctx = opts.ctx
}
if opts.tr != nil {
tr = opts.tr
}
// squash requesting chunks for reducing the total size of request header // squash requesting chunks for reducing the total size of request header
// (servers generally have limits for the size of headers) // (servers generally have limits for the size of headers)
// TODO: when our request has too many ranges, we need to divide it into // TODO: when our request has too many ranges, we need to divide it into
@ -470,10 +342,6 @@ func (f *httpFetcher) fetch(ctx context.Context, rs []region, retry bool) (multi
if err != nil { if err != nil {
return nil, err return nil, err
} }
req.Header = http.Header{}
for k, v := range f.header {
req.Header[k] = v
}
var ranges string var ranges string
for _, reg := range requests { for _, reg := range requests {
ranges += fmt.Sprintf("%d-%d,", reg.b, reg.e) ranges += fmt.Sprintf("%d-%d,", reg.b, reg.e)
@ -481,11 +349,7 @@ func (f *httpFetcher) fetch(ctx context.Context, rs []region, retry bool) (multi
req.Header.Add("Range", fmt.Sprintf("bytes=%s", ranges[:len(ranges)-1])) req.Header.Add("Range", fmt.Sprintf("bytes=%s", ranges[:len(ranges)-1]))
req.Header.Add("Accept-Encoding", "identity") req.Header.Add("Accept-Encoding", "identity")
req.Close = false req.Close = false
// Recording the roundtrip latency for remote registry GET operation.
start := time.Now()
res, err := tr.RoundTrip(req) // NOT DefaultClient; don't want redirects res, err := tr.RoundTrip(req) // NOT DefaultClient; don't want redirects
commonmetrics.MeasureLatencyInMilliseconds(commonmetrics.RemoteRegistryGet, f.digest, start)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -493,45 +357,41 @@ func (f *httpFetcher) fetch(ctx context.Context, rs []region, retry bool) (multi
// We are getting the whole blob in one part (= status 200) // We are getting the whole blob in one part (= status 200)
size, err := strconv.ParseInt(res.Header.Get("Content-Length"), 10, 64) size, err := strconv.ParseInt(res.Header.Get("Content-Length"), 10, 64)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to parse Content-Length: %w", err) return nil, errors.Wrapf(err, "failed to parse Content-Length")
} }
return newSinglePartReader(region{0, size - 1}, res.Body), nil return singlePartReader(region{0, size - 1}, res.Body), nil
} else if res.StatusCode == http.StatusPartialContent { } else if res.StatusCode == http.StatusPartialContent {
mediaType, params, err := mime.ParseMediaType(res.Header.Get("Content-Type")) mediaType, params, err := mime.ParseMediaType(res.Header.Get("Content-Type"))
if err != nil { if err != nil {
return nil, fmt.Errorf("invalid media type %q: %w", mediaType, err) return nil, errors.Wrapf(err, "invalid media type %q", mediaType)
} }
if strings.HasPrefix(mediaType, "multipart/") { if strings.HasPrefix(mediaType, "multipart/") {
// We are getting a set of chunks as a multipart body. // We are getting a set of chunks as a multipart body.
return newMultiPartReader(res.Body, params["boundary"]), nil return multiPartReader(res.Body, params["boundary"]), nil
} }
// We are getting single range // We are getting single range
reg, _, err := parseRange(res.Header.Get("Content-Range")) reg, _, err := parseRange(res.Header.Get("Content-Range"))
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to parse Content-Range: %w", err) return nil, errors.Wrapf(err, "failed to parse Content-Range")
} }
return newSinglePartReader(reg, res.Body), nil return singlePartReader(reg, res.Body), nil
} else if retry && res.StatusCode == http.StatusForbidden { } else if retry && res.StatusCode == http.StatusForbidden {
log.G(ctx).Infof("Received status code: %v. Refreshing URL and retrying...", res.Status)
// re-redirect and retry this once. // re-redirect and retry this once.
if err := f.refreshURL(ctx); err != nil { if err := f.refreshURL(ctx); err != nil {
return nil, fmt.Errorf("failed to refresh URL on %v: %w", res.Status, err) return nil, errors.Wrapf(err, "failed to refresh URL on %v", res.Status)
} }
return f.fetch(ctx, rs, false) return f.fetch(ctx, rs, false, opts)
} else if retry && res.StatusCode == http.StatusBadRequest && !singleRangeMode { } else if retry && res.StatusCode == http.StatusBadRequest && !singleRangeMode {
log.G(ctx).Infof("Received status code: %v. Setting single range mode and retrying...", res.Status)
// gcr.io (https://storage.googleapis.com) returns 400 on multi-range request (2020 #81) // gcr.io (https://storage.googleapis.com) returns 400 on multi-range request (2020 #81)
f.singleRangeMode() // fallbacks to singe range request mode f.singleRangeMode() // fallbacks to singe range request mode
return f.fetch(ctx, rs, false) // retries with the single range mode return f.fetch(ctx, rs, false, opts) // retries with the single range mode
} }
return nil, fmt.Errorf("unexpected status code: %v", res.Status) return nil, fmt.Errorf("unexpected status code: %v", res.Status)
} }
func (f *httpFetcher) check() error { func (f *fetcher) check() error {
ctx := context.Background() ctx := context.Background()
if f.timeout > 0 { if f.timeout > 0 {
var cancel context.CancelFunc var cancel context.CancelFunc
@ -543,26 +403,21 @@ func (f *httpFetcher) check() error {
f.urlMu.Unlock() f.urlMu.Unlock()
req, err := http.NewRequestWithContext(ctx, "GET", url, nil) req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil { if err != nil {
return fmt.Errorf("check failed: failed to make request: %w", err) return errors.Wrapf(err, "check failed: failed to make request")
}
req.Header = http.Header{}
for k, v := range f.header {
req.Header[k] = v
} }
req.Close = false req.Close = false
req.Header.Set("Range", "bytes=0-1") req.Header.Set("Range", "bytes=0-1")
res, err := f.tr.RoundTrip(req) res, err := f.tr.RoundTrip(req)
if err != nil { if err != nil {
return fmt.Errorf("check failed: failed to request to registry: %w", err) return errors.Wrapf(err, "check failed: failed to request to registry")
} }
defer func() { defer func() {
io.Copy(io.Discard, res.Body) io.Copy(ioutil.Discard, res.Body)
res.Body.Close() res.Body.Close()
}() }()
switch res.StatusCode { if res.StatusCode == http.StatusOK || res.StatusCode == http.StatusPartialContent {
case http.StatusOK, http.StatusPartialContent:
return nil return nil
case http.StatusForbidden: } else if res.StatusCode == http.StatusForbidden {
// Try to re-redirect this blob // Try to re-redirect this blob
rCtx := context.Background() rCtx := context.Background()
if f.timeout > 0 { if f.timeout > 0 {
@ -579,37 +434,36 @@ func (f *httpFetcher) check() error {
return fmt.Errorf("unexpected status code %v", res.StatusCode) return fmt.Errorf("unexpected status code %v", res.StatusCode)
} }
func (f *httpFetcher) refreshURL(ctx context.Context) error { func (f *fetcher) refreshURL(ctx context.Context) error {
newURL, headers, err := redirect(ctx, f.blobURL, f.tr, f.timeout, f.orgHeader) newURL, err := redirect(ctx, f.blobURL, f.tr, f.timeout)
if err != nil { if err != nil {
return err return err
} }
f.urlMu.Lock() f.urlMu.Lock()
f.url = newURL f.url = newURL
f.header = headers
f.urlMu.Unlock() f.urlMu.Unlock()
return nil return nil
} }
func (f *httpFetcher) genID(reg region) string { func (f *fetcher) genID(reg region) string {
sum := sha256.Sum256([]byte(fmt.Sprintf("%s-%d-%d", f.blobURL, reg.b, reg.e))) sum := sha256.Sum256([]byte(fmt.Sprintf("%s-%d-%d", f.blobURL, reg.b, reg.e)))
return fmt.Sprintf("%x", sum) return fmt.Sprintf("%x", sum)
} }
func (f *httpFetcher) singleRangeMode() { func (f *fetcher) singleRangeMode() {
f.singleRangeMu.Lock() f.singleRangeMu.Lock()
f.singleRange = true f.singleRange = true
f.singleRangeMu.Unlock() f.singleRangeMu.Unlock()
} }
func (f *httpFetcher) isSingleRangeMode() bool { func (f *fetcher) isSingleRangeMode() bool {
f.singleRangeMu.Lock() f.singleRangeMu.Lock()
r := f.singleRange r := f.singleRange
f.singleRangeMu.Unlock() f.singleRangeMu.Unlock()
return r return r
} }
func newSinglePartReader(reg region, rc io.ReadCloser) multipartReadCloser { func singlePartReader(reg region, rc io.ReadCloser) multipartReadCloser {
return &singlepartReader{ return &singlepartReader{
r: rc, r: rc,
Closer: rc, Closer: rc,
@ -632,7 +486,7 @@ func (sr *singlepartReader) Next() (region, io.Reader, error) {
return region{}, nil, io.EOF return region{}, nil, io.EOF
} }
func newMultiPartReader(rc io.ReadCloser, boundary string) multipartReadCloser { func multiPartReader(rc io.ReadCloser, boundary string) multipartReadCloser {
return &multipartReader{ return &multipartReader{
m: multipart.NewReader(rc, boundary), m: multipart.NewReader(rc, boundary),
Closer: rc, Closer: rc,
@ -651,7 +505,7 @@ func (sr *multipartReader) Next() (region, io.Reader, error) {
} }
reg, _, err := parseRange(p.Header.Get("Content-Range")) reg, _, err := parseRange(p.Header.Get("Content-Range"))
if err != nil { if err != nil {
return region{}, nil, fmt.Errorf("failed to parse Content-Range: %w", err) return region{}, nil, errors.Wrapf(err, "failed to parse Content-Range")
} }
return reg, p, nil return reg, p, nil
} }
@ -663,15 +517,15 @@ func parseRange(header string) (region, int64, error) {
} }
begin, err := strconv.ParseInt(submatches[1], 10, 64) begin, err := strconv.ParseInt(submatches[1], 10, 64)
if err != nil { if err != nil {
return region{}, 0, fmt.Errorf("failed to parse beginning offset %q: %w", submatches[1], err) return region{}, 0, errors.Wrapf(err, "failed to parse beginning offset %q", submatches[1])
} }
end, err := strconv.ParseInt(submatches[2], 10, 64) end, err := strconv.ParseInt(submatches[2], 10, 64)
if err != nil { if err != nil {
return region{}, 0, fmt.Errorf("failed to parse end offset %q: %w", submatches[2], err) return region{}, 0, errors.Wrapf(err, "failed to parse end offset %q", submatches[2])
} }
blobSize, err := strconv.ParseInt(submatches[3], 10, 64) blobSize, err := strconv.ParseInt(submatches[3], 10, 64)
if err != nil { if err != nil {
return region{}, 0, fmt.Errorf("failed to parse blob size %q: %w", submatches[3], err) return region{}, 0, errors.Wrapf(err, "failed to parse blob size %q", submatches[3])
} }
return region{begin, end}, blobSize, nil return region{begin, end}, blobSize, nil
@ -681,6 +535,7 @@ type Option func(*options)
type options struct { type options struct {
ctx context.Context ctx context.Context
tr http.RoundTripper
cacheOpts []cache.Option cacheOpts []cache.Option
} }
@ -690,43 +545,14 @@ func WithContext(ctx context.Context) Option {
} }
} }
func WithRoundTripper(tr http.RoundTripper) Option {
return func(opts *options) {
opts.tr = tr
}
}
func WithCacheOpts(cacheOpts ...cache.Option) Option { func WithCacheOpts(cacheOpts ...cache.Option) Option {
return func(opts *options) { return func(opts *options) {
opts.cacheOpts = cacheOpts opts.cacheOpts = cacheOpts
} }
} }
type remoteFetcher struct {
r Fetcher
}
func (r *remoteFetcher) fetch(ctx context.Context, rs []region, retry bool) (multipartReadCloser, error) {
var s regionSet
for _, reg := range rs {
s.add(reg)
}
reg := superRegion(s.rs)
rc, err := r.r.Fetch(ctx, reg.b, reg.size())
if err != nil {
return nil, err
}
return newSinglePartReader(reg, rc), nil
}
func (r *remoteFetcher) check() error {
return r.r.Check()
}
func (r *remoteFetcher) genID(reg region) string {
return r.r.GenID(reg.b, reg.size())
}
type Handler interface {
Handle(ctx context.Context, desc ocispec.Descriptor) (fetcher Fetcher, size int64, err error)
}
type Fetcher interface {
Fetch(ctx context.Context, off int64, size int64) (io.ReadCloser, error)
Check() error
GenID(off int64, size int64) string
}

View File

@ -26,17 +26,15 @@ import (
"bytes" "bytes"
"context" "context"
"fmt" "fmt"
"io" "io/ioutil"
"net/http" "net/http"
"net/url" "net/url"
"regexp" "regexp"
"strings" "strings"
"testing" "testing"
"github.com/containerd/containerd/v2/core/remotes/docker" "github.com/containerd/containerd/reference"
"github.com/containerd/containerd/v2/pkg/reference" "github.com/containerd/containerd/remotes/docker"
"github.com/containerd/stargz-snapshotter/fs/source"
rhttp "github.com/hashicorp/go-retryablehttp"
digest "github.com/opencontainers/go-digest" digest "github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1" ocispec "github.com/opencontainers/image-spec/specs-go/v1"
) )
@ -56,312 +54,146 @@ func TestMirror(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
hosts func(t *testing.T) source.RegistryHosts tr http.RoundTripper
mirrors []string
wantHost string wantHost string
error bool error bool
}{ }{
{ {
name: "no-mirror", name: "no-mirror",
hosts: hostsConfig( tr: &sampleRoundTripper{okURLs: []string{refHost}},
&sampleRoundTripper{okURLs: []string{refHost}}, mirrors: nil,
),
wantHost: refHost, wantHost: refHost,
}, },
{ {
name: "valid-mirror", name: "valid-mirror",
hosts: hostsConfig( tr: &sampleRoundTripper{okURLs: []string{"mirrorexample.com"}},
&sampleRoundTripper{okURLs: []string{"mirrorexample.com"}}, mirrors: []string{"mirrorexample.com"},
hostSimple("mirrorexample.com"),
),
wantHost: "mirrorexample.com", wantHost: "mirrorexample.com",
}, },
{ {
name: "invalid-mirror", name: "invalid-mirror",
hosts: hostsConfig( tr: &sampleRoundTripper{
&sampleRoundTripper{ withCode: map[string]int{
withCode: map[string]int{ "mirrorexample1.com": http.StatusInternalServerError,
"mirrorexample1.com": http.StatusInternalServerError, "mirrorexample2.com": http.StatusUnauthorized,
"mirrorexample2.com": http.StatusUnauthorized, "mirrorexample3.com": http.StatusNotFound,
"mirrorexample3.com": http.StatusNotFound,
},
okURLs: []string{"mirrorexample4.com", refHost},
}, },
hostSimple("mirrorexample1.com"), okURLs: []string{"mirrorexample4.com", refHost},
hostSimple("mirrorexample2.com"), },
hostSimple("mirrorexample3.com"), mirrors: []string{
hostSimple("mirrorexample4.com"), "mirrorexample1.com",
), "mirrorexample2.com",
"mirrorexample3.com",
"mirrorexample4.com",
},
wantHost: "mirrorexample4.com", wantHost: "mirrorexample4.com",
}, },
{ {
name: "invalid-all-mirror", name: "invalid-all-mirror",
hosts: hostsConfig( tr: &sampleRoundTripper{
&sampleRoundTripper{ withCode: map[string]int{
withCode: map[string]int{ "mirrorexample1.com": http.StatusInternalServerError,
"mirrorexample1.com": http.StatusInternalServerError, "mirrorexample2.com": http.StatusUnauthorized,
"mirrorexample2.com": http.StatusUnauthorized, "mirrorexample3.com": http.StatusNotFound,
"mirrorexample3.com": http.StatusNotFound,
},
okURLs: []string{refHost},
}, },
hostSimple("mirrorexample1.com"), okURLs: []string{refHost},
hostSimple("mirrorexample2.com"), },
hostSimple("mirrorexample3.com"), mirrors: []string{
), "mirrorexample1.com",
"mirrorexample2.com",
"mirrorexample3.com",
},
wantHost: refHost, wantHost: refHost,
}, },
{ {
name: "invalid-hostname-of-mirror", name: "invalid-hostname-of-mirror",
hosts: hostsConfig( tr: &sampleRoundTripper{
&sampleRoundTripper{ okURLs: []string{`.*`},
okURLs: []string{`.*`}, },
}, mirrors: []string{"mirrorexample.com/somepath/"},
hostSimple("mirrorexample.com/somepath/"),
),
wantHost: refHost, wantHost: refHost,
}, },
{ {
name: "redirected-mirror", name: "redirected-mirror",
hosts: hostsConfig( tr: &sampleRoundTripper{
&sampleRoundTripper{ redirectURL: map[string]string{
redirectURL: map[string]string{ regexp.QuoteMeta(fmt.Sprintf("mirrorexample.com%s", blobPath)): "https://backendexample.com/blobs/" + blobDigest.String(),
regexp.QuoteMeta(fmt.Sprintf("mirrorexample.com%s", blobPath)): "https://backendexample.com/blobs/" + blobDigest.String(),
},
okURLs: []string{`.*`},
}, },
hostSimple("mirrorexample.com"), okURLs: []string{`.*`},
), },
mirrors: []string{"mirrorexample.com"},
wantHost: "backendexample.com", wantHost: "backendexample.com",
}, },
{ {
name: "invalid-redirected-mirror", name: "invalid-redirected-mirror",
hosts: hostsConfig( tr: &sampleRoundTripper{
&sampleRoundTripper{ withCode: map[string]int{
withCode: map[string]int{ "backendexample.com": http.StatusInternalServerError,
"backendexample.com": http.StatusInternalServerError,
},
redirectURL: map[string]string{
regexp.QuoteMeta(fmt.Sprintf("mirrorexample.com%s", blobPath)): "https://backendexample.com/blobs/" + blobDigest.String(),
},
okURLs: []string{`.*`},
}, },
hostSimple("mirrorexample.com"), redirectURL: map[string]string{
), regexp.QuoteMeta(fmt.Sprintf("mirrorexample.com%s", blobPath)): "https://backendexample.com/blobs/" + blobDigest.String(),
},
okURLs: []string{`.*`},
},
mirrors: []string{"mirrorexample.com"},
wantHost: refHost, wantHost: refHost,
}, },
{ {
name: "fail-all", name: "fail-all",
hosts: hostsConfig( tr: &sampleRoundTripper{},
&sampleRoundTripper{}, mirrors: []string{"mirrorexample.com"},
hostSimple("mirrorexample.com"),
),
wantHost: "", wantHost: "",
error: true, error: true,
}, },
{
name: "headers",
hosts: hostsConfig(
&sampleRoundTripper{
okURLs: []string{`.*`},
wantHeaders: map[string]http.Header{
"mirrorexample.com": http.Header(map[string][]string{
"test-a-key": {"a-value-1", "a-value-2"},
"test-b-key": {"b-value-1"},
}),
},
},
hostWithHeaders("mirrorexample.com", map[string][]string{
"test-a-key": {"a-value-1", "a-value-2"},
"test-b-key": {"b-value-1"},
}),
),
wantHost: "mirrorexample.com",
},
{
name: "headers-with-mirrors",
hosts: hostsConfig(
&sampleRoundTripper{
withCode: map[string]int{
"mirrorexample1.com": http.StatusInternalServerError,
"mirrorexample2.com": http.StatusInternalServerError,
},
okURLs: []string{"mirrorexample3.com", refHost},
wantHeaders: map[string]http.Header{
"mirrorexample1.com": http.Header(map[string][]string{
"test-a-key": {"a-value"},
}),
"mirrorexample2.com": http.Header(map[string][]string{
"test-b-key": {"b-value"},
"test-b-key-2": {"b-value-2", "b-value-3"},
}),
"mirrorexample3.com": http.Header(map[string][]string{
"test-c-key": {"c-value"},
}),
},
},
hostWithHeaders("mirrorexample1.com", map[string][]string{
"test-a-key": {"a-value"},
}),
hostWithHeaders("mirrorexample2.com", map[string][]string{
"test-b-key": {"b-value"},
"test-b-key-2": {"b-value-2", "b-value-3"},
}),
hostWithHeaders("mirrorexample3.com", map[string][]string{
"test-c-key": {"c-value"},
}),
),
wantHost: "mirrorexample3.com",
},
{
name: "headers-with-mirrors-invalid-all",
hosts: hostsConfig(
&sampleRoundTripper{
withCode: map[string]int{
"mirrorexample1.com": http.StatusInternalServerError,
"mirrorexample2.com": http.StatusInternalServerError,
},
okURLs: []string{"mirrorexample3.com", refHost},
wantHeaders: map[string]http.Header{
"mirrorexample1.com": http.Header(map[string][]string{
"test-a-key": {"a-value"},
}),
"mirrorexample2.com": http.Header(map[string][]string{
"test-b-key": {"b-value"},
"test-b-key-2": {"b-value-2", "b-value-3"},
}),
},
},
hostWithHeaders("mirrorexample1.com", map[string][]string{
"test-a-key": {"a-value"},
}),
hostWithHeaders("mirrorexample2.com", map[string][]string{
"test-b-key": {"b-value"},
"test-b-key-2": {"b-value-2", "b-value-3"},
}),
),
wantHost: refHost,
},
{
name: "headers-with-redirected-mirror",
hosts: hostsConfig(
&sampleRoundTripper{
redirectURL: map[string]string{
regexp.QuoteMeta(fmt.Sprintf("mirrorexample.com%s", blobPath)): "https://backendexample.com/blobs/" + blobDigest.String(),
},
okURLs: []string{`.*`},
wantHeaders: map[string]http.Header{
"mirrorexample.com": http.Header(map[string][]string{
"test-a-key": {"a-value"},
"test-b-key-2": {"b-value-2", "b-value-3"},
}),
},
},
hostWithHeaders("mirrorexample.com", map[string][]string{
"test-a-key": {"a-value"},
"test-b-key-2": {"b-value-2", "b-value-3"},
}),
),
wantHost: "backendexample.com",
},
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
fetcher, _, err := newHTTPFetcher(context.Background(), &fetcherConfig{ hosts := func(host string) (reghosts []docker.RegistryHost, _ error) {
hosts: tt.hosts(t), for _, m := range append(tt.mirrors, host) {
refspec: refspec, reghosts = append(reghosts, docker.RegistryHost{
desc: ocispec.Descriptor{Digest: blobDigest}, Client: &http.Client{Transport: tt.tr},
}) Host: m,
Scheme: "https",
Path: "/v2",
Capabilities: docker.HostCapabilityPull,
})
}
return
}
fetcher, _, err := newFetcher(context.Background(), hosts, refspec, ocispec.Descriptor{Digest: blobDigest})
if err != nil { if err != nil {
if tt.error { if tt.error {
return return
} }
t.Fatalf("failed to resolve reference: %v", err) t.Fatalf("failed to resolve reference: %v", err)
} }
checkFetcherURL(t, fetcher, tt.wantHost) nurl, err := url.Parse(fetcher.url)
if err != nil {
// Test check() t.Fatalf("failed to parse url %q: %v", fetcher.url, err)
if err := fetcher.check(); err != nil {
t.Fatalf("failed to check fetcher: %v", err)
} }
if nurl.Hostname() != tt.wantHost {
// Test refreshURL() t.Errorf("invalid hostname %q(%q); want %q",
if err := fetcher.refreshURL(context.TODO()); err != nil { nurl.Hostname(), nurl.String(), tt.wantHost)
t.Fatalf("failed to refresh URL: %v", err)
} }
checkFetcherURL(t, fetcher, tt.wantHost)
}) })
} }
} }
func checkFetcherURL(t *testing.T, f *httpFetcher, wantHost string) {
nurl, err := url.Parse(f.url)
if err != nil {
t.Fatalf("failed to parse url %q: %v", f.url, err)
}
if nurl.Hostname() != wantHost {
t.Errorf("invalid hostname %q(%q); want %q", nurl.Hostname(), nurl.String(), wantHost)
}
}
type sampleRoundTripper struct { type sampleRoundTripper struct {
t *testing.T
withCode map[string]int withCode map[string]int
redirectURL map[string]string redirectURL map[string]string
okURLs []string okURLs []string
wantHeaders map[string]http.Header
}
func getTestHeaders(headers map[string][]string) map[string][]string {
res := make(map[string][]string)
for k, v := range headers {
if strings.HasPrefix(k, "test-") {
res[k] = v
}
}
return res
} }
func (tr *sampleRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { func (tr *sampleRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
reqHeader := getTestHeaders(req.Header)
for host, wHeaders := range tr.wantHeaders {
wantHeader := getTestHeaders(wHeaders)
if ok, _ := regexp.Match(host, []byte(req.URL.String())); ok {
if len(wantHeader) != len(reqHeader) {
tr.t.Fatalf("unexpected num of headers; got %d, wanted %d", len(wantHeader), len(reqHeader))
}
for k, v := range wantHeader {
gotV, ok := reqHeader[k]
if !ok {
tr.t.Fatalf("required header %q not found; got %+v", k, reqHeader)
}
wantVM := make(map[string]struct{})
for _, e := range v {
wantVM[e] = struct{}{}
}
if len(gotV) != len(v) {
tr.t.Fatalf("unexpected num of header values of %q; got %d, wanted %d", k, len(gotV), len(v))
}
for _, gotE := range gotV {
delete(wantVM, gotE)
}
if len(wantVM) != 0 {
tr.t.Fatalf("header %q must have elements %+v", k, wantVM)
}
delete(reqHeader, k)
}
}
}
if len(reqHeader) != 0 {
tr.t.Fatalf("unexpected headers %+v", reqHeader)
}
for host, code := range tr.withCode { for host, code := range tr.withCode {
if ok, _ := regexp.Match(host, []byte(req.URL.String())); ok { if ok, _ := regexp.Match(host, []byte(req.URL.String())); ok {
return &http.Response{ return &http.Response{
StatusCode: code, StatusCode: code,
Header: make(http.Header), Header: make(http.Header),
Body: io.NopCloser(bytes.NewReader([]byte{})), Body: ioutil.NopCloser(bytes.NewReader([]byte{})),
Request: req, Request: req,
}, nil }, nil
} }
@ -373,7 +205,7 @@ func (tr *sampleRoundTripper) RoundTrip(req *http.Request) (*http.Response, erro
return &http.Response{ return &http.Response{
StatusCode: http.StatusMovedPermanently, StatusCode: http.StatusMovedPermanently,
Header: header, Header: header,
Body: io.NopCloser(bytes.NewReader([]byte{})), Body: ioutil.NopCloser(bytes.NewReader([]byte{})),
Request: req, Request: req,
}, nil }, nil
} }
@ -385,7 +217,7 @@ func (tr *sampleRoundTripper) RoundTrip(req *http.Request) (*http.Response, erro
return &http.Response{ return &http.Response{
StatusCode: http.StatusOK, StatusCode: http.StatusOK,
Header: header, Header: header,
Body: io.NopCloser(bytes.NewReader([]byte{0})), Body: ioutil.NopCloser(bytes.NewReader([]byte{0})),
Request: req, Request: req,
}, nil }, nil
} }
@ -393,14 +225,14 @@ func (tr *sampleRoundTripper) RoundTrip(req *http.Request) (*http.Response, erro
return &http.Response{ return &http.Response{
StatusCode: http.StatusNotFound, StatusCode: http.StatusNotFound,
Header: make(http.Header), Header: make(http.Header),
Body: io.NopCloser(bytes.NewReader([]byte{})), Body: ioutil.NopCloser(bytes.NewReader([]byte{})),
Request: req, Request: req,
}, nil }, nil
} }
func TestCheck(t *testing.T) { func TestCheck(t *testing.T) {
tr := &breakRoundTripper{} tr := &breakRoundTripper{}
f := &httpFetcher{ f := &fetcher{
url: "test", url: "test",
tr: tr, tr: tr,
} }
@ -424,114 +256,14 @@ func (b *breakRoundTripper) RoundTrip(req *http.Request) (res *http.Response, er
res = &http.Response{ res = &http.Response{
StatusCode: http.StatusPartialContent, StatusCode: http.StatusPartialContent,
Header: make(http.Header), Header: make(http.Header),
Body: io.NopCloser(bytes.NewReader([]byte("test"))), Body: ioutil.NopCloser(bytes.NewReader([]byte("test"))),
} }
} else { } else {
res = &http.Response{ res = &http.Response{
StatusCode: http.StatusInternalServerError, StatusCode: http.StatusInternalServerError,
Header: make(http.Header), Header: make(http.Header),
Body: io.NopCloser(bytes.NewReader([]byte{})), Body: ioutil.NopCloser(bytes.NewReader([]byte{})),
} }
} }
return return
} }
func TestRetry(t *testing.T) {
tr := &retryRoundTripper{}
rclient := rhttp.NewClient()
rclient.HTTPClient.Transport = tr
rclient.Backoff = backoffStrategy
f := &httpFetcher{
url: "test",
tr: &rhttp.RoundTripper{Client: rclient},
}
regions := []region{{b: 0, e: 1}}
_, err := f.fetch(context.Background(), regions, true)
if err != nil {
t.Fatalf("unexpected error = %v", err)
}
if tr.retryCount != 4 {
t.Fatalf("unxpected retryCount; expected=4 got=%d", tr.retryCount)
}
}
type retryRoundTripper struct {
retryCount int
}
func (r *retryRoundTripper) RoundTrip(req *http.Request) (res *http.Response, err error) {
defer func() {
r.retryCount++
}()
switch r.retryCount {
case 0:
err = fmt.Errorf("dummy error")
case 1:
res = &http.Response{
StatusCode: http.StatusTooManyRequests,
Header: make(http.Header),
Body: io.NopCloser(bytes.NewReader([]byte{})),
}
case 2:
res = &http.Response{
StatusCode: http.StatusServiceUnavailable,
Header: make(http.Header),
Body: io.NopCloser(bytes.NewReader([]byte{})),
}
default:
header := make(http.Header)
header.Add("Content-Length", "4")
res = &http.Response{
StatusCode: http.StatusOK,
Header: header,
Body: io.NopCloser(bytes.NewReader([]byte("test"))),
}
}
return
}
type hostFactory func(tr http.RoundTripper) docker.RegistryHost
func hostSimple(host string) hostFactory {
return func(tr http.RoundTripper) docker.RegistryHost {
return docker.RegistryHost{
Client: &http.Client{Transport: tr},
Host: host,
Scheme: "https",
Path: "/v2",
Capabilities: docker.HostCapabilityPull,
}
}
}
func hostWithHeaders(host string, headers http.Header) hostFactory {
return func(tr http.RoundTripper) docker.RegistryHost {
return docker.RegistryHost{
Client: &http.Client{Transport: tr},
Host: host,
Scheme: "https",
Path: "/v2",
Capabilities: docker.HostCapabilityPull,
Header: headers,
}
}
}
func hostsConfig(tr *sampleRoundTripper, mirrors ...hostFactory) func(t *testing.T) source.RegistryHosts {
return func(t *testing.T) source.RegistryHosts {
tr.t = t
return func(refspec reference.Spec) (reghosts []docker.RegistryHost, _ error) {
host := refspec.Hostname()
for _, m := range mirrors {
reghosts = append(reghosts, m(tr))
}
reghosts = append(reghosts, hostSimple(host)(tr))
return
}
}
}

View File

@ -21,10 +21,10 @@ import (
"fmt" "fmt"
"strings" "strings"
"github.com/containerd/containerd/v2/core/images" "github.com/containerd/containerd/images"
"github.com/containerd/containerd/v2/core/remotes/docker" "github.com/containerd/containerd/labels"
"github.com/containerd/containerd/v2/pkg/labels" "github.com/containerd/containerd/reference"
"github.com/containerd/containerd/v2/pkg/reference" "github.com/containerd/containerd/remotes/docker"
"github.com/containerd/stargz-snapshotter/fs/config" "github.com/containerd/stargz-snapshotter/fs/config"
digest "github.com/opencontainers/go-digest" digest "github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1" ocispec "github.com/opencontainers/image-spec/specs-go/v1"
@ -37,15 +37,12 @@ import (
// about the blob. // about the blob.
type GetSources func(labels map[string]string) (source []Source, err error) type GetSources func(labels map[string]string) (source []Source, err error)
// RegistryHosts returns a list of registries that provides the specified image.
type RegistryHosts func(reference.Spec) ([]docker.RegistryHost, error)
// Source is a typed blob source information. This contains information about // Source is a typed blob source information. This contains information about
// a blob stored in registries and some contexts of the blob. // a blob stored in registries and some contexts of the blob.
type Source struct { type Source struct {
// Hosts is a registry configuration where this blob is stored. // Hosts is a registry configuration where this blob is stored.
Hosts RegistryHosts Hosts docker.RegistryHosts
// Name is an image reference which contains this blob. // Name is an image reference which contains this blob.
Name reference.Spec Name reference.Spec
@ -70,19 +67,11 @@ const (
// targetImageLayersLabel is a label which contains layer digests contained in // targetImageLayersLabel is a label which contains layer digests contained in
// the target image. // the target image.
targetImageLayersLabel = "containerd.io/snapshot/remote/stargz.layers" targetImageLayersLabel = "containerd.io/snapshot/remote/stargz.layers"
// targetImageURLsLabelPrefix is a label prefix which constructs a map from the layer index to
// urls of the layer descriptor.
targetImageURLsLabelPrefix = "containerd.io/snapshot/remote/urls."
// targetURsLLabel is a label which contains layer URL. This is only used to pass URL from containerd
// to snapshotter.
targetURLsLabel = "containerd.io/snapshot/remote/urls"
) )
// FromDefaultLabels returns a function for converting snapshot labels to // FromDefaultLabels returns a function for converting snapshot labels to
// source information based on labels. // source information based on labels.
func FromDefaultLabels(hosts RegistryHosts) GetSources { func FromDefaultLabels(hosts docker.RegistryHosts) GetSources {
return func(labels map[string]string) ([]Source, error) { return func(labels map[string]string) ([]Source, error) {
refStr, ok := labels[targetRefLabel] refStr, ok := labels[targetRefLabel]
if !ok { if !ok {
@ -102,38 +91,31 @@ func FromDefaultLabels(hosts RegistryHosts) GetSources {
return nil, err return nil, err
} }
var neighboringLayers []ocispec.Descriptor var layersDgst []digest.Digest
if l, ok := labels[targetImageLayersLabel]; ok { if l, ok := labels[targetImageLayersLabel]; ok {
layersStr := strings.Split(l, ",") layersStr := strings.Split(l, ",")
for i, l := range layersStr { for _, l := range layersStr {
d, err := digest.Parse(l) d, err := digest.Parse(l)
if err != nil { if err != nil {
return nil, err return nil, err
} }
if d.String() != target.String() { if d.String() != target.String() {
desc := ocispec.Descriptor{Digest: d} layersDgst = append(layersDgst, d)
if urls, ok := labels[targetImageURLsLabelPrefix+fmt.Sprintf("%d", i)]; ok {
desc.URLs = strings.Split(urls, ",")
}
neighboringLayers = append(neighboringLayers, desc)
} }
} }
} }
targetDesc := ocispec.Descriptor{ var layers []ocispec.Descriptor
Digest: target, for _, dgst := range append([]digest.Digest{target}, layersDgst...) {
Annotations: labels, layers = append(layers, ocispec.Descriptor{Digest: dgst})
}
if targetURLs, ok := labels[targetURLsLabel]; ok {
targetDesc.URLs = append(targetDesc.URLs, strings.Split(targetURLs, ",")...)
} }
return []Source{ return []Source{
{ {
Hosts: hosts, Hosts: hosts,
Name: refspec, Name: refspec,
Target: targetDesc, Target: ocispec.Descriptor{Digest: target},
Manifest: ocispec.Manifest{Layers: append([]ocispec.Descriptor{targetDesc}, neighboringLayers...)}, Manifest: ocispec.Manifest{Layers: layers},
}, },
}, nil }, nil
} }
@ -161,7 +143,7 @@ func AppendDefaultLabelsHandlerWrapper(ref string, prefetchSize int64) func(f im
c.Annotations[targetRefLabel] = ref c.Annotations[targetRefLabel] = ref
c.Annotations[targetDigestLabel] = c.Digest.String() c.Annotations[targetDigestLabel] = c.Digest.String()
var layers string var layers string
for i, l := range children[i:] { for _, l := range children[i:] {
if images.IsLayerType(l.MediaType) { if images.IsLayerType(l.MediaType) {
ls := fmt.Sprintf("%s,", l.Digest.String()) ls := fmt.Sprintf("%s,", l.Digest.String())
// This avoids the label hits the size limitation. // This avoids the label hits the size limitation.
@ -170,17 +152,10 @@ func AppendDefaultLabelsHandlerWrapper(ref string, prefetchSize int64) func(f im
break break
} }
layers += ls layers += ls
// Store URLs of the neighbouring layer as well.
urlsKey := targetImageURLsLabelPrefix + fmt.Sprintf("%d", i)
c.Annotations[urlsKey] = appendWithValidation(urlsKey, l.URLs)
} }
} }
c.Annotations[targetImageLayersLabel] = strings.TrimSuffix(layers, ",") c.Annotations[targetImageLayersLabel] = strings.TrimSuffix(layers, ",")
c.Annotations[config.TargetPrefetchSizeLabel] = fmt.Sprintf("%d", prefetchSize) c.Annotations[config.TargetPrefetchSizeLabel] = fmt.Sprintf("%d", prefetchSize)
// store URL in annotation to let containerd to pass it to the snapshotter
c.Annotations[targetURLsLabel] = appendWithValidation(targetURLsLabel, c.URLs)
} }
} }
} }
@ -188,84 +163,3 @@ func AppendDefaultLabelsHandlerWrapper(ref string, prefetchSize int64) func(f im
}) })
} }
} }
func appendWithValidation(key string, values []string) string {
var v string
for _, u := range values {
s := fmt.Sprintf("%s,", u)
if err := labels.Validate(key, v+s); err != nil {
break
}
v += s
}
return strings.TrimSuffix(v, ",")
}
// TODO: switch to "github.com/containerd/containerd/pkg/snapshotters" once all tools using
//
// stargz-snapshotter (e.g. k3s) move to containerd version where that pkg is available.
const (
// targetImageLayersLabel is a label which contains layer digests contained in
// the target image and will be passed to snapshotters for preparing layers in
// parallel. Skipping some layers is allowed and only affects performance.
targetImageLayersLabelContainerd = "containerd.io/snapshot/cri.image-layers"
)
// AppendExtraLabelsHandler adds optional labels that aren't provided by
// "github.com/containerd/containerd/pkg/snapshotters" but can be used for stargz snapshotter's extra functionalities.
func AppendExtraLabelsHandler(prefetchSize int64, wrapper func(images.Handler) images.Handler) func(images.Handler) images.Handler {
return func(f images.Handler) images.Handler {
return images.HandlerFunc(func(ctx context.Context, desc ocispec.Descriptor) ([]ocispec.Descriptor, error) {
children, err := wrapper(f).Handle(ctx, desc)
if err != nil {
return nil, err
}
switch desc.MediaType {
case ocispec.MediaTypeImageManifest, images.MediaTypeDockerSchema2Manifest:
for i := range children {
c := &children[i]
if !images.IsLayerType(c.MediaType) {
continue
}
if _, ok := c.Annotations[targetURLsLabel]; !ok { // nop if this key is already set
c.Annotations[targetURLsLabel] = appendWithValidation(targetURLsLabel, c.URLs)
}
if _, ok := c.Annotations[config.TargetPrefetchSizeLabel]; !ok { // nop if this key is already set
c.Annotations[config.TargetPrefetchSizeLabel] = fmt.Sprintf("%d", prefetchSize)
}
// Store URLs of the neighbouring layer as well.
nlayers, ok := c.Annotations[targetImageLayersLabelContainerd]
if !ok {
continue
}
for j, dstr := range strings.Split(nlayers, ",") {
d, err := digest.Parse(dstr)
if err != nil {
return nil, err
}
l, ok := layerFromDigest(children, d)
if !ok {
continue
}
urlsKey := targetImageURLsLabelPrefix + fmt.Sprintf("%d", j)
if _, ok := c.Annotations[urlsKey]; !ok { // nop if this key is already set
c.Annotations[urlsKey] = appendWithValidation(urlsKey, l.URLs)
}
}
}
}
return children, nil
})
}
}
func layerFromDigest(layers []ocispec.Descriptor, target digest.Digest) (ocispec.Descriptor, bool) {
for _, l := range layers {
if l.Digest == target {
return l, images.IsLayerType(l.MediaType)
}
}
return ocispec.Descriptor{}, false
}

View File

@ -1,566 +0,0 @@
// Code generated by protoc-gen-gogo. DO NOT EDIT.
// source: api.proto
package api
import (
context "context"
fmt "fmt"
proto "github.com/gogo/protobuf/proto"
grpc "google.golang.org/grpc"
codes "google.golang.org/grpc/codes"
status "google.golang.org/grpc/status"
math "math"
)
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
var _ = fmt.Errorf
var _ = math.Inf
// This is a compile-time assertion to ensure that this generated file
// is compatible with the proto package it is being compiled against.
// A compilation error at this line likely means your copy of the
// proto package needs to be updated.
const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package
type StatusRequest struct {
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *StatusRequest) Reset() { *m = StatusRequest{} }
func (m *StatusRequest) String() string { return proto.CompactTextString(m) }
func (*StatusRequest) ProtoMessage() {}
func (*StatusRequest) Descriptor() ([]byte, []int) {
return fileDescriptor_00212fb1f9d3bf1c, []int{0}
}
func (m *StatusRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StatusRequest.Unmarshal(m, b)
}
func (m *StatusRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StatusRequest.Marshal(b, m, deterministic)
}
func (m *StatusRequest) XXX_Merge(src proto.Message) {
xxx_messageInfo_StatusRequest.Merge(m, src)
}
func (m *StatusRequest) XXX_Size() int {
return xxx_messageInfo_StatusRequest.Size(m)
}
func (m *StatusRequest) XXX_DiscardUnknown() {
xxx_messageInfo_StatusRequest.DiscardUnknown(m)
}
var xxx_messageInfo_StatusRequest proto.InternalMessageInfo
type InitRequest struct {
Root string `protobuf:"bytes,1,opt,name=root,proto3" json:"root,omitempty"`
Config []byte `protobuf:"bytes,2,opt,name=config,proto3" json:"config,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *InitRequest) Reset() { *m = InitRequest{} }
func (m *InitRequest) String() string { return proto.CompactTextString(m) }
func (*InitRequest) ProtoMessage() {}
func (*InitRequest) Descriptor() ([]byte, []int) {
return fileDescriptor_00212fb1f9d3bf1c, []int{1}
}
func (m *InitRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_InitRequest.Unmarshal(m, b)
}
func (m *InitRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_InitRequest.Marshal(b, m, deterministic)
}
func (m *InitRequest) XXX_Merge(src proto.Message) {
xxx_messageInfo_InitRequest.Merge(m, src)
}
func (m *InitRequest) XXX_Size() int {
return xxx_messageInfo_InitRequest.Size(m)
}
func (m *InitRequest) XXX_DiscardUnknown() {
xxx_messageInfo_InitRequest.DiscardUnknown(m)
}
var xxx_messageInfo_InitRequest proto.InternalMessageInfo
func (m *InitRequest) GetRoot() string {
if m != nil {
return m.Root
}
return ""
}
func (m *InitRequest) GetConfig() []byte {
if m != nil {
return m.Config
}
return nil
}
type MountRequest struct {
Mountpoint string `protobuf:"bytes,1,opt,name=mountpoint,proto3" json:"mountpoint,omitempty"`
Labels map[string]string `protobuf:"bytes,2,rep,name=labels,proto3" json:"labels,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *MountRequest) Reset() { *m = MountRequest{} }
func (m *MountRequest) String() string { return proto.CompactTextString(m) }
func (*MountRequest) ProtoMessage() {}
func (*MountRequest) Descriptor() ([]byte, []int) {
return fileDescriptor_00212fb1f9d3bf1c, []int{2}
}
func (m *MountRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_MountRequest.Unmarshal(m, b)
}
func (m *MountRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_MountRequest.Marshal(b, m, deterministic)
}
func (m *MountRequest) XXX_Merge(src proto.Message) {
xxx_messageInfo_MountRequest.Merge(m, src)
}
func (m *MountRequest) XXX_Size() int {
return xxx_messageInfo_MountRequest.Size(m)
}
func (m *MountRequest) XXX_DiscardUnknown() {
xxx_messageInfo_MountRequest.DiscardUnknown(m)
}
var xxx_messageInfo_MountRequest proto.InternalMessageInfo
func (m *MountRequest) GetMountpoint() string {
if m != nil {
return m.Mountpoint
}
return ""
}
func (m *MountRequest) GetLabels() map[string]string {
if m != nil {
return m.Labels
}
return nil
}
type CheckRequest struct {
Mountpoint string `protobuf:"bytes,1,opt,name=mountpoint,proto3" json:"mountpoint,omitempty"`
Labels map[string]string `protobuf:"bytes,2,rep,name=labels,proto3" json:"labels,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *CheckRequest) Reset() { *m = CheckRequest{} }
func (m *CheckRequest) String() string { return proto.CompactTextString(m) }
func (*CheckRequest) ProtoMessage() {}
func (*CheckRequest) Descriptor() ([]byte, []int) {
return fileDescriptor_00212fb1f9d3bf1c, []int{3}
}
func (m *CheckRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_CheckRequest.Unmarshal(m, b)
}
func (m *CheckRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_CheckRequest.Marshal(b, m, deterministic)
}
func (m *CheckRequest) XXX_Merge(src proto.Message) {
xxx_messageInfo_CheckRequest.Merge(m, src)
}
func (m *CheckRequest) XXX_Size() int {
return xxx_messageInfo_CheckRequest.Size(m)
}
func (m *CheckRequest) XXX_DiscardUnknown() {
xxx_messageInfo_CheckRequest.DiscardUnknown(m)
}
var xxx_messageInfo_CheckRequest proto.InternalMessageInfo
func (m *CheckRequest) GetMountpoint() string {
if m != nil {
return m.Mountpoint
}
return ""
}
func (m *CheckRequest) GetLabels() map[string]string {
if m != nil {
return m.Labels
}
return nil
}
type UnmountRequest struct {
Mountpoint string `protobuf:"bytes,1,opt,name=mountpoint,proto3" json:"mountpoint,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *UnmountRequest) Reset() { *m = UnmountRequest{} }
func (m *UnmountRequest) String() string { return proto.CompactTextString(m) }
func (*UnmountRequest) ProtoMessage() {}
func (*UnmountRequest) Descriptor() ([]byte, []int) {
return fileDescriptor_00212fb1f9d3bf1c, []int{4}
}
func (m *UnmountRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_UnmountRequest.Unmarshal(m, b)
}
func (m *UnmountRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_UnmountRequest.Marshal(b, m, deterministic)
}
func (m *UnmountRequest) XXX_Merge(src proto.Message) {
xxx_messageInfo_UnmountRequest.Merge(m, src)
}
func (m *UnmountRequest) XXX_Size() int {
return xxx_messageInfo_UnmountRequest.Size(m)
}
func (m *UnmountRequest) XXX_DiscardUnknown() {
xxx_messageInfo_UnmountRequest.DiscardUnknown(m)
}
var xxx_messageInfo_UnmountRequest proto.InternalMessageInfo
func (m *UnmountRequest) GetMountpoint() string {
if m != nil {
return m.Mountpoint
}
return ""
}
type StatusResponse struct {
Status int32 `protobuf:"varint,1,opt,name=status,proto3" json:"status,omitempty"`
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *StatusResponse) Reset() { *m = StatusResponse{} }
func (m *StatusResponse) String() string { return proto.CompactTextString(m) }
func (*StatusResponse) ProtoMessage() {}
func (*StatusResponse) Descriptor() ([]byte, []int) {
return fileDescriptor_00212fb1f9d3bf1c, []int{5}
}
func (m *StatusResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StatusResponse.Unmarshal(m, b)
}
func (m *StatusResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_StatusResponse.Marshal(b, m, deterministic)
}
func (m *StatusResponse) XXX_Merge(src proto.Message) {
xxx_messageInfo_StatusResponse.Merge(m, src)
}
func (m *StatusResponse) XXX_Size() int {
return xxx_messageInfo_StatusResponse.Size(m)
}
func (m *StatusResponse) XXX_DiscardUnknown() {
xxx_messageInfo_StatusResponse.DiscardUnknown(m)
}
var xxx_messageInfo_StatusResponse proto.InternalMessageInfo
func (m *StatusResponse) GetStatus() int32 {
if m != nil {
return m.Status
}
return 0
}
type Response struct {
XXX_NoUnkeyedLiteral struct{} `json:"-"`
XXX_unrecognized []byte `json:"-"`
XXX_sizecache int32 `json:"-"`
}
func (m *Response) Reset() { *m = Response{} }
func (m *Response) String() string { return proto.CompactTextString(m) }
func (*Response) ProtoMessage() {}
func (*Response) Descriptor() ([]byte, []int) {
return fileDescriptor_00212fb1f9d3bf1c, []int{6}
}
func (m *Response) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_Response.Unmarshal(m, b)
}
func (m *Response) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
return xxx_messageInfo_Response.Marshal(b, m, deterministic)
}
func (m *Response) XXX_Merge(src proto.Message) {
xxx_messageInfo_Response.Merge(m, src)
}
func (m *Response) XXX_Size() int {
return xxx_messageInfo_Response.Size(m)
}
func (m *Response) XXX_DiscardUnknown() {
xxx_messageInfo_Response.DiscardUnknown(m)
}
var xxx_messageInfo_Response proto.InternalMessageInfo
func init() {
proto.RegisterType((*StatusRequest)(nil), "fusemanager.StatusRequest")
proto.RegisterType((*InitRequest)(nil), "fusemanager.InitRequest")
proto.RegisterType((*MountRequest)(nil), "fusemanager.MountRequest")
proto.RegisterMapType((map[string]string)(nil), "fusemanager.MountRequest.LabelsEntry")
proto.RegisterType((*CheckRequest)(nil), "fusemanager.CheckRequest")
proto.RegisterMapType((map[string]string)(nil), "fusemanager.CheckRequest.LabelsEntry")
proto.RegisterType((*UnmountRequest)(nil), "fusemanager.UnmountRequest")
proto.RegisterType((*StatusResponse)(nil), "fusemanager.StatusResponse")
proto.RegisterType((*Response)(nil), "fusemanager.Response")
}
func init() { proto.RegisterFile("api.proto", fileDescriptor_00212fb1f9d3bf1c) }
var fileDescriptor_00212fb1f9d3bf1c = []byte{
// 386 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xac, 0x53, 0x51, 0x4b, 0xf3, 0x30,
0x14, 0xa5, 0xdd, 0xd6, 0xef, 0xdb, 0xed, 0x9c, 0x12, 0x54, 0x6a, 0x05, 0x19, 0x05, 0xa1, 0x2f,
0x6b, 0x65, 0x3e, 0xe8, 0x84, 0x3d, 0xa8, 0x28, 0x08, 0xee, 0xa5, 0xc3, 0x17, 0xdf, 0xb2, 0x92,
0x75, 0x65, 0x6b, 0x52, 0x9b, 0x74, 0x30, 0x7f, 0x91, 0xff, 0xc5, 0x3f, 0x25, 0xcd, 0xba, 0x91,
0x8a, 0x13, 0x84, 0xbd, 0xe5, 0x24, 0xf7, 0xdc, 0x9e, 0x7b, 0xcf, 0x29, 0x34, 0x71, 0x1a, 0x7b,
0x69, 0xc6, 0x04, 0x43, 0xe6, 0x24, 0xe7, 0x24, 0xc1, 0x14, 0x47, 0x24, 0x73, 0xf6, 0x61, 0x6f,
0x24, 0xb0, 0xc8, 0x79, 0x40, 0xde, 0x72, 0xc2, 0x85, 0xd3, 0x07, 0xf3, 0x89, 0xc6, 0xa2, 0x84,
0x08, 0x41, 0x3d, 0x63, 0x4c, 0x58, 0x5a, 0x47, 0x73, 0x9b, 0x81, 0x3c, 0xa3, 0x63, 0x30, 0x42,
0x46, 0x27, 0x71, 0x64, 0xe9, 0x1d, 0xcd, 0x6d, 0x05, 0x25, 0x72, 0x3e, 0x34, 0x68, 0x0d, 0x59,
0x4e, 0x37, 0xe4, 0x33, 0x80, 0xa4, 0xc0, 0x29, 0x8b, 0xe9, 0xba, 0x85, 0x72, 0x83, 0x06, 0x60,
0xcc, 0xf1, 0x98, 0xcc, 0xb9, 0xa5, 0x77, 0x6a, 0xae, 0xd9, 0x3b, 0xf7, 0x14, 0x69, 0x9e, 0xda,
0xca, 0x7b, 0x96, 0x75, 0x0f, 0x54, 0x64, 0xcb, 0xa0, 0x24, 0xd9, 0x7d, 0x30, 0x95, 0x6b, 0x74,
0x00, 0xb5, 0x19, 0x59, 0x96, 0x9f, 0x29, 0x8e, 0xe8, 0x10, 0x1a, 0x0b, 0x3c, 0xcf, 0x89, 0xd4,
0xd9, 0x0c, 0x56, 0xe0, 0x46, 0xbf, 0xd6, 0xa4, 0xd4, 0xfb, 0x29, 0x09, 0x67, 0xbb, 0x91, 0xaa,
0xb6, 0xda, 0xb5, 0xd4, 0x0b, 0x68, 0xbf, 0xd0, 0xe4, 0x0f, 0x6b, 0x75, 0x5c, 0x68, 0xaf, 0x3d,
0xe5, 0x29, 0xa3, 0x9c, 0x14, 0x8e, 0x71, 0x79, 0x23, 0xab, 0x1b, 0x41, 0x89, 0x1c, 0x80, 0xff,
0xeb, 0x9a, 0xde, 0xa7, 0x0e, 0xd6, 0x48, 0xe0, 0x2c, 0x7a, 0x7f, 0xcc, 0x39, 0x19, 0xae, 0x26,
0x1b, 0x91, 0x6c, 0x11, 0x87, 0x04, 0xdd, 0x82, 0xb1, 0x6a, 0x89, 0xec, 0xca, 0xe0, 0x95, 0xec,
0xd8, 0xa7, 0x3f, 0xbe, 0x95, 0x1a, 0xae, 0xa0, 0x5e, 0x04, 0x0b, 0x59, 0x95, 0x22, 0x25, 0x6b,
0xf6, 0x51, 0xe5, 0x65, 0x43, 0xec, 0x43, 0x43, 0x46, 0x01, 0x9d, 0x6c, 0x8d, 0xc7, 0x2f, 0x54,
0x69, 0xcd, 0x37, 0xaa, 0x6a, 0xd7, 0x36, 0xea, 0x00, 0xfe, 0x95, 0x6b, 0x47, 0xd5, 0xb1, 0xaa,
0x66, 0x6c, 0xa1, 0xdf, 0xf9, 0xaf, 0xdd, 0x28, 0x16, 0xd3, 0x7c, 0xec, 0x85, 0x2c, 0xf1, 0xb9,
0xdc, 0x6b, 0x97, 0x53, 0x9c, 0xf2, 0x29, 0x13, 0x82, 0x64, 0xbe, 0xc2, 0xf2, 0x71, 0x1a, 0x8f,
0x0d, 0xf9, 0x73, 0x5e, 0x7e, 0x05, 0x00, 0x00, 0xff, 0xff, 0x9d, 0x24, 0xe1, 0x41, 0xa9, 0x03,
0x00, 0x00,
}
// Reference imports to suppress errors if they are not otherwise used.
var _ context.Context
var _ grpc.ClientConn
// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
const _ = grpc.SupportPackageIsVersion4
// StargzFuseManagerServiceClient is the client API for StargzFuseManagerService service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream.
type StargzFuseManagerServiceClient interface {
Status(ctx context.Context, in *StatusRequest, opts ...grpc.CallOption) (*StatusResponse, error)
Init(ctx context.Context, in *InitRequest, opts ...grpc.CallOption) (*Response, error)
Mount(ctx context.Context, in *MountRequest, opts ...grpc.CallOption) (*Response, error)
Check(ctx context.Context, in *CheckRequest, opts ...grpc.CallOption) (*Response, error)
Unmount(ctx context.Context, in *UnmountRequest, opts ...grpc.CallOption) (*Response, error)
}
type stargzFuseManagerServiceClient struct {
cc *grpc.ClientConn
}
func NewStargzFuseManagerServiceClient(cc *grpc.ClientConn) StargzFuseManagerServiceClient {
return &stargzFuseManagerServiceClient{cc}
}
func (c *stargzFuseManagerServiceClient) Status(ctx context.Context, in *StatusRequest, opts ...grpc.CallOption) (*StatusResponse, error) {
out := new(StatusResponse)
err := c.cc.Invoke(ctx, "/fusemanager.StargzFuseManagerService/Status", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *stargzFuseManagerServiceClient) Init(ctx context.Context, in *InitRequest, opts ...grpc.CallOption) (*Response, error) {
out := new(Response)
err := c.cc.Invoke(ctx, "/fusemanager.StargzFuseManagerService/Init", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *stargzFuseManagerServiceClient) Mount(ctx context.Context, in *MountRequest, opts ...grpc.CallOption) (*Response, error) {
out := new(Response)
err := c.cc.Invoke(ctx, "/fusemanager.StargzFuseManagerService/Mount", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *stargzFuseManagerServiceClient) Check(ctx context.Context, in *CheckRequest, opts ...grpc.CallOption) (*Response, error) {
out := new(Response)
err := c.cc.Invoke(ctx, "/fusemanager.StargzFuseManagerService/Check", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *stargzFuseManagerServiceClient) Unmount(ctx context.Context, in *UnmountRequest, opts ...grpc.CallOption) (*Response, error) {
out := new(Response)
err := c.cc.Invoke(ctx, "/fusemanager.StargzFuseManagerService/Unmount", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}
// StargzFuseManagerServiceServer is the server API for StargzFuseManagerService service.
type StargzFuseManagerServiceServer interface {
Status(context.Context, *StatusRequest) (*StatusResponse, error)
Init(context.Context, *InitRequest) (*Response, error)
Mount(context.Context, *MountRequest) (*Response, error)
Check(context.Context, *CheckRequest) (*Response, error)
Unmount(context.Context, *UnmountRequest) (*Response, error)
}
// UnimplementedStargzFuseManagerServiceServer can be embedded to have forward compatible implementations.
type UnimplementedStargzFuseManagerServiceServer struct {
}
func (*UnimplementedStargzFuseManagerServiceServer) Status(ctx context.Context, req *StatusRequest) (*StatusResponse, error) {
return nil, status.Errorf(codes.Unimplemented, "method Status not implemented")
}
func (*UnimplementedStargzFuseManagerServiceServer) Init(ctx context.Context, req *InitRequest) (*Response, error) {
return nil, status.Errorf(codes.Unimplemented, "method Init not implemented")
}
func (*UnimplementedStargzFuseManagerServiceServer) Mount(ctx context.Context, req *MountRequest) (*Response, error) {
return nil, status.Errorf(codes.Unimplemented, "method Mount not implemented")
}
func (*UnimplementedStargzFuseManagerServiceServer) Check(ctx context.Context, req *CheckRequest) (*Response, error) {
return nil, status.Errorf(codes.Unimplemented, "method Check not implemented")
}
func (*UnimplementedStargzFuseManagerServiceServer) Unmount(ctx context.Context, req *UnmountRequest) (*Response, error) {
return nil, status.Errorf(codes.Unimplemented, "method Unmount not implemented")
}
func RegisterStargzFuseManagerServiceServer(s *grpc.Server, srv StargzFuseManagerServiceServer) {
s.RegisterService(&_StargzFuseManagerService_serviceDesc, srv)
}
func _StargzFuseManagerService_Status_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(StatusRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(StargzFuseManagerServiceServer).Status(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/fusemanager.StargzFuseManagerService/Status",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(StargzFuseManagerServiceServer).Status(ctx, req.(*StatusRequest))
}
return interceptor(ctx, in, info, handler)
}
func _StargzFuseManagerService_Init_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(InitRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(StargzFuseManagerServiceServer).Init(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/fusemanager.StargzFuseManagerService/Init",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(StargzFuseManagerServiceServer).Init(ctx, req.(*InitRequest))
}
return interceptor(ctx, in, info, handler)
}
func _StargzFuseManagerService_Mount_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(MountRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(StargzFuseManagerServiceServer).Mount(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/fusemanager.StargzFuseManagerService/Mount",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(StargzFuseManagerServiceServer).Mount(ctx, req.(*MountRequest))
}
return interceptor(ctx, in, info, handler)
}
func _StargzFuseManagerService_Check_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(CheckRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(StargzFuseManagerServiceServer).Check(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/fusemanager.StargzFuseManagerService/Check",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(StargzFuseManagerServiceServer).Check(ctx, req.(*CheckRequest))
}
return interceptor(ctx, in, info, handler)
}
func _StargzFuseManagerService_Unmount_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(UnmountRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(StargzFuseManagerServiceServer).Unmount(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: "/fusemanager.StargzFuseManagerService/Unmount",
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(StargzFuseManagerServiceServer).Unmount(ctx, req.(*UnmountRequest))
}
return interceptor(ctx, in, info, handler)
}
var _StargzFuseManagerService_serviceDesc = grpc.ServiceDesc{
ServiceName: "fusemanager.StargzFuseManagerService",
HandlerType: (*StargzFuseManagerServiceServer)(nil),
Methods: []grpc.MethodDesc{
{
MethodName: "Status",
Handler: _StargzFuseManagerService_Status_Handler,
},
{
MethodName: "Init",
Handler: _StargzFuseManagerService_Init_Handler,
},
{
MethodName: "Mount",
Handler: _StargzFuseManagerService_Mount_Handler,
},
{
MethodName: "Check",
Handler: _StargzFuseManagerService_Check_Handler,
},
{
MethodName: "Unmount",
Handler: _StargzFuseManagerService_Unmount_Handler,
},
},
Streams: []grpc.StreamDesc{},
Metadata: "api.proto",
}

View File

@ -1,58 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
syntax = "proto3";
option go_package = "github.com/stargz-snapshotter/fusemanager/api";
package fusemanager;
service StargzFuseManagerService {
rpc Status (StatusRequest) returns (StatusResponse);
rpc Init (InitRequest) returns (Response);
rpc Mount (MountRequest) returns (Response);
rpc Check (CheckRequest) returns (Response);
rpc Unmount (UnmountRequest) returns (Response);
}
message StatusRequest {
}
message InitRequest {
string root = 1;
bytes config = 2;
}
message MountRequest {
string mountpoint = 1;
map<string, string> labels = 2;
}
message CheckRequest {
string mountpoint = 1;
map<string, string> labels = 2;
}
message UnmountRequest {
string mountpoint = 1;
}
message StatusResponse {
int32 status = 1;
}
message Response {
}

View File

@ -1,19 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package api
//go:generate protoc --gogo_out=paths=source_relative,plugins=grpc:. api.proto

View File

@ -1,141 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package fusemanager
import (
"context"
"encoding/json"
"fmt"
"github.com/containerd/containerd/v2/defaults"
"github.com/containerd/containerd/v2/pkg/dialer"
"github.com/containerd/log"
"google.golang.org/grpc"
"google.golang.org/grpc/backoff"
"google.golang.org/grpc/credentials/insecure"
pb "github.com/containerd/stargz-snapshotter/fusemanager/api"
"github.com/containerd/stargz-snapshotter/snapshot"
)
type Client struct {
client pb.StargzFuseManagerServiceClient
}
func NewManagerClient(ctx context.Context, root, socket string, config *Config) (snapshot.FileSystem, error) {
grpcCli, err := newClient(socket)
if err != nil {
return nil, err
}
client := &Client{
client: grpcCli,
}
err = client.init(ctx, root, config)
if err != nil {
return nil, err
}
return client, nil
}
func newClient(socket string) (pb.StargzFuseManagerServiceClient, error) {
connParams := grpc.ConnectParams{
Backoff: backoff.DefaultConfig,
}
gopts := []grpc.DialOption{
grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithConnectParams(connParams),
grpc.WithContextDialer(dialer.ContextDialer),
grpc.WithDefaultCallOptions(
grpc.MaxCallRecvMsgSize(defaults.DefaultMaxRecvMsgSize),
grpc.MaxCallSendMsgSize(defaults.DefaultMaxSendMsgSize),
),
}
conn, err := grpc.NewClient(fmt.Sprintf("unix://%s", socket), gopts...)
if err != nil {
return nil, err
}
return pb.NewStargzFuseManagerServiceClient(conn), nil
}
func (cli *Client) init(ctx context.Context, root string, config *Config) error {
configBytes, err := json.Marshal(config)
if err != nil {
return err
}
req := &pb.InitRequest{
Root: root,
Config: configBytes,
}
_, err = cli.client.Init(ctx, req)
if err != nil {
log.G(ctx).WithError(err).Errorf("failed to call Init")
return err
}
return nil
}
func (cli *Client) Mount(ctx context.Context, mountpoint string, labels map[string]string) error {
req := &pb.MountRequest{
Mountpoint: mountpoint,
Labels: labels,
}
_, err := cli.client.Mount(ctx, req)
if err != nil {
log.G(ctx).WithError(err).Errorf("failed to call Mount")
return err
}
return nil
}
func (cli *Client) Check(ctx context.Context, mountpoint string, labels map[string]string) error {
req := &pb.CheckRequest{
Mountpoint: mountpoint,
Labels: labels,
}
_, err := cli.client.Check(ctx, req)
if err != nil {
log.G(ctx).WithError(err).Errorf("failed to call Check")
return err
}
return nil
}
func (cli *Client) Unmount(ctx context.Context, mountpoint string) error {
req := &pb.UnmountRequest{
Mountpoint: mountpoint,
}
_, err := cli.client.Unmount(ctx, req)
if err != nil {
log.G(ctx).WithError(err).Errorf("failed to call Unmount")
return err
}
return nil
}

View File

@ -1,259 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package fusemanager
import (
"context"
"flag"
"fmt"
golog "log"
"net"
"os"
"os/exec"
"os/signal"
"path/filepath"
"syscall"
"github.com/containerd/log"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"google.golang.org/grpc"
pb "github.com/containerd/stargz-snapshotter/fusemanager/api"
"github.com/containerd/stargz-snapshotter/version"
)
var (
debugFlag bool
versionFlag bool
fuseStoreAddr string
address string
logLevel string
logPath string
action string
)
func parseFlags() {
flag.BoolVar(&debugFlag, "debug", false, "enable debug output in logs")
flag.BoolVar(&versionFlag, "v", false, "show the fusemanager version and exit")
flag.StringVar(&action, "action", "", "action of fusemanager")
flag.StringVar(&fuseStoreAddr, "fusestore-path", "/var/lib/containerd-stargz-grpc/fusestore.db", "address for the fusemanager's store")
flag.StringVar(&address, "address", "/run/containerd-stargz-grpc/fuse-manager.sock", "address for the fusemanager's gRPC socket")
flag.StringVar(&logLevel, "log-level", logrus.InfoLevel.String(), "set the logging level [trace, debug, info, warn, error, fatal, panic]")
flag.StringVar(&logPath, "log-path", "", "path to fusemanager's logs, no log recorded if empty")
flag.Parse()
}
func Run() {
if err := run(); err != nil {
fmt.Fprintf(os.Stderr, "failed to run fusemanager: %v", err)
os.Exit(1)
}
}
func run() error {
parseFlags()
if versionFlag {
fmt.Printf("%s:\n", os.Args[0])
fmt.Println(" Version: ", version.Version)
fmt.Println(" Revision:", version.Revision)
fmt.Println("")
return nil
}
if fuseStoreAddr == "" || address == "" {
return fmt.Errorf("fusemanager fusestore and socket path cannot be empty")
}
ctx := log.WithLogger(context.Background(), log.L)
switch action {
case "start":
return startNew(ctx, logPath, address, fuseStoreAddr, logLevel)
default:
return runFuseManager(ctx)
}
}
func startNew(ctx context.Context, logPath, address, fusestore, logLevel string) error {
self, err := os.Executable()
if err != nil {
return err
}
cwd, err := os.Getwd()
if err != nil {
return err
}
args := []string{
"-address", address,
"-fusestore-path", fusestore,
"-log-level", logLevel,
}
// we use shim-like approach to start new fusemanager process by self-invoking in the background
// and detach it from parent
cmd := exec.CommandContext(ctx, self, args...)
cmd.Dir = cwd
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}
if logPath != "" {
err := os.Remove(logPath)
if err != nil && !os.IsNotExist(err) {
return err
}
file, err := os.Create(logPath)
if err != nil {
return err
}
cmd.Stdout = file
cmd.Stderr = file
}
if err := cmd.Start(); err != nil {
return err
}
go cmd.Wait()
if ready, err := waitUntilReady(ctx); err != nil || !ready {
if err != nil {
return fmt.Errorf("failed to start new fusemanager: %w", err)
}
if !ready {
return fmt.Errorf("failed to start new fusemanager, fusemanager not ready")
}
}
return nil
}
// waitUntilReady waits until fusemanager is ready to accept requests
func waitUntilReady(ctx context.Context) (bool, error) {
grpcCli, err := newClient(address)
if err != nil {
return false, err
}
resp, err := grpcCli.Status(ctx, &pb.StatusRequest{})
if err != nil {
log.G(ctx).WithError(err).Errorf("failed to call Status")
return false, err
}
if resp.Status == FuseManagerNotReady {
return false, nil
}
return true, nil
}
func runFuseManager(ctx context.Context) error {
lvl, err := logrus.ParseLevel(logLevel)
if err != nil {
return fmt.Errorf("failed to prepare logger: %w", err)
}
logrus.SetLevel(lvl)
logrus.SetFormatter(&logrus.JSONFormatter{
TimestampFormat: log.RFC3339NanoFixed,
})
golog.SetOutput(log.G(ctx).WriterLevel(logrus.DebugLevel))
// Prepare the directory for the socket
if err := os.MkdirAll(filepath.Dir(address), 0700); err != nil {
return fmt.Errorf("failed to create directory %s: %w", filepath.Dir(address), err)
}
// Try to remove the socket file to avoid EADDRINUSE
if err := os.Remove(address); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to remove old socket file: %w", err)
}
l, err := net.Listen("unix", address)
if err != nil {
return fmt.Errorf("failed to listen socket: %w", err)
}
server := grpc.NewServer()
fm, err := NewFuseManager(ctx, l, server, fuseStoreAddr, address)
if err != nil {
return fmt.Errorf("failed to configure manager server: %w", err)
}
pb.RegisterStargzFuseManagerServiceServer(server, fm)
errCh := make(chan error, 1)
go func() {
if err := server.Serve(l); err != nil {
errCh <- fmt.Errorf("error on serving via socket %q: %w", address, err)
}
}()
var s os.Signal
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, unix.SIGINT, unix.SIGTERM)
select {
case s = <-sigCh:
log.G(ctx).Infof("Got %v", s)
case err := <-errCh:
log.G(ctx).WithError(err).Warnf("error during running the server")
}
server.Stop()
if err = fm.Close(ctx); err != nil {
return fmt.Errorf("failed to close fuse manager: %w", err)
}
return nil
}
func StartFuseManager(ctx context.Context, executable, address, fusestore, logLevel, logPath string) (newlyStarted bool, err error) {
// if socket exists, do not start it
if _, err := os.Stat(address); err == nil {
return false, nil
} else if !os.IsNotExist(err) {
return false, err
}
if _, err := os.Stat(executable); err != nil {
return false, fmt.Errorf("failed to stat fusemanager binary: %q", executable)
}
args := []string{
"-action", "start",
"-address", address,
"-fusestore-path", fusestore,
"-log-level", logLevel,
"-log-path", logPath,
}
cmd := exec.Command(executable, args...)
if err := cmd.Start(); err != nil {
return false, err
}
if err := cmd.Wait(); err != nil {
return false, err
}
return true, nil
}

View File

@ -1,235 +0,0 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package fusemanager
import (
"context"
"encoding/json"
"fmt"
"net"
"os"
"path/filepath"
"testing"
pb "github.com/containerd/stargz-snapshotter/fusemanager/api"
"github.com/containerd/stargz-snapshotter/service"
"google.golang.org/grpc"
)
// mockFileSystem implements snapshot.FileSystem for testing
type mockFileSystem struct {
t *testing.T
mountErr error
checkErr error
unmountErr error
mountPoints map[string]bool
checkCalled bool
mountCalled bool
unmountCalled bool
}
func newMockFileSystem(t *testing.T) *mockFileSystem {
return &mockFileSystem{
t: t,
mountPoints: make(map[string]bool),
}
}
func (fs *mockFileSystem) Mount(ctx context.Context, mountpoint string, labels map[string]string) error {
fs.mountCalled = true
if fs.mountErr != nil {
return fs.mountErr
}
fs.mountPoints[mountpoint] = true
return nil
}
func (fs *mockFileSystem) Check(ctx context.Context, mountpoint string, labels map[string]string) error {
fs.checkCalled = true
if fs.checkErr != nil {
return fs.checkErr
}
if _, ok := fs.mountPoints[mountpoint]; !ok {
return fmt.Errorf("mountpoint %s not found", mountpoint)
}
return nil
}
func (fs *mockFileSystem) Unmount(ctx context.Context, mountpoint string) error {
fs.unmountCalled = true
if fs.unmountErr != nil {
return fs.unmountErr
}
delete(fs.mountPoints, mountpoint)
return nil
}
// mockServer embeds Server struct and overrides Init method
type mockServer struct {
*Server
initCalled bool
initErr error
}
func newMockServer(ctx context.Context, listener net.Listener, server *grpc.Server, fuseStoreAddr, serverAddr string) (*mockServer, error) {
s, err := NewFuseManager(ctx, listener, server, fuseStoreAddr, serverAddr)
if err != nil {
return nil, err
}
return &mockServer{Server: s}, nil
}
// Init overrides Server.Init to avoid actual initialization
func (s *mockServer) Init(ctx context.Context, req *pb.InitRequest) (*pb.Response, error) {
s.initCalled = true
if s.initErr != nil {
return nil, s.initErr
}
// Set only required fields
s.root = req.Root
config := &Config{}
if err := json.Unmarshal(req.Config, config); err != nil {
return nil, err
}
s.config = config
s.status = FuseManagerReady
return &pb.Response{}, nil
}
func TestFuseManager(t *testing.T) {
tmpDir, err := os.MkdirTemp("", "fusemanager-test")
if err != nil {
t.Fatalf("failed to create temp dir: %v", err)
}
defer os.RemoveAll(tmpDir)
socketPath := filepath.Join(tmpDir, "test.sock")
fuseStorePath := filepath.Join(tmpDir, "fusestore.db")
fuseManagerSocketPath := filepath.Join(tmpDir, "test-fusemanager.sock")
l, err := net.Listen("unix", socketPath)
if err != nil {
t.Fatalf("failed to listen: %v", err)
}
defer l.Close()
// Create server with mock
grpcServer := grpc.NewServer()
mockFs := newMockFileSystem(t)
fm, err := newMockServer(context.Background(), l, grpcServer, fuseStorePath, fuseManagerSocketPath)
if err != nil {
t.Fatalf("failed to create fuse manager: %v", err)
}
defer fm.Close(context.Background())
pb.RegisterStargzFuseManagerServiceServer(grpcServer, fm)
// Set mock filesystem
fm.curFs = mockFs
go grpcServer.Serve(l)
defer grpcServer.Stop()
// Test cases to verify Init, Mount, Check and Unmount operations
testCases := []struct {
name string
mountpoint string
labels map[string]string
initErr error
mountErr error
checkErr error
unmountErr error
wantErr bool
}{
{
name: "successful init and mount",
mountpoint: filepath.Join(tmpDir, "mount1"),
labels: map[string]string{"key": "value"},
},
{
name: "init error",
mountpoint: filepath.Join(tmpDir, "mount2"),
initErr: fmt.Errorf("init error"),
wantErr: true,
},
{
name: "mount error",
mountpoint: filepath.Join(tmpDir, "mount3"),
mountErr: fmt.Errorf("mount error"),
wantErr: true,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
mockFs.mountErr = tc.mountErr
mockFs.checkErr = tc.checkErr
mockFs.unmountErr = tc.unmountErr
mockFs.mountCalled = false
mockFs.checkCalled = false
mockFs.unmountCalled = false
fm.initErr = tc.initErr
fm.initCalled = false
config := &Config{
Config: service.Config{},
}
client, err := NewManagerClient(context.Background(), tmpDir, socketPath, config)
if err != nil {
if !tc.wantErr {
t.Fatalf("failed to create client: %v", err)
}
return
}
if !fm.initCalled {
t.Error("Init() was not called")
}
if !tc.wantErr {
// Test Mount
err = client.Mount(context.Background(), tc.mountpoint, tc.labels)
if err != nil {
t.Errorf("Mount() error = %v", err)
}
if !mockFs.mountCalled {
t.Error("Mount() was not called on filesystem")
}
// Test Check
err = client.Check(context.Background(), tc.mountpoint, tc.labels)
if err != nil {
t.Errorf("Check() error = %v", err)
}
if !mockFs.checkCalled {
t.Error("Check() was not called on filesystem")
}
// Test Unmount
err = client.Unmount(context.Background(), tc.mountpoint)
if err != nil {
t.Errorf("Unmount() error = %v", err)
}
if !mockFs.unmountCalled {
t.Error("Unmount() was not called on filesystem")
}
}
})
}
}

Some files were not shown because too many files have changed in this diff Show More