From a56f58dbe1055f367b4717ae617750d6698b4618 Mon Sep 17 00:00:00 2001 From: Tim Hockin Date: Thu, 27 Jul 2023 14:43:39 -0700 Subject: [PATCH] Drop distroless and DIY Distroless is stuck on debian 11 - we can do the same thing, more or less in our own script. Sad that we have to, but here we are. The net result is a about 8MB smaller and passes e2e and passes trivy. --- Dockerfile.in | 48 ++++++++++--- clean_distroless.sh | 52 -------------- stage_binaries.sh | 168 +++++++++++++++++++++++++++++--------------- test_e2e.sh | 2 +- 4 files changed, 152 insertions(+), 118 deletions(-) delete mode 100755 clean_distroless.sh diff --git a/Dockerfile.in b/Dockerfile.in index 5cc6cb1..344a0cd 100644 --- a/Dockerfile.in +++ b/Dockerfile.in @@ -54,23 +54,55 @@ RUN apt-get -y -qq -o Dpkg::Use-Pty=0 install --no-install-recommends bash # for RUN mkdir -p {ARG_STAGING} COPY stage_binaries.sh / RUN /stage_binaries.sh -o {ARG_STAGING} \ + -p base-files \ + -p dash \ -p coreutils \ - -p socat \ - -p openssh-client \ -p git \ - -b /bin/dash \ + -p openssh-client \ + -p ca-certificates \ + -p socat \ -b /bin/grep \ - -b /bin/sed + -b /bin/sed \ + -f /etc/debian_version \ + -f /etc/group \ + -f /etc/nsswitch.conf \ + -f /etc/os-release \ + -f /etc/passwd \ + -f /etc/shadow RUN ln -sf /bin/dash {ARG_STAGING}/bin/sh -# We need to use distroless/base for tzdata, glibc, and some others. -FROM gcr.io/distroless/base as intermediate +FROM scratch as intermediate # Docker doesn't do vars in COPY, so we can't use a regular ARG. COPY --from=base {ARG_STAGING} / -COPY clean_distroless.sh /clean_distroless.sh -RUN /clean_distroless.sh +# This list is not generic - it is specific to git-sync on debian bookworm. +RUN rm -rf \ + /usr/share/base-files \ + /usr/share/doc \ + /usr/share/man \ + /usr/lib/*-linux-gnu/gconv \ + /usr/bin/c_rehash \ + /usr/bin/git-shell \ + /usr/bin/openssl \ + /usr/bin/scalar \ + /usr/bin/scp \ + /usr/bin/sftp \ + /usr/bin/ssh-add \ + /usr/bin/ssh-agent \ + /usr/bin/ssh-keygen \ + /usr/bin/ssh-keyscan \ + /usr/lib/git-core/git-shell \ + /usr/bin/openssl \ + /usr/lib/git-core/git-daemon \ + /usr/lib/git-core/git-http-backend \ + /usr/lib/git-core/git-http-fetch \ + /usr/lib/git-core/git-http-push \ + /usr/lib/git-core/git-imap-send \ + /usr/lib/openssh/ssh-keysign \ + /usr/lib/openssh/ssh-pkcs11-helper \ + /usr/lib/openssh/ssh-sk-helper \ + /usr/share/gitweb # Add the default UID to /etc/passwd so SSH is satisfied. RUN echo "git-sync:x:65533:65533::/tmp:/sbin/nologin" >> /etc/passwd diff --git a/clean_distroless.sh b/clean_distroless.sh deleted file mode 100755 index 0db1cd7..0000000 --- a/clean_distroless.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/sh - -# Copyright 2022 The Kubernetes Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This script needs to be "sh" and not "bash", but there are no arrays in sh, -# except for "$@". We need array semantics on the off chance we ever have a -# pathname with spaces in it. -# -# This list is not generic - it is specific to git-sync on debian bookworm. -set -- \ - /usr/share/base-files \ - /usr/share/doc \ - /usr/share/man \ - /usr/lib/*-linux-gnu/gconv \ - /usr/bin/c_rehash \ - /usr/bin/git-shell \ - /usr/bin/openssl \ - /usr/bin/scalar \ - /usr/bin/scp \ - /usr/bin/sftp \ - /usr/bin/ssh-add \ - /usr/bin/ssh-agent \ - /usr/bin/ssh-keygen \ - /usr/bin/ssh-keyscan \ - /usr/lib/git-core/git-shell \ - /usr/bin/openssl \ - /usr/lib/git-core/git-daemon \ - /usr/lib/git-core/git-http-backend \ - /usr/lib/git-core/git-http-fetch \ - /usr/lib/git-core/git-http-push \ - /usr/lib/git-core/git-imap-send \ - /usr/lib/openssh/ssh-keysign \ - /usr/lib/openssh/ssh-pkcs11-helper \ - /usr/lib/openssh/ssh-sk-helper \ - /usr/share/gitweb \ - /clean-distroless.sh - -for item; do - rm -rf "${ROOT}/${item}" -done diff --git a/stage_binaries.sh b/stage_binaries.sh index 365158a..bfe0e30 100755 --- a/stage_binaries.sh +++ b/stage_binaries.sh @@ -19,8 +19,7 @@ # Stages all the packages or files and their dependencies (+ libraries and # copyrights) to the staging dir. # -# This is intended to be used in a multi-stage docker build with a distroless/base -# or distroless/cc image. +# This is intended to be used in a multi-stage docker build. set -o errexit set -o nounset @@ -48,6 +47,33 @@ trap 'errexit' ERR # expansions and subshells set -o errtrace +function DBG() { + if [[ -n "${DBG:-}" ]]; then + echo "$@" + fi +} + +function grep_allow_nomatch() { + # grep exits 0 on match, 1 on no match, 2 on error + grep "$@" || [[ $? == 1 ]] +} + +function _indent() { + ( + IFS="" # preserve spaces in `read` + while read -r X; do + echo " ${X}" + done + ) +} + +# run "$@" and indent the output +function indent() { + # This lets us process stderr and stdout without merging them, without + # bash-isms. + { "$@" 2>&1 1>&3 | _indent; } 3>&1 1>&2 | _indent +} + # Track these globally so we only load it once. ROOT_FWD_LINKS=() ROOT_REV_LINKS=() @@ -109,8 +135,9 @@ function file_to_package() { fi # `dpkg-query --search $file-pattern` outputs lines with the format: "$package: $file-path" - # where $file-path belongs to $package. - echo "${result}" | cut -d':' -f1 + # where $file-path belongs to $package. Sometimes it has lines that say + # "diversion" but there's no documented grammar I can find. + echo "${result}" | grep -v "diversion" | cut -d':' -f1 } function ensure_dir_in_staging() { @@ -123,17 +150,11 @@ function ensure_dir_in_staging() { fi } -# stage_file stages the filepath $2 to $1, following symlinks -# and staging copyrights -function stage_file() { +# stage_one_file stages the filepath $2 to $1, following symlinks +function stage_one_file() { local staging="$1" local file="$2" - # short circuit if we have done this file before - if [[ -e "${staging}/${file}" ]]; then - return - fi - # copy the real form of the named path local real="$(realpath "${file}")" cp -a --parents "${real}" "${staging}" @@ -145,49 +166,49 @@ function stage_file() { ensure_dir_in_staging "${staging}" "${dir}" ln -s "${real}" "${staging}/${file}" fi - elif [[ -x "$file" ]]; then - # stage the dependencies of the binary - binary_to_libraries "${file}" \ - | while read -r lib; do - stage_file "${staging}" "${lib}" - done + fi +} + +# stage_file_and_deps stages the filepath $2 to $1, following symlinks and +# library deps, and staging copyrights +function stage_file_and_deps() { + local staging="$1" + local file="$2" + + # short circuit if we have done this file before + if [[ -e "${staging}/${file}" ]]; then + return fi # get the package so we can stage package metadata as well local package package="$(file_to_package "${file}")" - # stage the copyright for the file, if it exists - local copyright_src="/usr/share/doc/${package}/copyright" - local copyright_dst="${staging}/copyright/${package}/copyright.gz" - if [[ -f "${copyright_src}" && ! -f "${copyright_dst}" ]]; then - mkdir -p "$(dirname "${copyright_dst}")" - gzip -9 --to-stdout "${copyright_src}" > "${copyright_dst}" + DBG "staging file ${file} from pkg ${package}" + + stage_one_file "${staging}" "$file" + + # stage dependencies of binaries + if [[ -x "$file" ]]; then + while read -r lib; do + indent stage_file_and_deps "${staging}" "${lib}" + done < <( binary_to_libraries "${file}" ) fi - # stage the package status mimicking bazel - # https://github.com/bazelbuild/rules_docker/commit/f5432b813e0a11491cf2bf83ff1a923706b36420 - # instead of parsing the control file, we can just get the actual package status with dpkg - mkdir -p "${staging}/var/lib/dpkg/status.d/" - dpkg -s "${package}" > "${staging}/var/lib/dpkg/status.d/${package}" + if [[ -n "${package}" ]]; then + # stage the copyright for the file, if it exists + local copyright_src="/usr/share/doc/${package}/copyright" + local copyright_dst="${staging}/copyright/${package}/copyright.gz" + if [[ -f "${copyright_src}" && ! -f "${copyright_dst}" ]]; then + mkdir -p "$(dirname "${copyright_dst}")" + gzip -9 --to-stdout "${copyright_src}" > "${copyright_dst}" + fi -} - -function grep_allow_nomatch() { - # grep exits 0 on match, 1 on no match, 2 on error - grep "$@" || [[ $? == 1 ]] -} - -function _indent() { - while read -r X; do - echo " ${X}" - done -} - -# run "$@" and indent the output -function indent() { - # This lets us process stderr and stdout without merging them, without - # bash-isms. - { "$@" 2>&1 1>&3 | _indent; } 3>&1 1>&2 | _indent + # Since apt is not in the final image, stage the package status + # (mimicking bazel). This allows security scanners to run against it. + # https://github.com/bazelbuild/rules_docker/commit/f5432b813e0a11491cf2bf83ff1a923706b36420 + mkdir -p "${staging}/var/lib/dpkg/status.d/" + dpkg -s "${package}" > "${staging}/var/lib/dpkg/status.d/${package}" + fi } function stage_one_package() { @@ -216,7 +237,7 @@ function stage_one_package() { if [[ -z "${found}" ]]; then names+=("${file}") sums+=("${sum}") - stage_file "${staging}" "${file}" + indent stage_file_and_deps "${staging}" "${file}" fi fi done < <( dpkg -L "${pkg}" \ @@ -225,9 +246,15 @@ function stage_one_package() { function get_dependent_packages() { local pkg="$1" + # There's no documented grammar for the output of this. Sometimes it says: + # Depends: package + # ...and other times it says: + # Depends + # ...but those don't really seem to be required. There's also "PreDepends" + # which are something else. apt-cache depends "${pkg}" \ - | grep_allow_nomatch Depends \ - | awk -F '.*Depends:[[:space:]]?' '{print $2}' + | grep_allow_nomatch '^ *Depends: [a-zA-Z0-9]' \ + | awk -F ':' '{print $2}' } # Args: @@ -245,12 +272,12 @@ function stage_packages() { local du_before="$(du -sk "${staging}" | cut -f1)" indent apt-get -y -qq -o Dpkg::Use-Pty=0 --no-install-recommends install "${pkg}" stage_one_package "$staging" "${pkg}" - get_dependent_packages "${pkg}" \ - | while read -r dep; do - stage_one_package "${staging}" "${dep}" - done + while read -r dep; do + DBG "staging dependent package ${dep}" + indent stage_one_package "${staging}" "${dep}" + done < <( get_dependent_packages "${pkg}" ) local du_after="$(du -sk "${staging}" | cut -f1)" - echo "package ${pkg} size: +$(( $du_after - $du_before )) kB (of ${du_after} kB)" + indent echo "package ${pkg} size: +$(( $du_after - $du_before )) kB (of ${du_after} kB)" done } @@ -291,7 +318,7 @@ function stage_one_binary() { binary_path="$(which "${bin}")" # stage the binary itself - stage_file "${staging}" "${binary_path}" + stage_file_and_deps "${staging}" "${binary_path}" } function stage_binaries() { @@ -304,7 +331,21 @@ function stage_binaries() { local du_before="$(du -sk "${staging}" | cut -f1)" stage_one_binary "${staging}" "${bin}" local du_after="$(du -sk "${staging}" | cut -f1)" - echo "binary ${bin} size: +$(( $du_after - $du_before )) kB (of ${du_after} kB)" + indent echo "binary ${bin} size: +$(( $du_after - $du_before )) kB (of ${du_after} kB)" + done +} + +function stage_files() { + local staging="$1" + shift + + local bin + for file; do + echo "staging file ${file}" + local du_before="$(du -sk "${staging}" | cut -f1)" + stage_one_file "${staging}" "${file}" + local du_after="$(du -sk "${staging}" | cut -f1)" + indent echo "file ${file} size: +$(( $du_after - $du_before )) kB (of ${du_after} kB)" done } @@ -316,6 +357,7 @@ function main() { local staging="" local pkgs=() local bins=() + local files=() while [ "$#" -gt 0 ]; do case "$1" in @@ -332,6 +374,15 @@ function main() { bins+=("$2") shift 2 ;; + "-f") + if [[ -z "${2:-}" ]]; then + echo "error: flag '-f' requires an argument" >&2 + usage >&2 + exit 2 + fi + files+=("$2") + shift 2 + ;; "-p") if [[ -z "${2:-}" ]]; then echo "error: flag '-p' requires an argument" >&2 @@ -375,6 +426,9 @@ function main() { if (( "${#bins[@]}" > 0 )); then stage_binaries "${staging}" "${bins[@]}" fi + if (( "${#files[@]}" > 0 )); then + stage_files "${staging}" "${files[@]}" + fi echo "final staged size: $(du -sk "${staging}" | cut -f1) kB" du -xk --max-depth=3 "${staging}" | sort -n | _indent diff --git a/test_e2e.sh b/test_e2e.sh index 14c7c90..1e18bfe 100755 --- a/test_e2e.sh +++ b/test_e2e.sh @@ -1916,7 +1916,7 @@ function e2e::auth_askpass_url_slow_start() { sleep 1 assert_file_absent "$ROOT/link" - wait_for_sync "5" + wait_for_sync 5 assert_link_exists "$ROOT/link" assert_file_exists "$ROOT/link/file" assert_file_eq "$ROOT/link/file" "$FUNCNAME"