diff --git a/Dockerfile.in b/Dockerfile.in index 139595e..6aa1f5f 100644 --- a/Dockerfile.in +++ b/Dockerfile.in @@ -41,22 +41,36 @@ ############################################################################# # First we prepare the image that we want, regardless of build layers. ############################################################################# -FROM {ARG_FROM} as prep +FROM {ARG_FROM} as base # When building, we can pass a unique value (e.g. `date +%s`) for this arg, # which will force a rebuild from here (by invalidating docker's cache). ARG FORCE_REBUILD=0 -RUN apt-get -q -y update -RUN apt-get -q -y upgrade -RUN apt-get -q -y install --no-install-recommends \ - ca-certificates \ - coreutils \ - socat \ - openssh-client \ - git -RUN apt-get -q -y autoremove -RUN rm -rf /var/lib/apt/lists/* +RUN apt-get -y -qq -o Dpkg::Use-Pty=0 update +RUN apt-get -y -qq -o Dpkg::Use-Pty=0 -y upgrade + +RUN apt-get -y -qq -o Dpkg::Use-Pty=0 install bash # for the staging scripts and ldd +RUN mkdir -p {ARG_STAGING} +COPY stage_binaries.sh / +RUN /stage_binaries.sh -o {ARG_STAGING} \ + -p coreutils \ + -p socat \ + -p openssh-client \ + -p git \ + -b /bin/dash \ + -b /bin/grep \ + -b /bin/sed +RUN ln -sf /bin/dash {ARG_STAGING}/bin/sh + +COPY clean_distroless.sh /clean_distroless.sh +RUN /clean_distroless.sh {ARG_STAGING} + +# We need to use distroless/base for tzdata, glibc, and some others. +FROM gcr.io/distroless/base as intermediate + +# Docker doesn't do vars in COPY, so we can't use a regular ARG. +COPY --from=base {ARG_STAGING} / # Add the default UID to /etc/passwd so SSH is satisfied. RUN echo "git-sync:x:65533:65533::/tmp:/sbin/nologin" >> /etc/passwd @@ -90,7 +104,7 @@ COPY bin/{ARG_OS}_{ARG_ARCH}/{ARG_BIN} /{ARG_BIN} # Now we make a "clean" final image. ############################################################################# FROM scratch -COPY --from=prep / / +COPY --from=intermediate / / # Run as non-root by default. There's simply no reason to run as root. USER 65533:65533 diff --git a/Makefile b/Makefile index 86af42b..38cca74 100644 --- a/Makefile +++ b/Makefile @@ -169,6 +169,7 @@ container: .container-$(DOTFILE_IMAGE) container-name -e 's|{ARG_ARCH}|$(ARCH)|g' \ -e 's|{ARG_OS}|$(OS)|g' \ -e 's|{ARG_FROM}|$(BASEIMAGE)|g' \ + -e 's|{ARG_STAGING}|/staging|g' \ Dockerfile.in > .dockerfile-$(OS)_$(ARCH) HASH_LICENSES=$$(find $(LICENSES) -type f \ | xargs md5sum | md5sum | cut -f1 -d' '); \ diff --git a/clean_distroless.sh b/clean_distroless.sh new file mode 100755 index 0000000..8878fad --- /dev/null +++ b/clean_distroless.sh @@ -0,0 +1,39 @@ +#!/bin/sh + +# Copyright 2022 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# USAGE: clean-distroless.sh + +if [ -z "$1" ]; then + echo "usage: $0 " + exit 1 +fi +ROOT="$1" + +# This script needs to be "sh" and not "bash", but there are no arrays in sh, +# except for "$@". We need array semantics on the off chance we ever have a +# pathname with spaces in it. +set -- \ + /usr/share/base-files \ + /usr/share/man \ + /usr/lib/*-linux-gnu/gconv \ + /usr/bin/c_rehash \ + /usr/bin/openssl \ + /iptables-wrapper-installer.sh \ + /clean-distroless.sh + +for item; do + rm -rf "${ROOT}/${item}" +done diff --git a/stage_binaries.sh b/stage_binaries.sh new file mode 100755 index 0000000..ed6a673 --- /dev/null +++ b/stage_binaries.sh @@ -0,0 +1,278 @@ +#!/bin/bash + +# Copyright 2022 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# USAGE: stage-binaries.sh -o ( -p | -b binary )..." +# +# Stages all the packages or files and their dependencies (+ libraries and +# copyrights) to the staging dir. +# +# This is intended to be used in a multi-stage docker build with a distroless/base +# or distroless/cc image. + +set -o errexit +set -o nounset +set -o pipefail + +# A handler for when we exit automatically on an error. +# Borrowed from kubernetes, which was borrowed from +# https://gist.github.com/ahendrix/7030300 +function errexit() { + # If the shell we are in doesn't have errexit set (common in subshells) then + # don't dump stacks. + set +o | grep -qe "-o errexit" || return + + local file="$(basename "${BASH_SOURCE[1]}")" + local line="${BASH_LINENO[0]}" + local func="${FUNCNAME[1]:-}" + echo "FATAL: error at ${func}() ${file}:${line}" >&2 +} + +# trap ERR to provide an error handler whenever a command exits nonzero this +# is a more verbose version of set -o errexit +trap 'errexit' ERR + +# setting errtrace allows our ERR trap handler to be propagated to functions, +# expansions and subshells +set -o errtrace + +# file_to_package identifies the debian package that provided the file $1 +function file_to_package() { + local file="$1" + + # `dpkg-query --search $file-pattern` outputs lines with the format: "$package: $file-path" + # where $file-path belongs to $package + # https://manpages.debian.org/jessie/dpkg/dpkg-query.1.en.html + dpkg-query --search "$(realpath "${file}")" | cut -d':' -f1 +} + +# package_to_copyright gives the path to the copyright file for the package $1 +function package_to_copyright() { + local pkg="$1" + echo "/usr/share/doc/${pkg}/copyright" +} + +# stage_file stages the filepath $1 to $2, following symlinks +# and staging copyrights +function stage_file() { + local file="$1" + local staging="$2" + + # copy the named path + cp -a --parents "${file}" "${staging}" + + # recursively follow symlinks + if [[ -L "${file}" ]]; then + stage_file "$(cd "$(dirname "${file}")" || exit; realpath -s "$(readlink "${file}")")" "${staging}" + fi + + # get the package so we can stage package metadata as well + local package="$(file_to_package "${file}")" + # stage the copyright for the file, if it exists + local copyright="$(package_to_copyright "${package}")" + if [[ -f "${copyright}" ]]; then + cp -a --parents "${copyright}" "${staging}" + fi + + # stage the package status mimicking bazel + # https://github.com/bazelbuild/rules_docker/commit/f5432b813e0a11491cf2bf83ff1a923706b36420 + # instead of parsing the control file, we can just get the actual package status with dpkg + dpkg -s "${package}" > "${staging}/var/lib/dpkg/status.d/${package}" +} + +function grep_allow_nomatch() { + # grep exits 0 on match, 1 on no match, 2 on error + grep "$@" || [[ $? == 1 ]] +} + +function _indent() { + while read -r X; do + echo " ${X}" + done +} + +# run "$@" and indent the output +function indent() { + # This lets us process stderr and stdout without merging them, without + # bash-isms. + { "$@" 2>&1 1>&3 | _indent; } 3>&1 1>&2 | _indent +} + +function stage_file_list() { + local pkg="$1" + local staging="$2" + + dpkg -L "${pkg}" \ + | grep_allow_nomatch -vE '(/\.|/usr/share/(man|doc|.*-completion))' \ + | while read -r file; do + if [[ -f "$file" ]]; then + stage_file "${file}" "${staging}" + if [[ -L "$file" ]]; then + continue + fi + if [[ -x "$file" ]]; then + stage_binaries "${staging}" "${file}" + fi + fi + done +} + +function get_dependent_packages() { + local pkg="$1" + apt-cache depends "${pkg}" \ + | grep_allow_nomatch Depends \ + | awk -F '.*Depends:[[:space:]]?' '{print $2}' +} + +# Args: +# $1: path to staging dir +# $2+: package names +function stage_packages() { + local staging="$1" + shift + + mkdir -p "${staging}"/var/lib/dpkg/status.d/ + indent apt-get -y -qq -o Dpkg::Use-Pty=0 update + + local pkg + for pkg; do + echo "staging package ${pkg}" + indent apt-get -y -qq -o Dpkg::Use-Pty=0 --no-install-recommends install "${pkg}" + stage_file_list "${pkg}" "$staging" + get_dependent_packages "${pkg}" \ + | while read -r dep; do + stage_file_list "${dep}" "${staging}" + done + done +} + +# binary_to_libraries identifies the library files needed by the binary $1 with ldd +function binary_to_libraries() { + local bin="$1" + + # see: https://man7.org/linux/man-pages/man1/ldd.1.html + # Each output line looks like: + # linux-vdso.so.1 (0x00007fffb11c3000) + # or + # libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f2f52d26000) + # + # This is a little funky because ldd treats static binaries as errors ("not + # a dynamic executable") but static libraries as non-errors ("statically + # linked"). We want real ldd errors, but static binaries are OK. + if [[ "$(ldd "${bin}" 2>&1)" =~ "not a dynamic executable" ]]; then + return + fi + ldd "${bin}" \ + `# skip static binaries` \ + | grep_allow_nomatch -v "statically linked" \ + `# linux-vdso.so.1 is a special virtual shared object from the kernel` \ + `# see: http://man7.org/linux/man-pages/man7/vdso.7.html` \ + | grep_allow_nomatch -v 'linux-vdso.so.1' \ + `# strip the leading '${name} => ' if any so only '/lib-foo.so (0xf00)' remains` \ + | sed -E 's#.* => /#/#' \ + `# we want only the path remaining, not the (0x${LOCATION})` \ + | awk '{print $1}' +} + +function stage_binaries() { + local staging="$1" + shift + + local bin + for bin; do + echo "staging binary ${bin}" + + # locate the path to the binary + local binary_path + binary_path="$(which "${bin}")" + + # ensure package metadata dir + mkdir -p "${staging}/var/lib/dpkg/status.d/" + + # stage the binary itself + stage_file "${binary_path}" "${staging}" + + # stage the dependencies of the binary + binary_to_libraries "${binary_path}" \ + | while read -r lib; do + stage_file "${lib}" "${staging}" + done + done +} + +function usage() { + echo "$0 -o ( -p | -b binary )..." +} + +function main() { + local staging="" + local pkgs=() + local bins=() + + while [ "$#" -gt 0 ]; do + case "$1" in + "-?") + usage + exit 0 + ;; + "-b") + if [[ -z "${2:-}" ]]; then + echo "error: flag '-b' requires an argument" >&2 + usage >&2 + exit 2 + fi + bins+=("$2") + shift 2 + ;; + "-p") + if [[ -z "${2:-}" ]]; then + echo "error: flag '-p' requires an argument" >&2 + usage >&2 + exit 2 + fi + pkgs+=("$2") + shift 2 + ;; + "-o") + if [[ -z "${2:-}" ]]; then + echo "error: flag '-o' requires an argument" >&2 + usage >&2 + exit 2 + fi + staging="$2" + shift 2 + ;; + *) + echo "error: unknown argument: $1" >&2 + usage >&2 + exit 3 + ;; + esac + done + + if [[ -z "${staging}" ]]; then + usage >&2 + exit 4 + fi + + if (( "${#pkgs[@]}" > 0 )); then + stage_packages "${staging}" "${pkgs[@]}" + fi + if (( "${#bins[@]}" > 0 )); then + stage_binaries "${staging}" "${bins[@]}" + fi +} + +main "$@"