From 7729a838890e43c8e22002cc3f00fb912c1162b4 Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Wed, 6 Mar 2019 15:57:16 +0100 Subject: [PATCH] Dockerfile: optimize for caching The implementation of the Dockerfile first copied the current version of the documentation, to be followed by fetching upstream resources, and adding the archived documentation. As a result, the build-cache was "busted" on every change that was made locally, causing a full rebuild of both "fetching upstream resources", building the reference documentation, and generating the archived versions of the documentation. With the changes made to the "fetch-upstream-resources.sh" script, it's now possible to build those docs in advance (in a separate build- stage), and append the pre-built docs _after_ the current version of the documentation was built. This patch updates the Dockerfile to build reference-docs and archived docs in separate build-stages, optimizing the build for caching (wich the assumption that no changes are made in upstream resources between builds). Not only caused this builds to take longer than needed, but each rebuild also caused new "dangling" images to be created (in case of the "classic" builder), or new build-caches to be created (when using BuildKit). Note that when _deploying_ the documentation, no caching should be used (to enforce a full rebuild of the documentation, and guarantee that reference-docs are "fresh"). Before this change, rebuilding the documentation (after a previous build, so with cache present) took ~185 seconds; ``` docker build -t docs . [+] Building 184.7s (26/26) FINISHED => [builder 2/5] COPY . md_source 2.8s => [builder 3/5] RUN bash ./md_source/_scripts/fetch-upstream-resources.sh md_source 52.7s => [builder 4/5] RUN jekyll build -s md_source -d /usr/share/nginx/html --config md_source/_config.yml 113.0s => [builder 5/5] RUN find /usr/share/nginx/html -type f -name '*.html' | grep -vE "v[0-9]+\." | while read i; do sed -i 's#href="https://docs.docker.com/#href="/#g' "$i"; done 4.4s => CACHED [stage-1 2/8] COPY --from=docs/docker.github.io:nginx-onbuild /etc/nginx/conf.d/default.conf /etc/nginx/conf.d/default.conf 0.0s => CACHED [stage-1 3/8] COPY --from=docs/docker.github.io:v17.03 /usr/share/nginx/html /usr/share/nginx/html 0.0s => CACHED [stage-1 4/8] COPY --from=docs/docker.github.io:v17.06 /usr/share/nginx/html /usr/share/nginx/html 0.0s => CACHED [stage-1 5/8] COPY --from=docs/docker.github.io:v17.09 /usr/share/nginx/html /usr/share/nginx/html 0.0s => CACHED [stage-1 6/8] COPY --from=docs/docker.github.io:v17.12 /usr/share/nginx/html /usr/share/nginx/html 0.0s => CACHED [stage-1 7/8] COPY --from=docs/docker.github.io:v18.03 /usr/share/nginx/html /usr/share/nginx/html 0.0s => [stage-1 8/8] COPY --from=builder /usr/share/nginx/html /usr/share/nginx/html 3.7s => exporting to image 5.1s => => exporting layers 5.1s => => writing image sha256:70f77631001bd0271455f893b87dd75a1fd6f5db84a00feb8afe5ddf5a697d7d 0.0s => => naming to docker.io/library/docs 0.0s ``` After this change, archives and reference-docs are using the build-cache, and building was reduced to ~126 seconds (most of that taken by Jekyll to generate the static HTML); ``` docker build -t docs . +] Building 126.4s (30/30) FINISHED ... => CACHED [upstream-resources 1/4] COPY ./_scripts/fetch-upstream-resources.sh ./_scripts/ 0.0s => CACHED [upstream-resources 2/4] COPY ./_config.yml . 0.0s => CACHED [upstream-resources 3/4] COPY ./_data/toc.yaml ./_data/ 0.0s => CACHED [upstream-resources 4/4] RUN bash ./_scripts/fetch-upstream-resources.sh . 0.0s => [current 2/4] COPY --from=upstream-resources /usr/src/app/md_source/. ./ 1.5s => [current 3/4] RUN jekyll build -d /usr/share/nginx/html 108.9s => [current 4/4] RUN find /usr/share/nginx/html -type f -name '*.html' | grep -vE "v[0-9]+\." | while read i; do sed -i 's#href="https://docs.docker.com/#href="/#g' "$i"; done 4.8s => CACHED [deploybase 2/2] COPY --from=docs/docker.github.io:nginx-onbuild /etc/nginx/conf.d/default.conf /etc/nginx/conf.d/default.conf 0.0s => CACHED [archives 1/5] COPY --from=docs/docker.github.io:v17.03 /usr/share/nginx/html /usr/share/nginx/html 0.0s => CACHED [archives 2/5] COPY --from=docs/docker.github.io:v17.06 /usr/share/nginx/html /usr/share/nginx/html 0.0s => CACHED [archives 3/5] COPY --from=docs/docker.github.io:v17.09 /usr/share/nginx/html /usr/share/nginx/html 0.0s => CACHED [archives 4/5] COPY --from=docs/docker.github.io:v17.12 /usr/share/nginx/html /usr/share/nginx/html 0.0s => CACHED [archives 5/5] COPY --from=docs/docker.github.io:v18.03 /usr/share/nginx/html /usr/share/nginx/html 0.0s => [deploy 1/1] COPY --from=current /usr/share/nginx/html /usr/share/nginx/html 4.1s => exporting to image 1.9s => => exporting layers 1.9s => => writing image sha256:e76359b937fb4d956fb17e889a7333687929d4c9ae093909fe5630a8f3e4de64 0.0s => => naming to docker.io/library/docs 0.0s ``` Signed-off-by: Sebastiaan van Stijn --- Dockerfile | 87 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/Dockerfile b/Dockerfile index c1a0d06561..7e446d4a8e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,23 +2,29 @@ # from the master branch of https://github.com/docker/docker.github.io # # Here is the sequence: -# 1. Set up the build -# 2. Fetch upstream resources -# 3. Build static HTML from master -# 4. Reset to clean tiny nginx image -# 5. Copy Nginx config and archive HTML, which don't change often and can be cached -# 6. Copy static HTML from previous build stage (step 3) +# 1. Set up base stages for building and deploying +# 2. Collect and build the archived documentation +# 3. Collect and build the reference documentation (from upstream resources) +# 4. Build static HTML from the current branch +# 5. Build the final image, combining the archives, reference docs, and +# current version of the documentation # # When the image is run, it starts Nginx and serves the docs at port 4000 + # Engine ARG ENGINE_BRANCH="18.09.x" # Distribution ARG DISTRIBUTION_BRANCH="release/2.6" + +### +# Set up base stages for building and deploying +### + # Get basic configs and Jekyll env -FROM docs/docker.github.io:docs-builder AS builder +FROM docs/docker.github.io:docs-builder AS builderbase ENV TARGET=/usr/share/nginx/html WORKDIR /usr/src/app/md_source/ @@ -33,50 +39,55 @@ ARG DISTRIBUTION_BRANCH ENV DISTRIBUTION_BRANCH=${DISTRIBUTION_BRANCH} ENV DISTRIBUTION_SVN_BRANCH=branches/${DISTRIBUTION_BRANCH} -# Get the current docs from the checked out branch -COPY . . - -####### START UPSTREAM RESOURCES ######## -# Fetch upstream resources -RUN bash ./_scripts/fetch-upstream-resources.sh . -####### END UPSTREAM RESOURCES ######## - - -# Build the static HTML, now that everything is in place - -RUN jekyll build -d ${TARGET} - -# Fix up some links, don't touch the archives -RUN find ${TARGET} -type f -name '*.html' | grep -vE "v[0-9]+\." | while read i; do sed -i 's#href="https://docs.docker.com/#href="/#g' "$i"; done - -# BUILD OF MASTER DOCS IS NOW DONE! # Reset to alpine so we don't get any docs source or extra apps -FROM nginx:alpine - -# Set the target again +FROM nginx:alpine AS deploybase ENV TARGET=/usr/share/nginx/html # Get the nginx config from the nginx-onbuild image # This hardly ever changes so should usually be cached COPY --from=docs/docker.github.io:nginx-onbuild /etc/nginx/conf.d/default.conf /etc/nginx/conf.d/default.conf -# Get all the archive static HTML and put it into place -# Go oldest-to-newest to take advantage of the fact that we change older -# archives less often than new ones. -# To add a new archive, add it here -# AND ALSO edit _data/docsarchives/archives.yaml to add it to the drop-down +# Set the default command to serve the static HTML site +CMD echo -e "Docker docs are viewable at:\nhttp://0.0.0.0:4000"; exec nginx -g 'daemon off;' + + +# Build the archived docs +# these docs barely change, so can be cached +FROM deploybase AS archives +# Get all the archive static HTML and put it into place. To add a new archive, +# add it here, and ALSO edit _data/docsarchives/archives.yaml to add it to the drop-down COPY --from=docs/docker.github.io:v17.03 ${TARGET} ${TARGET} COPY --from=docs/docker.github.io:v17.06 ${TARGET} ${TARGET} COPY --from=docs/docker.github.io:v17.09 ${TARGET} ${TARGET} COPY --from=docs/docker.github.io:v17.12 ${TARGET} ${TARGET} COPY --from=docs/docker.github.io:v18.03 ${TARGET} ${TARGET} -# Get the built docs output from the previous build stage -# This ordering means all previous layers can come from cache unless an archive -# changes +# Fetch upstream resources (reference documentation) +# Only add the files that are needed to build these reference docs, so that +# these docs are only rebuilt if changes were made to the configuration. +FROM builderbase AS upstream-resources +COPY ./_scripts/fetch-upstream-resources.sh ./_scripts/ +COPY ./_config.yml . +COPY ./_data/toc.yaml ./_data/ +RUN bash ./_scripts/fetch-upstream-resources.sh . -COPY --from=builder ${TARGET} ${TARGET} -# Serve the site (target), which is now all static HTML -CMD echo -e "Docker docs are viewable at:\nhttp://0.0.0.0:4000"; exec nginx -g 'daemon off;' +# Build the current docs from the checked out branch +FROM builderbase AS current +COPY . . +COPY --from=upstream-resources /usr/src/app/md_source/. ./ + + +# Build the static HTML, now that everything is in place +RUN jekyll build -d ${TARGET} + +# Fix up some links, don't touch the archives +RUN find ${TARGET} -type f -name '*.html' | grep -vE "v[0-9]+\." | while read i; do sed -i 's#href="https://docs.docker.com/#href="/#g' "$i"; done + + +# Docs with archives (for deploy) +FROM archives AS deploy + +# Add the current version of the docs +COPY --from=current ${TARGET} ${TARGET}