Merge pull request #113 from infosiftr/run-mount-from-image

Move Dockerfile parsing to a dedicated package, add support for `RUN --mount=type=bind,from=...`
This commit is contained in:
yosifkit 2025-01-16 16:51:39 -08:00 committed by GitHub
commit 882b15e031
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 397 additions and 137 deletions

View File

@ -55,6 +55,14 @@ jobs:
# files: coverage.out
# fail_ci_if_error: true
# verbose: true
# in the meantime, upload coverage to GHA
- run: docker run --rm -i test go tool cover -html /dev/stdin -o /dev/stdout < coverage.out > coverage.html
- uses: actions/upload-artifact@v4
with:
name: coverage
path: coverage.*
include-hidden-files: true
if-no-files-found: error
dockerfile:
name: Test Dockerfile
runs-on: ubuntu-latest

View File

@ -1,4 +1,4 @@
FROM golang:1.20-bullseye AS build
FROM golang:1.21-bookworm AS build
SHELL ["bash", "-Eeuo", "pipefail", "-xc"]

View File

@ -1,4 +1,4 @@
FROM golang:1.20-bullseye
FROM golang:1.21-bookworm
SHELL ["bash", "-Eeuo", "pipefail", "-xc"]

View File

@ -1,4 +1,4 @@
FROM golang:1.20-bullseye
FROM golang:1.21-bookworm
SHELL ["bash", "-Eeuo", "pipefail", "-xc"]

View File

@ -6,11 +6,11 @@ set -Eeuo pipefail
dir="$(readlink -f "$BASH_SOURCE")"
dir="$(dirname "$dir")"
: "${CGO_ENABLED:=0}"
export GO111MODULE=on CGO_ENABLED
: "${CGO_ENABLED=0}" "${GOTOOLCHAIN=local}"
export CGO_ENABLED GOTOOLCHAIN
(
cd "$dir"
go build -o bin/bashbrew ./cmd/bashbrew > /dev/null
go build -trimpath -o bin/bashbrew ./cmd/bashbrew > /dev/null
)
exec "$dir/bin/bashbrew" "$@"

View File

@ -1,7 +1,6 @@
package main
import (
"bufio"
"bytes"
"crypto/sha256"
"encoding/hex"
@ -10,21 +9,14 @@ import (
"os"
"os/exec"
"path"
"strconv"
"strings"
"sync"
"github.com/docker-library/bashbrew/manifest"
"github.com/docker-library/bashbrew/pkg/dockerfile"
"github.com/urfave/cli"
)
type dockerfileMetadata struct {
StageFroms []string // every image "FROM" instruction value (or the parent stage's FROM value in the case of a named stage)
StageNames []string // the name of any named stage (in order)
StageNameFroms map[string]string // map of stage names to FROM values (or the parent stage's FROM value in the case of a named stage), useful for resolving stage names to FROM values
Froms []string // every "FROM" or "COPY --from=xxx" value (minus named and/or numbered stages in the case of "--from=")
}
// this returns the "FROM" value for the last stage (which essentially determines the "base" for the final published image)
func (r Repo) ArchLastStageFrom(arch string, entry *manifest.Manifest2822Entry) (string, error) {
dockerfileMeta, err := r.archDockerfileMetadata(arch, entry)
@ -46,27 +38,25 @@ func (r Repo) ArchDockerFroms(arch string, entry *manifest.Manifest2822Entry) ([
return dockerfileMeta.Froms, nil
}
func (r Repo) dockerfileMetadata(entry *manifest.Manifest2822Entry) (*dockerfileMetadata, error) {
func (r Repo) dockerfileMetadata(entry *manifest.Manifest2822Entry) (dockerfile.Metadata, error) {
return r.archDockerfileMetadata(arch, entry)
}
var dockerfileMetadataCache = map[string]*dockerfileMetadata{}
var (
dockerfileMetadataCache = map[string]dockerfile.Metadata{}
scratchDockerfileMetadata = sync.OnceValues(func() (dockerfile.Metadata, error) {
return dockerfile.Parse(`FROM scratch`)
})
)
func (r Repo) archDockerfileMetadata(arch string, entry *manifest.Manifest2822Entry) (*dockerfileMetadata, error) {
func (r Repo) archDockerfileMetadata(arch string, entry *manifest.Manifest2822Entry) (dockerfile.Metadata, error) {
if builder := entry.ArchBuilder(arch); builder == "oci-import" {
return &dockerfileMetadata{
StageFroms: []string{
"scratch",
},
Froms: []string{
"scratch",
},
}, nil
return scratchDockerfileMetadata()
}
commit, err := r.fetchGitRepo(arch, entry)
if err != nil {
return nil, cli.NewMultiError(fmt.Errorf("failed fetching Git repo for arch %q from entry %q", arch, entry.String()), err)
return dockerfile.Metadata{}, cli.NewMultiError(fmt.Errorf("failed fetching Git repo for arch %q from entry %q", arch, entry.String()), err)
}
dockerfileFile := path.Join(entry.ArchDirectory(arch), entry.ArchFile(arch))
@ -79,116 +69,20 @@ func (r Repo) archDockerfileMetadata(arch string, entry *manifest.Manifest2822En
return meta, nil
}
dockerfile, err := gitShow(commit, dockerfileFile)
df, err := gitShow(commit, dockerfileFile)
if err != nil {
return nil, cli.NewMultiError(fmt.Errorf(`failed "git show" for %q from commit %q`, dockerfileFile, commit), err)
return dockerfile.Metadata{}, cli.NewMultiError(fmt.Errorf(`failed "git show" for %q from commit %q`, dockerfileFile, commit), err)
}
meta, err := parseDockerfileMetadata(dockerfile)
meta, err := dockerfile.Parse(df)
if err != nil {
return nil, cli.NewMultiError(fmt.Errorf(`failed parsing Dockerfile metadata for %q from commit %q`, dockerfileFile, commit), err)
return dockerfile.Metadata{}, cli.NewMultiError(fmt.Errorf(`failed parsing Dockerfile metadata for %q from commit %q`, dockerfileFile, commit), err)
}
dockerfileMetadataCache[cacheKey] = meta
return meta, nil
}
func parseDockerfileMetadata(dockerfile string) (*dockerfileMetadata, error) {
meta := &dockerfileMetadata{
// panic: assignment to entry in nil map
StageNameFroms: map[string]string{},
// (nil slices work fine)
}
scanner := bufio.NewScanner(strings.NewReader(dockerfile))
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
// ignore blank lines
continue
}
if line[0] == '#' {
// TODO handle "escape" parser directive
// TODO handle "syntax" parser directive -- explode appropriately (since custom syntax invalidates our Dockerfile parsing)
// ignore comments
continue
}
// handle line continuations
// (TODO see note above regarding "escape" parser directive)
for line[len(line)-1] == '\\' && scanner.Scan() {
nextLine := strings.TrimSpace(scanner.Text())
if nextLine == "" || nextLine[0] == '#' {
// ignore blank lines and comments
continue
}
line = line[0:len(line)-1] + nextLine
}
fields := strings.Fields(line)
if len(fields) < 1 {
// must be a much more complex empty line??
continue
}
instruction := strings.ToUpper(fields[0])
// TODO balk at ARG / $ in from values
switch instruction {
case "FROM":
from := fields[1]
if stageFrom, ok := meta.StageNameFroms[from]; ok {
// if this is a valid stage name, we should resolve it back to the original FROM value of that previous stage (we don't care about inter-stage dependencies for the purposes of either tag dependency calculation or tag building -- just how many there are and what external things they require)
from = stageFrom
}
// make sure to add ":latest" if it's implied
from = latestizeRepoTag(from)
meta.StageFroms = append(meta.StageFroms, from)
meta.Froms = append(meta.Froms, from)
if len(fields) == 4 && strings.ToUpper(fields[2]) == "AS" {
stageName := fields[3]
meta.StageNames = append(meta.StageNames, stageName)
meta.StageNameFroms[stageName] = from
}
case "COPY":
for _, arg := range fields[1:] {
if !strings.HasPrefix(arg, "--") {
// doesn't appear to be a "flag"; time to bail!
break
}
if !strings.HasPrefix(arg, "--from=") {
// ignore any flags we're not interested in
continue
}
from := arg[len("--from="):]
if stageFrom, ok := meta.StageNameFroms[from]; ok {
// see note above regarding stage names in FROM
from = stageFrom
} else if stageNumber, err := strconv.Atoi(from); err == nil && stageNumber < len(meta.StageFroms) {
// must be a stage number, we should resolve it too
from = meta.StageFroms[stageNumber]
}
// make sure to add ":latest" if it's implied
from = latestizeRepoTag(from)
meta.Froms = append(meta.Froms, from)
}
}
}
if err := scanner.Err(); err != nil {
return nil, err
}
return meta, nil
}
func (r Repo) DockerCacheName(entry *manifest.Manifest2822Entry) (string, error) {
cacheHash, err := r.dockerCacheHash(entry)
if err != nil {

View File

@ -6,7 +6,6 @@ import (
"path"
"path/filepath"
"sort"
"strings"
"github.com/docker-library/bashbrew/manifest"
)
@ -39,13 +38,6 @@ func repos(all bool, args ...string) ([]string, error) {
return ret, nil
}
func latestizeRepoTag(repoTag string) string {
if repoTag != "scratch" && strings.IndexRune(repoTag, ':') < 0 {
return repoTag + ":latest"
}
return repoTag
}
type Repo struct {
RepoName string
TagName string

2
go.mod
View File

@ -1,6 +1,6 @@
module github.com/docker-library/bashbrew
go 1.20
go 1.21
require (
github.com/containerd/containerd v1.6.19

1
go.sum
View File

@ -617,6 +617,7 @@ github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4O
github.com/prometheus/procfs v0.2.0/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
github.com/prometheus/procfs v0.7.3 h1:4jVXhlkAyzOScmCkXBTOLRLTz8EeU+eyjrwB/EPq0VU=
github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU=
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=

182
pkg/dockerfile/parse.go Normal file
View File

@ -0,0 +1,182 @@
package dockerfile
import (
"bufio"
"io"
"strconv"
"strings"
"unicode"
)
type Metadata struct {
StageFroms []string // every image "FROM" instruction value (or the parent stage's FROM value in the case of a named stage)
StageNames []string // the name of any named stage (in order)
StageNameFroms map[string]string // map of stage names to FROM values (or the parent stage's FROM value in the case of a named stage), useful for resolving stage names to FROM values
Froms []string // every "FROM" or "COPY --from=xxx" value (minus named and/or numbered stages in the case of "--from=")
}
func Parse(dockerfile string) (Metadata, error) {
return ParseReader(strings.NewReader(dockerfile))
}
func ParseReader(dockerfile io.Reader) (Metadata, error) {
meta := Metadata{
// panic: assignment to entry in nil map
StageNameFroms: map[string]string{},
// (nil slices work fine)
}
scanner := bufio.NewScanner(dockerfile)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
// ignore straight up blank lines (no complexity)
continue
}
// (we can't have a comment that ends in a continuation line - that's not continuation, that's part of the comment)
if line[0] == '#' {
// TODO handle "escape" parser directive
// TODO handle "syntax" parser directive -- explode appropriately (since custom syntax invalidates our Dockerfile parsing)
// ignore comments
continue
}
// handle line continuations
// (TODO see note above regarding "escape" parser directive)
for line[len(line)-1] == '\\' {
if !scanner.Scan() {
line = line[0:len(line)-1]
break
}
// "strings.TrimRightFunc(IsSpace)" because whitespace *after* the escape character is supported and ignored 🙈
nextLine := strings.TrimRightFunc(scanner.Text(), unicode.IsSpace)
if nextLine == "" { // if it's all space, TrimRight will be TrimSpace 😏
// ignore "empty continuation" lines (https://github.com/moby/moby/pull/33719)
continue
}
if strings.TrimLeftFunc(nextLine, unicode.IsSpace)[0] == '#' {
// ignore comments inside continuation (https://github.com/moby/moby/issues/29005)
continue
}
line = line[0:len(line)-1] + nextLine
}
// TODO *technically* a line like " RUN echo hi " should be parsed as "RUN" "echo hi" (cut off instruction, then the rest of the line with TrimSpace), but for our needs "strings.Fields" is good enough for now
// line = strings.TrimSpace(line) // (emulated below; "strings.Fields" does essentially the same exact thing so we don't need to do it explicitly here too)
fields := strings.Fields(line)
if len(fields) < 1 {
// ignore empty lines
continue
}
instruction := strings.ToUpper(fields[0])
// TODO balk at ARG / $ in from values
switch instruction {
case "FROM":
from := fields[1]
if stageFrom, ok := meta.StageNameFroms[from]; ok {
// if this is a valid stage name, we should resolve it back to the original FROM value of that previous stage (we don't care about inter-stage dependencies for the purposes of either tag dependency calculation or tag building -- just how many there are and what external things they require)
from = stageFrom
}
// make sure to add ":latest" if it's implied
from = latestizeRepoTag(from)
meta.StageFroms = append(meta.StageFroms, from)
meta.Froms = append(meta.Froms, from)
if len(fields) == 4 && strings.ToUpper(fields[2]) == "AS" {
stageName := fields[3]
meta.StageNames = append(meta.StageNames, stageName)
meta.StageNameFroms[stageName] = from
}
case "COPY":
for _, arg := range fields[1:] {
if !strings.HasPrefix(arg, "--") {
// doesn't appear to be a "flag"; time to bail!
break
}
if !strings.HasPrefix(arg, "--from=") {
// ignore any flags we're not interested in
continue
}
from := arg[len("--from="):]
if stageFrom, ok := meta.StageNameFroms[from]; ok {
// see note above regarding stage names in FROM
from = stageFrom
} else if stageNumber, err := strconv.Atoi(from); err == nil && stageNumber < len(meta.StageFroms) {
// must be a stage number, we should resolve it too
from = meta.StageFroms[stageNumber]
}
// make sure to add ":latest" if it's implied
from = latestizeRepoTag(from)
meta.Froms = append(meta.Froms, from)
}
case "RUN": // TODO combine this and the above COPY-parsing code somehow sanely
for _, arg := range fields[1:] {
if !strings.HasPrefix(arg, "--") {
// doesn't appear to be a "flag"; time to bail!
break
}
if !strings.HasPrefix(arg, "--mount=") {
// ignore any flags we're not interested in
continue
}
csv := arg[len("--mount="):]
// TODO more correct CSV parsing
fields := strings.Split(csv, ",")
var mountType, from string
for _, field := range fields {
if strings.HasPrefix(field, "type=") {
mountType = field[len("type="):]
continue
}
if strings.HasPrefix(field, "from=") {
from = field[len("from="):]
continue
}
}
if mountType != "bind" || from == "" {
// this is probably something we should be worried about, but not something we're interested in parsing
continue
}
if stageFrom, ok := meta.StageNameFroms[from]; ok {
// see note above regarding stage names in FROM
from = stageFrom
} else if stageNumber, err := strconv.Atoi(from); err == nil && stageNumber < len(meta.StageFroms) {
// must be a stage number, we should resolve it too
from = meta.StageFroms[stageNumber]
}
// make sure to add ":latest" if it's implied
from = latestizeRepoTag(from)
meta.Froms = append(meta.Froms, from)
}
}
}
return meta, scanner.Err()
}
func latestizeRepoTag(repoTag string) string {
if repoTag != "scratch" && strings.IndexRune(repoTag, ':') < 0 {
return repoTag + ":latest"
}
return repoTag
}

View File

@ -0,0 +1,183 @@
package dockerfile_test
import (
"reflect"
"testing"
"github.com/docker-library/bashbrew/pkg/dockerfile"
)
func TestParse(t *testing.T) {
for _, td := range []struct {
name string
dockerfile string
metadata dockerfile.Metadata
}{
{
dockerfile: `FROM scratch`,
metadata: dockerfile.Metadata{
Froms: []string{"scratch"},
},
},
{
dockerfile: `from bash`,
metadata: dockerfile.Metadata{
Froms: []string{"bash:latest"},
},
},
{
dockerfile: `fRoM bash:5`,
metadata: dockerfile.Metadata{
Froms: []string{"bash:5"},
},
},
{
name: "comments+whitespace+continuation",
dockerfile: `
FROM scratch
# foo
# bar
FROM bash
RUN echo \
# comment inside continuation
hello \
world
`,
metadata: dockerfile.Metadata{
Froms: []string{"scratch", "bash:latest"},
},
},
{
name: "multi-stage",
dockerfile: `
FROM bash:latest AS foo
FROM busybox:uclibc
# intermediate stage without name
FROM bash:5 AS bar
FROM foo AS foo2
FROM scratch
COPY --from=foo / /
COPY --from=bar / /
COPY --from=foo2 / /
COPY --chown=1234:5678 /foo /bar
`,
metadata: dockerfile.Metadata{
StageFroms: []string{"bash:latest", "busybox:uclibc", "bash:5", "bash:latest", "scratch"},
StageNames: []string{"foo", "bar", "foo2"},
StageNameFroms: map[string]string{
"foo": "bash:latest",
"bar": "bash:5",
"foo2": "bash:latest",
},
Froms: []string{"bash:latest", "busybox:uclibc", "bash:5", "bash:latest", "scratch", "bash:latest", "bash:5", "bash:latest"},
},
},
{
name: "empty continuations",
dockerfile: `
\
\
\
\
\
\
`,
},
{
name: "continuation edge cases",
dockerfile: `
# continuation does not apply to this comment \
FROM scratch
# but everything below this is part of a single continuation
FROM\
\
\
\
\
# comments inside are fine
# and really yucky empty lines:
\
\
scratch\
`,
metadata: dockerfile.Metadata{
Froms: []string{"scratch", "scratch"},
},
},
{
// TODO is this even something that's supported by classic builder/buildkit? (Tianon *thinks* it was supported once, but maybe he's misremembering and it's never been a thing Dockerfiles, only docker build --target=N ?)
name: "numbered stages",
dockerfile: `
FROM bash:latest
RUN echo foo > /foo
FROM scratch
COPY --from=0 /foo /foo
FROM scratch
COPY --chown=1234:5678 --from=1 /foo /foo
FROM bash:latest
RUN --mount=type=bind,from=2 cat /foo
`,
metadata: dockerfile.Metadata{
StageFroms: []string{"bash:latest", "scratch", "scratch", "bash:latest"},
Froms: []string{"bash:latest", "scratch", "bash:latest", "scratch", "scratch", "bash:latest", "scratch"},
},
},
{
name: "RUN --mount",
dockerfile: `
FROM scratch
RUN --mount=type=bind,from=busybox:uclibc,target=/tmp ["/tmp/bin/sh","-euxc","echo foo > /foo"]
`,
metadata: dockerfile.Metadata{
StageFroms: []string{"scratch"},
Froms: []string{"scratch", "busybox:uclibc"},
},
},
{
name: "RUN --mount=stage",
dockerfile: `
FROM busybox:uclibc AS bb
RUN --network=none echo hi, a flag that is ignored
RUN --mount=type=tmpfs,dst=/foo touch /foo/bar # this should be ignored
FROM scratch
RUN --mount=type=bind,from=bb,target=/tmp ["/tmp/bin/sh","-euxc","echo foo > /foo"]
`,
metadata: dockerfile.Metadata{
StageFroms: []string{"busybox:uclibc", "scratch"},
StageNames: []string{"bb"},
StageNameFroms: map[string]string{"bb": "busybox:uclibc"},
Froms: []string{"busybox:uclibc", "scratch", "busybox:uclibc"},
},
},
} {
td := td
// some light normalization
if td.name == "" {
td.name = td.dockerfile
}
if len(td.metadata.Froms) > 0 && len(td.metadata.StageFroms) == 0 {
td.metadata.StageFroms = td.metadata.Froms
}
if td.metadata.StageNameFroms == nil {
td.metadata.StageNameFroms = map[string]string{}
}
t.Run(td.name, func(t *testing.T) {
parsed, err := dockerfile.Parse(td.dockerfile)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(parsed, td.metadata) {
t.Fatalf("expected:\n%#v\ngot:\n%#v", td.metadata, parsed)
}
})
}
}