Add `ArchGitChecksum` template command in `bashbrew cat`

This also finally adds `bashbrew context` as an explicit subcommand so that issues with this code are easier to test/debug (so we can generate the actual tarball and compare it to previous versions of it, versions generated by `git archive`, etc).

As-is, this currently generates verbatim identical checksums to 0cde8de57d/sources.sh (L90-L96) (by design).  We'll wait to do any cache bust there until we implement `Dockerfile`/context filtering:

```console
$ bashbrew cat varnish:stable --format '{{ .TagEntry.GitCommit }} {{ .TagEntry.Directory }}'
0c295b528f28a98650fb2580eab6d34b30b165c4 stable/debian
$ git -C "$BASHBREW_CACHE/git" archive 0c295b528f28a98650fb2580eab6d34b30b165c4:stable/debian/ | ./tar-scrubber | sha256sum
3aef5ac859b23d65dfe5e9f2a47750e9a32852222829cfba762a870c1473fad6
$ bashbrew cat --format '{{ .ArchGitChecksum arch .TagEntry }}' varnish:stable
3aef5ac859b23d65dfe5e9f2a47750e9a32852222829cfba762a870c1473fad6
```

(Choosing `varnish:stable` there because it currently has [some 100% valid dangling symlinks](6b1c6ffedc/stable/debian/scripts) that tripped up my code beautifully 💕)

From a performance perspective (which was the original reason for looking into / implementing this), running the `meta-scripts/sources.sh` script against `--all` vs this, my local system gets ~18.5m vs ~4.5m (faster being this new pure-Go implementation).
This commit is contained in:
Tianon Gravi 2024-01-08 12:02:09 -08:00
parent 4e0ea8d8ab
commit 2d67127dd1
12 changed files with 647 additions and 106 deletions

View File

@ -96,6 +96,20 @@ func getGitCommit(commit string) (string, error) {
return h.String(), nil
}
func (r Repo) archGitFS(arch string, entry *manifest.Manifest2822Entry) (fs.FS, error) {
commit, err := r.fetchGitRepo(arch, entry)
if err != nil {
return nil, fmt.Errorf("failed fetching %q: %w", r.EntryIdentifier(entry), err)
}
gitFS, err := gitCommitFS(commit)
if err != nil {
return nil, err
}
return fs.Sub(gitFS, entry.ArchDirectory(arch))
}
func gitCommitFS(commit string) (fs.FS, error) {
if err := ensureGitInit(); err != nil {
return nil, err

View File

@ -3,11 +3,13 @@ package main
import (
"fmt"
"os"
"path"
"path/filepath"
"strings"
"github.com/sirupsen/logrus" // this is used by containerd libraries, so we need to set the default log level for it
"github.com/urfave/cli"
xTerm "golang.org/x/term"
"github.com/docker-library/bashbrew/architecture"
"github.com/docker-library/bashbrew/manifest"
@ -421,6 +423,61 @@ func main() {
Category: "plumbing",
},
{
Name: "context",
Usage: "(eventually Dockerfile-filtered) git archive",
Flags: []cli.Flag{
cli.BoolFlag{
Name: "sha256",
Usage: `print sha256 instead of raw tar`,
},
// TODO "unfiltered" or something for not applying Dockerfile filtering (once that's implemented)
},
Before: subcommandBeforeFactory("context"),
Action: func(c *cli.Context) error {
repos, err := repos(false, c.Args()...)
if err != nil {
return err
}
if len(repos) != 1 {
return fmt.Errorf("'context' expects to act on exactly one architecture of one entry of one repo (got %d repos)", len(repos))
}
r, err := fetch(repos[0])
if err != nil {
return err
}
// TODO technically something like "hello-world:latest" *could* be relaxed a little if it resolves via architecture to one and only one entry 🤔 (but that's a little hard to implement with the existing internal data structures -- see TODO at the top of "sort.go")
if r.TagEntry == nil {
return fmt.Errorf("'context' expects to act on exactly one architecture of one entry of one repo (no specific entry of %q selected)", r.RepoName)
}
if len(r.TagEntries) != 1 {
return fmt.Errorf("'context' expects to act on exactly one architecture of one entry of one repo (got %d entires)", len(r.TagEntries))
}
if !r.TagEntry.HasArchitecture(arch) {
return fmt.Errorf("%q does not include architecture %q", path.Join(namespace, r.RepoName)+":"+r.TagEntry.Tags[0], arch)
}
if c.Bool("sha256") {
sum, err := r.ArchGitChecksum(arch, r.TagEntry)
if err != nil {
return err
}
fmt.Println(sum)
return nil
} else {
if xTerm.IsTerminal(int(os.Stdout.Fd())) {
return fmt.Errorf("cowardly refusing to output a tar to a terminal")
}
return r.archContextTar(arch, r.TagEntry, os.Stdout)
}
},
Category: "plumbing",
},
{
Name: "remote",
Usage: "query registries for bashbrew-related data",

View File

@ -89,6 +89,7 @@ func importOCIBlob(ctx context.Context, cs content.Store, fs iofs.FS, descriptor
// this is "docker build" but for "Builder: oci-import"
func ociImportBuild(tags []string, commit, dir, file string) (*imagespec.Descriptor, error) {
// TODO use r.archGitFS (we have no r or arch or entry here 😅)
fs, err := gitCommitFS(commit)
if err != nil {
return nil, err

View File

@ -5,6 +5,8 @@ import (
"pault.ag/go/topsort"
)
// TODO unify archFilter and applyConstraints handling by pre-filtering the full list of Repo objects such that all that remains are things we should process (thus removing all "if" statements throughout the various loops); re-doing the Architectures and Entries lists to only include ones we should process, etc
func sortRepos(repos []string, applyConstraints bool) ([]string, error) {
rs := []*Repo{}
rsMap := map[*Repo]string{}
@ -103,10 +105,10 @@ func sortRepoObjects(rs []*Repo, applyConstraints bool) ([]*Repo, error) {
continue
}
/*
// TODO need archFilter here :(
if archFilter && !entry.HasArchitecture(arch) {
continue
}
// TODO need archFilter here :(
if archFilter && !entry.HasArchitecture(arch) {
continue
}
*/
entryArches := []string{arch}

28
cmd/bashbrew/tar.go Normal file
View File

@ -0,0 +1,28 @@
package main
import (
"crypto/sha256"
"fmt"
"io"
"github.com/docker-library/bashbrew/manifest"
"github.com/docker-library/bashbrew/pkg/tarscrub"
)
func (r Repo) archContextTar(arch string, entry *manifest.Manifest2822Entry, w io.Writer) error {
f, err := r.archGitFS(arch, entry)
if err != nil {
return err
}
return tarscrub.WriteTar(f, w)
}
func (r Repo) ArchGitChecksum(arch string, entry *manifest.Manifest2822Entry) (string, error) {
h := sha256.New()
err := r.archContextTar(arch, entry, h)
if err != nil {
return "", err
}
return fmt.Sprintf("%x", h.Sum(nil)), nil
}

1
go.mod
View File

@ -9,6 +9,7 @@ require (
github.com/sirupsen/logrus v1.9.0
github.com/urfave/cli v1.22.10
go.etcd.io/bbolt v1.3.7
golang.org/x/term v0.5.0
pault.ag/go/debian v0.12.0
pault.ag/go/topsort v0.1.1
)

1
go.sum
View File

@ -940,6 +940,7 @@ golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuX
golang.org/x/term v0.0.0-20220722155259-a9ba230a4035/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
golang.org/x/term v0.5.0 h1:n2a8QNdAb0sZNpU9R1ALUXBbY+w51fCQDN+7EdxNBsY=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

View File

@ -12,168 +12,355 @@ import (
goGitPlumbing "github.com/go-git/go-git/v5/plumbing"
goGitPlumbingFileMode "github.com/go-git/go-git/v5/plumbing/filemode"
goGitPlumbingObject "github.com/go-git/go-git/v5/plumbing/object"
goGitPlumbingStorer "github.com/go-git/go-git/v5/plumbing/storer"
)
// https://github.com/go-git/go-git/issues/296
// TODO something more clever for directories
func CommitHash(repo *goGit.Repository, commit string) (fs.FS, error) {
gitCommit, err := repo.CommitObject(goGitPlumbing.NewHash(commit))
if err != nil {
return nil, err
}
return gitFS{
commit: gitCommit,
tree, err := gitCommit.Tree()
if err != nil {
return nil, err
}
return gitFSFS{
gitFS: &gitFS{
storer: repo.Storer,
tree: tree,
name: ".",
},
}, nil
}
// https://pkg.go.dev/io/fs#FS
type gitFS struct {
commit *goGitPlumbingObject.Commit
// This exists *only* because we cannot create a single object that concurrently implements *both* fs.FS *and* fs.File (Stat(string) vs Stat()).
type gitFSFS struct {
*gitFS
}
// apparently symlinks in "io/fs" are still a big TODO (https://github.com/golang/go/issues/49580, https://github.com/golang/go/issues/45470, etc related issues); all the existing interfaces assume symlinks don't exist
//
// if the File object passed to this function represents a symlink, this returns the (resolved) path that should be looked up instead; only relative symlinks are supported (and attempts to escape the repository with too many "../" *should* result in an error -- this is a convenience/sanity check, not a security boundary; subset of https://pkg.go.dev/io/fs#ValidPath)
//
// otherwise, it will return the empty string and nil
func resolveSymlink(f *goGitPlumbingObject.File) (target string, err error) {
if f.Mode != goGitPlumbingFileMode.Symlink {
return "", nil
// https://pkg.go.dev/io/fs#File
// https://pkg.go.dev/io/fs#FileInfo
// https://pkg.go.dev/io/fs#DirEntry
type gitFS struct {
storer goGitPlumbingStorer.EncodedObjectStorer
tree *goGitPlumbingObject.Tree
entry *goGitPlumbingObject.TreeEntry // might be nil ("." at the top-level of the repo)
// cached values
name string // full path from the repository root
size int64 // Tree.Size value for non-directories (more efficient than opening/reading the blob)
// state for "Open" objects
reader io.ReadCloser // only set for an "Open" file
walker *goGitPlumbingObject.TreeWalker // only set for an "Open" directory
}
// clones just the load-bearing bits (basically clearing anything that's "state"
func (f gitFS) clone() *gitFS {
f.reader = nil
f.walker = nil
return &f
}
// if our entry is a symlink, this returns the target of it
func (f gitFS) readLink() (bool, string, error) {
if f.entry == nil || f.entry.Mode != goGitPlumbingFileMode.Symlink {
return false, "", nil
}
target, err = f.Contents()
file, err := f.tree.TreeEntryFile(f.entry)
if err != nil {
return true, "", fmt.Errorf("TreeEntryFile(%q): %w", f.name, err)
}
target, err := file.Contents()
return true, target, err
}
// symlinks in "io/fs" are still a big TODO (https://github.com/golang/go/issues/49580, https://github.com/golang/go/issues/45470, etc related issues); all the existing interfaces mostly assume symlinks don't exist (fs.DirEntry.Info() and fs.WalkDir(...) as notable exceptions 🤷)
//
// if the object we're pointing at represents a symlink, this returns the (resolved) path that should be looked up instead; only relative symlinks are supported (and attempts to escape the repository with too many "../" *should* result in an error -- this is a convenience/sanity check, not a security boundary; subset of https://pkg.go.dev/io/fs#ValidPath)
//
// otherwise, it will return the empty string and nil
func (f gitFS) resolveLink() (string, error) {
isLink, target, err := f.readLink()
if !isLink || err != nil {
return "", err
}
if target == "" {
return "", fmt.Errorf("unexpected: empty symlink %q", f.Name)
return "", fmt.Errorf("unexpected: empty symlink %q", f.name)
}
// we *could* implement this as absolute symlinks being relative to the root of the Git repository, but that wouldn't match the behavior of a normal repository that's been "git clone"'d on disk, so I think that would be a mistake and erroring out is saner here
if path.IsAbs(target) {
return "", fmt.Errorf("unsupported: %q is an absolute symlink (%q)", f.Name, target)
return "", fmt.Errorf("unsupported: %q is an absolute symlink (%q)", f.name, target)
}
// symlinks are relative to the path they're in, so we need to prepend that
target = path.Join(path.Dir(f.Name), target)
target = path.Join(path.Dir(f.name), target)
// now let's use path.Clean to get rid of any excess ".." or "." entries in our end result
target = path.Clean(target)
// once we're cleaned, we should have a full path that's relative to the root of the Git repository, so if it still starts with "../", that's a problem that will error later when we try to read it, so let's error out now to bail earlier
if strings.HasPrefix(target, "../") {
return "", fmt.Errorf("unsupported: %q is a relative symlink outside the tree (%q)", f.Name, target)
return "", fmt.Errorf("unsupported: %q is a relative symlink outside the tree (%q)", f.name, target)
}
return target, nil
}
// a helper shared between FS.Stat(...) and FS.Open(...); also the primary entrypoint to creating new gitFS objects besides gitfs.CommitHash(...)
func (f gitFS) stat(name string, followSymlinks bool) (*gitFS, error) {
if !f.IsDir() {
return nil, fmt.Errorf("cannot stat a child (%q) of non-directory %q", name, f.name)
}
if path.Join(f.name, name) == f.name { // path.Join implies path.Clean too
// (this is to defensively special-case handling of ".", which FindEntry doesn't like)
return &f, nil
}
entry, err := f.tree.FindEntry(name)
if err != nil {
return nil, fmt.Errorf("Tree(%q).FindEntry(%q): %w", f.name, name, err)
}
return f.statEntry(name, entry, followSymlinks)
}
// dual-use by gitFS.stat and ReadDir (hence "followSymlinks" -- ReadDir needs to not resolve symlinks when creating sub-FS objects)
func (f gitFS) statEntry(name string, entry *goGitPlumbingObject.TreeEntry, followSymlinks bool) (*gitFS, error) {
if entry == nil {
return nil, fmt.Errorf("(%q).statEntry cannot accept a nil entry; perhaps you intended .stat(%q) instead?", f.name, name)
}
var (
fi = f.clone()
err error
)
fi.entry = entry
fi.name = path.Join(fi.name, name)
if fi.IsDir() {
fi.tree, err = goGitPlumbingObject.GetTree(f.storer, entry.Hash) // see https://github.com/go-git/go-git/blob/v5.11.0/plumbing/object/tree.go#L103
if err != nil {
return nil, fmt.Errorf("Tree(%q): %w", fi.name, err)
}
return fi, nil
}
fi.size, err = f.storer.EncodedObjectSize(entry.Hash) // https://github.com/go-git/go-git/blob/v5.11.0/plumbing/object/tree.go#L92
if err != nil {
return nil, fmt.Errorf("Size(%q): %w", fi.name, err)
}
if followSymlinks {
// TODO this should probably be an explicit loop (instead of implicit recursion) with some upper nesting limit? (symlink to symlink to symlink to ...; possibly even in an infinite cycle because symlinks)
if target, err := fi.resolveLink(); err != nil {
return nil, err
} else if target != "" {
return f.stat(target, followSymlinks)
}
}
return fi, nil
}
// https://pkg.go.dev/io/fs#FS
func (fs gitFS) Open(name string) (fs.File, error) {
f, err := fs.commit.File(name)
func (f gitFSFS) Open(name string) (fs.File, error) {
pathErr := &fs.PathError{
Op: "open",
Path: name,
}
if !fs.ValidPath(name) {
pathErr.Err = fs.ErrInvalid
return nil, pathErr
}
var fi *gitFS
fi, pathErr.Err = f.stat(name, true)
if pathErr.Err != nil {
return nil, pathErr
}
if fi.IsDir() {
fi.walker = goGitPlumbingObject.NewTreeWalker(fi.tree, false, nil)
return fi, nil
}
var file *goGitPlumbingObject.File
file, err := fi.tree.TreeEntryFile(fi.entry)
if err != nil {
// TODO if it's file-not-found, we need to check whether it's a directory
return nil, err
pathErr.Err = fmt.Errorf("Tree(%q).TreeEntryFile(%q): %w", f.name, fi.name, err)
return nil, pathErr
}
if target, err := resolveSymlink(f); err != nil {
return nil, err
} else if target != "" {
return fs.Open(target)
}
reader, err := f.Reader()
fi.reader, err = file.Reader()
if err != nil {
return nil, err
pathErr.Err = fmt.Errorf("File(%q).Reader(): %w", fi.name, err)
return nil, pathErr
}
return gitFSFile{
stat: gitFSFileInfo{
file: f,
},
reader: reader,
}, nil
return fi, nil
}
// https://pkg.go.dev/io/fs#StatFS
func (fs gitFS) Stat(name string) (fs.FileInfo, error) {
f, err := fs.commit.File(name)
func (f gitFSFS) Stat(name string) (fs.FileInfo, error) {
fi, err := f.stat(name, true)
if err != nil {
return nil, &fs.PathError{
Op: "stat",
Path: name,
Err: err,
}
}
return fi, nil
}
// https://github.com/golang/go/issues/49580 ("type ReadLinkFS interface")
func (f gitFSFS) ReadLink(name string) (string, error) {
fi, err := f.stat(name, false)
if err != nil {
return "", &fs.PathError{
Op: "readlink",
Path: name,
Err: err,
}
}
isLink, target, err := fi.readLink()
if err != nil {
return "", &fs.PathError{
Op: "readlink",
Path: name,
Err: err,
}
}
if !isLink {
return "", &fs.PathError{
Op: "readlink",
Path: name,
Err: fmt.Errorf("not a symlink"),
}
}
return target, nil
}
// https://pkg.go.dev/io/fs#SubFS
func (f gitFS) Sub(dir string) (fs.FS, error) {
fi, err := f.stat(dir, true)
if err != nil {
return nil, err
}
if target, err := resolveSymlink(f); err != nil {
return nil, err
} else if target != "" {
return fs.Stat(target)
if !fi.IsDir() {
return nil, fmt.Errorf("%q is not a directory", fi.name)
}
return gitFSFileInfo{
file: f,
}, nil
return gitFSFS{gitFS: fi}, nil
}
// https://pkg.go.dev/io/fs#File
type gitFSFile struct {
stat fs.FileInfo
reader io.ReadCloser
func (f gitFS) Stat() (fs.FileInfo, error) {
return f, nil
}
func (f gitFSFile) Stat() (fs.FileInfo, error) {
return f.stat, nil
}
func (f gitFSFile) Read(b []byte) (int, error) {
// https://pkg.go.dev/io/fs#File
func (f gitFS) Read(b []byte) (int, error) {
if f.reader == nil {
return 0, fmt.Errorf("%q not open (or not a file)", f.name)
}
return f.reader.Read(b)
}
func (f gitFSFile) Close() error {
return f.reader.Close()
// https://pkg.go.dev/io/fs#File
func (f gitFS) Close() error {
if f.reader != nil {
if err := f.reader.Close(); err != nil {
return err
}
}
if f.walker != nil {
f.walker.Close() // returns no error, nothing 🤔
}
return nil
}
type gitFSFileInfo struct {
file *goGitPlumbingObject.File
// https://pkg.go.dev/io/fs#ReadDirFile
func (f gitFS) ReadDir(n int) ([]fs.DirEntry, error) {
if f.walker == nil {
return nil, fmt.Errorf("%q not open (or not a directory)", f.name)
}
ret := []fs.DirEntry{}
for i := 0; n <= 0 || i < n; i++ {
name, entry, err := f.walker.Next()
if err != nil {
if err == io.EOF && n <= 0 {
// "In this case, if ReadDir succeeds (reads all the way to the end of the directory), it returns the slice and a nil error."
break
}
return ret, err
}
fi, err := f.statEntry(name, &entry, false)
if err != nil {
return ret, err
}
ret = append(ret, fi)
}
return ret, nil
}
// base name of the file
func (fi gitFSFileInfo) Name() string {
return path.Base(fi.file.Name)
// https://pkg.go.dev/io/fs#FileInfo: base name of the file
func (f gitFS) Name() string {
return path.Base(f.name) // this should be the same as f.entry.Name (except in the case of the top-level / root)
}
// length in bytes for regular files; system-dependent for others
func (fi gitFSFileInfo) Size() int64 {
return fi.file.Size
// https://pkg.go.dev/io/fs#FileInfo: length in bytes for regular files; system-dependent for others
func (f gitFS) Size() int64 {
return f.size
}
// file mode bits
func (fi gitFSFileInfo) Mode() fs.FileMode {
// https://pkg.go.dev/io/fs#FileInfo: file mode bits
func (f gitFS) Mode() fs.FileMode {
// https://pkg.go.dev/github.com/go-git/go-git/v5@v5.4.2/plumbing/filemode#FileMode
// https://pkg.go.dev/io/fs#FileMode
switch fi.file.Mode {
if f.entry == nil {
// "." at the top-level of the repository is a directory
return 0775 | fs.ModeDir
}
switch f.entry.Mode {
case goGitPlumbingFileMode.Regular:
return 0644
return 0664
case goGitPlumbingFileMode.Symlink:
return 0644 | fs.ModeSymlink
return 0777 | fs.ModeSymlink
case goGitPlumbingFileMode.Executable:
return 0755
return 0775
case goGitPlumbingFileMode.Dir:
return 0755 | fs.ModeDir
return 0775 | fs.ModeDir
}
return 0 | fs.ModeIrregular // TODO what to do for files whose types we don't support? 😬
}
// modification time
func (fi gitFSFileInfo) ModTime() time.Time {
// https://pkg.go.dev/io/fs#FileInfo: modification time
func (f gitFS) ModTime() time.Time {
return time.Time{} // TODO maybe pass down whichever is more recent of commit.Author.When vs commit.Committer.When ?
}
// abbreviation for Mode().IsDir()
func (fi gitFSFileInfo) IsDir() bool {
return fi.file.Mode == goGitPlumbingFileMode.Dir
// https://pkg.go.dev/io/fs#FileInfo: abbreviation for Mode().IsDir()
func (f gitFS) IsDir() bool {
return f.Mode().IsDir()
}
// underlying data source (can return nil)
func (fi gitFSFileInfo) Sys() interface{} {
return fi.file
// https://pkg.go.dev/io/fs#FileInfo: underlying data source (can return nil)
func (f gitFS) Sys() interface{} {
return nil
}
// https://pkg.go.dev/io/fs#DirEntry
func (f gitFS) Type() fs.FileMode {
return f.Mode().Type()
}
// https://pkg.go.dev/io/fs#DirEntry
func (f gitFS) Info() (fs.FileInfo, error) {
return f, nil
}

View File

@ -3,7 +3,7 @@ package gitfs_test
import (
"io"
"testing"
// TODO "testing/fstest"
"testing/fstest"
"github.com/docker-library/bashbrew/pkg/gitfs"
@ -12,7 +12,7 @@ import (
)
func TestCommitFS(t *testing.T) {
// TODO instead of cloning a remote repository, synthesize a very simple Git repository right in the test here
// TODO instead of cloning a remote repository, synthesize a very simple Git repository right in the test here (benefit of the remote repository is that it's much larger, so fstest.TestFS has a lot more data to test against)
repo, err := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
URL: "https://github.com/docker-library/hello-world.git",
SingleBranch: true,
@ -20,31 +20,76 @@ func TestCommitFS(t *testing.T) {
if err != nil {
t.Fatal(err)
}
fs, err := gitfs.CommitHash(repo, "480c62c690c0af4427372cf7f0de11da4e00e6c5")
f, err := gitfs.CommitHash(repo, "480c62c690c0af4427372cf7f0de11da4e00e6c5")
if err != nil {
t.Fatal(err)
}
r, err := fs.Open("greetings/hello-world.txt")
if err != nil {
t.Fatal(err)
}
defer func() {
if err := r.Close(); err != nil {
t.Run("Open+ReadAll", func(t *testing.T) {
r, err := f.Open("greetings/hello-world.txt")
if err != nil {
t.Fatal(err)
}
}()
b, err := io.ReadAll(r)
if err != nil {
t.Fatal(err)
}
expected := "Hello from Docker!\n"
if string(b) != expected {
t.Fatalf("expected %q, got %q", expected, string(b))
}
/*
TODO (we have to implement fake directory handling for this to work; it gets ".: Open: file not found" immediately)
if err := fstest.TestFS(fs, "greetings/hello-world.txt"); err != nil {
defer func() {
if err := r.Close(); err != nil {
t.Fatal(err)
}
}()
b, err := io.ReadAll(r)
if err != nil {
t.Fatal(err)
}
*/
expected := "Hello from Docker!\n"
if string(b) != expected {
t.Fatalf("expected %q, got %q", expected, string(b))
}
})
t.Run("fstest.TestFS", func(t *testing.T) {
if err := fstest.TestFS(f, "greetings/hello-world.txt"); err != nil {
t.Fatal(err)
}
})
}
func TestSymlinkFS(t *testing.T) {
// TODO instead of cloning a remote repository, synthesize a very simple Git repository right in the test here (benefit of the remote repository is that it's much larger, so fstest.TestFS has a lot more data to test against)
repo, err := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
URL: "https://github.com/tianon/gosu.git", // just a repository with a known symlink (`.dockerignore` -> `.gitignore`)
SingleBranch: true,
})
if err != nil {
t.Fatal(err)
}
f, err := gitfs.CommitHash(repo, "b73cc93b6f5b5a045c397ff0f75190e33d853946")
if err != nil {
t.Fatal(err)
}
t.Run("Open+ReadAll", func(t *testing.T) {
r, err := f.Open(".dockerignore")
if err != nil {
t.Fatal(err)
}
defer func() {
if err := r.Close(); err != nil {
t.Fatal(err)
}
}()
b, err := io.ReadAll(r)
if err != nil {
t.Fatal(err)
}
expected := ".git\nSHA256SUMS*\ngosu*\n"
if string(b) != expected {
t.Fatalf("expected %q, got %q", expected, string(b))
}
})
// might as well run fstest again, now that we have a new filesystem tree 😅
t.Run("fstest.TestFS", func(t *testing.T) {
if err := fstest.TestFS(f, ".dockerignore", "hub/Dockerfile.debian"); err != nil {
t.Fatal(err)
}
})
}

View File

@ -0,0 +1,44 @@
package gitfs_test
import (
"crypto/sha256"
"fmt"
"io/fs"
"github.com/docker-library/bashbrew/pkg/gitfs"
"github.com/docker-library/bashbrew/pkg/tarscrub"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/storage/memory"
)
// this example is nice because it has some intentionally dangling symlinks in it that trip things up if they aren't implemented correctly!
// (see also pkg/tarscrub/git_test.go)
func ExampleGitVarnish() {
repo, err := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
URL: "https://github.com/varnish/docker-varnish.git",
SingleBranch: true,
})
if err != nil {
panic(err)
}
commit, err := gitfs.CommitHash(repo, "0c295b528f28a98650fb2580eab6d34b30b165c4")
if err != nil {
panic(err)
}
f, err := fs.Sub(commit, "stable/debian")
if err != nil {
panic(err)
}
h := sha256.New()
if err := tarscrub.WriteTar(f, h); err != nil {
panic(err)
}
fmt.Printf("%x\n", h.Sum(nil))
// Output: 3aef5ac859b23d65dfe5e9f2a47750e9a32852222829cfba762a870c1473fad6
}

73
pkg/tarscrub/git_test.go Normal file
View File

@ -0,0 +1,73 @@
package tarscrub_test
import (
"crypto/sha256"
"fmt"
"io/fs"
"github.com/docker-library/bashbrew/pkg/gitfs"
"github.com/docker-library/bashbrew/pkg/tarscrub"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/storage/memory"
)
func ExampleGitHello() {
repo, err := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
URL: "https://github.com/docker-library/hello-world.git",
SingleBranch: true,
})
if err != nil {
panic(err)
}
commit, err := gitfs.CommitHash(repo, "3fb6ebca4163bf5b9cc496ac3e8f11cb1e754aee")
if err != nil {
panic(err)
}
f, err := fs.Sub(commit, "amd64/hello-world")
if err != nil {
panic(err)
}
h := sha256.New()
if err := tarscrub.WriteTar(f, h); err != nil {
panic(err)
}
fmt.Printf("%x\n", h.Sum(nil))
// Output: 22266b0a36deee72428cffd00859ce991f1db101260999c40904ace7d634b788
}
// this example is nice because it has some intentionally dangling symlinks in it that trip things up if they aren't implemented correctly!
// (see also pkg/gitfs/tarscrub_test.go)
func ExampleGitVarnish() {
repo, err := git.Clone(memory.NewStorage(), nil, &git.CloneOptions{
URL: "https://github.com/varnish/docker-varnish.git",
SingleBranch: true,
})
if err != nil {
panic(err)
}
commit, err := gitfs.CommitHash(repo, "0c295b528f28a98650fb2580eab6d34b30b165c4")
if err != nil {
panic(err)
}
f, err := fs.Sub(commit, "stable/debian")
if err != nil {
panic(err)
}
h := sha256.New()
if err := tarscrub.WriteTar(f, h); err != nil {
panic(err)
}
fmt.Printf("%x\n", h.Sum(nil))
// Output: 3aef5ac859b23d65dfe5e9f2a47750e9a32852222829cfba762a870c1473fad6
}

88
pkg/tarscrub/tarscrub.go Normal file
View File

@ -0,0 +1,88 @@
package tarscrub
import (
"archive/tar"
"fmt"
"io"
"io/fs"
)
// TODO create an io/fs that parses a Dockerfile in an io/fs and effectively "filters" the io/fs to only return/include files that are used by that Dockerfile 👀
// takes a tar header object and "scrubs" it (uid/gid zeroed, timestamps zeroed)
func ScrubHeader(hdr *tar.Header) *tar.Header {
return &tar.Header{
Typeflag: hdr.Typeflag,
Name: hdr.Name,
Linkname: hdr.Linkname,
Size: hdr.Size,
Mode: hdr.Mode,
Devmajor: hdr.Devmajor,
Devminor: hdr.Devminor,
}
}
// this writes a "scrubbed" tarball to the given io.Writer (uid/gid zeroed, timestamps zeroed)
func WriteTar(f fs.FS, w io.Writer) error {
tw := tar.NewWriter(w)
defer tw.Flush() // note: flush instead of close to avoid the empty block at EOF
// https://github.com/golang/go/blob/go1.22rc1/src/archive/tar/writer.go#L408-L443
// https://cs.opensource.google/go/go/+/go1.22rc1:src/archive/tar/writer.go;l=411
return fs.WalkDir(f, ".", func(path string, d fs.DirEntry, err error) error {
if err != nil {
return fmt.Errorf("%q: %w", path, err)
}
// TODO add more context to more errors
if path == "." {
// skip "." to match "git archive" behavior -- TODO this should be optional somehow
return nil
}
info, err := d.Info()
if err != nil {
return err
}
hdr, err := tar.FileInfoHeader(info, "")
if err != nil {
return err
}
hdr.Name = path
if info.IsDir() {
hdr.Name += "/"
}
if info.Mode()&fs.ModeSymlink != 0 {
// https://github.com/golang/go/issues/49580 ("type ReadLinkFS interface")
if readlinkFS, ok := f.(interface {
ReadLink(name string) (string, error)
}); ok {
hdr.Linkname, err = readlinkFS.ReadLink(path)
if err != nil {
return err
}
} else {
return fmt.Errorf("filesystem contains symlinks but does not implement ReadLinkFS (needed for symlink %q)", path)
}
}
newHdr := ScrubHeader(hdr)
if err := tw.WriteHeader(newHdr); err != nil {
return err
}
if info.IsDir() || hdr.Linkname != "" {
return nil
}
file, err := f.Open(path)
if err != nil {
return err
}
defer file.Close()
_, err = io.Copy(tw, file)
return err
})
}