From 7afd7a82bd672bf7b3976458197b8c56c17407de Mon Sep 17 00:00:00 2001 From: Graydon Hoare Date: Thu, 12 Dec 2013 05:33:15 +0000 Subject: [PATCH 1/5] Factor cache-probing logic out of buildfile.commit() and CmdRun(). --- buildfile.go | 54 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/buildfile.go b/buildfile.go index 7d87a17d3a..86ec1b461c 100644 --- a/buildfile.go +++ b/buildfile.go @@ -87,6 +87,26 @@ func (b *buildFile) CmdMaintainer(name string) error { return b.commit("", b.config.Cmd, fmt.Sprintf("MAINTAINER %s", name)) } +// probeCache checks to see if image-caching is enabled (`b.utilizeCache`) +// and if so attempts to look up the current `b.image` and `b.config` pair +// in the current server `b.srv`. If an image is found, probeCache returns +// `(true, nil)`. If no image is found, it returns `(false, nil)`. If there +// is any error, it returns `(false, err)`. +func (b *buildFile) probeCache() (bool, error) { + if b.utilizeCache { + if cache, err := b.srv.ImageGetCached(b.image, b.config); err != nil { + return false, err + } else if cache != nil { + fmt.Fprintf(b.outStream, " ---> Using cache\n") + utils.Debugf("[BUILDER] Use cached version") + b.image = cache.ID + return true, nil + } else { + utils.Debugf("[BUILDER] Cache miss") + } + } + return false, nil +} func (b *buildFile) CmdRun(args string) error { if b.image == "" { return fmt.Errorf("Please provide a source image with `from` prior to run") @@ -104,17 +124,12 @@ func (b *buildFile) CmdRun(args string) error { utils.Debugf("Command to be executed: %v", b.config.Cmd) - if b.utilizeCache { - if cache, err := b.srv.ImageGetCached(b.image, b.config); err != nil { - return err - } else if cache != nil { - fmt.Fprintf(b.outStream, " ---> Using cache\n") - utils.Debugf("[BUILDER] Use cached version") - b.image = cache.ID - return nil - } else { - utils.Debugf("[BUILDER] Cache miss") - } + hit, err := b.probeCache() + if err != nil { + return err + } + if hit { + return nil } cid, err := b.run() @@ -460,17 +475,12 @@ func (b *buildFile) commit(id string, autoCmd []string, comment string) error { b.config.Cmd = []string{"/bin/sh", "-c", "#(nop) " + comment} defer func(cmd []string) { b.config.Cmd = cmd }(cmd) - if b.utilizeCache { - if cache, err := b.srv.ImageGetCached(b.image, b.config); err != nil { - return err - } else if cache != nil { - fmt.Fprintf(b.outStream, " ---> Using cache\n") - utils.Debugf("[BUILDER] Use cached version") - b.image = cache.ID - return nil - } else { - utils.Debugf("[BUILDER] Cache miss") - } + hit, err := b.probeCache() + if err != nil { + return err + } + if hit { + return nil } container, warnings, err := b.runtime.Create(b.config, "") From 3f9416b58da029b7d82a4908f6c066cee0695edf Mon Sep 17 00:00:00 2001 From: Graydon Hoare Date: Thu, 12 Dec 2013 05:33:15 +0000 Subject: [PATCH 2/5] Record added-tree sha256 in buildfile.CmdAdd, probe and use cache. --- buildfile.go | 179 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 144 insertions(+), 35 deletions(-) diff --git a/buildfile.go b/buildfile.go index 86ec1b461c..745591e734 100644 --- a/buildfile.go +++ b/buildfile.go @@ -1,6 +1,9 @@ package docker import ( + "archive/tar" + "crypto/sha256" + "encoding/hex" "encoding/json" "fmt" "github.com/dotcloud/docker/archive" @@ -11,9 +14,11 @@ import ( "net/url" "os" "path" + "path/filepath" "reflect" "regexp" "strings" + "time" ) type BuildFile interface { @@ -107,6 +112,67 @@ func (b *buildFile) probeCache() (bool, error) { } return false, nil } + +// hashPath calculates a strong hash (sha256) value for a file tree located +// at `basepth`/`pth`, including all attributes that would normally be +// captured by `tar`. The path to hash is passed in two pieces only to +// permit logging the second piece in isolation, assuming the first is a +// temporary directory in which docker is running. If `clobberTimes` is +// true and hashPath is applied to a single file, the ctime/atime/mtime of +// the file is considered to be unix time 0, for purposes of hashing. +func (b *buildFile) hashPath(basePth, pth string, clobberTimes bool) (string, error) { + + p := path.Join(basePth, pth) + + st, err := os.Stat(p) + if err != nil { + return "", err + } + + h := sha256.New() + + if st.IsDir() { + tarRd, err := archive.Tar(p, archive.Uncompressed) + if err != nil { + return "", err + } + _, err = io.Copy(h, tarRd) + if err != nil { + return "", err + } + + } else { + hdr, err := tar.FileInfoHeader(st, "") + if err != nil { + return "", err + } + if clobberTimes { + hdr.AccessTime = time.Unix(0, 0) + hdr.ChangeTime = time.Unix(0, 0) + hdr.ModTime = time.Unix(0, 0) + } + hdr.Name = filepath.Base(p) + tarWr := tar.NewWriter(h) + if err := tarWr.WriteHeader(hdr); err != nil { + return "", err + } + + fileRd, err := os.Open(p) + if err != nil { + return "", err + } + + if _, err = io.Copy(tarWr, fileRd); err != nil { + return "", err + } + tarWr.Close() + } + + hstr := hex.EncodeToString(h.Sum(nil)) + fmt.Fprintf(b.outStream, " ---> data at %s has sha256 %.12s...\n", pth, hstr) + return hstr, nil +} + func (b *buildFile) CmdRun(args string) error { if b.image == "" { return fmt.Errorf("Please provide a source image with `from` prior to run") @@ -275,32 +341,16 @@ func (b *buildFile) CmdVolume(args string) error { return nil } -func (b *buildFile) addRemote(container *Container, orig, dest string) error { - file, err := utils.Download(orig) +func (b *buildFile) checkPathForAddition(orig string) error { + origPath := path.Join(b.context, orig) + if !strings.HasPrefix(origPath, b.context) { + return fmt.Errorf("Forbidden path outside the build context: %s (%s)", orig, origPath) + } + _, err := os.Stat(origPath) if err != nil { - return err + return fmt.Errorf("%s: no such file or directory", orig) } - defer file.Body.Close() - - // If the destination is a directory, figure out the filename. - if strings.HasSuffix(dest, "/") { - u, err := url.Parse(orig) - if err != nil { - return err - } - path := u.Path - if strings.HasSuffix(path, "/") { - path = path[:len(path)-1] - } - parts := strings.Split(path, "/") - filename := parts[len(parts)-1] - if filename == "" { - return fmt.Errorf("cannot determine filename from url: %s", u) - } - dest = dest + filename - } - - return container.Inject(file.Body, dest) + return nil } func (b *buildFile) addContext(container *Container, orig, dest string) error { @@ -310,9 +360,6 @@ func (b *buildFile) addContext(container *Container, orig, dest string) error { if strings.HasSuffix(dest, "/") { destPath = destPath + "/" } - if !strings.HasPrefix(origPath, b.context) { - return fmt.Errorf("Forbidden path outside the build context: %s (%s)", orig, origPath) - } fi, err := os.Stat(origPath) if err != nil { return fmt.Errorf("%s: no such file or directory", orig) @@ -358,6 +405,74 @@ func (b *buildFile) CmdAdd(args string) error { b.config.Cmd = []string{"/bin/sh", "-c", fmt.Sprintf("#(nop) ADD %s in %s", orig, dest)} b.config.Image = b.image + + origPath := orig + destPath := dest + clobberTimes := false + + if utils.IsURL(orig) { + + clobberTimes = true + + resp, err := utils.Download(orig) + if err != nil { + return err + } + tmpDirName, err := ioutil.TempDir(b.context, "docker-remote") + if err != nil { + return err + } + tmpFileName := path.Join(tmpDirName, "tmp") + tmpFile, err := os.OpenFile(tmpFileName, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0600) + if err != nil { + return err + } + defer os.RemoveAll(tmpDirName) + if _, err = io.Copy(tmpFile, resp.Body); err != nil { + return err + } + origPath = path.Join(filepath.Base(tmpDirName), filepath.Base(tmpFileName)) + tmpFile.Close() + + // If the destination is a directory, figure out the filename. + if strings.HasSuffix(dest, "/") { + u, err := url.Parse(orig) + if err != nil { + return err + } + path := u.Path + if strings.HasSuffix(path, "/") { + path = path[:len(path)-1] + } + parts := strings.Split(path, "/") + filename := parts[len(parts)-1] + if filename == "" { + return fmt.Errorf("cannot determine filename from url: %s", u) + } + destPath = dest + filename + } + } + + if err := b.checkPathForAddition(origPath); err != nil { + return err + } + + // Hash path and check the cache + if b.utilizeCache { + hash, err := b.hashPath(b.context, origPath, clobberTimes) + if err != nil { + return err + } + b.config.Cmd = []string{"/bin/sh", "-c", fmt.Sprintf("#(nop) ADD %s in %s", hash, dest)} + hit, err := b.probeCache() + if err != nil { + return err + } + if hit { + return nil + } + } + // Create the container and start it container, _, err := b.runtime.Create(b.config, "") if err != nil { @@ -370,14 +485,8 @@ func (b *buildFile) CmdAdd(args string) error { } defer container.Unmount() - if utils.IsURL(orig) { - if err := b.addRemote(container, orig, dest); err != nil { - return err - } - } else { - if err := b.addContext(container, orig, dest); err != nil { - return err - } + if err := b.addContext(container, origPath, destPath); err != nil { + return err } if err := b.commit(container.ID, cmd, fmt.Sprintf("ADD %s in %s", orig, dest)); err != nil { From 15a68541196b9b9338923ece6c6b4a69929652d6 Mon Sep 17 00:00:00 2001 From: Graydon Hoare Date: Thu, 12 Dec 2013 05:33:15 +0000 Subject: [PATCH 3/5] Add testcases for ADD caching, closes #880. --- integration/buildfile_test.go | 122 ++++++++++++++++++++++++++-------- 1 file changed, 94 insertions(+), 28 deletions(-) diff --git a/integration/buildfile_test.go b/integration/buildfile_test.go index 4d15031d30..21ba42c9aa 100644 --- a/integration/buildfile_test.go +++ b/integration/buildfile_test.go @@ -425,16 +425,10 @@ func TestBuildEntrypointRunCleanup(t *testing.T) { } } -func TestBuildImageWithCache(t *testing.T) { +func checkCacheBehavior(t *testing.T, template testContextTemplate, expectHit bool) { eng := NewTestEngine(t) defer nuke(mkRuntimeFromEngine(eng, t)) - template := testContextTemplate{` - from {IMAGE} - maintainer dockerio - `, - nil, nil} - img, err := buildImage(template, t, eng, true) if err != nil { t.Fatal(err) @@ -443,43 +437,115 @@ func TestBuildImageWithCache(t *testing.T) { imageId := img.ID img = nil - img, err = buildImage(template, t, eng, true) + img, err = buildImage(template, t, eng, expectHit) if err != nil { t.Fatal(err) } - if imageId != img.ID { - t.Logf("Image ids should match: %s != %s", imageId, img.ID) + hit := imageId == img.ID + if hit != expectHit { + t.Logf("Cache misbehavior, got hit=%t, expected hit=%t: (first: %s, second %s)", + hit, expectHit, imageId, img.ID) t.Fail() } } -func TestBuildImageWithoutCache(t *testing.T) { - eng := NewTestEngine(t) - defer nuke(mkRuntimeFromEngine(eng, t)) - +func TestBuildImageWithCache(t *testing.T) { template := testContextTemplate{` from {IMAGE} maintainer dockerio `, nil, nil} + checkCacheBehavior(t, template, true) +} - img, err := buildImage(template, t, eng, true) - if err != nil { - t.Fatal(err) - } - imageId := img.ID +func TestBuildImageWithoutCache(t *testing.T) { + template := testContextTemplate{` + from {IMAGE} + maintainer dockerio + `, + nil, nil} + checkCacheBehavior(t, template, false) +} - img = nil - img, err = buildImage(template, t, eng, false) - if err != nil { - t.Fatal(err) - } +func TestBuildADDLocalFileWithCache(t *testing.T) { + template := testContextTemplate{` + from {IMAGE} + maintainer dockerio + run echo "first" + add foo /usr/lib/bla/bar + run echo "second" + `, + [][2]string{{"foo", "hello"}}, + nil} + checkCacheBehavior(t, template, true) +} - if imageId == img.ID { - t.Logf("Image ids should not match: %s == %s", imageId, img.ID) - t.Fail() - } +func TestBuildADDLocalFileWithoutCache(t *testing.T) { + template := testContextTemplate{` + from {IMAGE} + maintainer dockerio + run echo "first" + add foo /usr/lib/bla/bar + run echo "second" + `, + [][2]string{{"foo", "hello"}}, + nil} + checkCacheBehavior(t, template, false) +} + +func TestBuildADDRemoteFileWithCache(t *testing.T) { + template := testContextTemplate{` + from {IMAGE} + maintainer dockerio + run echo "first" + add http://{SERVERADDR}/baz /usr/lib/baz/quux + run echo "second" + `, + nil, + [][2]string{{"/baz", "world!"}}} + checkCacheBehavior(t, template, true) +} + +func TestBuildADDRemoteFileWithoutCache(t *testing.T) { + template := testContextTemplate{` + from {IMAGE} + maintainer dockerio + run echo "first" + add http://{SERVERADDR}/baz /usr/lib/baz/quux + run echo "second" + `, + nil, + [][2]string{{"/baz", "world!"}}} + checkCacheBehavior(t, template, false) +} + +func TestBuildADDLocalAndRemoteFilesWithCache(t *testing.T) { + template := testContextTemplate{` + from {IMAGE} + maintainer dockerio + run echo "first" + add foo /usr/lib/bla/bar + add http://{SERVERADDR}/baz /usr/lib/baz/quux + run echo "second" + `, + [][2]string{{"foo", "hello"}}, + [][2]string{{"/baz", "world!"}}} + checkCacheBehavior(t, template, true) +} + +func TestBuildADDLocalAndRemoteFilesWithoutCache(t *testing.T) { + template := testContextTemplate{` + from {IMAGE} + maintainer dockerio + run echo "first" + add foo /usr/lib/bla/bar + add http://{SERVERADDR}/baz /usr/lib/baz/quux + run echo "second" + `, + [][2]string{{"foo", "hello"}}, + [][2]string{{"/baz", "world!"}}} + checkCacheBehavior(t, template, false) } func TestForbiddenContextPath(t *testing.T) { From 670b326c1b88d66ef941de5f27867ba6132762a1 Mon Sep 17 00:00:00 2001 From: Graydon Hoare Date: Thu, 12 Dec 2013 05:33:15 +0000 Subject: [PATCH 4/5] Add self to AUTHORS. --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 60533c1cc2..beee696146 100644 --- a/AUTHORS +++ b/AUTHORS @@ -68,6 +68,7 @@ Francisco Souza Frederick F. Kautz IV Gabriel Monroy Gareth Rushgrove +Graydon Hoare Greg Thornton Guillaume J. Charmes Gurjeet Singh From a26801c73f535b2e57366b9d6de0453e7c732884 Mon Sep 17 00:00:00 2001 From: Graydon Hoare Date: Thu, 12 Dec 2013 05:33:15 +0000 Subject: [PATCH 5/5] Add CHANGELOG.md entry for ADD caching. --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 060a07c21e..5fb81b4207 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +#### Builder + +- ADD now uses image cache, based on sha256 of added content. + ## 0.7.2 (2013-12-16) #### Runtime