Merge pull request #2809 from graydon/880-cache-ADD-commands-in-dockerfiles

Issue #880 - cache ADD commands in dockerfiles
This commit is contained in:
Guillaume J. Charmes 2013-12-24 16:22:51 -08:00
commit efaf2cac5c
4 changed files with 275 additions and 85 deletions

View File

@ -69,6 +69,7 @@ Francisco Souza <f@souza.cc>
Frederick F. Kautz IV <fkautz@alumni.cmu.edu> Frederick F. Kautz IV <fkautz@alumni.cmu.edu>
Gabriel Monroy <gabriel@opdemand.com> Gabriel Monroy <gabriel@opdemand.com>
Gareth Rushgrove <gareth@morethanseven.net> Gareth Rushgrove <gareth@morethanseven.net>
Graydon Hoare <graydon@pobox.com>
Greg Thornton <xdissent@me.com> Greg Thornton <xdissent@me.com>
Guillaume J. Charmes <guillaume.charmes@dotcloud.com> Guillaume J. Charmes <guillaume.charmes@dotcloud.com>
Gurjeet Singh <gurjeet@singh.im> Gurjeet Singh <gurjeet@singh.im>

View File

@ -1,5 +1,9 @@
# Changelog # Changelog
#### Builder
- ADD now uses image cache, based on sha256 of added content.
## 0.7.2 (2013-12-16) ## 0.7.2 (2013-12-16)
#### Runtime #### Runtime

View File

@ -1,6 +1,9 @@
package docker package docker
import ( import (
"archive/tar"
"crypto/sha256"
"encoding/hex"
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
@ -12,9 +15,11 @@ import (
"net/url" "net/url"
"os" "os"
"path" "path"
"path/filepath"
"reflect" "reflect"
"regexp" "regexp"
"strings" "strings"
"time"
) )
var ( var (
@ -92,6 +97,87 @@ func (b *buildFile) CmdMaintainer(name string) error {
return b.commit("", b.config.Cmd, fmt.Sprintf("MAINTAINER %s", name)) return b.commit("", b.config.Cmd, fmt.Sprintf("MAINTAINER %s", name))
} }
// probeCache checks to see if image-caching is enabled (`b.utilizeCache`)
// and if so attempts to look up the current `b.image` and `b.config` pair
// in the current server `b.srv`. If an image is found, probeCache returns
// `(true, nil)`. If no image is found, it returns `(false, nil)`. If there
// is any error, it returns `(false, err)`.
func (b *buildFile) probeCache() (bool, error) {
if b.utilizeCache {
if cache, err := b.srv.ImageGetCached(b.image, b.config); err != nil {
return false, err
} else if cache != nil {
fmt.Fprintf(b.outStream, " ---> Using cache\n")
utils.Debugf("[BUILDER] Use cached version")
b.image = cache.ID
return true, nil
} else {
utils.Debugf("[BUILDER] Cache miss")
}
}
return false, nil
}
// hashPath calculates a strong hash (sha256) value for a file tree located
// at `basepth`/`pth`, including all attributes that would normally be
// captured by `tar`. The path to hash is passed in two pieces only to
// permit logging the second piece in isolation, assuming the first is a
// temporary directory in which docker is running. If `clobberTimes` is
// true and hashPath is applied to a single file, the ctime/atime/mtime of
// the file is considered to be unix time 0, for purposes of hashing.
func (b *buildFile) hashPath(basePth, pth string, clobberTimes bool) (string, error) {
p := path.Join(basePth, pth)
st, err := os.Stat(p)
if err != nil {
return "", err
}
h := sha256.New()
if st.IsDir() {
tarRd, err := archive.Tar(p, archive.Uncompressed)
if err != nil {
return "", err
}
_, err = io.Copy(h, tarRd)
if err != nil {
return "", err
}
} else {
hdr, err := tar.FileInfoHeader(st, "")
if err != nil {
return "", err
}
if clobberTimes {
hdr.AccessTime = time.Unix(0, 0)
hdr.ChangeTime = time.Unix(0, 0)
hdr.ModTime = time.Unix(0, 0)
}
hdr.Name = filepath.Base(p)
tarWr := tar.NewWriter(h)
if err := tarWr.WriteHeader(hdr); err != nil {
return "", err
}
fileRd, err := os.Open(p)
if err != nil {
return "", err
}
if _, err = io.Copy(tarWr, fileRd); err != nil {
return "", err
}
tarWr.Close()
}
hstr := hex.EncodeToString(h.Sum(nil))
fmt.Fprintf(b.outStream, " ---> data at %s has sha256 %.12s...\n", pth, hstr)
return hstr, nil
}
func (b *buildFile) CmdRun(args string) error { func (b *buildFile) CmdRun(args string) error {
if b.image == "" { if b.image == "" {
return fmt.Errorf("Please provide a source image with `from` prior to run") return fmt.Errorf("Please provide a source image with `from` prior to run")
@ -109,17 +195,12 @@ func (b *buildFile) CmdRun(args string) error {
utils.Debugf("Command to be executed: %v", b.config.Cmd) utils.Debugf("Command to be executed: %v", b.config.Cmd)
if b.utilizeCache { hit, err := b.probeCache()
if cache, err := b.srv.ImageGetCached(b.image, b.config); err != nil { if err != nil {
return err return err
} else if cache != nil {
fmt.Fprintf(b.outStream, " ---> Using cache\n")
utils.Debugf("[BUILDER] Use cached version")
b.image = cache.ID
return nil
} else {
utils.Debugf("[BUILDER] Cache miss")
} }
if hit {
return nil
} }
cid, err := b.run() cid, err := b.run()
@ -265,32 +346,16 @@ func (b *buildFile) CmdVolume(args string) error {
return nil return nil
} }
func (b *buildFile) addRemote(container *Container, orig, dest string) error { func (b *buildFile) checkPathForAddition(orig string) error {
file, err := utils.Download(orig) origPath := path.Join(b.context, orig)
if !strings.HasPrefix(origPath, b.context) {
return fmt.Errorf("Forbidden path outside the build context: %s (%s)", orig, origPath)
}
_, err := os.Stat(origPath)
if err != nil { if err != nil {
return err return fmt.Errorf("%s: no such file or directory", orig)
} }
defer file.Body.Close() return nil
// If the destination is a directory, figure out the filename.
if strings.HasSuffix(dest, "/") {
u, err := url.Parse(orig)
if err != nil {
return err
}
path := u.Path
if strings.HasSuffix(path, "/") {
path = path[:len(path)-1]
}
parts := strings.Split(path, "/")
filename := parts[len(parts)-1]
if filename == "" {
return fmt.Errorf("cannot determine filename from url: %s", u)
}
dest = dest + filename
}
return container.Inject(file.Body, dest)
} }
func (b *buildFile) addContext(container *Container, orig, dest string) error { func (b *buildFile) addContext(container *Container, orig, dest string) error {
@ -300,9 +365,6 @@ func (b *buildFile) addContext(container *Container, orig, dest string) error {
if strings.HasSuffix(dest, "/") { if strings.HasSuffix(dest, "/") {
destPath = destPath + "/" destPath = destPath + "/"
} }
if !strings.HasPrefix(origPath, b.context) {
return fmt.Errorf("Forbidden path outside the build context: %s (%s)", orig, origPath)
}
fi, err := os.Stat(origPath) fi, err := os.Stat(origPath)
if err != nil { if err != nil {
return fmt.Errorf("%s: no such file or directory", orig) return fmt.Errorf("%s: no such file or directory", orig)
@ -348,6 +410,74 @@ func (b *buildFile) CmdAdd(args string) error {
b.config.Cmd = []string{"/bin/sh", "-c", fmt.Sprintf("#(nop) ADD %s in %s", orig, dest)} b.config.Cmd = []string{"/bin/sh", "-c", fmt.Sprintf("#(nop) ADD %s in %s", orig, dest)}
b.config.Image = b.image b.config.Image = b.image
origPath := orig
destPath := dest
clobberTimes := false
if utils.IsURL(orig) {
clobberTimes = true
resp, err := utils.Download(orig)
if err != nil {
return err
}
tmpDirName, err := ioutil.TempDir(b.context, "docker-remote")
if err != nil {
return err
}
tmpFileName := path.Join(tmpDirName, "tmp")
tmpFile, err := os.OpenFile(tmpFileName, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0600)
if err != nil {
return err
}
defer os.RemoveAll(tmpDirName)
if _, err = io.Copy(tmpFile, resp.Body); err != nil {
return err
}
origPath = path.Join(filepath.Base(tmpDirName), filepath.Base(tmpFileName))
tmpFile.Close()
// If the destination is a directory, figure out the filename.
if strings.HasSuffix(dest, "/") {
u, err := url.Parse(orig)
if err != nil {
return err
}
path := u.Path
if strings.HasSuffix(path, "/") {
path = path[:len(path)-1]
}
parts := strings.Split(path, "/")
filename := parts[len(parts)-1]
if filename == "" {
return fmt.Errorf("cannot determine filename from url: %s", u)
}
destPath = dest + filename
}
}
if err := b.checkPathForAddition(origPath); err != nil {
return err
}
// Hash path and check the cache
if b.utilizeCache {
hash, err := b.hashPath(b.context, origPath, clobberTimes)
if err != nil {
return err
}
b.config.Cmd = []string{"/bin/sh", "-c", fmt.Sprintf("#(nop) ADD %s in %s", hash, dest)}
hit, err := b.probeCache()
if err != nil {
return err
}
if hit {
return nil
}
}
// Create the container and start it // Create the container and start it
container, _, err := b.runtime.Create(b.config, "") container, _, err := b.runtime.Create(b.config, "")
if err != nil { if err != nil {
@ -360,15 +490,9 @@ func (b *buildFile) CmdAdd(args string) error {
} }
defer container.Unmount() defer container.Unmount()
if utils.IsURL(orig) { if err := b.addContext(container, origPath, destPath); err != nil {
if err := b.addRemote(container, orig, dest); err != nil {
return err return err
} }
} else {
if err := b.addContext(container, orig, dest); err != nil {
return err
}
}
if err := b.commit(container.ID, cmd, fmt.Sprintf("ADD %s in %s", orig, dest)); err != nil { if err := b.commit(container.ID, cmd, fmt.Sprintf("ADD %s in %s", orig, dest)); err != nil {
return err return err
@ -465,17 +589,12 @@ func (b *buildFile) commit(id string, autoCmd []string, comment string) error {
b.config.Cmd = []string{"/bin/sh", "-c", "#(nop) " + comment} b.config.Cmd = []string{"/bin/sh", "-c", "#(nop) " + comment}
defer func(cmd []string) { b.config.Cmd = cmd }(cmd) defer func(cmd []string) { b.config.Cmd = cmd }(cmd)
if b.utilizeCache { hit, err := b.probeCache()
if cache, err := b.srv.ImageGetCached(b.image, b.config); err != nil { if err != nil {
return err return err
} else if cache != nil {
fmt.Fprintf(b.outStream, " ---> Using cache\n")
utils.Debugf("[BUILDER] Use cached version")
b.image = cache.ID
return nil
} else {
utils.Debugf("[BUILDER] Cache miss")
} }
if hit {
return nil
} }
container, warnings, err := b.runtime.Create(b.config, "") container, warnings, err := b.runtime.Create(b.config, "")

View File

@ -425,16 +425,10 @@ func TestBuildEntrypointRunCleanup(t *testing.T) {
} }
} }
func TestBuildImageWithCache(t *testing.T) { func checkCacheBehavior(t *testing.T, template testContextTemplate, expectHit bool) {
eng := NewTestEngine(t) eng := NewTestEngine(t)
defer nuke(mkRuntimeFromEngine(eng, t)) defer nuke(mkRuntimeFromEngine(eng, t))
template := testContextTemplate{`
from {IMAGE}
maintainer dockerio
`,
nil, nil}
img, err := buildImage(template, t, eng, true) img, err := buildImage(template, t, eng, true)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
@ -443,43 +437,115 @@ func TestBuildImageWithCache(t *testing.T) {
imageId := img.ID imageId := img.ID
img = nil img = nil
img, err = buildImage(template, t, eng, true) img, err = buildImage(template, t, eng, expectHit)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
if imageId != img.ID { hit := imageId == img.ID
t.Logf("Image ids should match: %s != %s", imageId, img.ID) if hit != expectHit {
t.Logf("Cache misbehavior, got hit=%t, expected hit=%t: (first: %s, second %s)",
hit, expectHit, imageId, img.ID)
t.Fail() t.Fail()
} }
} }
func TestBuildImageWithCache(t *testing.T) {
template := testContextTemplate{`
from {IMAGE}
maintainer dockerio
`,
nil, nil}
checkCacheBehavior(t, template, true)
}
func TestBuildImageWithoutCache(t *testing.T) { func TestBuildImageWithoutCache(t *testing.T) {
eng := NewTestEngine(t)
defer nuke(mkRuntimeFromEngine(eng, t))
template := testContextTemplate{` template := testContextTemplate{`
from {IMAGE} from {IMAGE}
maintainer dockerio maintainer dockerio
`, `,
nil, nil} nil, nil}
checkCacheBehavior(t, template, false)
img, err := buildImage(template, t, eng, true)
if err != nil {
t.Fatal(err)
}
imageId := img.ID
img = nil
img, err = buildImage(template, t, eng, false)
if err != nil {
t.Fatal(err)
} }
if imageId == img.ID { func TestBuildADDLocalFileWithCache(t *testing.T) {
t.Logf("Image ids should not match: %s == %s", imageId, img.ID) template := testContextTemplate{`
t.Fail() from {IMAGE}
maintainer dockerio
run echo "first"
add foo /usr/lib/bla/bar
run echo "second"
`,
[][2]string{{"foo", "hello"}},
nil}
checkCacheBehavior(t, template, true)
} }
func TestBuildADDLocalFileWithoutCache(t *testing.T) {
template := testContextTemplate{`
from {IMAGE}
maintainer dockerio
run echo "first"
add foo /usr/lib/bla/bar
run echo "second"
`,
[][2]string{{"foo", "hello"}},
nil}
checkCacheBehavior(t, template, false)
}
func TestBuildADDRemoteFileWithCache(t *testing.T) {
template := testContextTemplate{`
from {IMAGE}
maintainer dockerio
run echo "first"
add http://{SERVERADDR}/baz /usr/lib/baz/quux
run echo "second"
`,
nil,
[][2]string{{"/baz", "world!"}}}
checkCacheBehavior(t, template, true)
}
func TestBuildADDRemoteFileWithoutCache(t *testing.T) {
template := testContextTemplate{`
from {IMAGE}
maintainer dockerio
run echo "first"
add http://{SERVERADDR}/baz /usr/lib/baz/quux
run echo "second"
`,
nil,
[][2]string{{"/baz", "world!"}}}
checkCacheBehavior(t, template, false)
}
func TestBuildADDLocalAndRemoteFilesWithCache(t *testing.T) {
template := testContextTemplate{`
from {IMAGE}
maintainer dockerio
run echo "first"
add foo /usr/lib/bla/bar
add http://{SERVERADDR}/baz /usr/lib/baz/quux
run echo "second"
`,
[][2]string{{"foo", "hello"}},
[][2]string{{"/baz", "world!"}}}
checkCacheBehavior(t, template, true)
}
func TestBuildADDLocalAndRemoteFilesWithoutCache(t *testing.T) {
template := testContextTemplate{`
from {IMAGE}
maintainer dockerio
run echo "first"
add foo /usr/lib/bla/bar
add http://{SERVERADDR}/baz /usr/lib/baz/quux
run echo "second"
`,
[][2]string{{"foo", "hello"}},
[][2]string{{"/baz", "world!"}}}
checkCacheBehavior(t, template, false)
} }
func TestForbiddenContextPath(t *testing.T) { func TestForbiddenContextPath(t *testing.T) {