chunked: implement lookaside cache
avoid parsing each json TOC file for the layers in the local storage, but attempt to create a lookaside cache in a custom format faster to load (and potentially be mmap'able). The same cache is used to lookup files, chunks and candidates for deduplication with hard links. There are 3 kind of digests stored: - digest(file.payload)) - digest(digest(file.payload) + file.UID + file.GID + file.mode + file.xattrs) - digest(i) for each i in chunks(file payload) Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
parent
a5f0cddf2b
commit
10697a05a2
|
|
@ -1,19 +1,43 @@
|
||||||
package chunked
|
package chunked
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/binary"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
storage "github.com/containers/storage"
|
storage "github.com/containers/storage"
|
||||||
"github.com/containers/storage/pkg/chunked/internal"
|
"github.com/containers/storage/pkg/chunked/internal"
|
||||||
|
"github.com/containers/storage/pkg/ioutils"
|
||||||
|
digest "github.com/opencontainers/go-digest"
|
||||||
|
"github.com/pkg/errors"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
cacheKey = "chunked-manifest-cache"
|
||||||
|
cacheVersion = 1
|
||||||
|
)
|
||||||
|
|
||||||
|
type metadata struct {
|
||||||
|
tagLen int
|
||||||
|
digestLen int
|
||||||
|
tags []byte
|
||||||
|
vdata []byte
|
||||||
|
}
|
||||||
|
|
||||||
type layer struct {
|
type layer struct {
|
||||||
id string
|
id string
|
||||||
metadata map[string][]*internal.FileMetadata
|
metadata *metadata
|
||||||
target string
|
target string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -25,10 +49,6 @@ type layersCache struct {
|
||||||
created time.Time
|
created time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
type findFileVisitor interface {
|
|
||||||
VisitFile(file *internal.FileMetadata, target string) (bool, error)
|
|
||||||
}
|
|
||||||
|
|
||||||
var cacheMutex sync.Mutex
|
var cacheMutex sync.Mutex
|
||||||
var cache *layersCache
|
var cache *layersCache
|
||||||
|
|
||||||
|
|
@ -86,25 +106,21 @@ func (c *layersCache) load() error {
|
||||||
if _, found := existingLayers[r.ID]; found {
|
if _, found := existingLayers[r.ID]; found {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
manifestReader, err := c.store.LayerBigData(r.ID, bigDataKey)
|
|
||||||
|
metadata, err := c.readMetadataFromCache(r.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
continue
|
logrus.Warningf("Error reading cache file for layer %q: %v", r.ID, err)
|
||||||
}
|
|
||||||
defer manifestReader.Close()
|
|
||||||
manifest, err := ioutil.ReadAll(manifestReader)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("open manifest file for layer %q: %w", r.ID, err)
|
|
||||||
}
|
|
||||||
var toc internal.TOC
|
|
||||||
if err := json.Unmarshal(manifest, &toc); err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
target, err := c.store.DifferTarget(r.ID)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("get checkout directory layer %q: %w", r.ID, err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
c.addLayer(r.ID, toc.Entries, target)
|
if metadata != nil {
|
||||||
|
c.addLayer(r.ID, metadata)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata, err = c.writeCache(r.ID)
|
||||||
|
if err == nil {
|
||||||
|
c.addLayer(r.ID, metadata)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var newLayers []layer
|
var newLayers []layer
|
||||||
|
|
@ -118,70 +134,370 @@ func (c *layersCache) load() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *layersCache) addLayer(id string, entries []internal.FileMetadata, target string) {
|
// calculateHardLinkFingerprint calculates a hash that can be used to verify if a file
|
||||||
r := make(map[string][]*internal.FileMetadata)
|
// is usable for deduplication with hardlinks.
|
||||||
for i := range entries {
|
// To calculate the digest, it uses the file payload digest, UID, GID, mode and xattrs.
|
||||||
if entries[i].Digest != "" {
|
func calculateHardLinkFingerprint(f *internal.FileMetadata) (string, error) {
|
||||||
r[entries[i].Digest] = append(r[entries[i].Digest], &entries[i])
|
digester := digest.Canonical.Digester()
|
||||||
|
|
||||||
|
modeString := fmt.Sprintf("%d:%d:%o", f.UID, f.GID, f.Mode)
|
||||||
|
hash := digester.Hash()
|
||||||
|
|
||||||
|
if _, err := hash.Write([]byte(f.Digest)); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := hash.Write([]byte(modeString)); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(f.Xattrs) > 0 {
|
||||||
|
keys := make([]string, 0, len(f.Xattrs))
|
||||||
|
for k := range f.Xattrs {
|
||||||
|
keys = append(keys, k)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
|
||||||
|
for _, k := range keys {
|
||||||
|
if _, err := hash.Write([]byte(k)); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
if _, err := hash.Write([]byte(f.Xattrs[k])); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return string(digester.Digest()), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// generateFileLocation generates a file location in the form $OFFSET@$PATH
|
||||||
|
func generateFileLocation(path string, offset uint64) []byte {
|
||||||
|
return []byte(fmt.Sprintf("%d@%s", offset, path))
|
||||||
|
}
|
||||||
|
|
||||||
|
// generateTag generates a tag in the form $DIGEST$OFFSET@LEN.
|
||||||
|
// the [OFFSET; LEN] points to the variable length data where the file locations
|
||||||
|
// are stored. $DIGEST has length digestLen stored in the metadata file header.
|
||||||
|
func generateTag(digest string, offset, len uint64) string {
|
||||||
|
return fmt.Sprintf("%s%.20d@%.20d", digest, offset, len)
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeCache write a cache for the layer ID.
|
||||||
|
// It generates a sorted list of digests with their offset to the path location and offset.
|
||||||
|
// The same cache is used to lookup files, chunks and candidates for deduplication with hard links.
|
||||||
|
// There are 3 kind of digests stored:
|
||||||
|
// - digest(file.payload))
|
||||||
|
// - digest(digest(file.payload) + file.UID + file.GID + file.mode + file.xattrs)
|
||||||
|
// - digest(i) for each i in chunks(file payload)
|
||||||
|
func (c *layersCache) writeCache(id string) (*metadata, error) {
|
||||||
|
var vdata bytes.Buffer
|
||||||
|
tagLen := 0
|
||||||
|
digestLen := 0
|
||||||
|
var tagsBuffer bytes.Buffer
|
||||||
|
|
||||||
|
toc, err := c.prepareMetadata(id)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var tags []string
|
||||||
|
for _, k := range toc {
|
||||||
|
if k.Digest != "" {
|
||||||
|
location := generateFileLocation(k.Name, 0)
|
||||||
|
|
||||||
|
off := uint64(vdata.Len())
|
||||||
|
l := uint64(len(location))
|
||||||
|
|
||||||
|
d := generateTag(k.Digest, off, l)
|
||||||
|
if tagLen == 0 {
|
||||||
|
tagLen = len(d)
|
||||||
|
}
|
||||||
|
if tagLen != len(d) {
|
||||||
|
return nil, errors.New("digest with different length found")
|
||||||
|
}
|
||||||
|
tags = append(tags, d)
|
||||||
|
|
||||||
|
fp, err := calculateHardLinkFingerprint(k)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
d = generateTag(fp, off, l)
|
||||||
|
if tagLen != len(d) {
|
||||||
|
return nil, errors.New("digest with different length found")
|
||||||
|
}
|
||||||
|
tags = append(tags, d)
|
||||||
|
|
||||||
|
if _, err := vdata.Write(location); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
digestLen = len(k.Digest)
|
||||||
|
}
|
||||||
|
if k.ChunkDigest != "" {
|
||||||
|
location := generateFileLocation(k.Name, uint64(k.ChunkOffset))
|
||||||
|
off := uint64(vdata.Len())
|
||||||
|
l := uint64(len(location))
|
||||||
|
d := generateTag(k.ChunkDigest, off, l)
|
||||||
|
if tagLen == 0 {
|
||||||
|
tagLen = len(d)
|
||||||
|
}
|
||||||
|
if tagLen != len(d) {
|
||||||
|
return nil, errors.New("digest with different length found")
|
||||||
|
}
|
||||||
|
tags = append(tags, d)
|
||||||
|
|
||||||
|
if _, err := vdata.Write(location); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
digestLen = len(k.ChunkDigest)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Strings(tags)
|
||||||
|
|
||||||
|
for _, t := range tags {
|
||||||
|
if _, err := tagsBuffer.Write([]byte(t)); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pipeReader, pipeWriter := io.Pipe()
|
||||||
|
errChan := make(chan error, 1)
|
||||||
|
go func() {
|
||||||
|
defer pipeWriter.Close()
|
||||||
|
defer close(errChan)
|
||||||
|
|
||||||
|
// version
|
||||||
|
if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(cacheVersion)); err != nil {
|
||||||
|
errChan <- err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// len of a tag
|
||||||
|
if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(tagLen)); err != nil {
|
||||||
|
errChan <- err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// len of a digest
|
||||||
|
if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(digestLen)); err != nil {
|
||||||
|
errChan <- err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// tags length
|
||||||
|
if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(tagsBuffer.Len())); err != nil {
|
||||||
|
errChan <- err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// vdata length
|
||||||
|
if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(vdata.Len())); err != nil {
|
||||||
|
errChan <- err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// tags
|
||||||
|
if _, err := pipeWriter.Write(tagsBuffer.Bytes()); err != nil {
|
||||||
|
errChan <- err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// variable length data
|
||||||
|
if _, err := pipeWriter.Write(vdata.Bytes()); err != nil {
|
||||||
|
errChan <- err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
errChan <- nil
|
||||||
|
}()
|
||||||
|
defer pipeReader.Close()
|
||||||
|
|
||||||
|
counter := ioutils.NewWriteCounter(ioutil.Discard)
|
||||||
|
|
||||||
|
r := io.TeeReader(pipeReader, counter)
|
||||||
|
|
||||||
|
if err := c.store.SetLayerBigData(id, cacheKey, r); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := <-errChan; err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
logrus.Debugf("Written lookaside cache for layer %q with length %v", id, counter.Count)
|
||||||
|
|
||||||
|
return &metadata{
|
||||||
|
tagLen: tagLen,
|
||||||
|
tags: tagsBuffer.Bytes(),
|
||||||
|
vdata: vdata.Bytes(),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *layersCache) readMetadataFromCache(id string) (*metadata, error) {
|
||||||
|
bigData, err := c.store.LayerBigData(id, cacheKey)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Cause(err) == os.ErrNotExist {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer bigData.Close()
|
||||||
|
|
||||||
|
var version, tagLen, digestLen, tagsLen, vdataLen uint64
|
||||||
|
if err := binary.Read(bigData, binary.LittleEndian, &version); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if version != cacheVersion {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
if err := binary.Read(bigData, binary.LittleEndian, &tagLen); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if err := binary.Read(bigData, binary.LittleEndian, &digestLen); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if err := binary.Read(bigData, binary.LittleEndian, &tagsLen); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if err := binary.Read(bigData, binary.LittleEndian, &vdataLen); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
tags := make([]byte, tagsLen)
|
||||||
|
if _, err := bigData.Read(tags); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
vdata := make([]byte, vdataLen)
|
||||||
|
if _, err = bigData.Read(vdata); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &metadata{
|
||||||
|
tagLen: int(tagLen),
|
||||||
|
digestLen: int(digestLen),
|
||||||
|
tags: tags,
|
||||||
|
vdata: vdata,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *layersCache) prepareMetadata(id string) ([]*internal.FileMetadata, error) {
|
||||||
|
manifestReader, err := c.store.LayerBigData(id, bigDataKey)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
defer manifestReader.Close()
|
||||||
|
manifest, err := ioutil.ReadAll(manifestReader)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("open manifest file for layer %q: %w", id, err)
|
||||||
|
}
|
||||||
|
var toc internal.TOC
|
||||||
|
if err := json.Unmarshal(manifest, &toc); err != nil {
|
||||||
|
// ignore errors here. They might be caused by a different manifest format.
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var r []*internal.FileMetadata
|
||||||
|
chunkSeen := make(map[string]bool)
|
||||||
|
for i := range toc.Entries {
|
||||||
|
d := toc.Entries[i].Digest
|
||||||
|
if d != "" {
|
||||||
|
r = append(r, &toc.Entries[i])
|
||||||
}
|
}
|
||||||
|
|
||||||
// chunks do not use hard link dedup so keeping just one candidate is enough
|
// chunks do not use hard link dedup so keeping just one candidate is enough
|
||||||
if entries[i].ChunkDigest != "" && len(r[entries[i].ChunkDigest]) == 0 {
|
cd := toc.Entries[i].ChunkDigest
|
||||||
r[entries[i].ChunkDigest] = append(r[entries[i].ChunkDigest], &entries[i])
|
if cd != "" && !chunkSeen[cd] {
|
||||||
|
r = append(r, &toc.Entries[i])
|
||||||
|
chunkSeen[cd] = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return r, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *layersCache) addLayer(id string, metadata *metadata) error {
|
||||||
|
target, err := c.store.DifferTarget(id)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("get checkout directory layer %q: %w", id, err)
|
||||||
|
}
|
||||||
|
|
||||||
l := layer{
|
l := layer{
|
||||||
id: id,
|
id: id,
|
||||||
metadata: r,
|
metadata: metadata,
|
||||||
target: target,
|
target: target,
|
||||||
}
|
}
|
||||||
c.layers = append(c.layers, l)
|
c.layers = append(c.layers, l)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func byteSliceAsString(b []byte) string {
|
||||||
|
return *(*string)(unsafe.Pointer(&b))
|
||||||
|
}
|
||||||
|
|
||||||
|
func findTag(digest string, metadata *metadata) (string, uint64, uint64) {
|
||||||
|
if len(digest) != metadata.digestLen {
|
||||||
|
return "", 0, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
nElements := len(metadata.tags) / metadata.tagLen
|
||||||
|
|
||||||
|
i := sort.Search(nElements, func(i int) bool {
|
||||||
|
d := byteSliceAsString(metadata.tags[i*metadata.tagLen : i*metadata.tagLen+metadata.digestLen])
|
||||||
|
return strings.Compare(d, digest) >= 0
|
||||||
|
})
|
||||||
|
if i < nElements {
|
||||||
|
d := string(metadata.tags[i*metadata.tagLen : i*metadata.tagLen+len(digest)])
|
||||||
|
if digest == d {
|
||||||
|
startOff := i*metadata.tagLen + metadata.digestLen
|
||||||
|
parts := strings.Split(string(metadata.tags[startOff:(i+1)*metadata.tagLen]), "@")
|
||||||
|
off, _ := strconv.ParseInt(parts[0], 10, 64)
|
||||||
|
len, _ := strconv.ParseInt(parts[1], 10, 64)
|
||||||
|
return digest, uint64(off), uint64(len)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "", 0, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *layersCache) findDigestInternal(digest string) (string, string, int64, error) {
|
||||||
|
if digest == "" {
|
||||||
|
return "", "", -1, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
c.mutex.Lock()
|
||||||
|
defer c.mutex.Unlock()
|
||||||
|
|
||||||
|
for _, layer := range c.layers {
|
||||||
|
digest, off, len := findTag(digest, layer.metadata)
|
||||||
|
if digest != "" {
|
||||||
|
position := string(layer.metadata.vdata[off : off+len])
|
||||||
|
parts := strings.SplitN(position, "@", 2)
|
||||||
|
offFile, _ := strconv.ParseInt(parts[1], 10, 64)
|
||||||
|
return layer.target, parts[1], offFile, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return "", "", -1, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// findFileInOtherLayers finds the specified file in other layers.
|
// findFileInOtherLayers finds the specified file in other layers.
|
||||||
// file is the file to look for.
|
// file is the file to look for.
|
||||||
// visitor is the findFileVisitor to notify for each candidate found.
|
func (c *layersCache) findFileInOtherLayers(file *internal.FileMetadata, useHardLinks bool) (string, string, error) {
|
||||||
func (c *layersCache) findFileInOtherLayers(file *internal.FileMetadata, visitor findFileVisitor) error {
|
digest := file.Digest
|
||||||
c.mutex.Lock()
|
if useHardLinks {
|
||||||
defer c.mutex.Unlock()
|
var err error
|
||||||
|
digest, err = calculateHardLinkFingerprint(file)
|
||||||
for _, layer := range c.layers {
|
if err != nil {
|
||||||
files, found := layer.metadata[file.Digest]
|
return "", "", err
|
||||||
if !found {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
for _, candidate := range files {
|
|
||||||
if candidate.Type == internal.TypeReg {
|
|
||||||
keepGoing, err := visitor.VisitFile(candidate, layer.target)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if !keepGoing {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
target, name, off, err := c.findDigestInternal(digest)
|
||||||
}
|
if off == 0 {
|
||||||
|
return target, name, err
|
||||||
func (c *layersCache) findChunkInOtherLayers(chunk *internal.FileMetadata) (string, string, int64) {
|
|
||||||
c.mutex.Lock()
|
|
||||||
defer c.mutex.Unlock()
|
|
||||||
|
|
||||||
for _, layer := range c.layers {
|
|
||||||
entries, found := layer.metadata[chunk.ChunkDigest]
|
|
||||||
if !found {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
for _, candidate := range entries {
|
|
||||||
if candidate.Type == internal.TypeChunk {
|
|
||||||
return layer.target, candidate.Name, candidate.ChunkOffset
|
|
||||||
}
|
|
||||||
if candidate.Type == internal.TypeReg {
|
|
||||||
return layer.target, candidate.Name, 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return "", "", -1
|
return "", "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *layersCache) findChunkInOtherLayers(chunk *internal.FileMetadata) (string, string, int64, error) {
|
||||||
|
return c.findDigestInternal(chunk.ChunkDigest)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -190,19 +190,19 @@ func makeCopyBuffer() []byte {
|
||||||
// copyFileFromOtherLayer copies a file from another layer
|
// copyFileFromOtherLayer copies a file from another layer
|
||||||
// file is the file to look for.
|
// file is the file to look for.
|
||||||
// source is the path to the source layer checkout.
|
// source is the path to the source layer checkout.
|
||||||
// otherFile contains the metadata for the file.
|
// name is the path to the file to copy in source.
|
||||||
// dirfd is an open file descriptor to the destination root directory.
|
// dirfd is an open file descriptor to the destination root directory.
|
||||||
// useHardLinks defines whether the deduplication can be performed using hard links.
|
// useHardLinks defines whether the deduplication can be performed using hard links.
|
||||||
func copyFileFromOtherLayer(file *internal.FileMetadata, source string, otherFile *internal.FileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
|
func copyFileFromOtherLayer(file *internal.FileMetadata, source string, name string, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
|
||||||
srcDirfd, err := unix.Open(source, unix.O_RDONLY, 0)
|
srcDirfd, err := unix.Open(source, unix.O_RDONLY, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, nil, 0, fmt.Errorf("open source file %q: %w", source, err)
|
return false, nil, 0, fmt.Errorf("open source file %q: %w", source, err)
|
||||||
}
|
}
|
||||||
defer unix.Close(srcDirfd)
|
defer unix.Close(srcDirfd)
|
||||||
|
|
||||||
srcFile, err := openFileUnderRoot(otherFile.Name, srcDirfd, unix.O_RDONLY, 0)
|
srcFile, err := openFileUnderRoot(name, srcDirfd, unix.O_RDONLY, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, nil, 0, fmt.Errorf("open source file %q under target rootfs: %w", otherFile.Name, err)
|
return false, nil, 0, fmt.Errorf("open source file %q under target rootfs: %w", name, err)
|
||||||
}
|
}
|
||||||
defer srcFile.Close()
|
defer srcFile.Close()
|
||||||
|
|
||||||
|
|
@ -395,47 +395,17 @@ func findFileOnTheHost(file *internal.FileMetadata, dirfd int, useHardLinks bool
|
||||||
return true, dstFile, written, nil
|
return true, dstFile, written, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type findFileState struct {
|
|
||||||
file *internal.FileMetadata
|
|
||||||
useHardLinks bool
|
|
||||||
dirfd int
|
|
||||||
|
|
||||||
found bool
|
|
||||||
dstFile *os.File
|
|
||||||
written int64
|
|
||||||
retError error
|
|
||||||
}
|
|
||||||
|
|
||||||
func (v *findFileState) VisitFile(candidate *internal.FileMetadata, target string) (bool, error) {
|
|
||||||
if v.useHardLinks && !canDedupMetadataWithHardLink(v.file, candidate) {
|
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
found, dstFile, written, err := copyFileFromOtherLayer(v.file, target, candidate, v.dirfd, v.useHardLinks)
|
|
||||||
if found && err == nil {
|
|
||||||
v.found = found
|
|
||||||
v.dstFile = dstFile
|
|
||||||
v.written = written
|
|
||||||
v.retError = err
|
|
||||||
return false, nil
|
|
||||||
}
|
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// findFileInOtherLayers finds the specified file in other layers.
|
// findFileInOtherLayers finds the specified file in other layers.
|
||||||
// cache is the layers cache to use.
|
// cache is the layers cache to use.
|
||||||
// file is the file to look for.
|
// file is the file to look for.
|
||||||
// dirfd is an open file descriptor to the checkout root directory.
|
// dirfd is an open file descriptor to the checkout root directory.
|
||||||
// useHardLinks defines whether the deduplication can be performed using hard links.
|
// useHardLinks defines whether the deduplication can be performed using hard links.
|
||||||
func findFileInOtherLayers(cache *layersCache, file *internal.FileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
|
func findFileInOtherLayers(cache *layersCache, file *internal.FileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
|
||||||
visitor := &findFileState{
|
target, name, err := cache.findFileInOtherLayers(file, useHardLinks)
|
||||||
file: file,
|
if err != nil || name == "" {
|
||||||
useHardLinks: useHardLinks,
|
|
||||||
dirfd: dirfd,
|
|
||||||
}
|
|
||||||
if err := cache.findFileInOtherLayers(file, visitor); err != nil {
|
|
||||||
return false, nil, 0, err
|
return false, nil, 0, err
|
||||||
}
|
}
|
||||||
return visitor.found, visitor.dstFile, visitor.written, visitor.retError
|
return copyFileFromOtherLayer(file, target, name, dirfd, useHardLinks)
|
||||||
}
|
}
|
||||||
|
|
||||||
func maybeDoIDRemap(manifest []internal.FileMetadata, options *archive.TarOptions) error {
|
func maybeDoIDRemap(manifest []internal.FileMetadata, options *archive.TarOptions) error {
|
||||||
|
|
@ -1486,7 +1456,10 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions) (gra
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
root, path, offset := c.layersCache.findChunkInOtherLayers(chunk)
|
root, path, offset, err := c.layersCache.findChunkInOtherLayers(chunk)
|
||||||
|
if err != nil {
|
||||||
|
return output, err
|
||||||
|
}
|
||||||
if offset >= 0 {
|
if offset >= 0 {
|
||||||
missingPartsSize -= size
|
missingPartsSize -= size
|
||||||
mp.OriginFile = &originFile{
|
mp.OriginFile = &originFile{
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue