diff --git a/pkg/chunked/bloom_filter.go b/pkg/chunked/bloom_filter.go new file mode 100644 index 000000000..696a52f8e --- /dev/null +++ b/pkg/chunked/bloom_filter.go @@ -0,0 +1,84 @@ +package chunked + +import ( + "encoding/binary" + "hash/crc32" + "io" +) + +type bloomFilter struct { + bitArray []uint64 + k uint32 +} + +func newBloomFilter(size int, k uint32) *bloomFilter { + numElements := (size + 63) / 64 + return &bloomFilter{ + bitArray: make([]uint64, numElements), + k: k, + } +} + +func newBloomFilterFromArray(bitArray []uint64, k uint32) *bloomFilter { + return &bloomFilter{ + bitArray: bitArray, + k: k, + } +} + +func (bf *bloomFilter) hashFn(item []byte, seed uint32) (uint64, uint64) { + if len(item) == 0 { + return 0, 0 + } + mod := uint32(len(bf.bitArray) * 64) + seedSplit := seed % uint32(len(item)) + hash := (crc32.ChecksumIEEE(item[:seedSplit]) ^ crc32.ChecksumIEEE(item[seedSplit:])) % mod + return uint64(hash / 64), uint64(1 << (hash % 64)) +} + +func (bf *bloomFilter) add(item []byte) { + for i := uint32(0); i < bf.k; i++ { + index, mask := bf.hashFn(item, i) + bf.bitArray[index] |= mask + } +} + +func (bf *bloomFilter) maybeContains(item []byte) bool { + for i := uint32(0); i < bf.k; i++ { + index, mask := bf.hashFn(item, i) + if bf.bitArray[index]&mask == 0 { + return false + } + } + return true +} + +func (bf *bloomFilter) writeTo(writer io.Writer) error { + if err := binary.Write(writer, binary.LittleEndian, uint64(len(bf.bitArray))); err != nil { + return err + } + if err := binary.Write(writer, binary.LittleEndian, uint32(bf.k)); err != nil { + return err + } + if err := binary.Write(writer, binary.LittleEndian, bf.bitArray); err != nil { + return err + } + return nil +} + +func readBloomFilter(reader io.Reader) (*bloomFilter, error) { + var bloomFilterLen uint64 + var k uint32 + + if err := binary.Read(reader, binary.LittleEndian, &bloomFilterLen); err != nil { + return nil, err + } + if err := binary.Read(reader, binary.LittleEndian, &k); err != nil { + return nil, err + } + bloomFilterArray := make([]uint64, bloomFilterLen) + if err := binary.Read(reader, binary.LittleEndian, &bloomFilterArray); err != nil { + return nil, err + } + return newBloomFilterFromArray(bloomFilterArray, k), nil +} diff --git a/pkg/chunked/bloom_filter_test.go b/pkg/chunked/bloom_filter_test.go new file mode 100644 index 000000000..491a2ac03 --- /dev/null +++ b/pkg/chunked/bloom_filter_test.go @@ -0,0 +1,127 @@ +package chunked + +import ( + "bytes" + "io" + "testing" + + digest "github.com/opencontainers/go-digest" + "github.com/stretchr/testify/assert" +) + +var ( + presentDigestInCache string + notPresentDigestInCache string + presentDigestInCacheBinary []byte + notPresentDigestInCacheBinary []byte + preloadedCache *cacheFile + preloadedbloomFilter *bloomFilter + benchmarkN int = 100000 +) + +// Using 3 hashes functions and n/m = 10 gives a false positive rate of ~1.7%: +// https://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html +var ( + factorNM int = 10 + numberHashes uint32 = 3 +) + +func initCache(sizeCache int) (*cacheFile, string, string, *bloomFilter) { + var tagsBuffer bytes.Buffer + var vdata bytes.Buffer + var fnames bytes.Buffer + tags := [][]byte{} + tagLen := 0 + digestLen := 64 + var presentDigest, notPresentDigest string + + bloomFilter := newBloomFilter(sizeCache*factorNM, numberHashes) + + digester := digest.Canonical.Digester() + hash := digester.Hash() + for i := 0; i < sizeCache; i++ { + hash.Write([]byte("1")) + d := digester.Digest().String() + digestLen = len(d) + presentDigest = d + tag, err := appendTag([]byte(d), 0, 0) + if err != nil { + panic(err) + } + tagLen = len(tag) + tags = append(tags, tag) + bd, err := makeBinaryDigest(d) + if err != nil { + panic(err) + } + bloomFilter.add(bd) + } + + hash.Write([]byte("1")) + notPresentDigest = digester.Digest().String() + + writeCacheFileToWriter(io.Discard, bloomFilter, tags, tagLen, digestLen, vdata, fnames, &tagsBuffer) + + cache := &cacheFile{ + digestLen: digestLen, + tagLen: tagLen, + tags: tagsBuffer.Bytes(), + vdata: vdata.Bytes(), + } + return cache, presentDigest, notPresentDigest, bloomFilter +} + +func init() { + var err error + preloadedCache, presentDigestInCache, notPresentDigestInCache, preloadedbloomFilter = initCache(10000) + presentDigestInCacheBinary, err = makeBinaryDigest(presentDigestInCache) + if err != nil { + panic(err) + } + notPresentDigestInCacheBinary, err = makeBinaryDigest(notPresentDigestInCache) + if err != nil { + panic(err) + } +} + +func BenchmarkLookupBloomFilter(b *testing.B) { + for i := 0; i < benchmarkN; i++ { + if preloadedbloomFilter.maybeContains(notPresentDigestInCacheBinary) { + findTag(notPresentDigestInCache, preloadedCache) + } + if preloadedbloomFilter.maybeContains(presentDigestInCacheBinary) { + findTag(presentDigestInCache, preloadedCache) + } + } +} + +func BenchmarkLookupBloomRaw(b *testing.B) { + for i := 0; i < benchmarkN; i++ { + findTag(notPresentDigestInCache, preloadedCache) + findTag(presentDigestInCache, preloadedCache) + } +} + +func TestBloomFilter(t *testing.T) { + bloomFilter := newBloomFilter(1000, 1) + digester := digest.Canonical.Digester() + hash := digester.Hash() + for i := 0; i < 1000; i++ { + hash.Write([]byte("1")) + d := digester.Digest().String() + bd, err := makeBinaryDigest(d) + assert.NoError(t, err) + contains := bloomFilter.maybeContains(bd) + assert.False(t, contains) + } + for i := 0; i < 1000; i++ { + hash.Write([]byte("1")) + d := digester.Digest().String() + bd, err := makeBinaryDigest(d) + assert.NoError(t, err) + bloomFilter.add(bd) + + contains := bloomFilter.maybeContains(bd) + assert.True(t, contains) + } +} diff --git a/pkg/chunked/cache_linux.go b/pkg/chunked/cache_linux.go index 1e3ad86d1..6dcaa668b 100644 --- a/pkg/chunked/cache_linux.go +++ b/pkg/chunked/cache_linux.go @@ -3,16 +3,16 @@ package chunked import ( "bytes" "encoding/binary" + "encoding/hex" "errors" "fmt" "io" "os" + "runtime" "sort" - "strconv" "strings" "sync" "time" - "unsafe" storage "github.com/containers/storage" graphdriver "github.com/containers/storage/drivers" @@ -21,30 +21,48 @@ import ( jsoniter "github.com/json-iterator/go" digest "github.com/opencontainers/go-digest" "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" ) const ( cacheKey = "chunked-manifest-cache" - cacheVersion = 2 + cacheVersion = 3 digestSha256Empty = "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + + // Using 3 hashes functions and n/m = 10 gives a false positive rate of ~1.7%: + // https://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html + bloomFilterScale = 10 // how much bigger is the bloom filter than the number of entries + bloomFilterHashes = 3 // number of hash functions for the bloom filter ) -type metadata struct { - tagLen int - digestLen int - tags []byte - vdata []byte +type cacheFile struct { + tagLen int + digestLen int + fnamesLen int + tags []byte + vdata []byte + fnames []byte + bloomFilter *bloomFilter } type layer struct { - id string - metadata *metadata - target string + id string + cacheFile *cacheFile + target string + // mmapBuffer is nil when the cache file is fully loaded in memory. + // Otherwise it points to a mmap'ed buffer that is referenced by cacheFile.vdata. + mmapBuffer []byte + + // reloadWithMmap is set when the current process generates the cache file, + // and cacheFile reuses the memory buffer used by the generation function. + // Next time the layer cache is used, attempt to reload the file using + // mmap. + reloadWithMmap bool } type layersCache struct { - layers []layer + layers []*layer refs int store storage.Store mutex sync.RWMutex @@ -56,14 +74,29 @@ var ( cache *layersCache ) +func (c *layer) release() { + runtime.SetFinalizer(c, nil) + if c.mmapBuffer != nil { + unix.Munmap(c.mmapBuffer) + } +} + +func layerFinalizer(c *layer) { + c.release() +} + func (c *layersCache) release() { cacheMutex.Lock() defer cacheMutex.Unlock() c.refs-- - if c.refs == 0 { - cache = nil + if c.refs != 0 { + return } + for _, l := range c.layers { + l.release() + } + cache = nil } func getLayersCacheRef(store storage.Store) *layersCache { @@ -91,90 +124,177 @@ func getLayersCache(store storage.Store) (*layersCache, error) { return c, nil } +// loadLayerBigData attempts to load the specified cacheKey from a file and mmap its content. +// If the cache is not backed by a file, then it loads the entire content in memory. +// Returns the cache content, and if mmap'ed, the mmap buffer to Munmap. +func (c *layersCache) loadLayerBigData(layerID, bigDataKey string) ([]byte, []byte, error) { + inputFile, err := c.store.LayerBigData(layerID, bigDataKey) + if err != nil { + return nil, nil, err + } + defer inputFile.Close() + + // if the cache is backed by a file, attempt to mmap it. + if osFile, ok := inputFile.(*os.File); ok { + st, err := osFile.Stat() + if err != nil { + logrus.Warningf("Error stat'ing cache file for layer %q: %v", layerID, err) + goto fallback + } + size := st.Size() + if size == 0 { + logrus.Warningf("Cache file size is zero for layer %q: %v", layerID, err) + goto fallback + } + buf, err := unix.Mmap(int(osFile.Fd()), 0, int(size), unix.PROT_READ, unix.MAP_SHARED) + if err != nil { + logrus.Warningf("Error mmap'ing cache file for layer %q: %v", layerID, err) + goto fallback + } + // best effort advise to the kernel. + _ = unix.Madvise(buf, unix.MADV_RANDOM) + + return buf, buf, nil + } +fallback: + buf, err := io.ReadAll(inputFile) + return buf, nil, err +} + +func makeBinaryDigest(stringDigest string) ([]byte, error) { + d, err := digest.Parse(stringDigest) + if err != nil { + return nil, err + } + digestBytes, err := hex.DecodeString(d.Encoded()) + if err != nil { + return nil, err + } + algo := []byte(d.Algorithm()) + buf := make([]byte, 0, len(algo)+1+len(digestBytes)) + buf = append(buf, algo...) + buf = append(buf, ':') + buf = append(buf, digestBytes...) + return buf, nil +} + +func (c *layersCache) loadLayerCache(layerID string) (_ *layer, errRet error) { + buffer, mmapBuffer, err := c.loadLayerBigData(layerID, cacheKey) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return nil, err + } + // there is no existing cache to load + if err != nil || buffer == nil { + return nil, nil + } + defer func() { + if errRet != nil && mmapBuffer != nil { + unix.Munmap(mmapBuffer) + } + }() + cacheFile, err := readCacheFileFromMemory(buffer) + if err != nil { + return nil, err + } + return c.createLayer(layerID, cacheFile, mmapBuffer) +} + +func (c *layersCache) createCacheFileFromTOC(layerID string) (*layer, error) { + clFile, err := c.store.LayerBigData(layerID, chunkedLayerDataKey) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return nil, err + } + var lcd chunkedLayerData + if err == nil && clFile != nil { + defer clFile.Close() + cl, err := io.ReadAll(clFile) + if err != nil { + return nil, fmt.Errorf("open manifest file: %w", err) + } + json := jsoniter.ConfigCompatibleWithStandardLibrary + + if err := json.Unmarshal(cl, &lcd); err != nil { + return nil, err + } + } + manifestReader, err := c.store.LayerBigData(layerID, bigDataKey) + if err != nil { + return nil, err + } + defer manifestReader.Close() + + manifest, err := io.ReadAll(manifestReader) + if err != nil { + return nil, fmt.Errorf("read manifest file: %w", err) + } + + cacheFile, err := writeCache(manifest, lcd.Format, layerID, c.store) + if err != nil { + return nil, err + } + l, err := c.createLayer(layerID, cacheFile, nil) + if err != nil { + return nil, err + } + l.reloadWithMmap = true + return l, nil +} + func (c *layersCache) load() error { c.mutex.Lock() defer c.mutex.Unlock() + loadedLayers := make(map[string]*layer) + for _, r := range c.layers { + loadedLayers[r.id] = r + } allLayers, err := c.store.Layers() if err != nil { return err } - existingLayers := make(map[string]string) - for _, r := range c.layers { - existingLayers[r.id] = r.target - } - currentLayers := make(map[string]string) + var newLayers []*layer for _, r := range allLayers { - currentLayers[r.ID] = r.ID - if _, found := existingLayers[r.ID]; found { - continue - } - - bigData, err := c.store.LayerBigData(r.ID, cacheKey) - // if the cache already exists, read and use it - if err == nil { - defer bigData.Close() - metadata, err := readMetadataFromCache(bigData) - if err == nil { - c.addLayer(r.ID, metadata) + // The layer is present in the store and it is already loaded. Attempt to + // re-use it if mmap'ed. + if l, found := loadedLayers[r.ID]; found { + // If the layer is not marked for re-load, move it to newLayers. + if !l.reloadWithMmap { + delete(loadedLayers, r.ID) + newLayers = append(newLayers, l) continue } - logrus.Warningf("Error reading cache file for layer %q: %v", r.ID, err) - } else if !errors.Is(err, os.ErrNotExist) { - return err } - - var lcd chunkedLayerData - - clFile, err := c.store.LayerBigData(r.ID, chunkedLayerDataKey) - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - if clFile != nil { - cl, err := io.ReadAll(clFile) - if err != nil { - return fmt.Errorf("open manifest file for layer %q: %w", r.ID, err) - } - json := jsoniter.ConfigCompatibleWithStandardLibrary - if err := json.Unmarshal(cl, &lcd); err != nil { - return err - } - } - - // otherwise create it from the layer TOC. - manifestReader, err := c.store.LayerBigData(r.ID, bigDataKey) + // try to read the existing cache file. + l, err := c.loadLayerCache(r.ID) if err != nil { + logrus.Warningf("Error loading cache file for layer %q: %v", r.ID, err) + } + if l != nil { + newLayers = append(newLayers, l) continue } - defer manifestReader.Close() - - manifest, err := io.ReadAll(manifestReader) + // the cache file is either not present or broken. Try to generate it from the TOC. + l, err = c.createCacheFileFromTOC(r.ID) if err != nil { - return fmt.Errorf("open manifest file for layer %q: %w", r.ID, err) + logrus.Warningf("Error creating cache file for layer %q: %v", r.ID, err) } - - metadata, err := writeCache(manifest, lcd.Format, r.ID, c.store) - if err == nil { - c.addLayer(r.ID, metadata) - } - } - - var newLayers []layer - for _, l := range c.layers { - if _, found := currentLayers[l.id]; found { + if l != nil { newLayers = append(newLayers, l) } } + // The layers that are still in loadedLayers are either stale or fully loaded in memory. Clean them up. + for _, l := range loadedLayers { + l.release() + } c.layers = newLayers - return nil } // calculateHardLinkFingerprint calculates a hash that can be used to verify if a file // is usable for deduplication with hardlinks. // To calculate the digest, it uses the file payload digest, UID, GID, mode and xattrs. -func calculateHardLinkFingerprint(f *internal.FileMetadata) (string, error) { +func calculateHardLinkFingerprint(f *fileMetadata) (string, error) { digester := digest.Canonical.Digester() modeString := fmt.Sprintf("%d:%d:%o", f.UID, f.GID, f.Mode) @@ -207,16 +327,46 @@ func calculateHardLinkFingerprint(f *internal.FileMetadata) (string, error) { return string(digester.Digest()), nil } -// generateFileLocation generates a file location in the form $OFFSET:$LEN:$PATH -func generateFileLocation(path string, offset, len uint64) []byte { - return []byte(fmt.Sprintf("%d:%d:%s", offset, len, path)) +// generateFileLocation generates a file location in the form $OFFSET$LEN$PATH_POS +func generateFileLocation(pathPos int, offset, len uint64) []byte { + var buf []byte + + buf = binary.AppendUvarint(buf, uint64(pathPos)) + buf = binary.AppendUvarint(buf, offset) + buf = binary.AppendUvarint(buf, len) + + return buf } -// generateTag generates a tag in the form $DIGEST$OFFSET@LEN. -// the [OFFSET; LEN] points to the variable length data where the file locations -// are stored. $DIGEST has length digestLen stored in the metadata file header. -func generateTag(digest string, offset, len uint64) string { - return fmt.Sprintf("%s%.20d@%.20d", digest, offset, len) +// parseFileLocation reads what was written by generateFileLocation. +func parseFileLocation(locationData []byte) (int, uint64, uint64, error) { + reader := bytes.NewReader(locationData) + + pathPos, err := binary.ReadUvarint(reader) + if err != nil { + return 0, 0, 0, err + } + + offset, err := binary.ReadUvarint(reader) + if err != nil { + return 0, 0, 0, err + } + + len, err := binary.ReadUvarint(reader) + if err != nil { + return 0, 0, 0, err + } + + return int(pathPos), offset, len, nil +} + +// appendTag appends the $OFFSET$LEN information to the provided $DIGEST. +// The [OFFSET; LEN] points to the variable length data where the file locations +// are stored. $DIGEST has length digestLen stored in the cache file file header. +func appendTag(digest []byte, offset, len uint64) ([]byte, error) { + digest = binary.LittleEndian.AppendUint64(digest, offset) + digest = binary.LittleEndian.AppendUint64(digest, len) + return digest, nil } type setBigData interface { @@ -224,6 +374,77 @@ type setBigData interface { SetLayerBigData(id, key string, data io.Reader) error } +func bloomFilterFromTags(tags [][]byte, digestLen int) *bloomFilter { + bloomFilter := newBloomFilter(len(tags)*bloomFilterScale, bloomFilterHashes) + for _, t := range tags { + bloomFilter.add(t[:digestLen]) + } + return bloomFilter +} + +func writeCacheFileToWriter(writer io.Writer, bloomFilter *bloomFilter, tags [][]byte, tagLen, digestLen int, vdata, fnames bytes.Buffer, tagsBuffer *bytes.Buffer) error { + sort.Slice(tags, func(i, j int) bool { + return bytes.Compare(tags[i], tags[j]) == -1 + }) + for _, t := range tags { + if _, err := tagsBuffer.Write(t); err != nil { + return err + } + } + + // version + if err := binary.Write(writer, binary.LittleEndian, uint64(cacheVersion)); err != nil { + return err + } + + // len of a tag + if err := binary.Write(writer, binary.LittleEndian, uint64(tagLen)); err != nil { + return err + } + + // len of a digest + if err := binary.Write(writer, binary.LittleEndian, uint64(digestLen)); err != nil { + return err + } + + // bloom filter + if err := bloomFilter.writeTo(writer); err != nil { + return err + } + + // tags length + if err := binary.Write(writer, binary.LittleEndian, uint64(tagsBuffer.Len())); err != nil { + return err + } + + // vdata length + if err := binary.Write(writer, binary.LittleEndian, uint64(vdata.Len())); err != nil { + return err + } + + // fnames length + if err := binary.Write(writer, binary.LittleEndian, uint64(fnames.Len())); err != nil { + return err + } + + // tags + if _, err := writer.Write(tagsBuffer.Bytes()); err != nil { + return err + } + + // variable length data + if _, err := writer.Write(vdata.Bytes()); err != nil { + return err + } + + // file names + if _, err := writer.Write(fnames.Bytes()); err != nil { + return err + } + + return nil +} + // writeCache write a cache for the layer ID. // It generates a sorted list of digests with their offset to the path location and offset. // The same cache is used to lookup files, chunks and candidates for deduplication with hard links. @@ -231,55 +452,99 @@ type setBigData interface { // - digest(file.payload)) // - digest(digest(file.payload) + file.UID + file.GID + file.mode + file.xattrs) // - digest(i) for each i in chunks(file payload) -func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id string, dest setBigData) (*metadata, error) { - var vdata bytes.Buffer +func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id string, dest setBigData) (*cacheFile, error) { + var vdata, tagsBuffer, fnames bytes.Buffer tagLen := 0 digestLen := 0 - var tagsBuffer bytes.Buffer - toc, err := prepareMetadata(manifest, format) + toc, err := prepareCacheFile(manifest, format) if err != nil { return nil, err } - var tags []string + fnamesMap := make(map[string]int) + getFileNamePosition := func(name string) (int, error) { + if pos, found := fnamesMap[name]; found { + return pos, nil + } + pos := fnames.Len() + fnamesMap[name] = pos + + if err := binary.Write(&fnames, binary.LittleEndian, uint32(len(name))); err != nil { + return 0, err + } + if _, err := fnames.WriteString(name); err != nil { + return 0, err + } + return pos, nil + } + + var tags [][]byte for _, k := range toc { if k.Digest != "" { - location := generateFileLocation(k.Name, 0, uint64(k.Size)) - + digest, err := makeBinaryDigest(k.Digest) + if err != nil { + return nil, err + } + fileNamePos, err := getFileNamePosition(k.Name) + if err != nil { + return nil, err + } + location := generateFileLocation(fileNamePos, 0, uint64(k.Size)) off := uint64(vdata.Len()) l := uint64(len(location)) - d := generateTag(k.Digest, off, l) - if tagLen == 0 { - tagLen = len(d) + tag, err := appendTag(digest, off, l) + if err != nil { + return nil, err } - if tagLen != len(d) { + if tagLen == 0 { + tagLen = len(tag) + } + if tagLen != len(tag) { return nil, errors.New("digest with different length found") } - tags = append(tags, d) + tags = append(tags, tag) fp, err := calculateHardLinkFingerprint(k) if err != nil { return nil, err } - d = generateTag(fp, off, l) - if tagLen != len(d) { + digestHardLink, err := makeBinaryDigest(fp) + if err != nil { + return nil, err + } + tag, err = appendTag(digestHardLink, off, l) + if err != nil { + return nil, err + } + if tagLen != len(tag) { return nil, errors.New("digest with different length found") } - tags = append(tags, d) + tags = append(tags, tag) if _, err := vdata.Write(location); err != nil { return nil, err } - - digestLen = len(k.Digest) + digestLen = len(digestHardLink) } if k.ChunkDigest != "" { - location := generateFileLocation(k.Name, uint64(k.ChunkOffset), uint64(k.ChunkSize)) + fileNamePos, err := getFileNamePosition(k.Name) + if err != nil { + return nil, err + } + location := generateFileLocation(fileNamePos, uint64(k.ChunkOffset), uint64(k.ChunkSize)) off := uint64(vdata.Len()) l := uint64(len(location)) - d := generateTag(k.ChunkDigest, off, l) + + digest, err := makeBinaryDigest(k.ChunkDigest) + if err != nil { + return nil, err + } + d, err := appendTag(digest, off, l) + if err != nil { + return nil, err + } if tagLen == 0 { tagLen = len(d) } @@ -291,17 +556,11 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin if _, err := vdata.Write(location); err != nil { return nil, err } - digestLen = len(k.ChunkDigest) + digestLen = len(digest) } } - sort.Strings(tags) - - for _, t := range tags { - if _, err := tagsBuffer.Write([]byte(t)); err != nil { - return nil, err - } - } + bloomFilter := bloomFilterFromTags(tags, digestLen) pipeReader, pipeWriter := io.Pipe() errChan := make(chan error, 1) @@ -309,49 +568,7 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin defer pipeWriter.Close() defer close(errChan) - // version - if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(cacheVersion)); err != nil { - errChan <- err - return - } - - // len of a tag - if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(tagLen)); err != nil { - errChan <- err - return - } - - // len of a digest - if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(digestLen)); err != nil { - errChan <- err - return - } - - // tags length - if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(tagsBuffer.Len())); err != nil { - errChan <- err - return - } - - // vdata length - if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(vdata.Len())); err != nil { - errChan <- err - return - } - - // tags - if _, err := pipeWriter.Write(tagsBuffer.Bytes()); err != nil { - errChan <- err - return - } - - // variable length data - if _, err := pipeWriter.Write(vdata.Bytes()); err != nil { - errChan <- err - return - } - - errChan <- nil + errChan <- writeCacheFileToWriter(pipeWriter, bloomFilter, tags, tagLen, digestLen, vdata, fnames, &tagsBuffer) }() defer pipeReader.Close() @@ -369,16 +586,21 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin logrus.Debugf("Written lookaside cache for layer %q with length %v", id, counter.Count) - return &metadata{ - digestLen: digestLen, - tagLen: tagLen, - tags: tagsBuffer.Bytes(), - vdata: vdata.Bytes(), + return &cacheFile{ + digestLen: digestLen, + tagLen: tagLen, + tags: tagsBuffer.Bytes(), + vdata: vdata.Bytes(), + fnames: fnames.Bytes(), + fnamesLen: len(fnames.Bytes()), + bloomFilter: bloomFilter, }, nil } -func readMetadataFromCache(bigData io.Reader) (*metadata, error) { - var version, tagLen, digestLen, tagsLen, vdataLen uint64 +func readCacheFileFromMemory(bigDataBuffer []byte) (*cacheFile, error) { + bigData := bytes.NewReader(bigDataBuffer) + + var version, tagLen, digestLen, tagsLen, fnamesLen, vdataLen uint64 if err := binary.Read(bigData, binary.LittleEndian, &version); err != nil { return nil, err } @@ -391,6 +613,12 @@ func readMetadataFromCache(bigData io.Reader) (*metadata, error) { if err := binary.Read(bigData, binary.LittleEndian, &digestLen); err != nil { return nil, err } + + bloomFilter, err := readBloomFilter(bigData) + if err != nil { + return nil, err + } + if err := binary.Read(bigData, binary.LittleEndian, &tagsLen); err != nil { return nil, err } @@ -398,25 +626,32 @@ func readMetadataFromCache(bigData io.Reader) (*metadata, error) { return nil, err } + if err := binary.Read(bigData, binary.LittleEndian, &fnamesLen); err != nil { + return nil, err + } tags := make([]byte, tagsLen) if _, err := bigData.Read(tags); err != nil { return nil, err } - vdata := make([]byte, vdataLen) - if _, err := bigData.Read(vdata); err != nil { - return nil, err - } + // retrieve the unread part of the buffer. + remaining := bigDataBuffer[len(bigDataBuffer)-bigData.Len():] - return &metadata{ - tagLen: int(tagLen), - digestLen: int(digestLen), - tags: tags, - vdata: vdata, + vdata := remaining[:vdataLen] + fnames := remaining[vdataLen:] + + return &cacheFile{ + bloomFilter: bloomFilter, + digestLen: int(digestLen), + fnames: fnames, + fnamesLen: int(fnamesLen), + tagLen: int(tagLen), + tags: tags, + vdata: vdata, }, nil } -func prepareMetadata(manifest []byte, format graphdriver.DifferOutputFormat) ([]*internal.FileMetadata, error) { +func prepareCacheFile(manifest []byte, format graphdriver.DifferOutputFormat) ([]*fileMetadata, error) { toc, err := unmarshalToc(manifest) if err != nil { // ignore errors here. They might be caused by a different manifest format. @@ -424,10 +659,17 @@ func prepareMetadata(manifest []byte, format graphdriver.DifferOutputFormat) ([] return nil, nil //nolint: nilnil } + var entries []fileMetadata + for i := range toc.Entries { + entries = append(entries, fileMetadata{ + FileMetadata: toc.Entries[i], + }) + } + switch format { case graphdriver.DifferOutputFormatDir: case graphdriver.DifferOutputFormatFlat: - toc.Entries, err = makeEntriesFlat(toc.Entries) + entries, err = makeEntriesFlat(entries) if err != nil { return nil, err } @@ -435,19 +677,19 @@ func prepareMetadata(manifest []byte, format graphdriver.DifferOutputFormat) ([] return nil, fmt.Errorf("unknown format %q", format) } - var r []*internal.FileMetadata + var r []*fileMetadata chunkSeen := make(map[string]bool) - for i := range toc.Entries { - d := toc.Entries[i].Digest + for i := range entries { + d := entries[i].Digest if d != "" { - r = append(r, &toc.Entries[i]) + r = append(r, &entries[i]) continue } // chunks do not use hard link dedup so keeping just one candidate is enough cd := toc.Entries[i].ChunkDigest if cd != "" && !chunkSeen[cd] { - r = append(r, &toc.Entries[i]) + r = append(r, &entries[i]) chunkSeen[cd] = true } } @@ -455,49 +697,49 @@ func prepareMetadata(manifest []byte, format graphdriver.DifferOutputFormat) ([] return r, nil } -func (c *layersCache) addLayer(id string, metadata *metadata) error { +func (c *layersCache) createLayer(id string, cacheFile *cacheFile, mmapBuffer []byte) (*layer, error) { target, err := c.store.DifferTarget(id) if err != nil { - return fmt.Errorf("get checkout directory layer %q: %w", id, err) + return nil, fmt.Errorf("get checkout directory layer %q: %w", id, err) } - - l := layer{ - id: id, - metadata: metadata, - target: target, + l := &layer{ + id: id, + cacheFile: cacheFile, + target: target, + mmapBuffer: mmapBuffer, } - c.layers = append(c.layers, l) - return nil + if mmapBuffer != nil { + runtime.SetFinalizer(l, layerFinalizer) + } + return l, nil } -func byteSliceAsString(b []byte) string { - return *(*string)(unsafe.Pointer(&b)) -} - -func findTag(digest string, metadata *metadata) (string, uint64, uint64) { - if len(digest) != metadata.digestLen { - return "", 0, 0 - } - - nElements := len(metadata.tags) / metadata.tagLen +func findBinaryTag(binaryDigest []byte, cacheFile *cacheFile) (bool, uint64, uint64) { + nElements := len(cacheFile.tags) / cacheFile.tagLen i := sort.Search(nElements, func(i int) bool { - d := byteSliceAsString(metadata.tags[i*metadata.tagLen : i*metadata.tagLen+metadata.digestLen]) - return strings.Compare(d, digest) >= 0 + d := cacheFile.tags[i*cacheFile.tagLen : i*cacheFile.tagLen+cacheFile.digestLen] + return bytes.Compare(d, binaryDigest) >= 0 }) if i < nElements { - d := string(metadata.tags[i*metadata.tagLen : i*metadata.tagLen+len(digest)]) - if digest == d { - startOff := i*metadata.tagLen + metadata.digestLen - parts := strings.Split(string(metadata.tags[startOff:(i+1)*metadata.tagLen]), "@") + d := cacheFile.tags[i*cacheFile.tagLen : i*cacheFile.tagLen+cacheFile.digestLen] + if bytes.Equal(binaryDigest, d) { + startOff := i*cacheFile.tagLen + cacheFile.digestLen - off, _ := strconv.ParseInt(parts[0], 10, 64) + // check for corrupted data, there must be 2 u64 (off and len) after the digest. + if cacheFile.tagLen < cacheFile.digestLen+16 { + return false, 0, 0 + } - len, _ := strconv.ParseInt(parts[1], 10, 64) - return digest, uint64(off), uint64(len) + offsetAndLen := cacheFile.tags[startOff : (i+1)*cacheFile.tagLen] + + off := binary.LittleEndian.Uint64(offsetAndLen[:8]) + len := binary.LittleEndian.Uint64(offsetAndLen[8:16]) + + return true, off, len } } - return "", 0, 0 + return false, 0, 0 } func (c *layersCache) findDigestInternal(digest string) (string, string, int64, error) { @@ -505,20 +747,42 @@ func (c *layersCache) findDigestInternal(digest string) (string, string, int64, return "", "", -1, nil } + binaryDigest, err := makeBinaryDigest(digest) + if err != nil { + return "", "", 0, err + } + c.mutex.RLock() defer c.mutex.RUnlock() for _, layer := range c.layers { - digest, off, tagLen := findTag(digest, layer.metadata) - if digest != "" { - position := string(layer.metadata.vdata[off : off+tagLen]) - parts := strings.SplitN(position, ":", 3) - if len(parts) != 3 { - continue + if !layer.cacheFile.bloomFilter.maybeContains(binaryDigest) { + continue + } + found, off, tagLen := findBinaryTag(binaryDigest, layer.cacheFile) + if found { + if uint64(len(layer.cacheFile.vdata)) < off+tagLen { + return "", "", 0, fmt.Errorf("corrupted cache file for layer %q", layer.id) } - offFile, _ := strconv.ParseInt(parts[0], 10, 64) + fileLocationData := layer.cacheFile.vdata[off : off+tagLen] + + fnamePosition, offFile, _, err := parseFileLocation(fileLocationData) + if err != nil { + return "", "", 0, fmt.Errorf("corrupted cache file for layer %q", layer.id) + } + + if len(layer.cacheFile.fnames) < fnamePosition+4 { + return "", "", 0, fmt.Errorf("corrupted cache file for layer %q", layer.id) + } + lenPath := int(binary.LittleEndian.Uint32(layer.cacheFile.fnames[fnamePosition : fnamePosition+4])) + + if len(layer.cacheFile.fnames) < fnamePosition+lenPath+4 { + return "", "", 0, fmt.Errorf("corrupted cache file for layer %q", layer.id) + } + path := string(layer.cacheFile.fnames[fnamePosition+4 : fnamePosition+lenPath+4]) + // parts[1] is the chunk length, currently unused. - return layer.target, parts[2], offFile, nil + return layer.target, path, int64(offFile), nil } } @@ -527,7 +791,7 @@ func (c *layersCache) findDigestInternal(digest string) (string, string, int64, // findFileInOtherLayers finds the specified file in other layers. // file is the file to look for. -func (c *layersCache) findFileInOtherLayers(file *internal.FileMetadata, useHardLinks bool) (string, string, error) { +func (c *layersCache) findFileInOtherLayers(file *fileMetadata, useHardLinks bool) (string, string, error) { digest := file.Digest if useHardLinks { var err error @@ -548,45 +812,9 @@ func (c *layersCache) findChunkInOtherLayers(chunk *internal.FileMetadata) (stri } func unmarshalToc(manifest []byte) (*internal.TOC, error) { - var buf bytes.Buffer - count := 0 var toc internal.TOC iter := jsoniter.ParseBytes(jsoniter.ConfigFastest, manifest) - for field := iter.ReadObject(); field != ""; field = iter.ReadObject() { - if strings.ToLower(field) != "entries" { - iter.Skip() - continue - } - for iter.ReadArray() { - for field := iter.ReadObject(); field != ""; field = iter.ReadObject() { - switch strings.ToLower(field) { - case "type", "name", "linkname", "digest", "chunkdigest", "chunktype", "modtime", "accesstime", "changetime": - count += len(iter.ReadStringAsSlice()) - case "xattrs": - for key := iter.ReadObject(); key != ""; key = iter.ReadObject() { - count += len(iter.ReadStringAsSlice()) - } - default: - iter.Skip() - } - } - } - break - } - - buf.Grow(count) - - getString := func(b []byte) string { - from := buf.Len() - buf.Write(b) - to := buf.Len() - return byteSliceAsString(buf.Bytes()[from:to]) - } - - pool := iter.Pool() - pool.ReturnIterator(iter) - iter = pool.BorrowIterator(manifest) for field := iter.ReadObject(); field != ""; field = iter.ReadObject() { if strings.ToLower(field) == "version" { @@ -602,11 +830,11 @@ func unmarshalToc(manifest []byte) (*internal.TOC, error) { for field := iter.ReadObject(); field != ""; field = iter.ReadObject() { switch strings.ToLower(field) { case "type": - m.Type = getString(iter.ReadStringAsSlice()) + m.Type = iter.ReadString() case "name": - m.Name = getString(iter.ReadStringAsSlice()) + m.Name = iter.ReadString() case "linkname": - m.Linkname = getString(iter.ReadStringAsSlice()) + m.Linkname = iter.ReadString() case "mode": m.Mode = iter.ReadInt64() case "size": @@ -616,19 +844,19 @@ func unmarshalToc(manifest []byte) (*internal.TOC, error) { case "gid": m.GID = iter.ReadInt() case "modtime": - time, err := time.Parse(time.RFC3339, byteSliceAsString(iter.ReadStringAsSlice())) + time, err := time.Parse(time.RFC3339, iter.ReadString()) if err != nil { return nil, err } m.ModTime = &time case "accesstime": - time, err := time.Parse(time.RFC3339, byteSliceAsString(iter.ReadStringAsSlice())) + time, err := time.Parse(time.RFC3339, iter.ReadString()) if err != nil { return nil, err } m.AccessTime = &time case "changetime": - time, err := time.Parse(time.RFC3339, byteSliceAsString(iter.ReadStringAsSlice())) + time, err := time.Parse(time.RFC3339, iter.ReadString()) if err != nil { return nil, err } @@ -638,7 +866,7 @@ func unmarshalToc(manifest []byte) (*internal.TOC, error) { case "devminor": m.Devminor = iter.ReadInt64() case "digest": - m.Digest = getString(iter.ReadStringAsSlice()) + m.Digest = iter.ReadString() case "offset": m.Offset = iter.ReadInt64() case "endoffset": @@ -648,14 +876,13 @@ func unmarshalToc(manifest []byte) (*internal.TOC, error) { case "chunkoffset": m.ChunkOffset = iter.ReadInt64() case "chunkdigest": - m.ChunkDigest = getString(iter.ReadStringAsSlice()) + m.ChunkDigest = iter.ReadString() case "chunktype": - m.ChunkType = getString(iter.ReadStringAsSlice()) + m.ChunkType = iter.ReadString() case "xattrs": m.Xattrs = make(map[string]string) for key := iter.ReadObject(); key != ""; key = iter.ReadObject() { - value := iter.ReadStringAsSlice() - m.Xattrs[key] = getString(value) + m.Xattrs[key] = iter.ReadString() } default: iter.Skip() @@ -677,6 +904,5 @@ func unmarshalToc(manifest []byte) (*internal.TOC, error) { return nil, fmt.Errorf("unexpected data after manifest") } - toc.StringsBuf = buf return &toc, nil } diff --git a/pkg/chunked/cache_linux_test.go b/pkg/chunked/cache_linux_test.go index 957bc27b6..426f44f0e 100644 --- a/pkg/chunked/cache_linux_test.go +++ b/pkg/chunked/cache_linux_test.go @@ -55,32 +55,49 @@ const jsonTOC = ` "chunkSize": 86252, "chunkOffset": 17615, "chunkDigest": "sha256:2a9d3f1b6b37abc8bb35eb8fa98b893a2a2447bcb01184c3bafc8c6b40da099d" + }, + { + "type": "reg", + "name": "usr/lib/systemd/system/system-systemd\\x2dcryptsetup.slice", + "mode": 420, + "size": 468, + "modtime": "2024-03-03T18:04:57+01:00", + "accesstime": "0001-01-01T00:00:00Z", + "changetime": "0001-01-01T00:00:00Z", + "digest": "sha256:68dc6e85631e077f2bc751352459823844911b93b7ba2afd95d96c893222bb50", + "offset": 148185424, + "endOffset": 148185753 + }, + { + "type": "reg", + "name": "usr/lib/systemd/system/system-systemd\\x2dcryptsetup-hardlink.slice", + "linkName": "usr/lib/systemd/system/system-systemd\\x2dcryptsetup.slice" } ] } ` func TestPrepareMetadata(t *testing.T) { - toc, err := prepareMetadata([]byte(jsonTOC), graphdriver.DifferOutputFormatDir) + toc, err := prepareCacheFile([]byte(jsonTOC), graphdriver.DifferOutputFormatDir) if err != nil { - t.Errorf("got error from prepareMetadata: %v", err) + t.Errorf("got error from prepareCacheFile: %v", err) } - if len(toc) != 2 { - t.Error("prepareMetadata returns the wrong length") + if len(toc) != 4 { + t.Error("prepareCacheFile returns the wrong length") } } func TestPrepareMetadataFlat(t *testing.T) { - toc, err := prepareMetadata([]byte(jsonTOC), graphdriver.DifferOutputFormatFlat) + toc, err := prepareCacheFile([]byte(jsonTOC), graphdriver.DifferOutputFormatFlat) if err != nil { - t.Errorf("got error from prepareMetadata: %v", err) + t.Errorf("got error from prepareCacheFile: %v", err) } for _, e := range toc { if len(strings.Split(e.Name, "/")) != 2 { - t.Error("prepareMetadata returns the wrong number of path elements for flat directories") + t.Error("prepareCacheFile returns the wrong number of path elements for flat directories") } if len(filepath.Dir(e.Name)) != 2 { - t.Error("prepareMetadata returns the wrong path for flat directories") + t.Error("prepareCacheFile returns the wrong path for flat directories") } } } @@ -103,10 +120,25 @@ func (b *bigDataToBuffer) SetLayerBigData(id, key string, data io.Reader) error return err } -func TestWriteCache(t *testing.T) { - toc, err := prepareMetadata([]byte(jsonTOC), graphdriver.DifferOutputFormatDir) +func findTag(digest string, cacheFile *cacheFile) (string, uint64, uint64) { + binaryDigest, err := makeBinaryDigest(digest) if err != nil { - t.Errorf("got error from prepareMetadata: %v", err) + return "", 0, 0 + } + if len(binaryDigest) != cacheFile.digestLen { + return "", 0, 0 + } + found, off, len := findBinaryTag(binaryDigest, cacheFile) + if found { + return digest, off, len + } + return "", 0, 0 +} + +func TestWriteCache(t *testing.T) { + toc, err := prepareCacheFile([]byte(jsonTOC), graphdriver.DifferOutputFormatDir) + if err != nil { + t.Errorf("got error from prepareCacheFile: %v", err) } dest := bigDataToBuffer{ @@ -116,25 +148,26 @@ func TestWriteCache(t *testing.T) { if err != nil { t.Errorf("got error from writeCache: %v", err) } - if digest, _, _ := findTag("foobar", cache); digest != "" { - t.Error("found invalid tag") + if digest, _, _ := findTag("sha256:99fe908c699dc068438b23e28319cadff1f2153c3043bafb8e83a430bba0a2c2", cache); digest != "" { + t.Error("a present tag was not found") } for _, r := range toc { if r.Digest != "" { // find the element in the cache by the digest checksum - digest, off, len := findTag(r.Digest, cache) + digest, off, lenTag := findTag(r.Digest, cache) if digest == "" { t.Error("file tag not found") } if digest != r.Digest { t.Error("wrong file found") } - expectedLocation := generateFileLocation(r.Name, 0, uint64(r.Size)) - location := cache.vdata[off : off+len] - if !bytes.Equal(location, expectedLocation) { - t.Errorf("wrong file found %q instead of %q", location, expectedLocation) - } + location := cache.vdata[off : off+lenTag] + _, offFile, fileSize, err := parseFileLocation(location) + assert.NoError(t, err) + + assert.Equal(t, fileSize, uint64(r.Size)) + assert.Equal(t, offFile, uint64(0)) fingerprint, err := calculateHardLinkFingerprint(r) if err != nil { @@ -142,18 +175,19 @@ func TestWriteCache(t *testing.T) { } // find the element in the cache by the hardlink fingerprint - digest, off, len = findTag(fingerprint, cache) + digest, off, lenTag = findTag(fingerprint, cache) if digest == "" { t.Error("file tag not found") } if digest != fingerprint { t.Error("wrong file found") } - expectedLocation = generateFileLocation(r.Name, 0, uint64(r.Size)) - location = cache.vdata[off : off+len] - if !bytes.Equal(location, expectedLocation) { - t.Errorf("wrong file found %q instead of %q", location, expectedLocation) - } + location = cache.vdata[off : off+lenTag] + _, offFile, fileSize, err = parseFileLocation(location) + assert.NoError(t, err) + + assert.Equal(t, fileSize, uint64(r.Size)) + assert.Equal(t, offFile, uint64(0)) } if r.ChunkDigest != "" { // find the element in the cache by the chunk digest checksum @@ -164,7 +198,7 @@ func TestWriteCache(t *testing.T) { if digest != r.ChunkDigest { t.Error("wrong digest found") } - expectedLocation := generateFileLocation(r.Name, uint64(r.ChunkOffset), uint64(r.ChunkSize)) + expectedLocation := generateFileLocation(0, uint64(r.ChunkOffset), uint64(r.ChunkSize)) location := cache.vdata[off : off+len] if !bytes.Equal(location, expectedLocation) { t.Errorf("wrong file found %q instead of %q", location, expectedLocation) @@ -182,7 +216,7 @@ func TestReadCache(t *testing.T) { t.Errorf("got error from writeCache: %v", err) } - cacheRead, err := readMetadataFromCache(dest.buf) + cacheRead, err := readCacheFileFromMemory(dest.buf.Bytes()) if err != nil { t.Errorf("got error from readMetadataFromCache: %v", err) } @@ -194,7 +228,7 @@ func TestReadCache(t *testing.T) { func TestUnmarshalToc(t *testing.T) { toc, err := unmarshalToc([]byte(jsonTOC)) assert.NoError(t, err) - assert.Equal(t, 4, len(toc.Entries)) + assert.Equal(t, 6, len(toc.Entries)) _, err = unmarshalToc([]byte(jsonTOC + " \n\n\n\n ")) assert.NoError(t, err) @@ -210,4 +244,20 @@ func TestUnmarshalToc(t *testing.T) { assert.Error(t, err) _, err = unmarshalToc([]byte(jsonTOC + "123")) assert.Error(t, err) + assert.Equal(t, toc.Entries[4].Name, "usr/lib/systemd/system/system-systemd\\x2dcryptsetup.slice", "invalid name escaped") + assert.Equal(t, toc.Entries[5].Name, "usr/lib/systemd/system/system-systemd\\x2dcryptsetup-hardlink.slice", "invalid name escaped") + assert.Equal(t, toc.Entries[5].Linkname, "usr/lib/systemd/system/system-systemd\\x2dcryptsetup.slice", "invalid link name escaped") +} + +func TestMakeBinaryDigest(t *testing.T) { + binDigest, err := makeBinaryDigest("sha256:5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03") + assert.NoError(t, err) + expected := []byte{0x73, 0x68, 0x61, 0x32, 0x35, 0x36, 0x3a, 0x58, 0x91, 0xb5, 0xb5, 0x22, 0xd5, 0xdf, 0x8, 0x6d, 0xf, 0xf0, 0xb1, 0x10, 0xfb, 0xd9, 0xd2, 0x1b, 0xb4, 0xfc, 0x71, 0x63, 0xaf, 0x34, 0xd0, 0x82, 0x86, 0xa2, 0xe8, 0x46, 0xf6, 0xbe, 0x3} + assert.Equal(t, expected, binDigest) + + _, err = makeBinaryDigest("sha256:foo") + assert.Error(t, err) + + _, err = makeBinaryDigest("noAlgorithm") + assert.Error(t, err) } diff --git a/pkg/chunked/compression_linux.go b/pkg/chunked/compression_linux.go index 112ca2c7c..38a892a6e 100644 --- a/pkg/chunked/compression_linux.go +++ b/pkg/chunked/compression_linux.go @@ -7,7 +7,6 @@ import ( "io" "strconv" - "github.com/containerd/stargz-snapshotter/estargz" "github.com/containers/storage/pkg/chunked/internal" "github.com/klauspost/compress/zstd" "github.com/klauspost/pgzip" @@ -33,7 +32,7 @@ func typeToTarType(t string) (byte, error) { return r, nil } -func readEstargzChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, annotations map[string]string) ([]byte, int64, error) { +func readEstargzChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, tocDigest digest.Digest) ([]byte, int64, error) { // information on the format here https://github.com/containerd/stargz-snapshotter/blob/main/docs/stargz-estargz.md footerSize := int64(51) if blobSize <= footerSize { @@ -126,11 +125,7 @@ func readEstargzChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, return nil, 0, err } - d, err := digest.Parse(annotations[estargz.TOCJSONDigestAnnotation]) - if err != nil { - return nil, 0, err - } - if manifestDigester.Digest() != d { + if manifestDigester.Digest() != tocDigest { return nil, 0, errors.New("invalid manifest checksum") } @@ -140,7 +135,7 @@ func readEstargzChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, // readZstdChunkedManifest reads the zstd:chunked manifest from the seekable stream blobStream. The blob total size must // be specified. // This function uses the io.github.containers.zstd-chunked. annotations when specified. -func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, annotations map[string]string) ([]byte, []byte, int64, error) { +func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, tocDigest digest.Digest, annotations map[string]string) ([]byte, []byte, int64, error) { footerSize := int64(internal.FooterSizeSupported) if blobSize <= footerSize { return nil, nil, 0, errors.New("blob too small") @@ -238,7 +233,7 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, ann return nil, nil, 0, err } - decodedBlob, err := decodeAndValidateBlob(manifest, footerData.LengthUncompressed, footerData.ChecksumAnnotation) + decodedBlob, err := decodeAndValidateBlob(manifest, footerData.LengthUncompressed, tocDigest.String()) if err != nil { return nil, nil, 0, err } diff --git a/pkg/chunked/dump/dump.go b/pkg/chunked/dump/dump.go index d3c105c4d..701b6aa53 100644 --- a/pkg/chunked/dump/dump.go +++ b/pkg/chunked/dump/dump.go @@ -52,7 +52,7 @@ func escaped(val string, escape int) string { if noescapeSpace { hexEscape = !unicode.IsPrint(rune(c)) } else { - hexEscape = !unicode.IsGraphic(rune(c)) + hexEscape = !unicode.IsPrint(rune(c)) || unicode.IsSpace(rune(c)) } } diff --git a/pkg/chunked/dump/dump_test.go b/pkg/chunked/dump/dump_test.go index 740b8fb9a..ee45424fe 100644 --- a/pkg/chunked/dump/dump_test.go +++ b/pkg/chunked/dump/dump_test.go @@ -14,7 +14,6 @@ func TestEscaped(t *testing.T) { escape int want string }{ - {"Hello, World!", 0, "Hello, World!"}, {"12345", 0, "12345"}, {"", 0, ""}, {"\n", 0, "\\n"}, @@ -25,9 +24,12 @@ func TestEscaped(t *testing.T) { {"foo=bar", ESCAPE_EQUAL, "foo\\x3dbar"}, {"-", ESCAPE_LONE_DASH, "\\x2d"}, {"\n", NOESCAPE_SPACE, "\\n"}, + {" ", 0, "\\x20"}, {" ", NOESCAPE_SPACE, " "}, {"\t", NOESCAPE_SPACE, "\\t"}, {"\n\t", NOESCAPE_SPACE, "\\n\\t"}, + {"Hello World!", 0, "Hello\\x20World!"}, + {"Hello World!", NOESCAPE_SPACE, "Hello World!"}, } for _, test := range tests { diff --git a/pkg/chunked/internal/compression.go b/pkg/chunked/internal/compression.go index caa581efe..f52a07a9f 100644 --- a/pkg/chunked/internal/compression.go +++ b/pkg/chunked/internal/compression.go @@ -21,9 +21,6 @@ import ( type TOC struct { Version int `json:"version"` Entries []FileMetadata `json:"entries"` - - // internal: used by unmarshalToc - StringsBuf bytes.Buffer `json:"-"` } type FileMetadata struct { @@ -48,9 +45,6 @@ type FileMetadata struct { ChunkOffset int64 `json:"chunkOffset,omitempty"` ChunkDigest string `json:"chunkDigest,omitempty"` ChunkType string `json:"chunkType,omitempty"` - - // internal: computed by mergeTOCEntries. - Chunks []*FileMetadata `json:"-"` } const ( @@ -189,7 +183,6 @@ func WriteZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, off Offset: manifestOffset, LengthCompressed: uint64(len(compressedManifest)), LengthUncompressed: uint64(len(manifest)), - ChecksumAnnotation: "", // unused OffsetTarSplit: uint64(tarSplitOffset), LengthCompressedTarSplit: uint64(len(tarSplitData.Data)), LengthUncompressedTarSplit: uint64(tarSplitData.UncompressedSize), @@ -213,7 +206,6 @@ type ZstdChunkedFooterData struct { Offset uint64 LengthCompressed uint64 LengthUncompressed uint64 - ChecksumAnnotation string // Only used when reading a layer, not when creating it OffsetTarSplit uint64 LengthCompressedTarSplit uint64 @@ -240,11 +232,6 @@ func footerDataToBlob(footer ZstdChunkedFooterData) []byte { func ReadFooterDataFromAnnotations(annotations map[string]string) (ZstdChunkedFooterData, error) { var footerData ZstdChunkedFooterData - footerData.ChecksumAnnotation = annotations[ManifestChecksumKey] - if footerData.ChecksumAnnotation == "" { - return footerData, fmt.Errorf("manifest checksum annotation %q not found", ManifestChecksumKey) - } - offsetMetadata := annotations[ManifestInfoKey] if _, err := fmt.Sscanf(offsetMetadata, "%d:%d:%d:%d", &footerData.Offset, &footerData.LengthCompressed, &footerData.LengthUncompressed, &footerData.ManifestType); err != nil { diff --git a/pkg/chunked/internal/compression_test.go b/pkg/chunked/internal/compression_test.go index da660b89b..9d4e60d47 100644 --- a/pkg/chunked/internal/compression_test.go +++ b/pkg/chunked/internal/compression_test.go @@ -15,7 +15,6 @@ func TestGenerateAndReadFooter(t *testing.T) { Offset: 2, LengthCompressed: 3, LengthUncompressed: 4, - ChecksumAnnotation: "", // unused OffsetTarSplit: 5, LengthCompressedTarSplit: 6, LengthUncompressedTarSplit: 7, diff --git a/pkg/chunked/storage_linux.go b/pkg/chunked/storage_linux.go index f300df347..82966b8a5 100644 --- a/pkg/chunked/storage_linux.go +++ b/pkg/chunked/storage_linux.go @@ -25,6 +25,7 @@ import ( "github.com/containers/storage/pkg/archive" "github.com/containers/storage/pkg/chunked/compressor" "github.com/containers/storage/pkg/chunked/internal" + "github.com/containers/storage/pkg/chunked/toc" "github.com/containers/storage/pkg/fsverity" "github.com/containers/storage/pkg/idtools" "github.com/containers/storage/pkg/system" @@ -58,6 +59,21 @@ const ( copyGoRoutines = 32 ) +// fileMetadata is a wrapper around internal.FileMetadata with additional private fields that +// are not part of the TOC document. +// Type: TypeChunk entries are stored in Chunks, the primary [fileMetadata] entries never use TypeChunk. +type fileMetadata struct { + internal.FileMetadata + + // chunks stores the TypeChunk entries relevant to this entry when FileMetadata.Type == TypeReg. + chunks []*internal.FileMetadata + + // skipSetAttrs is set when the file attributes must not be + // modified, e.g. it is a hard link from a different source, + // or a composefs file. + skipSetAttrs bool +} + type compressedFileType int type chunkedDiffer struct { @@ -138,7 +154,8 @@ func doHardLink(srcFd int, destDirFd int, destBase string) error { return err } -func copyFileContent(srcFd int, destFile string, dirfd int, mode os.FileMode, useHardLinks bool) (*os.File, int64, error) { +func copyFileContent(srcFd int, fileMetadata *fileMetadata, dirfd int, mode os.FileMode, useHardLinks bool) (*os.File, int64, error) { + destFile := fileMetadata.Name src := fmt.Sprintf("/proc/self/fd/%d", srcFd) st, err := os.Stat(src) if err != nil { @@ -156,6 +173,8 @@ func copyFileContent(srcFd int, destFile string, dirfd int, mode os.FileMode, us err := doHardLink(srcFd, int(destDir.Fd()), destBase) if err == nil { + // if the file was deduplicated with a hard link, skip overriding file metadata. + fileMetadata.skipSetAttrs = true return nil, st.Size(), nil } } @@ -198,15 +217,15 @@ func (f *seekableFile) GetBlobAt(chunks []ImageSourceChunk) (chan io.ReadCloser, return streams, errs, nil } -func convertTarToZstdChunked(destDirectory string, payload *os.File) (*seekableFile, digest.Digest, map[string]string, error) { +func convertTarToZstdChunked(destDirectory string, payload *os.File) (int64, *seekableFile, digest.Digest, map[string]string, error) { diff, err := archive.DecompressStream(payload) if err != nil { - return nil, "", nil, err + return 0, nil, "", nil, err } fd, err := unix.Open(destDirectory, unix.O_TMPFILE|unix.O_RDWR|unix.O_CLOEXEC, 0o600) if err != nil { - return nil, "", nil, err + return 0, nil, "", nil, err } f := os.NewFile(uintptr(fd), destDirectory) @@ -216,23 +235,24 @@ func convertTarToZstdChunked(destDirectory string, payload *os.File) (*seekableF chunked, err := compressor.ZstdCompressor(f, newAnnotations, &level) if err != nil { f.Close() - return nil, "", nil, err + return 0, nil, "", nil, err } convertedOutputDigester := digest.Canonical.Digester() - if _, err := io.Copy(io.MultiWriter(chunked, convertedOutputDigester.Hash()), diff); err != nil { + copied, err := io.Copy(io.MultiWriter(chunked, convertedOutputDigester.Hash()), diff) + if err != nil { f.Close() - return nil, "", nil, err + return 0, nil, "", nil, err } if err := chunked.Close(); err != nil { f.Close() - return nil, "", nil, err + return 0, nil, "", nil, err } is := seekableFile{ file: f, } - return &is, convertedOutputDigester.Digest(), newAnnotations, nil + return copied, &is, convertedOutputDigester.Digest(), newAnnotations, nil } // GetDiffer returns a differ than can be used with ApplyDiffWithDiffer. @@ -246,18 +266,26 @@ func GetDiffer(ctx context.Context, store storage.Store, blobDigest digest.Diges return nil, errors.New("enable_partial_images not configured") } - _, hasZstdChunkedTOC := annotations[internal.ManifestChecksumKey] - _, hasEstargzTOC := annotations[estargz.TOCJSONDigestAnnotation] + zstdChunkedTOCDigestString, hasZstdChunkedTOC := annotations[internal.ManifestChecksumKey] + estargzTOCDigestString, hasEstargzTOC := annotations[estargz.TOCJSONDigestAnnotation] if hasZstdChunkedTOC && hasEstargzTOC { return nil, errors.New("both zstd:chunked and eStargz TOC found") } if hasZstdChunkedTOC { - return makeZstdChunkedDiffer(ctx, store, blobSize, annotations, iss, &storeOpts) + zstdChunkedTOCDigest, err := digest.Parse(zstdChunkedTOCDigestString) + if err != nil { + return nil, fmt.Errorf("parsing zstd:chunked TOC digest %q: %w", zstdChunkedTOCDigestString, err) + } + return makeZstdChunkedDiffer(ctx, store, blobSize, zstdChunkedTOCDigest, annotations, iss, &storeOpts) } if hasEstargzTOC { - return makeEstargzChunkedDiffer(ctx, store, blobSize, annotations, iss, &storeOpts) + estargzTOCDigest, err := digest.Parse(estargzTOCDigestString) + if err != nil { + return nil, fmt.Errorf("parsing estargz TOC digest %q: %w", estargzTOCDigestString, err) + } + return makeEstargzChunkedDiffer(ctx, store, blobSize, estargzTOCDigest, iss, &storeOpts) } return makeConvertFromRawDiffer(ctx, store, blobDigest, blobSize, annotations, iss, &storeOpts) @@ -285,8 +313,8 @@ func makeConvertFromRawDiffer(ctx context.Context, store storage.Store, blobDige }, nil } -func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable, storeOpts *types.StoreOptions) (*chunkedDiffer, error) { - manifest, tarSplit, tocOffset, err := readZstdChunkedManifest(iss, blobSize, annotations) +func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, tocDigest digest.Digest, annotations map[string]string, iss ImageSourceSeekable, storeOpts *types.StoreOptions) (*chunkedDiffer, error) { + manifest, tarSplit, tocOffset, err := readZstdChunkedManifest(iss, blobSize, tocDigest, annotations) if err != nil { return nil, fmt.Errorf("read zstd:chunked manifest: %w", err) } @@ -295,11 +323,6 @@ func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize in return nil, err } - tocDigest, err := digest.Parse(annotations[internal.ManifestChecksumKey]) - if err != nil { - return nil, fmt.Errorf("parse TOC digest %q: %w", annotations[internal.ManifestChecksumKey], err) - } - return &chunkedDiffer{ fsVerityDigests: make(map[string]string), blobSize: blobSize, @@ -315,8 +338,8 @@ func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize in }, nil } -func makeEstargzChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable, storeOpts *types.StoreOptions) (*chunkedDiffer, error) { - manifest, tocOffset, err := readEstargzChunkedManifest(iss, blobSize, annotations) +func makeEstargzChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, tocDigest digest.Digest, iss ImageSourceSeekable, storeOpts *types.StoreOptions) (*chunkedDiffer, error) { + manifest, tocOffset, err := readEstargzChunkedManifest(iss, blobSize, tocDigest) if err != nil { return nil, fmt.Errorf("read zstd:chunked manifest: %w", err) } @@ -325,11 +348,6 @@ func makeEstargzChunkedDiffer(ctx context.Context, store storage.Store, blobSize return nil, err } - tocDigest, err := digest.Parse(annotations[estargz.TOCJSONDigestAnnotation]) - if err != nil { - return nil, fmt.Errorf("parse TOC digest %q: %w", annotations[estargz.TOCJSONDigestAnnotation], err) - } - return &chunkedDiffer{ fsVerityDigests: make(map[string]string), blobSize: blobSize, @@ -354,7 +372,7 @@ func makeCopyBuffer() []byte { // name is the path to the file to copy in source. // dirfd is an open file descriptor to the destination root directory. // useHardLinks defines whether the deduplication can be performed using hard links. -func copyFileFromOtherLayer(file *internal.FileMetadata, source string, name string, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) { +func copyFileFromOtherLayer(file *fileMetadata, source string, name string, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) { srcDirfd, err := unix.Open(source, unix.O_RDONLY, 0) if err != nil { return false, nil, 0, fmt.Errorf("open source file: %w", err) @@ -367,7 +385,7 @@ func copyFileFromOtherLayer(file *internal.FileMetadata, source string, name str } defer srcFile.Close() - dstFile, written, err := copyFileContent(int(srcFile.Fd()), file.Name, dirfd, 0, useHardLinks) + dstFile, written, err := copyFileContent(int(srcFile.Fd()), file, dirfd, 0, useHardLinks) if err != nil { return false, nil, 0, fmt.Errorf("copy content to %q: %w", file.Name, err) } @@ -376,7 +394,7 @@ func copyFileFromOtherLayer(file *internal.FileMetadata, source string, name str // canDedupMetadataWithHardLink says whether it is possible to deduplicate file with otherFile. // It checks that the two files have the same UID, GID, file mode and xattrs. -func canDedupMetadataWithHardLink(file *internal.FileMetadata, otherFile *internal.FileMetadata) bool { +func canDedupMetadataWithHardLink(file *fileMetadata, otherFile *fileMetadata) bool { if file.UID != otherFile.UID { return false } @@ -394,7 +412,7 @@ func canDedupMetadataWithHardLink(file *internal.FileMetadata, otherFile *intern // canDedupFileWithHardLink checks if the specified file can be deduplicated by an // open file, given its descriptor and stat data. -func canDedupFileWithHardLink(file *internal.FileMetadata, fd int, s os.FileInfo) bool { +func canDedupFileWithHardLink(file *fileMetadata, fd int, s os.FileInfo) bool { st, ok := s.Sys().(*syscall.Stat_t) if !ok { return false @@ -420,11 +438,13 @@ func canDedupFileWithHardLink(file *internal.FileMetadata, fd int, s os.FileInfo xattrs[x] = string(v) } // fill only the attributes used by canDedupMetadataWithHardLink. - otherFile := internal.FileMetadata{ - UID: int(st.Uid), - GID: int(st.Gid), - Mode: int64(st.Mode), - Xattrs: xattrs, + otherFile := fileMetadata{ + FileMetadata: internal.FileMetadata{ + UID: int(st.Uid), + GID: int(st.Gid), + Mode: int64(st.Mode), + Xattrs: xattrs, + }, } return canDedupMetadataWithHardLink(file, &otherFile) } @@ -434,7 +454,7 @@ func canDedupFileWithHardLink(file *internal.FileMetadata, fd int, s os.FileInfo // ostreeRepos is a list of OSTree repos. // dirfd is an open fd to the destination checkout. // useHardLinks defines whether the deduplication can be performed using hard links. -func findFileInOSTreeRepos(file *internal.FileMetadata, ostreeRepos []string, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) { +func findFileInOSTreeRepos(file *fileMetadata, ostreeRepos []string, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) { digest, err := digest.Parse(file.Digest) if err != nil { logrus.Debugf("could not parse digest: %v", err) @@ -467,7 +487,7 @@ func findFileInOSTreeRepos(file *internal.FileMetadata, ostreeRepos []string, di continue } - dstFile, written, err := copyFileContent(fd, file.Name, dirfd, 0, useHardLinks) + dstFile, written, err := copyFileContent(fd, file, dirfd, 0, useHardLinks) if err != nil { logrus.Debugf("could not copyFileContent: %v", err) return false, nil, 0, nil @@ -487,7 +507,7 @@ func findFileInOSTreeRepos(file *internal.FileMetadata, ostreeRepos []string, di // file is the file to look for. // dirfd is an open file descriptor to the checkout root directory. // useHardLinks defines whether the deduplication can be performed using hard links. -func findFileInOtherLayers(cache *layersCache, file *internal.FileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) { +func findFileInOtherLayers(cache *layersCache, file *fileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) { target, name, err := cache.findFileInOtherLayers(file, useHardLinks) if err != nil || name == "" { return false, nil, 0, err @@ -495,7 +515,7 @@ func findFileInOtherLayers(cache *layersCache, file *internal.FileMetadata, dirf return copyFileFromOtherLayer(file, target, name, dirfd, useHardLinks) } -func maybeDoIDRemap(manifest []internal.FileMetadata, options *archive.TarOptions) error { +func maybeDoIDRemap(manifest []fileMetadata, options *archive.TarOptions) error { if options.ChownOpts == nil && len(options.UIDMaps) == 0 || len(options.GIDMaps) == 0 { return nil } @@ -529,7 +549,7 @@ func mapToSlice(inputMap map[uint32]struct{}) []uint32 { return out } -func collectIDs(entries []internal.FileMetadata) ([]uint32, []uint32) { +func collectIDs(entries []fileMetadata) ([]uint32, []uint32) { uids := make(map[uint32]struct{}) gids := make(map[uint32]struct{}) for _, entry := range entries { @@ -549,7 +569,7 @@ type missingFileChunk struct { Gap int64 Hole bool - File *internal.FileMetadata + File *fileMetadata CompressedSize int64 UncompressedSize int64 @@ -582,7 +602,10 @@ func (o *originFile) OpenFile() (io.ReadCloser, error) { } // setFileAttrs sets the file attributes for file given metadata -func setFileAttrs(dirfd int, file *os.File, mode os.FileMode, metadata *internal.FileMetadata, options *archive.TarOptions, usePath bool) error { +func setFileAttrs(dirfd int, file *os.File, mode os.FileMode, metadata *fileMetadata, options *archive.TarOptions, usePath bool) error { + if metadata.skipSetAttrs { + return nil + } if file == nil || file.Fd() < 0 { return errors.New("invalid file") } @@ -944,14 +967,14 @@ type destinationFile struct { dirfd int file *os.File hash hash.Hash - metadata *internal.FileMetadata + metadata *fileMetadata options *archive.TarOptions skipValidation bool to io.Writer recordFsVerity recordFsVerityFunc } -func openDestinationFile(dirfd int, metadata *internal.FileMetadata, options *archive.TarOptions, skipValidation bool, recordFsVerity recordFsVerityFunc) (*destinationFile, error) { +func openDestinationFile(dirfd int, metadata *fileMetadata, options *archive.TarOptions, skipValidation bool, recordFsVerity recordFsVerityFunc) (*destinationFile, error) { file, err := openFileUnderRoot(metadata.Name, dirfd, newFileFlags, 0) if err != nil { return nil, err @@ -1314,7 +1337,7 @@ func (c *chunkedDiffer) retrieveMissingFiles(stream ImageSourceSeekable, dest st return nil } -func safeMkdir(dirfd int, mode os.FileMode, name string, metadata *internal.FileMetadata, options *archive.TarOptions) error { +func safeMkdir(dirfd int, mode os.FileMode, name string, metadata *fileMetadata, options *archive.TarOptions) error { parent := filepath.Dir(name) base := filepath.Base(name) @@ -1343,7 +1366,7 @@ func safeMkdir(dirfd int, mode os.FileMode, name string, metadata *internal.File return setFileAttrs(dirfd, file, mode, metadata, options, false) } -func safeLink(dirfd int, mode os.FileMode, metadata *internal.FileMetadata, options *archive.TarOptions) error { +func safeLink(dirfd int, mode os.FileMode, metadata *fileMetadata, options *archive.TarOptions) error { sourceFile, err := openFileUnderRoot(metadata.Linkname, dirfd, unix.O_PATH|unix.O_RDONLY|unix.O_NOFOLLOW, 0) if err != nil { return err @@ -1385,7 +1408,7 @@ func safeLink(dirfd int, mode os.FileMode, metadata *internal.FileMetadata, opti return setFileAttrs(dirfd, newFile, mode, metadata, options, false) } -func safeSymlink(dirfd int, mode os.FileMode, metadata *internal.FileMetadata, options *archive.TarOptions) error { +func safeSymlink(dirfd int, mode os.FileMode, metadata *fileMetadata, options *archive.TarOptions) error { destDir, destBase := filepath.Dir(metadata.Name), filepath.Base(metadata.Name) destDirFd := dirfd if destDir != "." { @@ -1473,7 +1496,7 @@ type hardLinkToCreate struct { dest string dirfd int mode os.FileMode - metadata *internal.FileMetadata + metadata *fileMetadata } func parseBooleanPullOption(storeOpts *storage.StoreOptions, name string, def bool) bool { @@ -1498,7 +1521,7 @@ func reopenFileReadOnly(f *os.File) (*os.File, error) { return os.NewFile(uintptr(fd), f.Name()), nil } -func (c *chunkedDiffer) findAndCopyFile(dirfd int, r *internal.FileMetadata, copyOptions *findAndCopyFileOptions, mode os.FileMode) (bool, error) { +func (c *chunkedDiffer) findAndCopyFile(dirfd int, r *fileMetadata, copyOptions *findAndCopyFileOptions, mode os.FileMode) (bool, error) { finalizeFile := func(dstFile *os.File) error { if dstFile == nil { return nil @@ -1549,8 +1572,8 @@ func (c *chunkedDiffer) findAndCopyFile(dirfd int, r *internal.FileMetadata, cop return false, nil } -func makeEntriesFlat(mergedEntries []internal.FileMetadata) ([]internal.FileMetadata, error) { - var new []internal.FileMetadata +func makeEntriesFlat(mergedEntries []fileMetadata) ([]fileMetadata, error) { + var new []fileMetadata hashes := make(map[string]string) for i := range mergedEntries { @@ -1572,6 +1595,7 @@ func makeEntriesFlat(mergedEntries []internal.FileMetadata) ([]internal.FileMeta hashes[d] = d mergedEntries[i].Name = fmt.Sprintf("%s/%s", d[0:2], d[2:]) + mergedEntries[i].skipSetAttrs = true new = append(new, mergedEntries[i]) } @@ -1629,6 +1653,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff stream := c.stream var uncompressedDigest digest.Digest + var convertedBlobSize int64 if c.convertToZstdChunked { fd, err := unix.Open(dest, unix.O_TMPFILE|unix.O_RDWR|unix.O_CLOEXEC, 0o600) @@ -1656,10 +1681,11 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff return graphdriver.DriverWithDifferOutput{}, err } - fileSource, diffID, annotations, err := convertTarToZstdChunked(dest, blobFile) + tarSize, fileSource, diffID, annotations, err := convertTarToZstdChunked(dest, blobFile) if err != nil { return graphdriver.DriverWithDifferOutput{}, err } + convertedBlobSize = tarSize // fileSource is a O_TMPFILE file descriptor, so we // need to keep it open until the entire file is processed. defer fileSource.Close() @@ -1668,7 +1694,14 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff blobFile.Close() blobFile = nil - manifest, tarSplit, tocOffset, err := readZstdChunkedManifest(fileSource, c.blobSize, annotations) + tocDigest, err := toc.GetTOCDigest(annotations) + if err != nil { + return graphdriver.DriverWithDifferOutput{}, fmt.Errorf("internal error: parsing just-created zstd:chunked TOC digest: %w", err) + } + if tocDigest == nil { + return graphdriver.DriverWithDifferOutput{}, fmt.Errorf("internal error: just-created zstd:chunked missing TOC digest") + } + manifest, tarSplit, tocOffset, err := readZstdChunkedManifest(fileSource, c.blobSize, *tocDigest, annotations) if err != nil { return graphdriver.DriverWithDifferOutput{}, fmt.Errorf("read zstd:chunked manifest: %w", err) } @@ -1729,14 +1762,19 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff var missingParts []missingPart - output.UIDs, output.GIDs = collectIDs(toc.Entries) - - mergedEntries, totalSize, err := c.mergeTocEntries(c.fileType, toc.Entries) + mergedEntries, totalSizeFromTOC, err := c.mergeTocEntries(c.fileType, toc.Entries) if err != nil { return output, err } - output.Size = totalSize + output.UIDs, output.GIDs = collectIDs(mergedEntries) + if convertedBlobSize > 0 { + // if the image was converted, store the original tar size, so that + // it can be recreated correctly. + output.Size = convertedBlobSize + } else { + output.Size = totalSizeFromTOC + } if err := maybeDoIDRemap(mergedEntries, options); err != nil { return output, err @@ -1789,7 +1827,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff njob int index int mode os.FileMode - metadata *internal.FileMetadata + metadata *fileMetadata found bool err error @@ -1961,7 +1999,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff remainingSize := r.Size // the file is missing, attempt to find individual chunks. - for _, chunk := range r.Chunks { + for _, chunk := range r.chunks { compressedSize := int64(chunk.EndOffset - chunk.Offset) size := remainingSize if chunk.ChunkSize > 0 { @@ -2045,7 +2083,7 @@ func mustSkipFile(fileType compressedFileType, e internal.FileMetadata) bool { return false } -func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []internal.FileMetadata) ([]internal.FileMetadata, int64, error) { +func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []internal.FileMetadata) ([]fileMetadata, int64, error) { var totalFilesSize int64 countNextChunks := func(start int) int { @@ -2069,11 +2107,11 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i } } - mergedEntries := make([]internal.FileMetadata, size) + mergedEntries := make([]fileMetadata, size) m := 0 for i := 0; i < len(entries); i++ { - e := entries[i] - if mustSkipFile(fileType, e) { + e := fileMetadata{FileMetadata: entries[i]} + if mustSkipFile(fileType, entries[i]) { continue } @@ -2086,12 +2124,12 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i if e.Type == TypeReg { nChunks := countNextChunks(i + 1) - e.Chunks = make([]*internal.FileMetadata, nChunks+1) + e.chunks = make([]*internal.FileMetadata, nChunks+1) for j := 0; j <= nChunks; j++ { // we need a copy here, otherwise we override the // .Size later copy := entries[i+j] - e.Chunks[j] = © + e.chunks[j] = © e.EndOffset = entries[i+j].EndOffset } i += nChunks @@ -2110,10 +2148,10 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i } lastChunkOffset := mergedEntries[i].EndOffset - for j := len(mergedEntries[i].Chunks) - 1; j >= 0; j-- { - mergedEntries[i].Chunks[j].EndOffset = lastChunkOffset - mergedEntries[i].Chunks[j].Size = mergedEntries[i].Chunks[j].EndOffset - mergedEntries[i].Chunks[j].Offset - lastChunkOffset = mergedEntries[i].Chunks[j].Offset + for j := len(mergedEntries[i].chunks) - 1; j >= 0; j-- { + mergedEntries[i].chunks[j].EndOffset = lastChunkOffset + mergedEntries[i].chunks[j].Size = mergedEntries[i].chunks[j].EndOffset - mergedEntries[i].chunks[j].Offset + lastChunkOffset = mergedEntries[i].chunks[j].Offset } } return mergedEntries, totalFilesSize, nil diff --git a/pkg/chunked/zstdchunked_test.go b/pkg/chunked/zstdchunked_test.go index 8041db9a2..ec1edffcf 100644 --- a/pkg/chunked/zstdchunked_test.go +++ b/pkg/chunked/zstdchunked_test.go @@ -12,8 +12,10 @@ import ( "testing" "github.com/containers/storage/pkg/chunked/internal" + "github.com/containers/storage/pkg/chunked/toc" "github.com/klauspost/compress/zstd" "github.com/opencontainers/go-digest" + "github.com/stretchr/testify/require" ) type seekable struct { @@ -148,7 +150,10 @@ func TestGenerateAndParseManifest(t *testing.T) { t: t, } - manifest, _, _, err := readZstdChunkedManifest(s, 8192, annotations) + tocDigest, err := toc.GetTOCDigest(annotations) + require.NoError(t, err) + require.NotNil(t, tocDigest) + manifest, _, _, err := readZstdChunkedManifest(s, 8192, *tocDigest, annotations) if err != nil { t.Error(err) }