From 9d309f6d0c8fcaefe2a4b3121bc6d722ee45f966 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Sat, 16 Mar 2024 23:10:26 +0100 Subject: [PATCH 01/20] chunked: refactor private fields to internal struct Signed-off-by: Giuseppe Scrivano (cherry picked from commit f6356d6ccdb31a429c2c43eece079336c24cc808) --- pkg/chunked/cache_linux.go | 25 ++++++--- pkg/chunked/internal/compression.go | 3 - pkg/chunked/storage_linux.go | 85 ++++++++++++++++------------- 3 files changed, 64 insertions(+), 49 deletions(-) diff --git a/pkg/chunked/cache_linux.go b/pkg/chunked/cache_linux.go index 1e3ad86d1..442030719 100644 --- a/pkg/chunked/cache_linux.go +++ b/pkg/chunked/cache_linux.go @@ -174,7 +174,7 @@ func (c *layersCache) load() error { // calculateHardLinkFingerprint calculates a hash that can be used to verify if a file // is usable for deduplication with hardlinks. // To calculate the digest, it uses the file payload digest, UID, GID, mode and xattrs. -func calculateHardLinkFingerprint(f *internal.FileMetadata) (string, error) { +func calculateHardLinkFingerprint(f *fileMetadata) (string, error) { digester := digest.Canonical.Digester() modeString := fmt.Sprintf("%d:%d:%o", f.UID, f.GID, f.Mode) @@ -416,7 +416,7 @@ func readMetadataFromCache(bigData io.Reader) (*metadata, error) { }, nil } -func prepareMetadata(manifest []byte, format graphdriver.DifferOutputFormat) ([]*internal.FileMetadata, error) { +func prepareMetadata(manifest []byte, format graphdriver.DifferOutputFormat) ([]*fileMetadata, error) { toc, err := unmarshalToc(manifest) if err != nil { // ignore errors here. They might be caused by a different manifest format. @@ -424,10 +424,17 @@ func prepareMetadata(manifest []byte, format graphdriver.DifferOutputFormat) ([] return nil, nil //nolint: nilnil } + var entries []fileMetadata + for i := range toc.Entries { + entries = append(entries, fileMetadata{ + FileMetadata: toc.Entries[i], + }) + } + switch format { case graphdriver.DifferOutputFormatDir: case graphdriver.DifferOutputFormatFlat: - toc.Entries, err = makeEntriesFlat(toc.Entries) + entries, err = makeEntriesFlat(entries) if err != nil { return nil, err } @@ -435,19 +442,19 @@ func prepareMetadata(manifest []byte, format graphdriver.DifferOutputFormat) ([] return nil, fmt.Errorf("unknown format %q", format) } - var r []*internal.FileMetadata + var r []*fileMetadata chunkSeen := make(map[string]bool) - for i := range toc.Entries { - d := toc.Entries[i].Digest + for i := range entries { + d := entries[i].Digest if d != "" { - r = append(r, &toc.Entries[i]) + r = append(r, &entries[i]) continue } // chunks do not use hard link dedup so keeping just one candidate is enough cd := toc.Entries[i].ChunkDigest if cd != "" && !chunkSeen[cd] { - r = append(r, &toc.Entries[i]) + r = append(r, &entries[i]) chunkSeen[cd] = true } } @@ -527,7 +534,7 @@ func (c *layersCache) findDigestInternal(digest string) (string, string, int64, // findFileInOtherLayers finds the specified file in other layers. // file is the file to look for. -func (c *layersCache) findFileInOtherLayers(file *internal.FileMetadata, useHardLinks bool) (string, string, error) { +func (c *layersCache) findFileInOtherLayers(file *fileMetadata, useHardLinks bool) (string, string, error) { digest := file.Digest if useHardLinks { var err error diff --git a/pkg/chunked/internal/compression.go b/pkg/chunked/internal/compression.go index caa581efe..54da17fd7 100644 --- a/pkg/chunked/internal/compression.go +++ b/pkg/chunked/internal/compression.go @@ -48,9 +48,6 @@ type FileMetadata struct { ChunkOffset int64 `json:"chunkOffset,omitempty"` ChunkDigest string `json:"chunkDigest,omitempty"` ChunkType string `json:"chunkType,omitempty"` - - // internal: computed by mergeTOCEntries. - Chunks []*FileMetadata `json:"-"` } const ( diff --git a/pkg/chunked/storage_linux.go b/pkg/chunked/storage_linux.go index f300df347..45ca14ab7 100644 --- a/pkg/chunked/storage_linux.go +++ b/pkg/chunked/storage_linux.go @@ -58,6 +58,16 @@ const ( copyGoRoutines = 32 ) +// fileMetadata is a wrapper around internal.FileMetadata with additional private fields that +// are not part of the TOC document. +// Type: TypeChunk entries are stored in Chunks, the primary [fileMetadata] entries never use TypeChunk. +type fileMetadata struct { + internal.FileMetadata + + // chunks stores the TypeChunk entries relevant to this entry when FileMetadata.Type == TypeReg. + chunks []*internal.FileMetadata +} + type compressedFileType int type chunkedDiffer struct { @@ -354,7 +364,7 @@ func makeCopyBuffer() []byte { // name is the path to the file to copy in source. // dirfd is an open file descriptor to the destination root directory. // useHardLinks defines whether the deduplication can be performed using hard links. -func copyFileFromOtherLayer(file *internal.FileMetadata, source string, name string, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) { +func copyFileFromOtherLayer(file *fileMetadata, source string, name string, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) { srcDirfd, err := unix.Open(source, unix.O_RDONLY, 0) if err != nil { return false, nil, 0, fmt.Errorf("open source file: %w", err) @@ -376,7 +386,7 @@ func copyFileFromOtherLayer(file *internal.FileMetadata, source string, name str // canDedupMetadataWithHardLink says whether it is possible to deduplicate file with otherFile. // It checks that the two files have the same UID, GID, file mode and xattrs. -func canDedupMetadataWithHardLink(file *internal.FileMetadata, otherFile *internal.FileMetadata) bool { +func canDedupMetadataWithHardLink(file *fileMetadata, otherFile *fileMetadata) bool { if file.UID != otherFile.UID { return false } @@ -394,7 +404,7 @@ func canDedupMetadataWithHardLink(file *internal.FileMetadata, otherFile *intern // canDedupFileWithHardLink checks if the specified file can be deduplicated by an // open file, given its descriptor and stat data. -func canDedupFileWithHardLink(file *internal.FileMetadata, fd int, s os.FileInfo) bool { +func canDedupFileWithHardLink(file *fileMetadata, fd int, s os.FileInfo) bool { st, ok := s.Sys().(*syscall.Stat_t) if !ok { return false @@ -420,11 +430,13 @@ func canDedupFileWithHardLink(file *internal.FileMetadata, fd int, s os.FileInfo xattrs[x] = string(v) } // fill only the attributes used by canDedupMetadataWithHardLink. - otherFile := internal.FileMetadata{ - UID: int(st.Uid), - GID: int(st.Gid), - Mode: int64(st.Mode), - Xattrs: xattrs, + otherFile := fileMetadata{ + FileMetadata: internal.FileMetadata{ + UID: int(st.Uid), + GID: int(st.Gid), + Mode: int64(st.Mode), + Xattrs: xattrs, + }, } return canDedupMetadataWithHardLink(file, &otherFile) } @@ -434,7 +446,7 @@ func canDedupFileWithHardLink(file *internal.FileMetadata, fd int, s os.FileInfo // ostreeRepos is a list of OSTree repos. // dirfd is an open fd to the destination checkout. // useHardLinks defines whether the deduplication can be performed using hard links. -func findFileInOSTreeRepos(file *internal.FileMetadata, ostreeRepos []string, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) { +func findFileInOSTreeRepos(file *fileMetadata, ostreeRepos []string, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) { digest, err := digest.Parse(file.Digest) if err != nil { logrus.Debugf("could not parse digest: %v", err) @@ -487,7 +499,7 @@ func findFileInOSTreeRepos(file *internal.FileMetadata, ostreeRepos []string, di // file is the file to look for. // dirfd is an open file descriptor to the checkout root directory. // useHardLinks defines whether the deduplication can be performed using hard links. -func findFileInOtherLayers(cache *layersCache, file *internal.FileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) { +func findFileInOtherLayers(cache *layersCache, file *fileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) { target, name, err := cache.findFileInOtherLayers(file, useHardLinks) if err != nil || name == "" { return false, nil, 0, err @@ -495,7 +507,7 @@ func findFileInOtherLayers(cache *layersCache, file *internal.FileMetadata, dirf return copyFileFromOtherLayer(file, target, name, dirfd, useHardLinks) } -func maybeDoIDRemap(manifest []internal.FileMetadata, options *archive.TarOptions) error { +func maybeDoIDRemap(manifest []fileMetadata, options *archive.TarOptions) error { if options.ChownOpts == nil && len(options.UIDMaps) == 0 || len(options.GIDMaps) == 0 { return nil } @@ -529,7 +541,7 @@ func mapToSlice(inputMap map[uint32]struct{}) []uint32 { return out } -func collectIDs(entries []internal.FileMetadata) ([]uint32, []uint32) { +func collectIDs(entries []fileMetadata) ([]uint32, []uint32) { uids := make(map[uint32]struct{}) gids := make(map[uint32]struct{}) for _, entry := range entries { @@ -549,7 +561,7 @@ type missingFileChunk struct { Gap int64 Hole bool - File *internal.FileMetadata + File *fileMetadata CompressedSize int64 UncompressedSize int64 @@ -582,7 +594,7 @@ func (o *originFile) OpenFile() (io.ReadCloser, error) { } // setFileAttrs sets the file attributes for file given metadata -func setFileAttrs(dirfd int, file *os.File, mode os.FileMode, metadata *internal.FileMetadata, options *archive.TarOptions, usePath bool) error { +func setFileAttrs(dirfd int, file *os.File, mode os.FileMode, metadata *fileMetadata, options *archive.TarOptions, usePath bool) error { if file == nil || file.Fd() < 0 { return errors.New("invalid file") } @@ -944,14 +956,14 @@ type destinationFile struct { dirfd int file *os.File hash hash.Hash - metadata *internal.FileMetadata + metadata *fileMetadata options *archive.TarOptions skipValidation bool to io.Writer recordFsVerity recordFsVerityFunc } -func openDestinationFile(dirfd int, metadata *internal.FileMetadata, options *archive.TarOptions, skipValidation bool, recordFsVerity recordFsVerityFunc) (*destinationFile, error) { +func openDestinationFile(dirfd int, metadata *fileMetadata, options *archive.TarOptions, skipValidation bool, recordFsVerity recordFsVerityFunc) (*destinationFile, error) { file, err := openFileUnderRoot(metadata.Name, dirfd, newFileFlags, 0) if err != nil { return nil, err @@ -1314,7 +1326,7 @@ func (c *chunkedDiffer) retrieveMissingFiles(stream ImageSourceSeekable, dest st return nil } -func safeMkdir(dirfd int, mode os.FileMode, name string, metadata *internal.FileMetadata, options *archive.TarOptions) error { +func safeMkdir(dirfd int, mode os.FileMode, name string, metadata *fileMetadata, options *archive.TarOptions) error { parent := filepath.Dir(name) base := filepath.Base(name) @@ -1343,7 +1355,7 @@ func safeMkdir(dirfd int, mode os.FileMode, name string, metadata *internal.File return setFileAttrs(dirfd, file, mode, metadata, options, false) } -func safeLink(dirfd int, mode os.FileMode, metadata *internal.FileMetadata, options *archive.TarOptions) error { +func safeLink(dirfd int, mode os.FileMode, metadata *fileMetadata, options *archive.TarOptions) error { sourceFile, err := openFileUnderRoot(metadata.Linkname, dirfd, unix.O_PATH|unix.O_RDONLY|unix.O_NOFOLLOW, 0) if err != nil { return err @@ -1385,7 +1397,7 @@ func safeLink(dirfd int, mode os.FileMode, metadata *internal.FileMetadata, opti return setFileAttrs(dirfd, newFile, mode, metadata, options, false) } -func safeSymlink(dirfd int, mode os.FileMode, metadata *internal.FileMetadata, options *archive.TarOptions) error { +func safeSymlink(dirfd int, mode os.FileMode, metadata *fileMetadata, options *archive.TarOptions) error { destDir, destBase := filepath.Dir(metadata.Name), filepath.Base(metadata.Name) destDirFd := dirfd if destDir != "." { @@ -1473,7 +1485,7 @@ type hardLinkToCreate struct { dest string dirfd int mode os.FileMode - metadata *internal.FileMetadata + metadata *fileMetadata } func parseBooleanPullOption(storeOpts *storage.StoreOptions, name string, def bool) bool { @@ -1498,7 +1510,7 @@ func reopenFileReadOnly(f *os.File) (*os.File, error) { return os.NewFile(uintptr(fd), f.Name()), nil } -func (c *chunkedDiffer) findAndCopyFile(dirfd int, r *internal.FileMetadata, copyOptions *findAndCopyFileOptions, mode os.FileMode) (bool, error) { +func (c *chunkedDiffer) findAndCopyFile(dirfd int, r *fileMetadata, copyOptions *findAndCopyFileOptions, mode os.FileMode) (bool, error) { finalizeFile := func(dstFile *os.File) error { if dstFile == nil { return nil @@ -1549,8 +1561,8 @@ func (c *chunkedDiffer) findAndCopyFile(dirfd int, r *internal.FileMetadata, cop return false, nil } -func makeEntriesFlat(mergedEntries []internal.FileMetadata) ([]internal.FileMetadata, error) { - var new []internal.FileMetadata +func makeEntriesFlat(mergedEntries []fileMetadata) ([]fileMetadata, error) { + var new []fileMetadata hashes := make(map[string]string) for i := range mergedEntries { @@ -1729,13 +1741,12 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff var missingParts []missingPart - output.UIDs, output.GIDs = collectIDs(toc.Entries) - mergedEntries, totalSize, err := c.mergeTocEntries(c.fileType, toc.Entries) if err != nil { return output, err } + output.UIDs, output.GIDs = collectIDs(mergedEntries) output.Size = totalSize if err := maybeDoIDRemap(mergedEntries, options); err != nil { @@ -1789,7 +1800,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff njob int index int mode os.FileMode - metadata *internal.FileMetadata + metadata *fileMetadata found bool err error @@ -1961,7 +1972,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff remainingSize := r.Size // the file is missing, attempt to find individual chunks. - for _, chunk := range r.Chunks { + for _, chunk := range r.chunks { compressedSize := int64(chunk.EndOffset - chunk.Offset) size := remainingSize if chunk.ChunkSize > 0 { @@ -2045,7 +2056,7 @@ func mustSkipFile(fileType compressedFileType, e internal.FileMetadata) bool { return false } -func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []internal.FileMetadata) ([]internal.FileMetadata, int64, error) { +func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []internal.FileMetadata) ([]fileMetadata, int64, error) { var totalFilesSize int64 countNextChunks := func(start int) int { @@ -2069,11 +2080,11 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i } } - mergedEntries := make([]internal.FileMetadata, size) + mergedEntries := make([]fileMetadata, size) m := 0 for i := 0; i < len(entries); i++ { - e := entries[i] - if mustSkipFile(fileType, e) { + e := fileMetadata{FileMetadata: entries[i]} + if mustSkipFile(fileType, entries[i]) { continue } @@ -2086,12 +2097,12 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i if e.Type == TypeReg { nChunks := countNextChunks(i + 1) - e.Chunks = make([]*internal.FileMetadata, nChunks+1) + e.chunks = make([]*internal.FileMetadata, nChunks+1) for j := 0; j <= nChunks; j++ { // we need a copy here, otherwise we override the // .Size later copy := entries[i+j] - e.Chunks[j] = © + e.chunks[j] = © e.EndOffset = entries[i+j].EndOffset } i += nChunks @@ -2110,10 +2121,10 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i } lastChunkOffset := mergedEntries[i].EndOffset - for j := len(mergedEntries[i].Chunks) - 1; j >= 0; j-- { - mergedEntries[i].Chunks[j].EndOffset = lastChunkOffset - mergedEntries[i].Chunks[j].Size = mergedEntries[i].Chunks[j].EndOffset - mergedEntries[i].Chunks[j].Offset - lastChunkOffset = mergedEntries[i].Chunks[j].Offset + for j := len(mergedEntries[i].chunks) - 1; j >= 0; j-- { + mergedEntries[i].chunks[j].EndOffset = lastChunkOffset + mergedEntries[i].chunks[j].Size = mergedEntries[i].chunks[j].EndOffset - mergedEntries[i].chunks[j].Offset + lastChunkOffset = mergedEntries[i].chunks[j].Offset } } return mergedEntries, totalFilesSize, nil From 04c198333e1d4d4d443d090a8a514d30459651b9 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Thu, 14 Mar 2024 11:14:25 +0100 Subject: [PATCH 02/20] chunked: add way to skip setting file metadata when it is set, only the file payload is written, but the inode attributes are ignored. Signed-off-by: Giuseppe Scrivano (cherry picked from commit f52cbe08c163bf7e6d59a3f24cb7541a8980a795) --- pkg/chunked/storage_linux.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pkg/chunked/storage_linux.go b/pkg/chunked/storage_linux.go index 45ca14ab7..ece1f4cd1 100644 --- a/pkg/chunked/storage_linux.go +++ b/pkg/chunked/storage_linux.go @@ -66,6 +66,11 @@ type fileMetadata struct { // chunks stores the TypeChunk entries relevant to this entry when FileMetadata.Type == TypeReg. chunks []*internal.FileMetadata + + // skipSetAttrs is set when the file attributes must not be + // modified, e.g. it is a hard link from a different source, + // or a composefs file. + skipSetAttrs bool } type compressedFileType int @@ -595,6 +600,9 @@ func (o *originFile) OpenFile() (io.ReadCloser, error) { // setFileAttrs sets the file attributes for file given metadata func setFileAttrs(dirfd int, file *os.File, mode os.FileMode, metadata *fileMetadata, options *archive.TarOptions, usePath bool) error { + if metadata.skipSetAttrs { + return nil + } if file == nil || file.Fd() < 0 { return errors.New("invalid file") } From 2455bbadba7372bce21baee96289600e6d8d1964 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Thu, 14 Mar 2024 11:20:57 +0100 Subject: [PATCH 03/20] chunked: skip file metadata for hard links if a file was deduplicated with a hard link, do not override its metadata. Signed-off-by: Giuseppe Scrivano (cherry picked from commit 0f12ecea7908d2568b69c82ba50e4357809d9b61) --- pkg/chunked/storage_linux.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pkg/chunked/storage_linux.go b/pkg/chunked/storage_linux.go index ece1f4cd1..682245f03 100644 --- a/pkg/chunked/storage_linux.go +++ b/pkg/chunked/storage_linux.go @@ -153,7 +153,8 @@ func doHardLink(srcFd int, destDirFd int, destBase string) error { return err } -func copyFileContent(srcFd int, destFile string, dirfd int, mode os.FileMode, useHardLinks bool) (*os.File, int64, error) { +func copyFileContent(srcFd int, fileMetadata *fileMetadata, dirfd int, mode os.FileMode, useHardLinks bool) (*os.File, int64, error) { + destFile := fileMetadata.Name src := fmt.Sprintf("/proc/self/fd/%d", srcFd) st, err := os.Stat(src) if err != nil { @@ -171,6 +172,8 @@ func copyFileContent(srcFd int, destFile string, dirfd int, mode os.FileMode, us err := doHardLink(srcFd, int(destDir.Fd()), destBase) if err == nil { + // if the file was deduplicated with a hard link, skip overriding file metadata. + fileMetadata.skipSetAttrs = true return nil, st.Size(), nil } } @@ -382,7 +385,7 @@ func copyFileFromOtherLayer(file *fileMetadata, source string, name string, dirf } defer srcFile.Close() - dstFile, written, err := copyFileContent(int(srcFile.Fd()), file.Name, dirfd, 0, useHardLinks) + dstFile, written, err := copyFileContent(int(srcFile.Fd()), file, dirfd, 0, useHardLinks) if err != nil { return false, nil, 0, fmt.Errorf("copy content to %q: %w", file.Name, err) } @@ -484,7 +487,7 @@ func findFileInOSTreeRepos(file *fileMetadata, ostreeRepos []string, dirfd int, continue } - dstFile, written, err := copyFileContent(fd, file.Name, dirfd, 0, useHardLinks) + dstFile, written, err := copyFileContent(fd, file, dirfd, 0, useHardLinks) if err != nil { logrus.Debugf("could not copyFileContent: %v", err) return false, nil, 0, nil From 128cac0abb15e5ba2688090f42fb4f31a2e8ecbe Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Thu, 14 Mar 2024 11:24:26 +0100 Subject: [PATCH 04/20] chunked: skip file metadata for composefs files if the file is created using the object-store flat directory format, there is no need to set its inodes attributes, as anyway they are ignored when creating the composefs binary blob. Signed-off-by: Giuseppe Scrivano (cherry picked from commit 1126d65aa70e9756a32094e66c55f554d39ff227) --- pkg/chunked/storage_linux.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/chunked/storage_linux.go b/pkg/chunked/storage_linux.go index 682245f03..7bbfd7bed 100644 --- a/pkg/chunked/storage_linux.go +++ b/pkg/chunked/storage_linux.go @@ -1595,6 +1595,7 @@ func makeEntriesFlat(mergedEntries []fileMetadata) ([]fileMetadata, error) { hashes[d] = d mergedEntries[i].Name = fmt.Sprintf("%s/%s", d[0:2], d[2:]) + mergedEntries[i].skipSetAttrs = true new = append(new, mergedEntries[i]) } From fbd6ec62dc1d9e1d135ca111a0f530d104bf22e6 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 13 Mar 2024 18:07:25 -0700 Subject: [PATCH 05/20] pkg/chunked: rename metadata to cacheFile Signed-off-by: Kir Kolyshkin (cherry picked from commit f7e661fecca0c7739f12f0373d40e14463d08d52) --- pkg/chunked/cache_linux.go | 52 ++++++++++++++++----------------- pkg/chunked/cache_linux_test.go | 2 +- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/pkg/chunked/cache_linux.go b/pkg/chunked/cache_linux.go index 442030719..a7604535f 100644 --- a/pkg/chunked/cache_linux.go +++ b/pkg/chunked/cache_linux.go @@ -30,7 +30,7 @@ const ( digestSha256Empty = "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" ) -type metadata struct { +type cacheFile struct { tagLen int digestLen int tags []byte @@ -38,9 +38,9 @@ type metadata struct { } type layer struct { - id string - metadata *metadata - target string + id string + cacheFile *cacheFile + target string } type layersCache struct { @@ -115,9 +115,9 @@ func (c *layersCache) load() error { // if the cache already exists, read and use it if err == nil { defer bigData.Close() - metadata, err := readMetadataFromCache(bigData) + cacheFile, err := readCacheFileFromReader(bigData) if err == nil { - c.addLayer(r.ID, metadata) + c.addLayer(r.ID, cacheFile) continue } logrus.Warningf("Error reading cache file for layer %q: %v", r.ID, err) @@ -154,9 +154,9 @@ func (c *layersCache) load() error { return fmt.Errorf("open manifest file for layer %q: %w", r.ID, err) } - metadata, err := writeCache(manifest, lcd.Format, r.ID, c.store) + cacheFile, err := writeCache(manifest, lcd.Format, r.ID, c.store) if err == nil { - c.addLayer(r.ID, metadata) + c.addLayer(r.ID, cacheFile) } } @@ -214,7 +214,7 @@ func generateFileLocation(path string, offset, len uint64) []byte { // generateTag generates a tag in the form $DIGEST$OFFSET@LEN. // the [OFFSET; LEN] points to the variable length data where the file locations -// are stored. $DIGEST has length digestLen stored in the metadata file header. +// are stored. $DIGEST has length digestLen stored in the cache file file header. func generateTag(digest string, offset, len uint64) string { return fmt.Sprintf("%s%.20d@%.20d", digest, offset, len) } @@ -231,7 +231,7 @@ type setBigData interface { // - digest(file.payload)) // - digest(digest(file.payload) + file.UID + file.GID + file.mode + file.xattrs) // - digest(i) for each i in chunks(file payload) -func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id string, dest setBigData) (*metadata, error) { +func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id string, dest setBigData) (*cacheFile, error) { var vdata bytes.Buffer tagLen := 0 digestLen := 0 @@ -369,7 +369,7 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin logrus.Debugf("Written lookaside cache for layer %q with length %v", id, counter.Count) - return &metadata{ + return &cacheFile{ digestLen: digestLen, tagLen: tagLen, tags: tagsBuffer.Bytes(), @@ -377,7 +377,7 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin }, nil } -func readMetadataFromCache(bigData io.Reader) (*metadata, error) { +func readCacheFileFromReader(bigData io.Reader) (*cacheFile, error) { var version, tagLen, digestLen, tagsLen, vdataLen uint64 if err := binary.Read(bigData, binary.LittleEndian, &version); err != nil { return nil, err @@ -408,7 +408,7 @@ func readMetadataFromCache(bigData io.Reader) (*metadata, error) { return nil, err } - return &metadata{ + return &cacheFile{ tagLen: int(tagLen), digestLen: int(digestLen), tags: tags, @@ -462,16 +462,16 @@ func prepareMetadata(manifest []byte, format graphdriver.DifferOutputFormat) ([] return r, nil } -func (c *layersCache) addLayer(id string, metadata *metadata) error { +func (c *layersCache) addLayer(id string, cacheFile *cacheFile) error { target, err := c.store.DifferTarget(id) if err != nil { return fmt.Errorf("get checkout directory layer %q: %w", id, err) } l := layer{ - id: id, - metadata: metadata, - target: target, + id: id, + cacheFile: cacheFile, + target: target, } c.layers = append(c.layers, l) return nil @@ -481,22 +481,22 @@ func byteSliceAsString(b []byte) string { return *(*string)(unsafe.Pointer(&b)) } -func findTag(digest string, metadata *metadata) (string, uint64, uint64) { - if len(digest) != metadata.digestLen { +func findTag(digest string, cacheFile *cacheFile) (string, uint64, uint64) { + if len(digest) != cacheFile.digestLen { return "", 0, 0 } - nElements := len(metadata.tags) / metadata.tagLen + nElements := len(cacheFile.tags) / cacheFile.tagLen i := sort.Search(nElements, func(i int) bool { - d := byteSliceAsString(metadata.tags[i*metadata.tagLen : i*metadata.tagLen+metadata.digestLen]) + d := byteSliceAsString(cacheFile.tags[i*cacheFile.tagLen : i*cacheFile.tagLen+cacheFile.digestLen]) return strings.Compare(d, digest) >= 0 }) if i < nElements { - d := string(metadata.tags[i*metadata.tagLen : i*metadata.tagLen+len(digest)]) + d := string(cacheFile.tags[i*cacheFile.tagLen : i*cacheFile.tagLen+len(digest)]) if digest == d { - startOff := i*metadata.tagLen + metadata.digestLen - parts := strings.Split(string(metadata.tags[startOff:(i+1)*metadata.tagLen]), "@") + startOff := i*cacheFile.tagLen + cacheFile.digestLen + parts := strings.Split(string(cacheFile.tags[startOff:(i+1)*cacheFile.tagLen]), "@") off, _ := strconv.ParseInt(parts[0], 10, 64) @@ -516,9 +516,9 @@ func (c *layersCache) findDigestInternal(digest string) (string, string, int64, defer c.mutex.RUnlock() for _, layer := range c.layers { - digest, off, tagLen := findTag(digest, layer.metadata) + digest, off, tagLen := findTag(digest, layer.cacheFile) if digest != "" { - position := string(layer.metadata.vdata[off : off+tagLen]) + position := string(layer.cacheFile.vdata[off : off+tagLen]) parts := strings.SplitN(position, ":", 3) if len(parts) != 3 { continue diff --git a/pkg/chunked/cache_linux_test.go b/pkg/chunked/cache_linux_test.go index 957bc27b6..36744a21d 100644 --- a/pkg/chunked/cache_linux_test.go +++ b/pkg/chunked/cache_linux_test.go @@ -182,7 +182,7 @@ func TestReadCache(t *testing.T) { t.Errorf("got error from writeCache: %v", err) } - cacheRead, err := readMetadataFromCache(dest.buf) + cacheRead, err := readCacheFileFromReader(dest.buf) if err != nil { t.Errorf("got error from readMetadataFromCache: %v", err) } From 7b5a939b3b4a79098d17e5df3a480ce08025e652 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Tue, 5 Mar 2024 12:51:35 +0100 Subject: [PATCH 06/20] chunked: use mmap to load cache files reduce memory usage for the process by not loading entirely in memory any cache file for the layers. The memory mapped files can be shared among multiple instances of Podman, as well as not being fully loaded in memory. Signed-off-by: Giuseppe Scrivano (cherry picked from commit 080dbaf67be043c01d08717a55eb4599861bbbeb) --- pkg/chunked/cache_linux.go | 247 ++++++++++++++++++++++---------- pkg/chunked/cache_linux_test.go | 20 +-- 2 files changed, 182 insertions(+), 85 deletions(-) diff --git a/pkg/chunked/cache_linux.go b/pkg/chunked/cache_linux.go index a7604535f..24baf72e0 100644 --- a/pkg/chunked/cache_linux.go +++ b/pkg/chunked/cache_linux.go @@ -7,6 +7,7 @@ import ( "fmt" "io" "os" + "runtime" "sort" "strconv" "strings" @@ -21,6 +22,7 @@ import ( jsoniter "github.com/json-iterator/go" digest "github.com/opencontainers/go-digest" "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" ) const ( @@ -41,10 +43,19 @@ type layer struct { id string cacheFile *cacheFile target string + // mmapBuffer is nil when the cache file is fully loaded in memory. + // Otherwise it points to a mmap'ed buffer that is referenced by cacheFile.vdata. + mmapBuffer []byte + + // reloadWithMmap is set when the current process generates the cache file, + // and cacheFile reuses the memory buffer used by the generation function. + // Next time the layer cache is used, attempt to reload the file using + // mmap. + reloadWithMmap bool } type layersCache struct { - layers []layer + layers []*layer refs int store storage.Store mutex sync.RWMutex @@ -56,14 +67,29 @@ var ( cache *layersCache ) +func (c *layer) release() { + runtime.SetFinalizer(c, nil) + if c.mmapBuffer != nil { + unix.Munmap(c.mmapBuffer) + } +} + +func layerFinalizer(c *layer) { + c.release() +} + func (c *layersCache) release() { cacheMutex.Lock() defer cacheMutex.Unlock() c.refs-- - if c.refs == 0 { - cache = nil + if c.refs != 0 { + return } + for _, l := range c.layers { + l.release() + } + cache = nil } func getLayersCacheRef(store storage.Store) *layersCache { @@ -91,83 +117,153 @@ func getLayersCache(store storage.Store) (*layersCache, error) { return c, nil } +// loadLayerBigData attempts to load the specified cacheKey from a file and mmap its content. +// If the cache is not backed by a file, then it loads the entire content in memory. +// Returns the cache content, and if mmap'ed, the mmap buffer to Munmap. +func (c *layersCache) loadLayerBigData(layerID, bigDataKey string) ([]byte, []byte, error) { + inputFile, err := c.store.LayerBigData(layerID, bigDataKey) + if err != nil { + return nil, nil, err + } + defer inputFile.Close() + + // if the cache is backed by a file, attempt to mmap it. + if osFile, ok := inputFile.(*os.File); ok { + st, err := osFile.Stat() + if err != nil { + logrus.Warningf("Error stat'ing cache file for layer %q: %v", layerID, err) + goto fallback + } + size := st.Size() + if size == 0 { + logrus.Warningf("Cache file size is zero for layer %q: %v", layerID, err) + goto fallback + } + buf, err := unix.Mmap(int(osFile.Fd()), 0, int(size), unix.PROT_READ, unix.MAP_SHARED) + if err != nil { + logrus.Warningf("Error mmap'ing cache file for layer %q: %v", layerID, err) + goto fallback + } + // best effort advise to the kernel. + _ = unix.Madvise(buf, unix.MADV_RANDOM) + + return buf, buf, nil + } +fallback: + buf, err := io.ReadAll(inputFile) + return buf, nil, err +} + +func (c *layersCache) loadLayerCache(layerID string) (_ *layer, errRet error) { + buffer, mmapBuffer, err := c.loadLayerBigData(layerID, cacheKey) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return nil, err + } + // there is no existing cache to load + if err != nil || buffer == nil { + return nil, nil + } + defer func() { + if errRet != nil && mmapBuffer != nil { + unix.Munmap(mmapBuffer) + } + }() + cacheFile, err := readCacheFileFromMemory(buffer) + if err != nil { + return nil, err + } + return c.createLayer(layerID, cacheFile, mmapBuffer) +} + +func (c *layersCache) createCacheFileFromTOC(layerID string) (*layer, error) { + clFile, err := c.store.LayerBigData(layerID, chunkedLayerDataKey) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return nil, err + } + var lcd chunkedLayerData + if err == nil && clFile != nil { + defer clFile.Close() + cl, err := io.ReadAll(clFile) + if err != nil { + return nil, fmt.Errorf("open manifest file: %w", err) + } + json := jsoniter.ConfigCompatibleWithStandardLibrary + + if err := json.Unmarshal(cl, &lcd); err != nil { + return nil, err + } + } + manifestReader, err := c.store.LayerBigData(layerID, bigDataKey) + if err != nil { + return nil, err + } + defer manifestReader.Close() + + manifest, err := io.ReadAll(manifestReader) + if err != nil { + return nil, fmt.Errorf("read manifest file: %w", err) + } + + cacheFile, err := writeCache(manifest, lcd.Format, layerID, c.store) + if err != nil { + return nil, err + } + l, err := c.createLayer(layerID, cacheFile, nil) + if err != nil { + return nil, err + } + l.reloadWithMmap = true + return l, nil +} + func (c *layersCache) load() error { c.mutex.Lock() defer c.mutex.Unlock() + loadedLayers := make(map[string]*layer) + for _, r := range c.layers { + loadedLayers[r.id] = r + } allLayers, err := c.store.Layers() if err != nil { return err } - existingLayers := make(map[string]string) - for _, r := range c.layers { - existingLayers[r.id] = r.target - } - currentLayers := make(map[string]string) + var newLayers []*layer for _, r := range allLayers { - currentLayers[r.ID] = r.ID - if _, found := existingLayers[r.ID]; found { - continue - } - - bigData, err := c.store.LayerBigData(r.ID, cacheKey) - // if the cache already exists, read and use it - if err == nil { - defer bigData.Close() - cacheFile, err := readCacheFileFromReader(bigData) - if err == nil { - c.addLayer(r.ID, cacheFile) + // The layer is present in the store and it is already loaded. Attempt to + // re-use it if mmap'ed. + if l, found := loadedLayers[r.ID]; found { + // If the layer is not marked for re-load, move it to newLayers. + if !l.reloadWithMmap { + delete(loadedLayers, r.ID) + newLayers = append(newLayers, l) continue } - logrus.Warningf("Error reading cache file for layer %q: %v", r.ID, err) - } else if !errors.Is(err, os.ErrNotExist) { - return err } - - var lcd chunkedLayerData - - clFile, err := c.store.LayerBigData(r.ID, chunkedLayerDataKey) - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - if clFile != nil { - cl, err := io.ReadAll(clFile) - if err != nil { - return fmt.Errorf("open manifest file for layer %q: %w", r.ID, err) - } - json := jsoniter.ConfigCompatibleWithStandardLibrary - if err := json.Unmarshal(cl, &lcd); err != nil { - return err - } - } - - // otherwise create it from the layer TOC. - manifestReader, err := c.store.LayerBigData(r.ID, bigDataKey) + // try to read the existing cache file. + l, err := c.loadLayerCache(r.ID) if err != nil { + logrus.Warningf("Error loading cache file for layer %q: %v", r.ID, err) + } + if l != nil { + newLayers = append(newLayers, l) continue } - defer manifestReader.Close() - - manifest, err := io.ReadAll(manifestReader) + // the cache file is either not present or broken. Try to generate it from the TOC. + l, err = c.createCacheFileFromTOC(r.ID) if err != nil { - return fmt.Errorf("open manifest file for layer %q: %w", r.ID, err) + logrus.Warningf("Error creating cache file for layer %q: %v", r.ID, err) } - - cacheFile, err := writeCache(manifest, lcd.Format, r.ID, c.store) - if err == nil { - c.addLayer(r.ID, cacheFile) - } - } - - var newLayers []layer - for _, l := range c.layers { - if _, found := currentLayers[l.id]; found { + if l != nil { newLayers = append(newLayers, l) } } + // The layers that are still in loadedLayers are either stale or fully loaded in memory. Clean them up. + for _, l := range loadedLayers { + l.release() + } c.layers = newLayers - return nil } @@ -237,7 +333,7 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin digestLen := 0 var tagsBuffer bytes.Buffer - toc, err := prepareMetadata(manifest, format) + toc, err := prepareCacheFile(manifest, format) if err != nil { return nil, err } @@ -272,7 +368,6 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin if _, err := vdata.Write(location); err != nil { return nil, err } - digestLen = len(k.Digest) } if k.ChunkDigest != "" { @@ -377,7 +472,9 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin }, nil } -func readCacheFileFromReader(bigData io.Reader) (*cacheFile, error) { +func readCacheFileFromMemory(bigDataBuffer []byte) (*cacheFile, error) { + bigData := bytes.NewReader(bigDataBuffer) + var version, tagLen, digestLen, tagsLen, vdataLen uint64 if err := binary.Read(bigData, binary.LittleEndian, &version); err != nil { return nil, err @@ -403,10 +500,8 @@ func readCacheFileFromReader(bigData io.Reader) (*cacheFile, error) { return nil, err } - vdata := make([]byte, vdataLen) - if _, err := bigData.Read(vdata); err != nil { - return nil, err - } + // retrieve the unread part of the buffer. + vdata := bigDataBuffer[len(bigDataBuffer)-bigData.Len():] return &cacheFile{ tagLen: int(tagLen), @@ -416,7 +511,7 @@ func readCacheFileFromReader(bigData io.Reader) (*cacheFile, error) { }, nil } -func prepareMetadata(manifest []byte, format graphdriver.DifferOutputFormat) ([]*fileMetadata, error) { +func prepareCacheFile(manifest []byte, format graphdriver.DifferOutputFormat) ([]*fileMetadata, error) { toc, err := unmarshalToc(manifest) if err != nil { // ignore errors here. They might be caused by a different manifest format. @@ -462,19 +557,21 @@ func prepareMetadata(manifest []byte, format graphdriver.DifferOutputFormat) ([] return r, nil } -func (c *layersCache) addLayer(id string, cacheFile *cacheFile) error { +func (c *layersCache) createLayer(id string, cacheFile *cacheFile, mmapBuffer []byte) (*layer, error) { target, err := c.store.DifferTarget(id) if err != nil { - return fmt.Errorf("get checkout directory layer %q: %w", id, err) + return nil, fmt.Errorf("get checkout directory layer %q: %w", id, err) } - - l := layer{ - id: id, - cacheFile: cacheFile, - target: target, + l := &layer{ + id: id, + cacheFile: cacheFile, + target: target, + mmapBuffer: mmapBuffer, } - c.layers = append(c.layers, l) - return nil + if mmapBuffer != nil { + runtime.SetFinalizer(l, layerFinalizer) + } + return l, nil } func byteSliceAsString(b []byte) string { diff --git a/pkg/chunked/cache_linux_test.go b/pkg/chunked/cache_linux_test.go index 36744a21d..190ddb2f3 100644 --- a/pkg/chunked/cache_linux_test.go +++ b/pkg/chunked/cache_linux_test.go @@ -61,26 +61,26 @@ const jsonTOC = ` ` func TestPrepareMetadata(t *testing.T) { - toc, err := prepareMetadata([]byte(jsonTOC), graphdriver.DifferOutputFormatDir) + toc, err := prepareCacheFile([]byte(jsonTOC), graphdriver.DifferOutputFormatDir) if err != nil { - t.Errorf("got error from prepareMetadata: %v", err) + t.Errorf("got error from prepareCacheFile: %v", err) } if len(toc) != 2 { - t.Error("prepareMetadata returns the wrong length") + t.Error("prepareCacheFile returns the wrong length") } } func TestPrepareMetadataFlat(t *testing.T) { - toc, err := prepareMetadata([]byte(jsonTOC), graphdriver.DifferOutputFormatFlat) + toc, err := prepareCacheFile([]byte(jsonTOC), graphdriver.DifferOutputFormatFlat) if err != nil { - t.Errorf("got error from prepareMetadata: %v", err) + t.Errorf("got error from prepareCacheFile: %v", err) } for _, e := range toc { if len(strings.Split(e.Name, "/")) != 2 { - t.Error("prepareMetadata returns the wrong number of path elements for flat directories") + t.Error("prepareCacheFile returns the wrong number of path elements for flat directories") } if len(filepath.Dir(e.Name)) != 2 { - t.Error("prepareMetadata returns the wrong path for flat directories") + t.Error("prepareCacheFile returns the wrong path for flat directories") } } } @@ -104,9 +104,9 @@ func (b *bigDataToBuffer) SetLayerBigData(id, key string, data io.Reader) error } func TestWriteCache(t *testing.T) { - toc, err := prepareMetadata([]byte(jsonTOC), graphdriver.DifferOutputFormatDir) + toc, err := prepareCacheFile([]byte(jsonTOC), graphdriver.DifferOutputFormatDir) if err != nil { - t.Errorf("got error from prepareMetadata: %v", err) + t.Errorf("got error from prepareCacheFile: %v", err) } dest := bigDataToBuffer{ @@ -182,7 +182,7 @@ func TestReadCache(t *testing.T) { t.Errorf("got error from writeCache: %v", err) } - cacheRead, err := readCacheFileFromReader(dest.buf) + cacheRead, err := readCacheFileFromMemory(dest.buf.Bytes()) if err != nil { t.Errorf("got error from readMetadataFromCache: %v", err) } From 69bedaa9a27a6ea675ef64329dfafd20e1401c9e Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Tue, 9 Apr 2024 12:01:02 +0200 Subject: [PATCH 07/20] chunked: fix unmarshaling of file names The getString() function was used to extract string values, but it doesn't handle escaped characters. Replace it with iter.ReadString() that is slower but handles escaped characters correctly. Closes: https://github.com/containers/storage/issues/1878 Signed-off-by: Giuseppe Scrivano (cherry picked from commit f388a77afb31f25c387b81f60d17da1660390ca5) --- pkg/chunked/cache_linux.go | 58 +++++------------------------ pkg/chunked/cache_linux_test.go | 24 +++++++++++- pkg/chunked/internal/compression.go | 3 -- 3 files changed, 32 insertions(+), 53 deletions(-) diff --git a/pkg/chunked/cache_linux.go b/pkg/chunked/cache_linux.go index 24baf72e0..3393af668 100644 --- a/pkg/chunked/cache_linux.go +++ b/pkg/chunked/cache_linux.go @@ -652,45 +652,9 @@ func (c *layersCache) findChunkInOtherLayers(chunk *internal.FileMetadata) (stri } func unmarshalToc(manifest []byte) (*internal.TOC, error) { - var buf bytes.Buffer - count := 0 var toc internal.TOC iter := jsoniter.ParseBytes(jsoniter.ConfigFastest, manifest) - for field := iter.ReadObject(); field != ""; field = iter.ReadObject() { - if strings.ToLower(field) != "entries" { - iter.Skip() - continue - } - for iter.ReadArray() { - for field := iter.ReadObject(); field != ""; field = iter.ReadObject() { - switch strings.ToLower(field) { - case "type", "name", "linkname", "digest", "chunkdigest", "chunktype", "modtime", "accesstime", "changetime": - count += len(iter.ReadStringAsSlice()) - case "xattrs": - for key := iter.ReadObject(); key != ""; key = iter.ReadObject() { - count += len(iter.ReadStringAsSlice()) - } - default: - iter.Skip() - } - } - } - break - } - - buf.Grow(count) - - getString := func(b []byte) string { - from := buf.Len() - buf.Write(b) - to := buf.Len() - return byteSliceAsString(buf.Bytes()[from:to]) - } - - pool := iter.Pool() - pool.ReturnIterator(iter) - iter = pool.BorrowIterator(manifest) for field := iter.ReadObject(); field != ""; field = iter.ReadObject() { if strings.ToLower(field) == "version" { @@ -706,11 +670,11 @@ func unmarshalToc(manifest []byte) (*internal.TOC, error) { for field := iter.ReadObject(); field != ""; field = iter.ReadObject() { switch strings.ToLower(field) { case "type": - m.Type = getString(iter.ReadStringAsSlice()) + m.Type = iter.ReadString() case "name": - m.Name = getString(iter.ReadStringAsSlice()) + m.Name = iter.ReadString() case "linkname": - m.Linkname = getString(iter.ReadStringAsSlice()) + m.Linkname = iter.ReadString() case "mode": m.Mode = iter.ReadInt64() case "size": @@ -720,19 +684,19 @@ func unmarshalToc(manifest []byte) (*internal.TOC, error) { case "gid": m.GID = iter.ReadInt() case "modtime": - time, err := time.Parse(time.RFC3339, byteSliceAsString(iter.ReadStringAsSlice())) + time, err := time.Parse(time.RFC3339, iter.ReadString()) if err != nil { return nil, err } m.ModTime = &time case "accesstime": - time, err := time.Parse(time.RFC3339, byteSliceAsString(iter.ReadStringAsSlice())) + time, err := time.Parse(time.RFC3339, iter.ReadString()) if err != nil { return nil, err } m.AccessTime = &time case "changetime": - time, err := time.Parse(time.RFC3339, byteSliceAsString(iter.ReadStringAsSlice())) + time, err := time.Parse(time.RFC3339, iter.ReadString()) if err != nil { return nil, err } @@ -742,7 +706,7 @@ func unmarshalToc(manifest []byte) (*internal.TOC, error) { case "devminor": m.Devminor = iter.ReadInt64() case "digest": - m.Digest = getString(iter.ReadStringAsSlice()) + m.Digest = iter.ReadString() case "offset": m.Offset = iter.ReadInt64() case "endoffset": @@ -752,14 +716,13 @@ func unmarshalToc(manifest []byte) (*internal.TOC, error) { case "chunkoffset": m.ChunkOffset = iter.ReadInt64() case "chunkdigest": - m.ChunkDigest = getString(iter.ReadStringAsSlice()) + m.ChunkDigest = iter.ReadString() case "chunktype": - m.ChunkType = getString(iter.ReadStringAsSlice()) + m.ChunkType = iter.ReadString() case "xattrs": m.Xattrs = make(map[string]string) for key := iter.ReadObject(); key != ""; key = iter.ReadObject() { - value := iter.ReadStringAsSlice() - m.Xattrs[key] = getString(value) + m.Xattrs[key] = iter.ReadString() } default: iter.Skip() @@ -781,6 +744,5 @@ func unmarshalToc(manifest []byte) (*internal.TOC, error) { return nil, fmt.Errorf("unexpected data after manifest") } - toc.StringsBuf = buf return &toc, nil } diff --git a/pkg/chunked/cache_linux_test.go b/pkg/chunked/cache_linux_test.go index 190ddb2f3..3164e9b06 100644 --- a/pkg/chunked/cache_linux_test.go +++ b/pkg/chunked/cache_linux_test.go @@ -55,6 +55,23 @@ const jsonTOC = ` "chunkSize": 86252, "chunkOffset": 17615, "chunkDigest": "sha256:2a9d3f1b6b37abc8bb35eb8fa98b893a2a2447bcb01184c3bafc8c6b40da099d" + }, + { + "type": "reg", + "name": "usr/lib/systemd/system/system-systemd\\x2dcryptsetup.slice", + "mode": 420, + "size": 468, + "modtime": "2024-03-03T18:04:57+01:00", + "accesstime": "0001-01-01T00:00:00Z", + "changetime": "0001-01-01T00:00:00Z", + "digest": "sha256:68dc6e85631e077f2bc751352459823844911b93b7ba2afd95d96c893222bb50", + "offset": 148185424, + "endOffset": 148185753 + }, + { + "type": "reg", + "name": "usr/lib/systemd/system/system-systemd\\x2dcryptsetup-hardlink.slice", + "linkName": "usr/lib/systemd/system/system-systemd\\x2dcryptsetup.slice" } ] } @@ -65,7 +82,7 @@ func TestPrepareMetadata(t *testing.T) { if err != nil { t.Errorf("got error from prepareCacheFile: %v", err) } - if len(toc) != 2 { + if len(toc) != 4 { t.Error("prepareCacheFile returns the wrong length") } } @@ -194,7 +211,7 @@ func TestReadCache(t *testing.T) { func TestUnmarshalToc(t *testing.T) { toc, err := unmarshalToc([]byte(jsonTOC)) assert.NoError(t, err) - assert.Equal(t, 4, len(toc.Entries)) + assert.Equal(t, 6, len(toc.Entries)) _, err = unmarshalToc([]byte(jsonTOC + " \n\n\n\n ")) assert.NoError(t, err) @@ -210,4 +227,7 @@ func TestUnmarshalToc(t *testing.T) { assert.Error(t, err) _, err = unmarshalToc([]byte(jsonTOC + "123")) assert.Error(t, err) + assert.Equal(t, toc.Entries[4].Name, "usr/lib/systemd/system/system-systemd\\x2dcryptsetup.slice", "invalid name escaped") + assert.Equal(t, toc.Entries[5].Name, "usr/lib/systemd/system/system-systemd\\x2dcryptsetup-hardlink.slice", "invalid name escaped") + assert.Equal(t, toc.Entries[5].Linkname, "usr/lib/systemd/system/system-systemd\\x2dcryptsetup.slice", "invalid link name escaped") } diff --git a/pkg/chunked/internal/compression.go b/pkg/chunked/internal/compression.go index 54da17fd7..1136d67b8 100644 --- a/pkg/chunked/internal/compression.go +++ b/pkg/chunked/internal/compression.go @@ -21,9 +21,6 @@ import ( type TOC struct { Version int `json:"version"` Entries []FileMetadata `json:"entries"` - - // internal: used by unmarshalToc - StringsBuf bytes.Buffer `json:"-"` } type FileMetadata struct { From d25ef4c9635b3e03ba40af239d41d9808b154f9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Sat, 13 Apr 2024 16:22:40 +0200 Subject: [PATCH 08/20] Only obtain the estargz TOC digest once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make it structually clear that the code is all using the same value, making it less likely for the verifier and other uses to get out of sync. Also avoids some redundant parsing and error paths. Should not change behavior. Signed-off-by: Miloslav Trmač (cherry picked from commit 3beea1e21e9bbefab97a69577d4ae827980e0e30) --- pkg/chunked/compression_linux.go | 9 ++------- pkg/chunked/storage_linux.go | 17 ++++++++--------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/pkg/chunked/compression_linux.go b/pkg/chunked/compression_linux.go index 112ca2c7c..d5a478189 100644 --- a/pkg/chunked/compression_linux.go +++ b/pkg/chunked/compression_linux.go @@ -7,7 +7,6 @@ import ( "io" "strconv" - "github.com/containerd/stargz-snapshotter/estargz" "github.com/containers/storage/pkg/chunked/internal" "github.com/klauspost/compress/zstd" "github.com/klauspost/pgzip" @@ -33,7 +32,7 @@ func typeToTarType(t string) (byte, error) { return r, nil } -func readEstargzChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, annotations map[string]string) ([]byte, int64, error) { +func readEstargzChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, tocDigest digest.Digest) ([]byte, int64, error) { // information on the format here https://github.com/containerd/stargz-snapshotter/blob/main/docs/stargz-estargz.md footerSize := int64(51) if blobSize <= footerSize { @@ -126,11 +125,7 @@ func readEstargzChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, return nil, 0, err } - d, err := digest.Parse(annotations[estargz.TOCJSONDigestAnnotation]) - if err != nil { - return nil, 0, err - } - if manifestDigester.Digest() != d { + if manifestDigester.Digest() != tocDigest { return nil, 0, errors.New("invalid manifest checksum") } diff --git a/pkg/chunked/storage_linux.go b/pkg/chunked/storage_linux.go index 7bbfd7bed..05fb51f46 100644 --- a/pkg/chunked/storage_linux.go +++ b/pkg/chunked/storage_linux.go @@ -265,7 +265,7 @@ func GetDiffer(ctx context.Context, store storage.Store, blobDigest digest.Diges } _, hasZstdChunkedTOC := annotations[internal.ManifestChecksumKey] - _, hasEstargzTOC := annotations[estargz.TOCJSONDigestAnnotation] + estargzTOCDigestString, hasEstargzTOC := annotations[estargz.TOCJSONDigestAnnotation] if hasZstdChunkedTOC && hasEstargzTOC { return nil, errors.New("both zstd:chunked and eStargz TOC found") @@ -275,7 +275,11 @@ func GetDiffer(ctx context.Context, store storage.Store, blobDigest digest.Diges return makeZstdChunkedDiffer(ctx, store, blobSize, annotations, iss, &storeOpts) } if hasEstargzTOC { - return makeEstargzChunkedDiffer(ctx, store, blobSize, annotations, iss, &storeOpts) + estargzTOCDigest, err := digest.Parse(estargzTOCDigestString) + if err != nil { + return nil, fmt.Errorf("parsing estargz TOC digest %q: %w", estargzTOCDigestString, err) + } + return makeEstargzChunkedDiffer(ctx, store, blobSize, estargzTOCDigest, iss, &storeOpts) } return makeConvertFromRawDiffer(ctx, store, blobDigest, blobSize, annotations, iss, &storeOpts) @@ -333,8 +337,8 @@ func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize in }, nil } -func makeEstargzChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable, storeOpts *types.StoreOptions) (*chunkedDiffer, error) { - manifest, tocOffset, err := readEstargzChunkedManifest(iss, blobSize, annotations) +func makeEstargzChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, tocDigest digest.Digest, iss ImageSourceSeekable, storeOpts *types.StoreOptions) (*chunkedDiffer, error) { + manifest, tocOffset, err := readEstargzChunkedManifest(iss, blobSize, tocDigest) if err != nil { return nil, fmt.Errorf("read zstd:chunked manifest: %w", err) } @@ -343,11 +347,6 @@ func makeEstargzChunkedDiffer(ctx context.Context, store storage.Store, blobSize return nil, err } - tocDigest, err := digest.Parse(annotations[estargz.TOCJSONDigestAnnotation]) - if err != nil { - return nil, fmt.Errorf("parse TOC digest %q: %w", annotations[estargz.TOCJSONDigestAnnotation], err) - } - return &chunkedDiffer{ fsVerityDigests: make(map[string]string), blobSize: blobSize, From 586ef7b39eeeb9b337adefeded00760c486b6080 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Sat, 13 Apr 2024 16:39:28 +0200 Subject: [PATCH 09/20] Only obtain the zstd:chunked TOC digest once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make it structually clear that the code is all using the same value, making it less likely for the verifier and other uses to get out of sync. Also avoids some redundant parsing and error paths. The conversion path looks longer, but that's just moving the parsing from the called function (which is redundant for other callers). Should not change behavior. Signed-off-by: Miloslav Trmač (cherry picked from commit 1f47b38c0949e1d0f4099e3ba5b6b7659a462220) --- pkg/chunked/compression_linux.go | 4 ++-- pkg/chunked/internal/compression.go | 7 ++----- pkg/chunked/storage_linux.go | 27 +++++++++++++++++---------- pkg/chunked/zstdchunked_test.go | 7 ++++++- 4 files changed, 27 insertions(+), 18 deletions(-) diff --git a/pkg/chunked/compression_linux.go b/pkg/chunked/compression_linux.go index d5a478189..a191b8cb0 100644 --- a/pkg/chunked/compression_linux.go +++ b/pkg/chunked/compression_linux.go @@ -135,7 +135,7 @@ func readEstargzChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, // readZstdChunkedManifest reads the zstd:chunked manifest from the seekable stream blobStream. The blob total size must // be specified. // This function uses the io.github.containers.zstd-chunked. annotations when specified. -func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, annotations map[string]string) ([]byte, []byte, int64, error) { +func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, tocDigest digest.Digest, annotations map[string]string) ([]byte, []byte, int64, error) { footerSize := int64(internal.FooterSizeSupported) if blobSize <= footerSize { return nil, nil, 0, errors.New("blob too small") @@ -145,7 +145,7 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, ann if offsetMetadata := annotations[internal.ManifestInfoKey]; offsetMetadata != "" { var err error - footerData, err = internal.ReadFooterDataFromAnnotations(annotations) + footerData, err = internal.ReadFooterDataFromAnnotations(tocDigest, annotations) if err != nil { return nil, nil, 0, err } diff --git a/pkg/chunked/internal/compression.go b/pkg/chunked/internal/compression.go index 1136d67b8..bbbf6e3e7 100644 --- a/pkg/chunked/internal/compression.go +++ b/pkg/chunked/internal/compression.go @@ -231,13 +231,10 @@ func footerDataToBlob(footer ZstdChunkedFooterData) []byte { } // ReadFooterDataFromAnnotations reads the zstd:chunked footer data from the given annotations. -func ReadFooterDataFromAnnotations(annotations map[string]string) (ZstdChunkedFooterData, error) { +func ReadFooterDataFromAnnotations(tocDigest digest.Digest, annotations map[string]string) (ZstdChunkedFooterData, error) { var footerData ZstdChunkedFooterData - footerData.ChecksumAnnotation = annotations[ManifestChecksumKey] - if footerData.ChecksumAnnotation == "" { - return footerData, fmt.Errorf("manifest checksum annotation %q not found", ManifestChecksumKey) - } + footerData.ChecksumAnnotation = tocDigest.String() offsetMetadata := annotations[ManifestInfoKey] diff --git a/pkg/chunked/storage_linux.go b/pkg/chunked/storage_linux.go index 05fb51f46..2c49967c7 100644 --- a/pkg/chunked/storage_linux.go +++ b/pkg/chunked/storage_linux.go @@ -25,6 +25,7 @@ import ( "github.com/containers/storage/pkg/archive" "github.com/containers/storage/pkg/chunked/compressor" "github.com/containers/storage/pkg/chunked/internal" + "github.com/containers/storage/pkg/chunked/toc" "github.com/containers/storage/pkg/fsverity" "github.com/containers/storage/pkg/idtools" "github.com/containers/storage/pkg/system" @@ -264,7 +265,7 @@ func GetDiffer(ctx context.Context, store storage.Store, blobDigest digest.Diges return nil, errors.New("enable_partial_images not configured") } - _, hasZstdChunkedTOC := annotations[internal.ManifestChecksumKey] + zstdChunkedTOCDigestString, hasZstdChunkedTOC := annotations[internal.ManifestChecksumKey] estargzTOCDigestString, hasEstargzTOC := annotations[estargz.TOCJSONDigestAnnotation] if hasZstdChunkedTOC && hasEstargzTOC { @@ -272,7 +273,11 @@ func GetDiffer(ctx context.Context, store storage.Store, blobDigest digest.Diges } if hasZstdChunkedTOC { - return makeZstdChunkedDiffer(ctx, store, blobSize, annotations, iss, &storeOpts) + zstdChunkedTOCDigest, err := digest.Parse(zstdChunkedTOCDigestString) + if err != nil { + return nil, fmt.Errorf("parsing zstd:chunked TOC digest %q: %w", zstdChunkedTOCDigestString, err) + } + return makeZstdChunkedDiffer(ctx, store, blobSize, zstdChunkedTOCDigest, annotations, iss, &storeOpts) } if hasEstargzTOC { estargzTOCDigest, err := digest.Parse(estargzTOCDigestString) @@ -307,8 +312,8 @@ func makeConvertFromRawDiffer(ctx context.Context, store storage.Store, blobDige }, nil } -func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable, storeOpts *types.StoreOptions) (*chunkedDiffer, error) { - manifest, tarSplit, tocOffset, err := readZstdChunkedManifest(iss, blobSize, annotations) +func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, tocDigest digest.Digest, annotations map[string]string, iss ImageSourceSeekable, storeOpts *types.StoreOptions) (*chunkedDiffer, error) { + manifest, tarSplit, tocOffset, err := readZstdChunkedManifest(iss, blobSize, tocDigest, annotations) if err != nil { return nil, fmt.Errorf("read zstd:chunked manifest: %w", err) } @@ -317,11 +322,6 @@ func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize in return nil, err } - tocDigest, err := digest.Parse(annotations[internal.ManifestChecksumKey]) - if err != nil { - return nil, fmt.Errorf("parse TOC digest %q: %w", annotations[internal.ManifestChecksumKey], err) - } - return &chunkedDiffer{ fsVerityDigests: make(map[string]string), blobSize: blobSize, @@ -1691,7 +1691,14 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff blobFile.Close() blobFile = nil - manifest, tarSplit, tocOffset, err := readZstdChunkedManifest(fileSource, c.blobSize, annotations) + tocDigest, err := toc.GetTOCDigest(annotations) + if err != nil { + return graphdriver.DriverWithDifferOutput{}, fmt.Errorf("internal error: parsing just-created zstd:chunked TOC digest: %w", err) + } + if tocDigest == nil { + return graphdriver.DriverWithDifferOutput{}, fmt.Errorf("internal error: just-created zstd:chunked missing TOC digest") + } + manifest, tarSplit, tocOffset, err := readZstdChunkedManifest(fileSource, c.blobSize, *tocDigest, annotations) if err != nil { return graphdriver.DriverWithDifferOutput{}, fmt.Errorf("read zstd:chunked manifest: %w", err) } diff --git a/pkg/chunked/zstdchunked_test.go b/pkg/chunked/zstdchunked_test.go index 8041db9a2..ec1edffcf 100644 --- a/pkg/chunked/zstdchunked_test.go +++ b/pkg/chunked/zstdchunked_test.go @@ -12,8 +12,10 @@ import ( "testing" "github.com/containers/storage/pkg/chunked/internal" + "github.com/containers/storage/pkg/chunked/toc" "github.com/klauspost/compress/zstd" "github.com/opencontainers/go-digest" + "github.com/stretchr/testify/require" ) type seekable struct { @@ -148,7 +150,10 @@ func TestGenerateAndParseManifest(t *testing.T) { t: t, } - manifest, _, _, err := readZstdChunkedManifest(s, 8192, annotations) + tocDigest, err := toc.GetTOCDigest(annotations) + require.NoError(t, err) + require.NotNil(t, tocDigest) + manifest, _, _, err := readZstdChunkedManifest(s, 8192, *tocDigest, annotations) if err != nil { t.Error(err) } From 1f81e602d0db8f71426c63bd411a0ffabaa102d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Sat, 13 Apr 2024 16:49:11 +0200 Subject: [PATCH 10/20] Remove ChecksumAnntation from ZstdChunkedFooterData MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Manage the value directly to simplify. This happens to fix the ReadFooterDataFromBlob code path, which was not setting ChecksumAnntation at all. Signed-off-by: Miloslav Trmač (cherry picked from commit 053ac6105dfe7e11b2e7d7f6cc2a11f71d9c5c57) --- pkg/chunked/compression_linux.go | 4 ++-- pkg/chunked/internal/compression.go | 6 +----- pkg/chunked/internal/compression_test.go | 1 - 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/pkg/chunked/compression_linux.go b/pkg/chunked/compression_linux.go index a191b8cb0..38a892a6e 100644 --- a/pkg/chunked/compression_linux.go +++ b/pkg/chunked/compression_linux.go @@ -145,7 +145,7 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, toc if offsetMetadata := annotations[internal.ManifestInfoKey]; offsetMetadata != "" { var err error - footerData, err = internal.ReadFooterDataFromAnnotations(tocDigest, annotations) + footerData, err = internal.ReadFooterDataFromAnnotations(annotations) if err != nil { return nil, nil, 0, err } @@ -233,7 +233,7 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, toc return nil, nil, 0, err } - decodedBlob, err := decodeAndValidateBlob(manifest, footerData.LengthUncompressed, footerData.ChecksumAnnotation) + decodedBlob, err := decodeAndValidateBlob(manifest, footerData.LengthUncompressed, tocDigest.String()) if err != nil { return nil, nil, 0, err } diff --git a/pkg/chunked/internal/compression.go b/pkg/chunked/internal/compression.go index bbbf6e3e7..f52a07a9f 100644 --- a/pkg/chunked/internal/compression.go +++ b/pkg/chunked/internal/compression.go @@ -183,7 +183,6 @@ func WriteZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, off Offset: manifestOffset, LengthCompressed: uint64(len(compressedManifest)), LengthUncompressed: uint64(len(manifest)), - ChecksumAnnotation: "", // unused OffsetTarSplit: uint64(tarSplitOffset), LengthCompressedTarSplit: uint64(len(tarSplitData.Data)), LengthUncompressedTarSplit: uint64(tarSplitData.UncompressedSize), @@ -207,7 +206,6 @@ type ZstdChunkedFooterData struct { Offset uint64 LengthCompressed uint64 LengthUncompressed uint64 - ChecksumAnnotation string // Only used when reading a layer, not when creating it OffsetTarSplit uint64 LengthCompressedTarSplit uint64 @@ -231,11 +229,9 @@ func footerDataToBlob(footer ZstdChunkedFooterData) []byte { } // ReadFooterDataFromAnnotations reads the zstd:chunked footer data from the given annotations. -func ReadFooterDataFromAnnotations(tocDigest digest.Digest, annotations map[string]string) (ZstdChunkedFooterData, error) { +func ReadFooterDataFromAnnotations(annotations map[string]string) (ZstdChunkedFooterData, error) { var footerData ZstdChunkedFooterData - footerData.ChecksumAnnotation = tocDigest.String() - offsetMetadata := annotations[ManifestInfoKey] if _, err := fmt.Sscanf(offsetMetadata, "%d:%d:%d:%d", &footerData.Offset, &footerData.LengthCompressed, &footerData.LengthUncompressed, &footerData.ManifestType); err != nil { diff --git a/pkg/chunked/internal/compression_test.go b/pkg/chunked/internal/compression_test.go index da660b89b..9d4e60d47 100644 --- a/pkg/chunked/internal/compression_test.go +++ b/pkg/chunked/internal/compression_test.go @@ -15,7 +15,6 @@ func TestGenerateAndReadFooter(t *testing.T) { Offset: 2, LengthCompressed: 3, LengthUncompressed: 4, - ChecksumAnnotation: "", // unused OffsetTarSplit: 5, LengthCompressedTarSplit: 6, LengthUncompressedTarSplit: 7, From 554c639d41cccca15b55b3aee4515539342cf490 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Wed, 20 Mar 2024 18:10:54 +0100 Subject: [PATCH 11/20] chunked: move cache file generation to separate function Signed-off-by: Giuseppe Scrivano (cherry picked from commit 397943be44e5cbb9d8f910a95bd46a4d89304193) --- pkg/chunked/cache_linux.go | 101 ++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 53 deletions(-) diff --git a/pkg/chunked/cache_linux.go b/pkg/chunked/cache_linux.go index 3393af668..2da11a50c 100644 --- a/pkg/chunked/cache_linux.go +++ b/pkg/chunked/cache_linux.go @@ -320,6 +320,52 @@ type setBigData interface { SetLayerBigData(id, key string, data io.Reader) error } +func writeCacheFileToWriter(writer io.Writer, tags []string, tagLen, digestLen int, vdata bytes.Buffer, tagsBuffer *bytes.Buffer) error { + sort.Strings(tags) + for _, t := range tags { + if _, err := tagsBuffer.Write([]byte(t)); err != nil { + return err + } + } + + // version + if err := binary.Write(writer, binary.LittleEndian, uint64(cacheVersion)); err != nil { + return err + } + + // len of a tag + if err := binary.Write(writer, binary.LittleEndian, uint64(tagLen)); err != nil { + return err + } + + // len of a digest + if err := binary.Write(writer, binary.LittleEndian, uint64(digestLen)); err != nil { + return err + } + + // tags length + if err := binary.Write(writer, binary.LittleEndian, uint64(tagsBuffer.Len())); err != nil { + return err + } + + // vdata length + if err := binary.Write(writer, binary.LittleEndian, uint64(vdata.Len())); err != nil { + return err + } + + // tags + if _, err := writer.Write(tagsBuffer.Bytes()); err != nil { + return err + } + + // variable length data + if _, err := writer.Write(vdata.Bytes()); err != nil { + return err + } + + return nil +} + // writeCache write a cache for the layer ID. // It generates a sorted list of digests with their offset to the path location and offset. // The same cache is used to lookup files, chunks and candidates for deduplication with hard links. @@ -328,10 +374,9 @@ type setBigData interface { // - digest(digest(file.payload) + file.UID + file.GID + file.mode + file.xattrs) // - digest(i) for each i in chunks(file payload) func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id string, dest setBigData) (*cacheFile, error) { - var vdata bytes.Buffer + var vdata, tagsBuffer bytes.Buffer tagLen := 0 digestLen := 0 - var tagsBuffer bytes.Buffer toc, err := prepareCacheFile(manifest, format) if err != nil { @@ -390,63 +435,13 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin } } - sort.Strings(tags) - - for _, t := range tags { - if _, err := tagsBuffer.Write([]byte(t)); err != nil { - return nil, err - } - } - pipeReader, pipeWriter := io.Pipe() errChan := make(chan error, 1) go func() { defer pipeWriter.Close() defer close(errChan) - // version - if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(cacheVersion)); err != nil { - errChan <- err - return - } - - // len of a tag - if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(tagLen)); err != nil { - errChan <- err - return - } - - // len of a digest - if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(digestLen)); err != nil { - errChan <- err - return - } - - // tags length - if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(tagsBuffer.Len())); err != nil { - errChan <- err - return - } - - // vdata length - if err := binary.Write(pipeWriter, binary.LittleEndian, uint64(vdata.Len())); err != nil { - errChan <- err - return - } - - // tags - if _, err := pipeWriter.Write(tagsBuffer.Bytes()); err != nil { - errChan <- err - return - } - - // variable length data - if _, err := pipeWriter.Write(vdata.Bytes()); err != nil { - errChan <- err - return - } - - errChan <- nil + errChan <- writeCacheFileToWriter(pipeWriter, tags, tagLen, digestLen, vdata, &tagsBuffer) }() defer pipeReader.Close() From 0d6e102042358f79b74c4cdaf31b325a343f35fc Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Sat, 23 Mar 2024 23:50:18 +0100 Subject: [PATCH 12/20] chunked: store digest in binary format use the binary representation for a given digest, it helps reducing the file size by ~25%. Signed-off-by: Giuseppe Scrivano (cherry picked from commit 33472545fbd7ea3cb9193c1675f6a553c6c4dd64) --- pkg/chunked/cache_linux.go | 78 +++++++++++++++++++++++---------- pkg/chunked/cache_linux_test.go | 17 ++++++- 2 files changed, 71 insertions(+), 24 deletions(-) diff --git a/pkg/chunked/cache_linux.go b/pkg/chunked/cache_linux.go index 2da11a50c..05d17c36b 100644 --- a/pkg/chunked/cache_linux.go +++ b/pkg/chunked/cache_linux.go @@ -3,6 +3,7 @@ package chunked import ( "bytes" "encoding/binary" + "encoding/hex" "errors" "fmt" "io" @@ -13,7 +14,6 @@ import ( "strings" "sync" "time" - "unsafe" storage "github.com/containers/storage" graphdriver "github.com/containers/storage/drivers" @@ -154,6 +154,23 @@ fallback: return buf, nil, err } +func makeBinaryDigest(stringDigest string) ([]byte, error) { + d, err := digest.Parse(stringDigest) + if err != nil { + return nil, err + } + digestBytes, err := hex.DecodeString(d.Encoded()) + if err != nil { + return nil, err + } + algo := []byte(d.Algorithm()) + buf := make([]byte, 0, len(algo)+1+len(digestBytes)) + buf = append(buf, algo...) + buf = append(buf, ':') + buf = append(buf, digestBytes...) + return buf, nil +} + func (c *layersCache) loadLayerCache(layerID string) (_ *layer, errRet error) { buffer, mmapBuffer, err := c.loadLayerBigData(layerID, cacheKey) if err != nil && !errors.Is(err, os.ErrNotExist) { @@ -311,8 +328,9 @@ func generateFileLocation(path string, offset, len uint64) []byte { // generateTag generates a tag in the form $DIGEST$OFFSET@LEN. // the [OFFSET; LEN] points to the variable length data where the file locations // are stored. $DIGEST has length digestLen stored in the cache file file header. -func generateTag(digest string, offset, len uint64) string { - return fmt.Sprintf("%s%.20d@%.20d", digest, offset, len) +func generateTag(digest []byte, offset, len uint64) []byte { + tag := append(digest[:], []byte(fmt.Sprintf("%.20d@%.20d", offset, len))...) + return tag } type setBigData interface { @@ -320,10 +338,12 @@ type setBigData interface { SetLayerBigData(id, key string, data io.Reader) error } -func writeCacheFileToWriter(writer io.Writer, tags []string, tagLen, digestLen int, vdata bytes.Buffer, tagsBuffer *bytes.Buffer) error { - sort.Strings(tags) +func writeCacheFileToWriter(writer io.Writer, tags [][]byte, tagLen, digestLen int, vdata, tagsBuffer *bytes.Buffer) error { + sort.Slice(tags, func(i, j int) bool { + return bytes.Compare(tags[i], tags[j]) == -1 + }) for _, t := range tags { - if _, err := tagsBuffer.Write([]byte(t)); err != nil { + if _, err := tagsBuffer.Write(t); err != nil { return err } } @@ -383,15 +403,19 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin return nil, err } - var tags []string + var tags [][]byte for _, k := range toc { if k.Digest != "" { + digest, err := makeBinaryDigest(k.Digest) + if err != nil { + return nil, err + } location := generateFileLocation(k.Name, 0, uint64(k.Size)) off := uint64(vdata.Len()) l := uint64(len(location)) - d := generateTag(k.Digest, off, l) + d := generateTag(digest, off, l) if tagLen == 0 { tagLen = len(d) } @@ -404,7 +428,11 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin if err != nil { return nil, err } - d = generateTag(fp, off, l) + digestHardLink, err := makeBinaryDigest(fp) + if err != nil { + return nil, err + } + d = generateTag(digestHardLink, off, l) if tagLen != len(d) { return nil, errors.New("digest with different length found") } @@ -413,13 +441,19 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin if _, err := vdata.Write(location); err != nil { return nil, err } - digestLen = len(k.Digest) + digestLen = len(digestHardLink) } if k.ChunkDigest != "" { location := generateFileLocation(k.Name, uint64(k.ChunkOffset), uint64(k.ChunkSize)) off := uint64(vdata.Len()) l := uint64(len(location)) - d := generateTag(k.ChunkDigest, off, l) + + digest, err := makeBinaryDigest(k.ChunkDigest) + if err != nil { + return nil, err + } + + d := generateTag(digest, off, l) if tagLen == 0 { tagLen = len(d) } @@ -431,7 +465,7 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin if _, err := vdata.Write(location); err != nil { return nil, err } - digestLen = len(k.ChunkDigest) + digestLen = len(digest) } } @@ -441,7 +475,7 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin defer pipeWriter.Close() defer close(errChan) - errChan <- writeCacheFileToWriter(pipeWriter, tags, tagLen, digestLen, vdata, &tagsBuffer) + errChan <- writeCacheFileToWriter(pipeWriter, tags, tagLen, digestLen, &vdata, &tagsBuffer) }() defer pipeReader.Close() @@ -569,24 +603,24 @@ func (c *layersCache) createLayer(id string, cacheFile *cacheFile, mmapBuffer [] return l, nil } -func byteSliceAsString(b []byte) string { - return *(*string)(unsafe.Pointer(&b)) -} - func findTag(digest string, cacheFile *cacheFile) (string, uint64, uint64) { - if len(digest) != cacheFile.digestLen { + binaryDigest, err := makeBinaryDigest(digest) + if err != nil { + return "", 0, 0 + } + if len(binaryDigest) != cacheFile.digestLen { return "", 0, 0 } nElements := len(cacheFile.tags) / cacheFile.tagLen i := sort.Search(nElements, func(i int) bool { - d := byteSliceAsString(cacheFile.tags[i*cacheFile.tagLen : i*cacheFile.tagLen+cacheFile.digestLen]) - return strings.Compare(d, digest) >= 0 + d := cacheFile.tags[i*cacheFile.tagLen : i*cacheFile.tagLen+cacheFile.digestLen] + return bytes.Compare(d, binaryDigest) >= 0 }) if i < nElements { - d := string(cacheFile.tags[i*cacheFile.tagLen : i*cacheFile.tagLen+len(digest)]) - if digest == d { + d := cacheFile.tags[i*cacheFile.tagLen : i*cacheFile.tagLen+len(binaryDigest)] + if bytes.Equal(binaryDigest, d) { startOff := i*cacheFile.tagLen + cacheFile.digestLen parts := strings.Split(string(cacheFile.tags[startOff:(i+1)*cacheFile.tagLen]), "@") diff --git a/pkg/chunked/cache_linux_test.go b/pkg/chunked/cache_linux_test.go index 3164e9b06..0804a9e51 100644 --- a/pkg/chunked/cache_linux_test.go +++ b/pkg/chunked/cache_linux_test.go @@ -133,8 +133,8 @@ func TestWriteCache(t *testing.T) { if err != nil { t.Errorf("got error from writeCache: %v", err) } - if digest, _, _ := findTag("foobar", cache); digest != "" { - t.Error("found invalid tag") + if digest, _, _ := findTag("sha256:99fe908c699dc068438b23e28319cadff1f2153c3043bafb8e83a430bba0a2c2", cache); digest != "" { + t.Error("a present tag was not found") } for _, r := range toc { @@ -231,3 +231,16 @@ func TestUnmarshalToc(t *testing.T) { assert.Equal(t, toc.Entries[5].Name, "usr/lib/systemd/system/system-systemd\\x2dcryptsetup-hardlink.slice", "invalid name escaped") assert.Equal(t, toc.Entries[5].Linkname, "usr/lib/systemd/system/system-systemd\\x2dcryptsetup.slice", "invalid link name escaped") } + +func TestMakeBinaryDigest(t *testing.T) { + binDigest, err := makeBinaryDigest("sha256:5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03") + assert.NoError(t, err) + expected := []byte{0x73, 0x68, 0x61, 0x32, 0x35, 0x36, 0x3a, 0x58, 0x91, 0xb5, 0xb5, 0x22, 0xd5, 0xdf, 0x8, 0x6d, 0xf, 0xf0, 0xb1, 0x10, 0xfb, 0xd9, 0xd2, 0x1b, 0xb4, 0xfc, 0x71, 0x63, 0xaf, 0x34, 0xd0, 0x82, 0x86, 0xa2, 0xe8, 0x46, 0xf6, 0xbe, 0x3} + assert.Equal(t, expected, binDigest) + + _, err = makeBinaryDigest("sha256:foo") + assert.Error(t, err) + + _, err = makeBinaryDigest("noAlgorithm") + assert.Error(t, err) +} From 1cafa743c259f433ffdc14b552098ed3a64e0011 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Mon, 25 Mar 2024 10:23:21 +0100 Subject: [PATCH 13/20] chunked: store file offset and length in binary format it helps reducing the cache file size by ~25%. Signed-off-by: Giuseppe Scrivano (cherry picked from commit e6793e394c134ac68c3796894de273ae69ad7f57) --- pkg/chunked/cache_linux.go | 55 ++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/pkg/chunked/cache_linux.go b/pkg/chunked/cache_linux.go index 05d17c36b..11d88a303 100644 --- a/pkg/chunked/cache_linux.go +++ b/pkg/chunked/cache_linux.go @@ -325,12 +325,13 @@ func generateFileLocation(path string, offset, len uint64) []byte { return []byte(fmt.Sprintf("%d:%d:%s", offset, len, path)) } -// generateTag generates a tag in the form $DIGEST$OFFSET@LEN. -// the [OFFSET; LEN] points to the variable length data where the file locations +// appendTag appends the $OFFSET$LEN information to the provided $DIGEST. +// The [OFFSET; LEN] points to the variable length data where the file locations // are stored. $DIGEST has length digestLen stored in the cache file file header. -func generateTag(digest []byte, offset, len uint64) []byte { - tag := append(digest[:], []byte(fmt.Sprintf("%.20d@%.20d", offset, len))...) - return tag +func appendTag(digest []byte, offset, len uint64) ([]byte, error) { + digest = binary.LittleEndian.AppendUint64(digest, offset) + digest = binary.LittleEndian.AppendUint64(digest, len) + return digest, nil } type setBigData interface { @@ -415,14 +416,17 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin off := uint64(vdata.Len()) l := uint64(len(location)) - d := generateTag(digest, off, l) - if tagLen == 0 { - tagLen = len(d) + tag, err := appendTag(digest, off, l) + if err != nil { + return nil, err } - if tagLen != len(d) { + if tagLen == 0 { + tagLen = len(tag) + } + if tagLen != len(tag) { return nil, errors.New("digest with different length found") } - tags = append(tags, d) + tags = append(tags, tag) fp, err := calculateHardLinkFingerprint(k) if err != nil { @@ -432,11 +436,14 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin if err != nil { return nil, err } - d = generateTag(digestHardLink, off, l) - if tagLen != len(d) { + tag, err = appendTag(digestHardLink, off, l) + if err != nil { + return nil, err + } + if tagLen != len(tag) { return nil, errors.New("digest with different length found") } - tags = append(tags, d) + tags = append(tags, tag) if _, err := vdata.Write(location); err != nil { return nil, err @@ -452,8 +459,10 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin if err != nil { return nil, err } - - d := generateTag(digest, off, l) + d, err := appendTag(digest, off, l) + if err != nil { + return nil, err + } if tagLen == 0 { tagLen = len(d) } @@ -619,15 +628,21 @@ func findTag(digest string, cacheFile *cacheFile) (string, uint64, uint64) { return bytes.Compare(d, binaryDigest) >= 0 }) if i < nElements { - d := cacheFile.tags[i*cacheFile.tagLen : i*cacheFile.tagLen+len(binaryDigest)] + d := cacheFile.tags[i*cacheFile.tagLen : i*cacheFile.tagLen+cacheFile.digestLen] if bytes.Equal(binaryDigest, d) { startOff := i*cacheFile.tagLen + cacheFile.digestLen - parts := strings.Split(string(cacheFile.tags[startOff:(i+1)*cacheFile.tagLen]), "@") - off, _ := strconv.ParseInt(parts[0], 10, 64) + // check for corrupted data, there must be 2 u64 (off and len) after the digest. + if cacheFile.tagLen < cacheFile.digestLen+16 { + return "", 0, 0 + } - len, _ := strconv.ParseInt(parts[1], 10, 64) - return digest, uint64(off), uint64(len) + offsetAndLen := cacheFile.tags[startOff : (i+1)*cacheFile.tagLen] + + off := binary.LittleEndian.Uint64(offsetAndLen[:8]) + len := binary.LittleEndian.Uint64(offsetAndLen[8:16]) + + return digest, off, len } } return "", 0, 0 From d029fe88620a249e2b8a3391e77a523f6df762c8 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Wed, 20 Mar 2024 20:43:34 +0100 Subject: [PATCH 14/20] chunked: add implementation for a bloom filter Signed-off-by: Giuseppe Scrivano (cherry picked from commit 6668761b3dababa7914ad2d4571012d648186bce) --- pkg/chunked/bloom_filter.go | 84 +++++++++++++++++++++ pkg/chunked/bloom_filter_test.go | 126 +++++++++++++++++++++++++++++++ 2 files changed, 210 insertions(+) create mode 100644 pkg/chunked/bloom_filter.go create mode 100644 pkg/chunked/bloom_filter_test.go diff --git a/pkg/chunked/bloom_filter.go b/pkg/chunked/bloom_filter.go new file mode 100644 index 000000000..696a52f8e --- /dev/null +++ b/pkg/chunked/bloom_filter.go @@ -0,0 +1,84 @@ +package chunked + +import ( + "encoding/binary" + "hash/crc32" + "io" +) + +type bloomFilter struct { + bitArray []uint64 + k uint32 +} + +func newBloomFilter(size int, k uint32) *bloomFilter { + numElements := (size + 63) / 64 + return &bloomFilter{ + bitArray: make([]uint64, numElements), + k: k, + } +} + +func newBloomFilterFromArray(bitArray []uint64, k uint32) *bloomFilter { + return &bloomFilter{ + bitArray: bitArray, + k: k, + } +} + +func (bf *bloomFilter) hashFn(item []byte, seed uint32) (uint64, uint64) { + if len(item) == 0 { + return 0, 0 + } + mod := uint32(len(bf.bitArray) * 64) + seedSplit := seed % uint32(len(item)) + hash := (crc32.ChecksumIEEE(item[:seedSplit]) ^ crc32.ChecksumIEEE(item[seedSplit:])) % mod + return uint64(hash / 64), uint64(1 << (hash % 64)) +} + +func (bf *bloomFilter) add(item []byte) { + for i := uint32(0); i < bf.k; i++ { + index, mask := bf.hashFn(item, i) + bf.bitArray[index] |= mask + } +} + +func (bf *bloomFilter) maybeContains(item []byte) bool { + for i := uint32(0); i < bf.k; i++ { + index, mask := bf.hashFn(item, i) + if bf.bitArray[index]&mask == 0 { + return false + } + } + return true +} + +func (bf *bloomFilter) writeTo(writer io.Writer) error { + if err := binary.Write(writer, binary.LittleEndian, uint64(len(bf.bitArray))); err != nil { + return err + } + if err := binary.Write(writer, binary.LittleEndian, uint32(bf.k)); err != nil { + return err + } + if err := binary.Write(writer, binary.LittleEndian, bf.bitArray); err != nil { + return err + } + return nil +} + +func readBloomFilter(reader io.Reader) (*bloomFilter, error) { + var bloomFilterLen uint64 + var k uint32 + + if err := binary.Read(reader, binary.LittleEndian, &bloomFilterLen); err != nil { + return nil, err + } + if err := binary.Read(reader, binary.LittleEndian, &k); err != nil { + return nil, err + } + bloomFilterArray := make([]uint64, bloomFilterLen) + if err := binary.Read(reader, binary.LittleEndian, &bloomFilterArray); err != nil { + return nil, err + } + return newBloomFilterFromArray(bloomFilterArray, k), nil +} diff --git a/pkg/chunked/bloom_filter_test.go b/pkg/chunked/bloom_filter_test.go new file mode 100644 index 000000000..d267e6005 --- /dev/null +++ b/pkg/chunked/bloom_filter_test.go @@ -0,0 +1,126 @@ +package chunked + +import ( + "bytes" + "io" + "testing" + + digest "github.com/opencontainers/go-digest" + "github.com/stretchr/testify/assert" +) + +var ( + presentDigestInCache string + notPresentDigestInCache string + presentDigestInCacheBinary []byte + notPresentDigestInCacheBinary []byte + preloadedCache *cacheFile + preloadedbloomFilter *bloomFilter + benchmarkN int = 100000 +) + +// Using 3 hashes functions and n/m = 10 gives a false positive rate of ~1.7%: +// https://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html +var ( + factorNM int = 10 + numberHashes uint32 = 3 +) + +func initCache(sizeCache int) (*cacheFile, string, string, *bloomFilter) { + var tagsBuffer bytes.Buffer + var vdata bytes.Buffer + tags := [][]byte{} + tagLen := 0 + digestLen := 64 + var presentDigest, notPresentDigest string + + bloomFilter := newBloomFilter(sizeCache*factorNM, numberHashes) + + digester := digest.Canonical.Digester() + hash := digester.Hash() + for i := 0; i < sizeCache; i++ { + hash.Write([]byte("1")) + d := digester.Digest().String() + digestLen = len(d) + presentDigest = d + tag, err := appendTag([]byte(d), 0, 0) + if err != nil { + panic(err) + } + tagLen = len(tag) + tags = append(tags, tag) + bd, err := makeBinaryDigest(d) + if err != nil { + panic(err) + } + bloomFilter.add(bd) + } + + hash.Write([]byte("1")) + notPresentDigest = digester.Digest().String() + + writeCacheFileToWriter(io.Discard, tags, tagLen, digestLen, &vdata, &tagsBuffer) + + cache := &cacheFile{ + digestLen: digestLen, + tagLen: tagLen, + tags: tagsBuffer.Bytes(), + vdata: vdata.Bytes(), + } + return cache, presentDigest, notPresentDigest, bloomFilter +} + +func init() { + var err error + preloadedCache, presentDigestInCache, notPresentDigestInCache, preloadedbloomFilter = initCache(10000) + presentDigestInCacheBinary, err = makeBinaryDigest(presentDigestInCache) + if err != nil { + panic(err) + } + notPresentDigestInCacheBinary, err = makeBinaryDigest(notPresentDigestInCache) + if err != nil { + panic(err) + } +} + +func BenchmarkLookupBloomFilter(b *testing.B) { + for i := 0; i < benchmarkN; i++ { + if preloadedbloomFilter.maybeContains(notPresentDigestInCacheBinary) { + findTag(notPresentDigestInCache, preloadedCache) + } + if preloadedbloomFilter.maybeContains(presentDigestInCacheBinary) { + findTag(presentDigestInCache, preloadedCache) + } + } +} + +func BenchmarkLookupBloomRaw(b *testing.B) { + for i := 0; i < benchmarkN; i++ { + findTag(notPresentDigestInCache, preloadedCache) + findTag(presentDigestInCache, preloadedCache) + } +} + +func TestBloomFilter(t *testing.T) { + bloomFilter := newBloomFilter(1000, 1) + digester := digest.Canonical.Digester() + hash := digester.Hash() + for i := 0; i < 1000; i++ { + hash.Write([]byte("1")) + d := digester.Digest().String() + bd, err := makeBinaryDigest(d) + assert.NoError(t, err) + contains := bloomFilter.maybeContains(bd) + assert.False(t, contains) + } + for i := 0; i < 1000; i++ { + hash.Write([]byte("1")) + d := digester.Digest().String() + bd, err := makeBinaryDigest(d) + assert.NoError(t, err) + bloomFilter.add(bd) + + contains := bloomFilter.maybeContains(bd) + assert.True(t, contains) + } +} From 98836f26476479036d36d3a791f0cd0be0ad27bf Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Thu, 21 Mar 2024 22:20:04 +0100 Subject: [PATCH 15/20] chunked: use a bloom filter to speedup lookup use a bloom filter to speed up lookup of digests in a cache file. The biggest advantage is that it reduces page faults with the mmap'ed cache file. Signed-off-by: Giuseppe Scrivano (cherry picked from commit e9a96e022d5fa672cf47bf78d06a121ba8645d5e) --- pkg/chunked/bloom_filter_test.go | 2 +- pkg/chunked/cache_linux.go | 87 +++++++++++++++++++++----------- pkg/chunked/cache_linux_test.go | 15 ++++++ 3 files changed, 74 insertions(+), 30 deletions(-) diff --git a/pkg/chunked/bloom_filter_test.go b/pkg/chunked/bloom_filter_test.go index d267e6005..c51b76c56 100644 --- a/pkg/chunked/bloom_filter_test.go +++ b/pkg/chunked/bloom_filter_test.go @@ -59,7 +59,7 @@ func initCache(sizeCache int) (*cacheFile, string, string, *bloomFilter) { hash.Write([]byte("1")) notPresentDigest = digester.Digest().String() - writeCacheFileToWriter(io.Discard, tags, tagLen, digestLen, &vdata, &tagsBuffer) + writeCacheFileToWriter(io.Discard, bloomFilter, tags, tagLen, digestLen, vdata, &tagsBuffer) cache := &cacheFile{ digestLen: digestLen, diff --git a/pkg/chunked/cache_linux.go b/pkg/chunked/cache_linux.go index 11d88a303..21da67fad 100644 --- a/pkg/chunked/cache_linux.go +++ b/pkg/chunked/cache_linux.go @@ -30,13 +30,19 @@ const ( cacheVersion = 2 digestSha256Empty = "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + + // Using 3 hashes functions and n/m = 10 gives a false positive rate of ~1.7%: + // https://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html + bloomFilterScale = 10 // how much bigger is the bloom filter than the number of entries + bloomFilterHashes = 3 // number of hash functions for the bloom filter ) type cacheFile struct { - tagLen int - digestLen int - tags []byte - vdata []byte + tagLen int + digestLen int + tags []byte + vdata []byte + bloomFilter *bloomFilter } type layer struct { @@ -339,10 +345,19 @@ type setBigData interface { SetLayerBigData(id, key string, data io.Reader) error } -func writeCacheFileToWriter(writer io.Writer, tags [][]byte, tagLen, digestLen int, vdata, tagsBuffer *bytes.Buffer) error { +func bloomFilterFromTags(tags [][]byte, digestLen int) *bloomFilter { + bloomFilter := newBloomFilter(len(tags)*bloomFilterScale, bloomFilterHashes) + for _, t := range tags { + bloomFilter.add(t[:digestLen]) + } + return bloomFilter +} + +func writeCacheFileToWriter(writer io.Writer, bloomFilter *bloomFilter, tags [][]byte, tagLen, digestLen int, vdata bytes.Buffer, tagsBuffer *bytes.Buffer) error { sort.Slice(tags, func(i, j int) bool { return bytes.Compare(tags[i], tags[j]) == -1 }) + for _, t := range tags { if _, err := tagsBuffer.Write(t); err != nil { return err @@ -364,6 +379,11 @@ func writeCacheFileToWriter(writer io.Writer, tags [][]byte, tagLen, digestLen i return err } + // bloom filter + if err := bloomFilter.writeTo(writer); err != nil { + return err + } + // tags length if err := binary.Write(writer, binary.LittleEndian, uint64(tagsBuffer.Len())); err != nil { return err @@ -478,13 +498,15 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin } } + bloomFilter := bloomFilterFromTags(tags, digestLen) + pipeReader, pipeWriter := io.Pipe() errChan := make(chan error, 1) go func() { defer pipeWriter.Close() defer close(errChan) - errChan <- writeCacheFileToWriter(pipeWriter, tags, tagLen, digestLen, &vdata, &tagsBuffer) + errChan <- writeCacheFileToWriter(pipeWriter, bloomFilter, tags, tagLen, digestLen, vdata, &tagsBuffer) }() defer pipeReader.Close() @@ -503,10 +525,11 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin logrus.Debugf("Written lookaside cache for layer %q with length %v", id, counter.Count) return &cacheFile{ - digestLen: digestLen, - tagLen: tagLen, - tags: tagsBuffer.Bytes(), - vdata: vdata.Bytes(), + digestLen: digestLen, + tagLen: tagLen, + tags: tagsBuffer.Bytes(), + vdata: vdata.Bytes(), + bloomFilter: bloomFilter, }, nil } @@ -526,13 +549,18 @@ func readCacheFileFromMemory(bigDataBuffer []byte) (*cacheFile, error) { if err := binary.Read(bigData, binary.LittleEndian, &digestLen); err != nil { return nil, err } + + bloomFilter, err := readBloomFilter(bigData) + if err != nil { + return nil, err + } + if err := binary.Read(bigData, binary.LittleEndian, &tagsLen); err != nil { return nil, err } if err := binary.Read(bigData, binary.LittleEndian, &vdataLen); err != nil { return nil, err } - tags := make([]byte, tagsLen) if _, err := bigData.Read(tags); err != nil { return nil, err @@ -542,10 +570,11 @@ func readCacheFileFromMemory(bigDataBuffer []byte) (*cacheFile, error) { vdata := bigDataBuffer[len(bigDataBuffer)-bigData.Len():] return &cacheFile{ - tagLen: int(tagLen), - digestLen: int(digestLen), - tags: tags, - vdata: vdata, + tagLen: int(tagLen), + digestLen: int(digestLen), + bloomFilter: bloomFilter, + tags: tags, + vdata: vdata, }, nil } @@ -612,15 +641,7 @@ func (c *layersCache) createLayer(id string, cacheFile *cacheFile, mmapBuffer [] return l, nil } -func findTag(digest string, cacheFile *cacheFile) (string, uint64, uint64) { - binaryDigest, err := makeBinaryDigest(digest) - if err != nil { - return "", 0, 0 - } - if len(binaryDigest) != cacheFile.digestLen { - return "", 0, 0 - } - +func findBinaryTag(binaryDigest []byte, cacheFile *cacheFile) (bool, uint64, uint64) { nElements := len(cacheFile.tags) / cacheFile.tagLen i := sort.Search(nElements, func(i int) bool { @@ -634,7 +655,7 @@ func findTag(digest string, cacheFile *cacheFile) (string, uint64, uint64) { // check for corrupted data, there must be 2 u64 (off and len) after the digest. if cacheFile.tagLen < cacheFile.digestLen+16 { - return "", 0, 0 + return false, 0, 0 } offsetAndLen := cacheFile.tags[startOff : (i+1)*cacheFile.tagLen] @@ -642,10 +663,10 @@ func findTag(digest string, cacheFile *cacheFile) (string, uint64, uint64) { off := binary.LittleEndian.Uint64(offsetAndLen[:8]) len := binary.LittleEndian.Uint64(offsetAndLen[8:16]) - return digest, off, len + return true, off, len } } - return "", 0, 0 + return false, 0, 0 } func (c *layersCache) findDigestInternal(digest string) (string, string, int64, error) { @@ -653,12 +674,20 @@ func (c *layersCache) findDigestInternal(digest string) (string, string, int64, return "", "", -1, nil } + binaryDigest, err := makeBinaryDigest(digest) + if err != nil { + return "", "", 0, err + } + c.mutex.RLock() defer c.mutex.RUnlock() for _, layer := range c.layers { - digest, off, tagLen := findTag(digest, layer.cacheFile) - if digest != "" { + if !layer.cacheFile.bloomFilter.maybeContains(binaryDigest) { + continue + } + found, off, tagLen := findBinaryTag(binaryDigest, layer.cacheFile) + if found { position := string(layer.cacheFile.vdata[off : off+tagLen]) parts := strings.SplitN(position, ":", 3) if len(parts) != 3 { diff --git a/pkg/chunked/cache_linux_test.go b/pkg/chunked/cache_linux_test.go index 0804a9e51..410c81aba 100644 --- a/pkg/chunked/cache_linux_test.go +++ b/pkg/chunked/cache_linux_test.go @@ -120,6 +120,21 @@ func (b *bigDataToBuffer) SetLayerBigData(id, key string, data io.Reader) error return err } +func findTag(digest string, cacheFile *cacheFile) (string, uint64, uint64) { + binaryDigest, err := makeBinaryDigest(digest) + if err != nil { + return "", 0, 0 + } + if len(binaryDigest) != cacheFile.digestLen { + return "", 0, 0 + } + found, off, len := findBinaryTag(binaryDigest, cacheFile) + if found { + return digest, off, len + } + return "", 0, 0 +} + func TestWriteCache(t *testing.T) { toc, err := prepareCacheFile([]byte(jsonTOC), graphdriver.DifferOutputFormatDir) if err != nil { From 950cac9339a0fa513f5ce171efae21e1531ba225 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Fri, 22 Mar 2024 16:59:27 +0100 Subject: [PATCH 16/20] chunked: store file names separately so that the same file path is stored only once in the cache file. After this change, the cache file measured on the fedora:{38,39,40} images is in average ~6% smaller. Signed-off-by: Giuseppe Scrivano (cherry picked from commit 59ac03970d68251414607687d0ac1ced6b410630) --- pkg/chunked/bloom_filter_test.go | 3 +- pkg/chunked/cache_linux.go | 94 +++++++++++++++++++++++++++----- pkg/chunked/cache_linux_test.go | 39 ++++++++----- 3 files changed, 108 insertions(+), 28 deletions(-) diff --git a/pkg/chunked/bloom_filter_test.go b/pkg/chunked/bloom_filter_test.go index c51b76c56..491a2ac03 100644 --- a/pkg/chunked/bloom_filter_test.go +++ b/pkg/chunked/bloom_filter_test.go @@ -29,6 +29,7 @@ var ( func initCache(sizeCache int) (*cacheFile, string, string, *bloomFilter) { var tagsBuffer bytes.Buffer var vdata bytes.Buffer + var fnames bytes.Buffer tags := [][]byte{} tagLen := 0 digestLen := 64 @@ -59,7 +60,7 @@ func initCache(sizeCache int) (*cacheFile, string, string, *bloomFilter) { hash.Write([]byte("1")) notPresentDigest = digester.Digest().String() - writeCacheFileToWriter(io.Discard, bloomFilter, tags, tagLen, digestLen, vdata, &tagsBuffer) + writeCacheFileToWriter(io.Discard, bloomFilter, tags, tagLen, digestLen, vdata, fnames, &tagsBuffer) cache := &cacheFile{ digestLen: digestLen, diff --git a/pkg/chunked/cache_linux.go b/pkg/chunked/cache_linux.go index 21da67fad..481072b1f 100644 --- a/pkg/chunked/cache_linux.go +++ b/pkg/chunked/cache_linux.go @@ -40,8 +40,10 @@ const ( type cacheFile struct { tagLen int digestLen int + fnamesLen int tags []byte vdata []byte + fnames []byte bloomFilter *bloomFilter } @@ -326,9 +328,9 @@ func calculateHardLinkFingerprint(f *fileMetadata) (string, error) { return string(digester.Digest()), nil } -// generateFileLocation generates a file location in the form $OFFSET:$LEN:$PATH -func generateFileLocation(path string, offset, len uint64) []byte { - return []byte(fmt.Sprintf("%d:%d:%s", offset, len, path)) +// generateFileLocation generates a file location in the form $OFFSET:$LEN:$PATH_POS +func generateFileLocation(pathPos int, offset, len uint64) []byte { + return []byte(fmt.Sprintf("%d:%d:%d", offset, len, pathPos)) } // appendTag appends the $OFFSET$LEN information to the provided $DIGEST. @@ -353,11 +355,10 @@ func bloomFilterFromTags(tags [][]byte, digestLen int) *bloomFilter { return bloomFilter } -func writeCacheFileToWriter(writer io.Writer, bloomFilter *bloomFilter, tags [][]byte, tagLen, digestLen int, vdata bytes.Buffer, tagsBuffer *bytes.Buffer) error { +func writeCacheFileToWriter(writer io.Writer, bloomFilter *bloomFilter, tags [][]byte, tagLen, digestLen int, vdata, fnames bytes.Buffer, tagsBuffer *bytes.Buffer) error { sort.Slice(tags, func(i, j int) bool { return bytes.Compare(tags[i], tags[j]) == -1 }) - for _, t := range tags { if _, err := tagsBuffer.Write(t); err != nil { return err @@ -394,6 +395,11 @@ func writeCacheFileToWriter(writer io.Writer, bloomFilter *bloomFilter, tags [][ return err } + // fnames length + if err := binary.Write(writer, binary.LittleEndian, uint64(fnames.Len())); err != nil { + return err + } + // tags if _, err := writer.Write(tagsBuffer.Bytes()); err != nil { return err @@ -404,6 +410,11 @@ func writeCacheFileToWriter(writer io.Writer, bloomFilter *bloomFilter, tags [][ return err } + // file names + if _, err := writer.Write(fnames.Bytes()); err != nil { + return err + } + return nil } @@ -415,7 +426,7 @@ func writeCacheFileToWriter(writer io.Writer, bloomFilter *bloomFilter, tags [][ // - digest(digest(file.payload) + file.UID + file.GID + file.mode + file.xattrs) // - digest(i) for each i in chunks(file payload) func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id string, dest setBigData) (*cacheFile, error) { - var vdata, tagsBuffer bytes.Buffer + var vdata, tagsBuffer, fnames bytes.Buffer tagLen := 0 digestLen := 0 @@ -424,6 +435,23 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin return nil, err } + fnamesMap := make(map[string]int) + getFileNamePosition := func(name string) (int, error) { + if pos, found := fnamesMap[name]; found { + return pos, nil + } + pos := fnames.Len() + fnamesMap[name] = pos + + if err := binary.Write(&fnames, binary.LittleEndian, uint32(len(name))); err != nil { + return 0, err + } + if _, err := fnames.WriteString(name); err != nil { + return 0, err + } + return pos, nil + } + var tags [][]byte for _, k := range toc { if k.Digest != "" { @@ -431,7 +459,11 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin if err != nil { return nil, err } - location := generateFileLocation(k.Name, 0, uint64(k.Size)) + fileNamePos, err := getFileNamePosition(k.Name) + if err != nil { + return nil, err + } + location := generateFileLocation(fileNamePos, 0, uint64(k.Size)) off := uint64(vdata.Len()) l := uint64(len(location)) @@ -471,7 +503,11 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin digestLen = len(digestHardLink) } if k.ChunkDigest != "" { - location := generateFileLocation(k.Name, uint64(k.ChunkOffset), uint64(k.ChunkSize)) + fileNamePos, err := getFileNamePosition(k.Name) + if err != nil { + return nil, err + } + location := generateFileLocation(fileNamePos, uint64(k.ChunkOffset), uint64(k.ChunkSize)) off := uint64(vdata.Len()) l := uint64(len(location)) @@ -506,7 +542,7 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin defer pipeWriter.Close() defer close(errChan) - errChan <- writeCacheFileToWriter(pipeWriter, bloomFilter, tags, tagLen, digestLen, vdata, &tagsBuffer) + errChan <- writeCacheFileToWriter(pipeWriter, bloomFilter, tags, tagLen, digestLen, vdata, fnames, &tagsBuffer) }() defer pipeReader.Close() @@ -529,6 +565,8 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin tagLen: tagLen, tags: tagsBuffer.Bytes(), vdata: vdata.Bytes(), + fnames: fnames.Bytes(), + fnamesLen: len(fnames.Bytes()), bloomFilter: bloomFilter, }, nil } @@ -536,7 +574,7 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin func readCacheFileFromMemory(bigDataBuffer []byte) (*cacheFile, error) { bigData := bytes.NewReader(bigDataBuffer) - var version, tagLen, digestLen, tagsLen, vdataLen uint64 + var version, tagLen, digestLen, tagsLen, fnamesLen, vdataLen uint64 if err := binary.Read(bigData, binary.LittleEndian, &version); err != nil { return nil, err } @@ -561,18 +599,27 @@ func readCacheFileFromMemory(bigDataBuffer []byte) (*cacheFile, error) { if err := binary.Read(bigData, binary.LittleEndian, &vdataLen); err != nil { return nil, err } + + if err := binary.Read(bigData, binary.LittleEndian, &fnamesLen); err != nil { + return nil, err + } tags := make([]byte, tagsLen) if _, err := bigData.Read(tags); err != nil { return nil, err } // retrieve the unread part of the buffer. - vdata := bigDataBuffer[len(bigDataBuffer)-bigData.Len():] + remaining := bigDataBuffer[len(bigDataBuffer)-bigData.Len():] + + vdata := remaining[:vdataLen] + fnames := remaining[vdataLen:] return &cacheFile{ - tagLen: int(tagLen), - digestLen: int(digestLen), bloomFilter: bloomFilter, + digestLen: int(digestLen), + fnames: fnames, + fnamesLen: int(fnamesLen), + tagLen: int(tagLen), tags: tags, vdata: vdata, }, nil @@ -693,9 +740,28 @@ func (c *layersCache) findDigestInternal(digest string) (string, string, int64, if len(parts) != 3 { continue } + offFile, _ := strconv.ParseInt(parts[0], 10, 64) + + tmp, err := strconv.ParseUint(parts[2], 10, 32) + if err != nil { + logrus.Warningf("Invalid file name offset in the cache for layer %q, skipping: %v", layer.id, err) + continue + } + fnamePosition := int(tmp) + + if len(layer.cacheFile.fnames) <= fnamePosition+4 { + return "", "", 0, err + } + lenPath := int(binary.LittleEndian.Uint32(layer.cacheFile.fnames[fnamePosition : fnamePosition+4])) + + if len(layer.cacheFile.fnames) <= fnamePosition+lenPath+4 { + return "", "", 0, err + } + path := string(layer.cacheFile.fnames[fnamePosition+4 : fnamePosition+lenPath+4]) + // parts[1] is the chunk length, currently unused. - return layer.target, parts[2], offFile, nil + return layer.target, path, offFile, nil } } diff --git a/pkg/chunked/cache_linux_test.go b/pkg/chunked/cache_linux_test.go index 410c81aba..5fc7d8876 100644 --- a/pkg/chunked/cache_linux_test.go +++ b/pkg/chunked/cache_linux_test.go @@ -6,6 +6,7 @@ import ( "io" "path/filepath" "reflect" + "strconv" "strings" "testing" @@ -155,18 +156,24 @@ func TestWriteCache(t *testing.T) { for _, r := range toc { if r.Digest != "" { // find the element in the cache by the digest checksum - digest, off, len := findTag(r.Digest, cache) + digest, off, lenTag := findTag(r.Digest, cache) if digest == "" { t.Error("file tag not found") } if digest != r.Digest { t.Error("wrong file found") } - expectedLocation := generateFileLocation(r.Name, 0, uint64(r.Size)) - location := cache.vdata[off : off+len] - if !bytes.Equal(location, expectedLocation) { - t.Errorf("wrong file found %q instead of %q", location, expectedLocation) - } + location := cache.vdata[off : off+lenTag] + parts := strings.SplitN(string(location), ":", 3) + + assert.Equal(t, len(parts), 3) + offFile, err := strconv.ParseInt(parts[0], 10, 64) + assert.NoError(t, err) + fileSize, err := strconv.ParseInt(parts[1], 10, 64) + assert.NoError(t, err) + + assert.Equal(t, fileSize, int64(r.Size)) + assert.Equal(t, offFile, int64(0)) fingerprint, err := calculateHardLinkFingerprint(r) if err != nil { @@ -174,18 +181,24 @@ func TestWriteCache(t *testing.T) { } // find the element in the cache by the hardlink fingerprint - digest, off, len = findTag(fingerprint, cache) + digest, off, lenTag = findTag(fingerprint, cache) if digest == "" { t.Error("file tag not found") } if digest != fingerprint { t.Error("wrong file found") } - expectedLocation = generateFileLocation(r.Name, 0, uint64(r.Size)) - location = cache.vdata[off : off+len] - if !bytes.Equal(location, expectedLocation) { - t.Errorf("wrong file found %q instead of %q", location, expectedLocation) - } + location = cache.vdata[off : off+lenTag] + parts = strings.SplitN(string(location), ":", 3) + + assert.Equal(t, len(parts), 3) + offFile, err = strconv.ParseInt(parts[0], 10, 64) + assert.NoError(t, err) + fileSize, err = strconv.ParseInt(parts[1], 10, 64) + assert.NoError(t, err) + + assert.Equal(t, fileSize, int64(r.Size)) + assert.Equal(t, offFile, int64(0)) } if r.ChunkDigest != "" { // find the element in the cache by the chunk digest checksum @@ -196,7 +209,7 @@ func TestWriteCache(t *testing.T) { if digest != r.ChunkDigest { t.Error("wrong digest found") } - expectedLocation := generateFileLocation(r.Name, uint64(r.ChunkOffset), uint64(r.ChunkSize)) + expectedLocation := generateFileLocation(0, uint64(r.ChunkOffset), uint64(r.ChunkSize)) location := cache.vdata[off : off+len] if !bytes.Equal(location, expectedLocation) { t.Errorf("wrong file found %q instead of %q", location, expectedLocation) From 53cb4adf9e8987385af18908ccda23cb7a6a142f Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Mon, 25 Mar 2024 11:48:09 +0100 Subject: [PATCH 17/20] chunked: store file locations as binary it reduces the cache file size by ~3%. Signed-off-by: Giuseppe Scrivano (cherry picked from commit 9619a53b9196d7e9b5fa135b72b477c0fa8ac156) --- pkg/chunked/cache_linux.go | 59 ++++++++++++++++++++++----------- pkg/chunked/cache_linux_test.go | 23 ++++--------- 2 files changed, 46 insertions(+), 36 deletions(-) diff --git a/pkg/chunked/cache_linux.go b/pkg/chunked/cache_linux.go index 481072b1f..29e5e6d22 100644 --- a/pkg/chunked/cache_linux.go +++ b/pkg/chunked/cache_linux.go @@ -10,7 +10,6 @@ import ( "os" "runtime" "sort" - "strconv" "strings" "sync" "time" @@ -328,9 +327,37 @@ func calculateHardLinkFingerprint(f *fileMetadata) (string, error) { return string(digester.Digest()), nil } -// generateFileLocation generates a file location in the form $OFFSET:$LEN:$PATH_POS +// generateFileLocation generates a file location in the form $OFFSET$LEN$PATH_POS func generateFileLocation(pathPos int, offset, len uint64) []byte { - return []byte(fmt.Sprintf("%d:%d:%d", offset, len, pathPos)) + var buf []byte + + buf = binary.AppendUvarint(buf, uint64(pathPos)) + buf = binary.AppendUvarint(buf, offset) + buf = binary.AppendUvarint(buf, len) + + return buf +} + +// parseFileLocation reads what was written by generateFileLocation. +func parseFileLocation(locationData []byte) (int, uint64, uint64, error) { + reader := bytes.NewReader(locationData) + + pathPos, err := binary.ReadUvarint(reader) + if err != nil { + return 0, 0, 0, err + } + + offset, err := binary.ReadUvarint(reader) + if err != nil { + return 0, 0, 0, err + } + + len, err := binary.ReadUvarint(reader) + if err != nil { + return 0, 0, 0, err + } + + return int(pathPos), offset, len, nil } // appendTag appends the $OFFSET$LEN information to the provided $DIGEST. @@ -464,7 +491,6 @@ func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id strin return nil, err } location := generateFileLocation(fileNamePos, 0, uint64(k.Size)) - off := uint64(vdata.Len()) l := uint64(len(location)) @@ -735,33 +761,28 @@ func (c *layersCache) findDigestInternal(digest string) (string, string, int64, } found, off, tagLen := findBinaryTag(binaryDigest, layer.cacheFile) if found { - position := string(layer.cacheFile.vdata[off : off+tagLen]) - parts := strings.SplitN(position, ":", 3) - if len(parts) != 3 { - continue + if uint64(len(layer.cacheFile.vdata)) < off+tagLen { + return "", "", 0, fmt.Errorf("corrupted cache file for layer %q", layer.id) } + fileLocationData := layer.cacheFile.vdata[off : off+tagLen] - offFile, _ := strconv.ParseInt(parts[0], 10, 64) - - tmp, err := strconv.ParseUint(parts[2], 10, 32) + fnamePosition, offFile, _, err := parseFileLocation(fileLocationData) if err != nil { - logrus.Warningf("Invalid file name offset in the cache for layer %q, skipping: %v", layer.id, err) - continue + return "", "", 0, fmt.Errorf("corrupted cache file for layer %q", layer.id) } - fnamePosition := int(tmp) - if len(layer.cacheFile.fnames) <= fnamePosition+4 { - return "", "", 0, err + if len(layer.cacheFile.fnames) < fnamePosition+4 { + return "", "", 0, fmt.Errorf("corrupted cache file for layer %q", layer.id) } lenPath := int(binary.LittleEndian.Uint32(layer.cacheFile.fnames[fnamePosition : fnamePosition+4])) - if len(layer.cacheFile.fnames) <= fnamePosition+lenPath+4 { - return "", "", 0, err + if len(layer.cacheFile.fnames) < fnamePosition+lenPath+4 { + return "", "", 0, fmt.Errorf("corrupted cache file for layer %q", layer.id) } path := string(layer.cacheFile.fnames[fnamePosition+4 : fnamePosition+lenPath+4]) // parts[1] is the chunk length, currently unused. - return layer.target, path, offFile, nil + return layer.target, path, int64(offFile), nil } } diff --git a/pkg/chunked/cache_linux_test.go b/pkg/chunked/cache_linux_test.go index 5fc7d8876..426f44f0e 100644 --- a/pkg/chunked/cache_linux_test.go +++ b/pkg/chunked/cache_linux_test.go @@ -6,7 +6,6 @@ import ( "io" "path/filepath" "reflect" - "strconv" "strings" "testing" @@ -164,16 +163,11 @@ func TestWriteCache(t *testing.T) { t.Error("wrong file found") } location := cache.vdata[off : off+lenTag] - parts := strings.SplitN(string(location), ":", 3) - - assert.Equal(t, len(parts), 3) - offFile, err := strconv.ParseInt(parts[0], 10, 64) - assert.NoError(t, err) - fileSize, err := strconv.ParseInt(parts[1], 10, 64) + _, offFile, fileSize, err := parseFileLocation(location) assert.NoError(t, err) - assert.Equal(t, fileSize, int64(r.Size)) - assert.Equal(t, offFile, int64(0)) + assert.Equal(t, fileSize, uint64(r.Size)) + assert.Equal(t, offFile, uint64(0)) fingerprint, err := calculateHardLinkFingerprint(r) if err != nil { @@ -189,16 +183,11 @@ func TestWriteCache(t *testing.T) { t.Error("wrong file found") } location = cache.vdata[off : off+lenTag] - parts = strings.SplitN(string(location), ":", 3) - - assert.Equal(t, len(parts), 3) - offFile, err = strconv.ParseInt(parts[0], 10, 64) - assert.NoError(t, err) - fileSize, err = strconv.ParseInt(parts[1], 10, 64) + _, offFile, fileSize, err = parseFileLocation(location) assert.NoError(t, err) - assert.Equal(t, fileSize, int64(r.Size)) - assert.Equal(t, offFile, int64(0)) + assert.Equal(t, fileSize, uint64(r.Size)) + assert.Equal(t, offFile, uint64(0)) } if r.ChunkDigest != "" { // find the element in the cache by the chunk digest checksum From e6a34f1f885c2b3ed933fd59e35897ce325f2760 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Fri, 22 Mar 2024 16:59:59 +0100 Subject: [PATCH 18/20] chunked: bump version number for cache file Signed-off-by: Giuseppe Scrivano (cherry picked from commit 065a2f33217b8ba368e4d379b3046f5fd1f26218) --- pkg/chunked/cache_linux.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/chunked/cache_linux.go b/pkg/chunked/cache_linux.go index 29e5e6d22..6dcaa668b 100644 --- a/pkg/chunked/cache_linux.go +++ b/pkg/chunked/cache_linux.go @@ -26,7 +26,7 @@ import ( const ( cacheKey = "chunked-manifest-cache" - cacheVersion = 2 + cacheVersion = 3 digestSha256Empty = "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" From 8c3e6d7e3ef4580a0434d52ee9d99cb2ecb782be Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Fri, 19 Apr 2024 15:49:27 +0200 Subject: [PATCH 19/20] chunked: store original tar size for converted layers if the layer was converted from an existing one, store the original layer size. Closes: https://github.com/containers/storage/issues/1892 Signed-off-by: Giuseppe Scrivano (cherry picked from commit 639f1a62f9823d878ba379f4d34d77ef90f69eb7) --- pkg/chunked/storage_linux.go | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/pkg/chunked/storage_linux.go b/pkg/chunked/storage_linux.go index 2c49967c7..82966b8a5 100644 --- a/pkg/chunked/storage_linux.go +++ b/pkg/chunked/storage_linux.go @@ -217,15 +217,15 @@ func (f *seekableFile) GetBlobAt(chunks []ImageSourceChunk) (chan io.ReadCloser, return streams, errs, nil } -func convertTarToZstdChunked(destDirectory string, payload *os.File) (*seekableFile, digest.Digest, map[string]string, error) { +func convertTarToZstdChunked(destDirectory string, payload *os.File) (int64, *seekableFile, digest.Digest, map[string]string, error) { diff, err := archive.DecompressStream(payload) if err != nil { - return nil, "", nil, err + return 0, nil, "", nil, err } fd, err := unix.Open(destDirectory, unix.O_TMPFILE|unix.O_RDWR|unix.O_CLOEXEC, 0o600) if err != nil { - return nil, "", nil, err + return 0, nil, "", nil, err } f := os.NewFile(uintptr(fd), destDirectory) @@ -235,23 +235,24 @@ func convertTarToZstdChunked(destDirectory string, payload *os.File) (*seekableF chunked, err := compressor.ZstdCompressor(f, newAnnotations, &level) if err != nil { f.Close() - return nil, "", nil, err + return 0, nil, "", nil, err } convertedOutputDigester := digest.Canonical.Digester() - if _, err := io.Copy(io.MultiWriter(chunked, convertedOutputDigester.Hash()), diff); err != nil { + copied, err := io.Copy(io.MultiWriter(chunked, convertedOutputDigester.Hash()), diff) + if err != nil { f.Close() - return nil, "", nil, err + return 0, nil, "", nil, err } if err := chunked.Close(); err != nil { f.Close() - return nil, "", nil, err + return 0, nil, "", nil, err } is := seekableFile{ file: f, } - return &is, convertedOutputDigester.Digest(), newAnnotations, nil + return copied, &is, convertedOutputDigester.Digest(), newAnnotations, nil } // GetDiffer returns a differ than can be used with ApplyDiffWithDiffer. @@ -1652,6 +1653,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff stream := c.stream var uncompressedDigest digest.Digest + var convertedBlobSize int64 if c.convertToZstdChunked { fd, err := unix.Open(dest, unix.O_TMPFILE|unix.O_RDWR|unix.O_CLOEXEC, 0o600) @@ -1679,10 +1681,11 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff return graphdriver.DriverWithDifferOutput{}, err } - fileSource, diffID, annotations, err := convertTarToZstdChunked(dest, blobFile) + tarSize, fileSource, diffID, annotations, err := convertTarToZstdChunked(dest, blobFile) if err != nil { return graphdriver.DriverWithDifferOutput{}, err } + convertedBlobSize = tarSize // fileSource is a O_TMPFILE file descriptor, so we // need to keep it open until the entire file is processed. defer fileSource.Close() @@ -1759,13 +1762,19 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff var missingParts []missingPart - mergedEntries, totalSize, err := c.mergeTocEntries(c.fileType, toc.Entries) + mergedEntries, totalSizeFromTOC, err := c.mergeTocEntries(c.fileType, toc.Entries) if err != nil { return output, err } output.UIDs, output.GIDs = collectIDs(mergedEntries) - output.Size = totalSize + if convertedBlobSize > 0 { + // if the image was converted, store the original tar size, so that + // it can be recreated correctly. + output.Size = convertedBlobSize + } else { + output.Size = totalSizeFromTOC + } if err := maybeDoIDRemap(mergedEntries, options); err != nil { return output, err From c8291eee40a8fe1151bb1ac789a3609dc7e5f5c5 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Fri, 19 Apr 2024 21:43:08 +0200 Subject: [PATCH 20/20] chunked: fix escape of space the code was copied from the composefs C version: if (noescape_space) hex_escape = !isprint(c); else hex_escape = !isgraph(c); but unicode.IsGraphic() seems to behave differently and includes the space: isgraph(' ') -> 0 unicode.IsGraphic(' ') -> true Signed-off-by: Giuseppe Scrivano (cherry picked from commit 839beda40e43c9223504ad2a2bc89f10ab063a31) --- pkg/chunked/dump/dump.go | 2 +- pkg/chunked/dump/dump_test.go | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/chunked/dump/dump.go b/pkg/chunked/dump/dump.go index d3c105c4d..701b6aa53 100644 --- a/pkg/chunked/dump/dump.go +++ b/pkg/chunked/dump/dump.go @@ -52,7 +52,7 @@ func escaped(val string, escape int) string { if noescapeSpace { hexEscape = !unicode.IsPrint(rune(c)) } else { - hexEscape = !unicode.IsGraphic(rune(c)) + hexEscape = !unicode.IsPrint(rune(c)) || unicode.IsSpace(rune(c)) } } diff --git a/pkg/chunked/dump/dump_test.go b/pkg/chunked/dump/dump_test.go index 740b8fb9a..ee45424fe 100644 --- a/pkg/chunked/dump/dump_test.go +++ b/pkg/chunked/dump/dump_test.go @@ -14,7 +14,6 @@ func TestEscaped(t *testing.T) { escape int want string }{ - {"Hello, World!", 0, "Hello, World!"}, {"12345", 0, "12345"}, {"", 0, ""}, {"\n", 0, "\\n"}, @@ -25,9 +24,12 @@ func TestEscaped(t *testing.T) { {"foo=bar", ESCAPE_EQUAL, "foo\\x3dbar"}, {"-", ESCAPE_LONE_DASH, "\\x2d"}, {"\n", NOESCAPE_SPACE, "\\n"}, + {" ", 0, "\\x20"}, {" ", NOESCAPE_SPACE, " "}, {"\t", NOESCAPE_SPACE, "\\t"}, {"\n\t", NOESCAPE_SPACE, "\\n\\t"}, + {"Hello World!", 0, "Hello\\x20World!"}, + {"Hello World!", NOESCAPE_SPACE, "Hello World!"}, } for _, test := range tests {