From a50bb95770a9471bd2d1d11d19048d96245d372f Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Fri, 26 May 2023 12:20:56 +0200 Subject: [PATCH] chunked: support writing files in a flat dir format so that they can be stored by their digest Signed-off-by: Giuseppe Scrivano --- drivers/driver.go | 13 +++++++ pkg/chunked/cache_linux.go | 40 ++++++++++++++++--- pkg/chunked/cache_linux_test.go | 27 +++++++++++-- pkg/chunked/storage_linux.go | 69 ++++++++++++++++++++++++++++++++- 4 files changed, 138 insertions(+), 11 deletions(-) diff --git a/drivers/driver.go b/drivers/driver.go index 663173a29..1fb04dc3e 100644 --- a/drivers/driver.go +++ b/drivers/driver.go @@ -191,8 +191,21 @@ type DriverWithDifferOutput struct { TOCDigest digest.Digest } +type DifferOutputFormat int + +const ( + // DifferOutputFormatDir means the output is a directory and it will + // keep the original layout. + DifferOutputFormatDir = iota + // DifferOutputFormatFlat will store the files by their checksum, in the form + // checksum[0:2]/checksum[2:] + DifferOutputFormatFlat +) + // DifferOptions overrides how the differ work type DifferOptions struct { + // Format defines the destination directory layout format + Format DifferOutputFormat } // Differ defines the interface for using a custom differ. diff --git a/pkg/chunked/cache_linux.go b/pkg/chunked/cache_linux.go index cd13212e6..56c30e267 100644 --- a/pkg/chunked/cache_linux.go +++ b/pkg/chunked/cache_linux.go @@ -15,6 +15,7 @@ import ( "unsafe" storage "github.com/containers/storage" + graphdriver "github.com/containers/storage/drivers" "github.com/containers/storage/pkg/chunked/internal" "github.com/containers/storage/pkg/ioutils" jsoniter "github.com/json-iterator/go" @@ -109,7 +110,7 @@ func (c *layersCache) load() error { } bigData, err := c.store.LayerBigData(r.ID, cacheKey) - // if the cache areadly exists, read and use it + // if the cache already exists, read and use it if err == nil { defer bigData.Close() metadata, err := readMetadataFromCache(bigData) @@ -122,6 +123,23 @@ func (c *layersCache) load() error { return err } + var lcd chunkedLayerData + + clFile, err := c.store.LayerBigData(r.ID, chunkedLayerDataKey) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return err + } + if clFile != nil { + cl, err := io.ReadAll(clFile) + if err != nil { + return fmt.Errorf("open manifest file for layer %q: %w", r.ID, err) + } + json := jsoniter.ConfigCompatibleWithStandardLibrary + if err := json.Unmarshal(cl, &lcd); err != nil { + return err + } + } + // otherwise create it from the layer TOC. manifestReader, err := c.store.LayerBigData(r.ID, bigDataKey) if err != nil { @@ -134,7 +152,7 @@ func (c *layersCache) load() error { return fmt.Errorf("open manifest file for layer %q: %w", r.ID, err) } - metadata, err := writeCache(manifest, r.ID, c.store) + metadata, err := writeCache(manifest, lcd.Format, r.ID, c.store) if err == nil { c.addLayer(r.ID, metadata) } @@ -211,13 +229,13 @@ type setBigData interface { // - digest(file.payload)) // - digest(digest(file.payload) + file.UID + file.GID + file.mode + file.xattrs) // - digest(i) for each i in chunks(file payload) -func writeCache(manifest []byte, id string, dest setBigData) (*metadata, error) { +func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id string, dest setBigData) (*metadata, error) { var vdata bytes.Buffer tagLen := 0 digestLen := 0 var tagsBuffer bytes.Buffer - toc, err := prepareMetadata(manifest) + toc, err := prepareMetadata(manifest, format) if err != nil { return nil, err } @@ -396,7 +414,7 @@ func readMetadataFromCache(bigData io.Reader) (*metadata, error) { }, nil } -func prepareMetadata(manifest []byte) ([]*internal.FileMetadata, error) { +func prepareMetadata(manifest []byte, format graphdriver.DifferOutputFormat) ([]*internal.FileMetadata, error) { toc, err := unmarshalToc(manifest) if err != nil { // ignore errors here. They might be caused by a different manifest format. @@ -404,6 +422,17 @@ func prepareMetadata(manifest []byte) ([]*internal.FileMetadata, error) { return nil, nil //nolint: nilnil } + switch format { + case graphdriver.DifferOutputFormatDir: + case graphdriver.DifferOutputFormatFlat: + toc.Entries, err = makeEntriesFlat(toc.Entries) + if err != nil { + return nil, err + } + default: + return nil, fmt.Errorf("unknown format %q", format) + } + var r []*internal.FileMetadata chunkSeen := make(map[string]bool) for i := range toc.Entries { @@ -420,6 +449,7 @@ func prepareMetadata(manifest []byte) ([]*internal.FileMetadata, error) { chunkSeen[cd] = true } } + return r, nil } diff --git a/pkg/chunked/cache_linux_test.go b/pkg/chunked/cache_linux_test.go index 598fd63e9..2ce18d936 100644 --- a/pkg/chunked/cache_linux_test.go +++ b/pkg/chunked/cache_linux_test.go @@ -4,8 +4,12 @@ import ( "bytes" "fmt" "io" + "path/filepath" "reflect" + "strings" "testing" + + graphdriver "github.com/containers/storage/drivers" ) const jsonTOC = ` @@ -55,7 +59,7 @@ const jsonTOC = ` ` func TestPrepareMetadata(t *testing.T) { - toc, err := prepareMetadata([]byte(jsonTOC)) + toc, err := prepareMetadata([]byte(jsonTOC), graphdriver.DifferOutputFormatDir) if err != nil { t.Errorf("got error from prepareMetadata: %v", err) } @@ -64,6 +68,21 @@ func TestPrepareMetadata(t *testing.T) { } } +func TestPrepareMetadataFlat(t *testing.T) { + toc, err := prepareMetadata([]byte(jsonTOC), graphdriver.DifferOutputFormatFlat) + if err != nil { + t.Errorf("got error from prepareMetadata: %v", err) + } + for _, e := range toc { + if len(strings.Split(e.Name, "/")) != 2 { + t.Error("prepareMetadata returns the wrong number of path elements for flat directories") + } + if len(filepath.Dir(e.Name)) != 2 { + t.Error("prepareMetadata returns the wrong path for flat directories") + } + } +} + type bigDataToBuffer struct { buf *bytes.Buffer id string @@ -83,7 +102,7 @@ func (b *bigDataToBuffer) SetLayerBigData(id, key string, data io.Reader) error } func TestWriteCache(t *testing.T) { - toc, err := prepareMetadata([]byte(jsonTOC)) + toc, err := prepareMetadata([]byte(jsonTOC), graphdriver.DifferOutputFormatDir) if err != nil { t.Errorf("got error from prepareMetadata: %v", err) } @@ -91,7 +110,7 @@ func TestWriteCache(t *testing.T) { dest := bigDataToBuffer{ buf: bytes.NewBuffer(nil), } - cache, err := writeCache([]byte(jsonTOC), "foobar", &dest) + cache, err := writeCache([]byte(jsonTOC), graphdriver.DifferOutputFormatDir, "foobar", &dest) if err != nil { t.Errorf("got error from writeCache: %v", err) } @@ -156,7 +175,7 @@ func TestReadCache(t *testing.T) { dest := bigDataToBuffer{ buf: bytes.NewBuffer(nil), } - cache, err := writeCache([]byte(jsonTOC), "foobar", &dest) + cache, err := writeCache([]byte(jsonTOC), graphdriver.DifferOutputFormatDir, "foobar", &dest) if err != nil { t.Errorf("got error from writeCache: %v", err) } diff --git a/pkg/chunked/storage_linux.go b/pkg/chunked/storage_linux.go index 254ce9c73..f13056082 100644 --- a/pkg/chunked/storage_linux.go +++ b/pkg/chunked/storage_linux.go @@ -28,6 +28,7 @@ import ( "github.com/containers/storage/pkg/system" "github.com/containers/storage/types" securejoin "github.com/cyphar/filepath-securejoin" + jsoniter "github.com/json-iterator/go" "github.com/klauspost/compress/zstd" "github.com/klauspost/pgzip" digest "github.com/opencontainers/go-digest" @@ -41,6 +42,8 @@ const ( newFileFlags = (unix.O_CREAT | unix.O_TRUNC | unix.O_EXCL | unix.O_WRONLY) containersOverrideXattr = "user.containers.override_stat" bigDataKey = "zstd-chunked-manifest" + chunkedData = "zstd-chunked-data" + chunkedLayerDataKey = "zstd-chunked-layer-data" fileTypeZstdChunked = iota fileTypeEstargz @@ -73,6 +76,11 @@ var xattrsToIgnore = map[string]interface{}{ "security.selinux": true, } +// chunkedLayerData is used to store additional information about the layer +type chunkedLayerData struct { + Format graphdriver.DifferOutputFormat `json:"format"` +} + func timeToTimespec(time *time.Time) (ts unix.Timespec) { if time == nil || time.IsZero() { // Return UTIME_OMIT special value @@ -241,7 +249,7 @@ func copyFileFromOtherLayer(file *internal.FileMetadata, source string, name str srcFile, err := openFileUnderRoot(name, srcDirfd, unix.O_RDONLY, 0) if err != nil { - return false, nil, 0, fmt.Errorf("open source file under target rootfs: %w", err) + return false, nil, 0, fmt.Errorf("open source file under target rootfs (%s): %w", name, err) } defer srcFile.Close() @@ -1324,6 +1332,38 @@ func (c *chunkedDiffer) findAndCopyFile(dirfd int, r *internal.FileMetadata, cop return false, nil } +func makeEntriesFlat(mergedEntries []internal.FileMetadata) ([]internal.FileMetadata, error) { + var new []internal.FileMetadata + + hashes := make(map[string]string) + for i := range mergedEntries { + if mergedEntries[i].Type != TypeReg { + continue + } + if mergedEntries[i].Digest == "" { + if mergedEntries[i].Size != 0 { + return nil, fmt.Errorf("missing digest for %q", mergedEntries[i].Name) + } + continue + } + digest, err := digest.Parse(mergedEntries[i].Digest) + if err != nil { + return nil, err + } + d := digest.Encoded() + + if hashes[d] != "" { + continue + } + hashes[d] = d + + mergedEntries[i].Name = fmt.Sprintf("%s/%s", d[0:2], d[2:]) + + new = append(new, mergedEntries[i]) + } + return new, nil +} + func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, differOpts *graphdriver.DifferOptions) (graphdriver.DriverWithDifferOutput, error) { defer c.layersCache.release() defer func() { @@ -1332,11 +1372,21 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff } }() + lcd := chunkedLayerData{ + Format: differOpts.Format, + } + + json := jsoniter.ConfigCompatibleWithStandardLibrary + lcdBigData, err := json.Marshal(lcd) + if err != nil { + return graphdriver.DriverWithDifferOutput{}, err + } output := graphdriver.DriverWithDifferOutput{ Differ: c, TarSplit: c.tarSplit, BigData: map[string][]byte{ - bigDataKey: c.manifest, + bigDataKey: c.manifest, + chunkedLayerDataKey: lcdBigData, }, TOCDigest: c.tocDigest, } @@ -1396,6 +1446,21 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff } defer unix.Close(dirfd) + if differOpts != nil && differOpts.Format == graphdriver.DifferOutputFormatFlat { + mergedEntries, err = makeEntriesFlat(mergedEntries) + if err != nil { + return output, err + } + createdDirs := make(map[string]struct{}) + for _, e := range mergedEntries { + d := e.Name[0:2] + if _, found := createdDirs[d]; !found { + unix.Mkdirat(dirfd, d, 0o755) + createdDirs[d] = struct{}{} + } + } + } + // hardlinks can point to missing files. So create them after all files // are retrieved var hardLinks []hardLinkToCreate