chunked: support writing files in a flat dir format

so that they can be stored by their digest

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
Giuseppe Scrivano 2023-05-26 12:20:56 +02:00
parent 8bb5a087ab
commit a50bb95770
No known key found for this signature in database
GPG Key ID: 67E38F7A8BA21772
4 changed files with 138 additions and 11 deletions

View File

@ -191,8 +191,21 @@ type DriverWithDifferOutput struct {
TOCDigest digest.Digest
}
type DifferOutputFormat int
const (
// DifferOutputFormatDir means the output is a directory and it will
// keep the original layout.
DifferOutputFormatDir = iota
// DifferOutputFormatFlat will store the files by their checksum, in the form
// checksum[0:2]/checksum[2:]
DifferOutputFormatFlat
)
// DifferOptions overrides how the differ work
type DifferOptions struct {
// Format defines the destination directory layout format
Format DifferOutputFormat
}
// Differ defines the interface for using a custom differ.

View File

@ -15,6 +15,7 @@ import (
"unsafe"
storage "github.com/containers/storage"
graphdriver "github.com/containers/storage/drivers"
"github.com/containers/storage/pkg/chunked/internal"
"github.com/containers/storage/pkg/ioutils"
jsoniter "github.com/json-iterator/go"
@ -109,7 +110,7 @@ func (c *layersCache) load() error {
}
bigData, err := c.store.LayerBigData(r.ID, cacheKey)
// if the cache areadly exists, read and use it
// if the cache already exists, read and use it
if err == nil {
defer bigData.Close()
metadata, err := readMetadataFromCache(bigData)
@ -122,6 +123,23 @@ func (c *layersCache) load() error {
return err
}
var lcd chunkedLayerData
clFile, err := c.store.LayerBigData(r.ID, chunkedLayerDataKey)
if err != nil && !errors.Is(err, os.ErrNotExist) {
return err
}
if clFile != nil {
cl, err := io.ReadAll(clFile)
if err != nil {
return fmt.Errorf("open manifest file for layer %q: %w", r.ID, err)
}
json := jsoniter.ConfigCompatibleWithStandardLibrary
if err := json.Unmarshal(cl, &lcd); err != nil {
return err
}
}
// otherwise create it from the layer TOC.
manifestReader, err := c.store.LayerBigData(r.ID, bigDataKey)
if err != nil {
@ -134,7 +152,7 @@ func (c *layersCache) load() error {
return fmt.Errorf("open manifest file for layer %q: %w", r.ID, err)
}
metadata, err := writeCache(manifest, r.ID, c.store)
metadata, err := writeCache(manifest, lcd.Format, r.ID, c.store)
if err == nil {
c.addLayer(r.ID, metadata)
}
@ -211,13 +229,13 @@ type setBigData interface {
// - digest(file.payload))
// - digest(digest(file.payload) + file.UID + file.GID + file.mode + file.xattrs)
// - digest(i) for each i in chunks(file payload)
func writeCache(manifest []byte, id string, dest setBigData) (*metadata, error) {
func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id string, dest setBigData) (*metadata, error) {
var vdata bytes.Buffer
tagLen := 0
digestLen := 0
var tagsBuffer bytes.Buffer
toc, err := prepareMetadata(manifest)
toc, err := prepareMetadata(manifest, format)
if err != nil {
return nil, err
}
@ -396,7 +414,7 @@ func readMetadataFromCache(bigData io.Reader) (*metadata, error) {
}, nil
}
func prepareMetadata(manifest []byte) ([]*internal.FileMetadata, error) {
func prepareMetadata(manifest []byte, format graphdriver.DifferOutputFormat) ([]*internal.FileMetadata, error) {
toc, err := unmarshalToc(manifest)
if err != nil {
// ignore errors here. They might be caused by a different manifest format.
@ -404,6 +422,17 @@ func prepareMetadata(manifest []byte) ([]*internal.FileMetadata, error) {
return nil, nil //nolint: nilnil
}
switch format {
case graphdriver.DifferOutputFormatDir:
case graphdriver.DifferOutputFormatFlat:
toc.Entries, err = makeEntriesFlat(toc.Entries)
if err != nil {
return nil, err
}
default:
return nil, fmt.Errorf("unknown format %q", format)
}
var r []*internal.FileMetadata
chunkSeen := make(map[string]bool)
for i := range toc.Entries {
@ -420,6 +449,7 @@ func prepareMetadata(manifest []byte) ([]*internal.FileMetadata, error) {
chunkSeen[cd] = true
}
}
return r, nil
}

View File

@ -4,8 +4,12 @@ import (
"bytes"
"fmt"
"io"
"path/filepath"
"reflect"
"strings"
"testing"
graphdriver "github.com/containers/storage/drivers"
)
const jsonTOC = `
@ -55,7 +59,7 @@ const jsonTOC = `
`
func TestPrepareMetadata(t *testing.T) {
toc, err := prepareMetadata([]byte(jsonTOC))
toc, err := prepareMetadata([]byte(jsonTOC), graphdriver.DifferOutputFormatDir)
if err != nil {
t.Errorf("got error from prepareMetadata: %v", err)
}
@ -64,6 +68,21 @@ func TestPrepareMetadata(t *testing.T) {
}
}
func TestPrepareMetadataFlat(t *testing.T) {
toc, err := prepareMetadata([]byte(jsonTOC), graphdriver.DifferOutputFormatFlat)
if err != nil {
t.Errorf("got error from prepareMetadata: %v", err)
}
for _, e := range toc {
if len(strings.Split(e.Name, "/")) != 2 {
t.Error("prepareMetadata returns the wrong number of path elements for flat directories")
}
if len(filepath.Dir(e.Name)) != 2 {
t.Error("prepareMetadata returns the wrong path for flat directories")
}
}
}
type bigDataToBuffer struct {
buf *bytes.Buffer
id string
@ -83,7 +102,7 @@ func (b *bigDataToBuffer) SetLayerBigData(id, key string, data io.Reader) error
}
func TestWriteCache(t *testing.T) {
toc, err := prepareMetadata([]byte(jsonTOC))
toc, err := prepareMetadata([]byte(jsonTOC), graphdriver.DifferOutputFormatDir)
if err != nil {
t.Errorf("got error from prepareMetadata: %v", err)
}
@ -91,7 +110,7 @@ func TestWriteCache(t *testing.T) {
dest := bigDataToBuffer{
buf: bytes.NewBuffer(nil),
}
cache, err := writeCache([]byte(jsonTOC), "foobar", &dest)
cache, err := writeCache([]byte(jsonTOC), graphdriver.DifferOutputFormatDir, "foobar", &dest)
if err != nil {
t.Errorf("got error from writeCache: %v", err)
}
@ -156,7 +175,7 @@ func TestReadCache(t *testing.T) {
dest := bigDataToBuffer{
buf: bytes.NewBuffer(nil),
}
cache, err := writeCache([]byte(jsonTOC), "foobar", &dest)
cache, err := writeCache([]byte(jsonTOC), graphdriver.DifferOutputFormatDir, "foobar", &dest)
if err != nil {
t.Errorf("got error from writeCache: %v", err)
}

View File

@ -28,6 +28,7 @@ import (
"github.com/containers/storage/pkg/system"
"github.com/containers/storage/types"
securejoin "github.com/cyphar/filepath-securejoin"
jsoniter "github.com/json-iterator/go"
"github.com/klauspost/compress/zstd"
"github.com/klauspost/pgzip"
digest "github.com/opencontainers/go-digest"
@ -41,6 +42,8 @@ const (
newFileFlags = (unix.O_CREAT | unix.O_TRUNC | unix.O_EXCL | unix.O_WRONLY)
containersOverrideXattr = "user.containers.override_stat"
bigDataKey = "zstd-chunked-manifest"
chunkedData = "zstd-chunked-data"
chunkedLayerDataKey = "zstd-chunked-layer-data"
fileTypeZstdChunked = iota
fileTypeEstargz
@ -73,6 +76,11 @@ var xattrsToIgnore = map[string]interface{}{
"security.selinux": true,
}
// chunkedLayerData is used to store additional information about the layer
type chunkedLayerData struct {
Format graphdriver.DifferOutputFormat `json:"format"`
}
func timeToTimespec(time *time.Time) (ts unix.Timespec) {
if time == nil || time.IsZero() {
// Return UTIME_OMIT special value
@ -241,7 +249,7 @@ func copyFileFromOtherLayer(file *internal.FileMetadata, source string, name str
srcFile, err := openFileUnderRoot(name, srcDirfd, unix.O_RDONLY, 0)
if err != nil {
return false, nil, 0, fmt.Errorf("open source file under target rootfs: %w", err)
return false, nil, 0, fmt.Errorf("open source file under target rootfs (%s): %w", name, err)
}
defer srcFile.Close()
@ -1324,6 +1332,38 @@ func (c *chunkedDiffer) findAndCopyFile(dirfd int, r *internal.FileMetadata, cop
return false, nil
}
func makeEntriesFlat(mergedEntries []internal.FileMetadata) ([]internal.FileMetadata, error) {
var new []internal.FileMetadata
hashes := make(map[string]string)
for i := range mergedEntries {
if mergedEntries[i].Type != TypeReg {
continue
}
if mergedEntries[i].Digest == "" {
if mergedEntries[i].Size != 0 {
return nil, fmt.Errorf("missing digest for %q", mergedEntries[i].Name)
}
continue
}
digest, err := digest.Parse(mergedEntries[i].Digest)
if err != nil {
return nil, err
}
d := digest.Encoded()
if hashes[d] != "" {
continue
}
hashes[d] = d
mergedEntries[i].Name = fmt.Sprintf("%s/%s", d[0:2], d[2:])
new = append(new, mergedEntries[i])
}
return new, nil
}
func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, differOpts *graphdriver.DifferOptions) (graphdriver.DriverWithDifferOutput, error) {
defer c.layersCache.release()
defer func() {
@ -1332,11 +1372,21 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
}
}()
lcd := chunkedLayerData{
Format: differOpts.Format,
}
json := jsoniter.ConfigCompatibleWithStandardLibrary
lcdBigData, err := json.Marshal(lcd)
if err != nil {
return graphdriver.DriverWithDifferOutput{}, err
}
output := graphdriver.DriverWithDifferOutput{
Differ: c,
TarSplit: c.tarSplit,
BigData: map[string][]byte{
bigDataKey: c.manifest,
bigDataKey: c.manifest,
chunkedLayerDataKey: lcdBigData,
},
TOCDigest: c.tocDigest,
}
@ -1396,6 +1446,21 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
}
defer unix.Close(dirfd)
if differOpts != nil && differOpts.Format == graphdriver.DifferOutputFormatFlat {
mergedEntries, err = makeEntriesFlat(mergedEntries)
if err != nil {
return output, err
}
createdDirs := make(map[string]struct{})
for _, e := range mergedEntries {
d := e.Name[0:2]
if _, found := createdDirs[d]; !found {
unix.Mkdirat(dirfd, d, 0o755)
createdDirs[d] = struct{}{}
}
}
}
// hardlinks can point to missing files. So create them after all files
// are retrieved
var hardLinks []hardLinkToCreate