chunked: support writing files in a flat dir format
so that they can be stored by their digest Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
parent
8bb5a087ab
commit
a50bb95770
|
|
@ -191,8 +191,21 @@ type DriverWithDifferOutput struct {
|
|||
TOCDigest digest.Digest
|
||||
}
|
||||
|
||||
type DifferOutputFormat int
|
||||
|
||||
const (
|
||||
// DifferOutputFormatDir means the output is a directory and it will
|
||||
// keep the original layout.
|
||||
DifferOutputFormatDir = iota
|
||||
// DifferOutputFormatFlat will store the files by their checksum, in the form
|
||||
// checksum[0:2]/checksum[2:]
|
||||
DifferOutputFormatFlat
|
||||
)
|
||||
|
||||
// DifferOptions overrides how the differ work
|
||||
type DifferOptions struct {
|
||||
// Format defines the destination directory layout format
|
||||
Format DifferOutputFormat
|
||||
}
|
||||
|
||||
// Differ defines the interface for using a custom differ.
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ import (
|
|||
"unsafe"
|
||||
|
||||
storage "github.com/containers/storage"
|
||||
graphdriver "github.com/containers/storage/drivers"
|
||||
"github.com/containers/storage/pkg/chunked/internal"
|
||||
"github.com/containers/storage/pkg/ioutils"
|
||||
jsoniter "github.com/json-iterator/go"
|
||||
|
|
@ -109,7 +110,7 @@ func (c *layersCache) load() error {
|
|||
}
|
||||
|
||||
bigData, err := c.store.LayerBigData(r.ID, cacheKey)
|
||||
// if the cache areadly exists, read and use it
|
||||
// if the cache already exists, read and use it
|
||||
if err == nil {
|
||||
defer bigData.Close()
|
||||
metadata, err := readMetadataFromCache(bigData)
|
||||
|
|
@ -122,6 +123,23 @@ func (c *layersCache) load() error {
|
|||
return err
|
||||
}
|
||||
|
||||
var lcd chunkedLayerData
|
||||
|
||||
clFile, err := c.store.LayerBigData(r.ID, chunkedLayerDataKey)
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
if clFile != nil {
|
||||
cl, err := io.ReadAll(clFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open manifest file for layer %q: %w", r.ID, err)
|
||||
}
|
||||
json := jsoniter.ConfigCompatibleWithStandardLibrary
|
||||
if err := json.Unmarshal(cl, &lcd); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// otherwise create it from the layer TOC.
|
||||
manifestReader, err := c.store.LayerBigData(r.ID, bigDataKey)
|
||||
if err != nil {
|
||||
|
|
@ -134,7 +152,7 @@ func (c *layersCache) load() error {
|
|||
return fmt.Errorf("open manifest file for layer %q: %w", r.ID, err)
|
||||
}
|
||||
|
||||
metadata, err := writeCache(manifest, r.ID, c.store)
|
||||
metadata, err := writeCache(manifest, lcd.Format, r.ID, c.store)
|
||||
if err == nil {
|
||||
c.addLayer(r.ID, metadata)
|
||||
}
|
||||
|
|
@ -211,13 +229,13 @@ type setBigData interface {
|
|||
// - digest(file.payload))
|
||||
// - digest(digest(file.payload) + file.UID + file.GID + file.mode + file.xattrs)
|
||||
// - digest(i) for each i in chunks(file payload)
|
||||
func writeCache(manifest []byte, id string, dest setBigData) (*metadata, error) {
|
||||
func writeCache(manifest []byte, format graphdriver.DifferOutputFormat, id string, dest setBigData) (*metadata, error) {
|
||||
var vdata bytes.Buffer
|
||||
tagLen := 0
|
||||
digestLen := 0
|
||||
var tagsBuffer bytes.Buffer
|
||||
|
||||
toc, err := prepareMetadata(manifest)
|
||||
toc, err := prepareMetadata(manifest, format)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
@ -396,7 +414,7 @@ func readMetadataFromCache(bigData io.Reader) (*metadata, error) {
|
|||
}, nil
|
||||
}
|
||||
|
||||
func prepareMetadata(manifest []byte) ([]*internal.FileMetadata, error) {
|
||||
func prepareMetadata(manifest []byte, format graphdriver.DifferOutputFormat) ([]*internal.FileMetadata, error) {
|
||||
toc, err := unmarshalToc(manifest)
|
||||
if err != nil {
|
||||
// ignore errors here. They might be caused by a different manifest format.
|
||||
|
|
@ -404,6 +422,17 @@ func prepareMetadata(manifest []byte) ([]*internal.FileMetadata, error) {
|
|||
return nil, nil //nolint: nilnil
|
||||
}
|
||||
|
||||
switch format {
|
||||
case graphdriver.DifferOutputFormatDir:
|
||||
case graphdriver.DifferOutputFormatFlat:
|
||||
toc.Entries, err = makeEntriesFlat(toc.Entries)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown format %q", format)
|
||||
}
|
||||
|
||||
var r []*internal.FileMetadata
|
||||
chunkSeen := make(map[string]bool)
|
||||
for i := range toc.Entries {
|
||||
|
|
@ -420,6 +449,7 @@ func prepareMetadata(manifest []byte) ([]*internal.FileMetadata, error) {
|
|||
chunkSeen[cd] = true
|
||||
}
|
||||
}
|
||||
|
||||
return r, nil
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,8 +4,12 @@ import (
|
|||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
graphdriver "github.com/containers/storage/drivers"
|
||||
)
|
||||
|
||||
const jsonTOC = `
|
||||
|
|
@ -55,7 +59,7 @@ const jsonTOC = `
|
|||
`
|
||||
|
||||
func TestPrepareMetadata(t *testing.T) {
|
||||
toc, err := prepareMetadata([]byte(jsonTOC))
|
||||
toc, err := prepareMetadata([]byte(jsonTOC), graphdriver.DifferOutputFormatDir)
|
||||
if err != nil {
|
||||
t.Errorf("got error from prepareMetadata: %v", err)
|
||||
}
|
||||
|
|
@ -64,6 +68,21 @@ func TestPrepareMetadata(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestPrepareMetadataFlat(t *testing.T) {
|
||||
toc, err := prepareMetadata([]byte(jsonTOC), graphdriver.DifferOutputFormatFlat)
|
||||
if err != nil {
|
||||
t.Errorf("got error from prepareMetadata: %v", err)
|
||||
}
|
||||
for _, e := range toc {
|
||||
if len(strings.Split(e.Name, "/")) != 2 {
|
||||
t.Error("prepareMetadata returns the wrong number of path elements for flat directories")
|
||||
}
|
||||
if len(filepath.Dir(e.Name)) != 2 {
|
||||
t.Error("prepareMetadata returns the wrong path for flat directories")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type bigDataToBuffer struct {
|
||||
buf *bytes.Buffer
|
||||
id string
|
||||
|
|
@ -83,7 +102,7 @@ func (b *bigDataToBuffer) SetLayerBigData(id, key string, data io.Reader) error
|
|||
}
|
||||
|
||||
func TestWriteCache(t *testing.T) {
|
||||
toc, err := prepareMetadata([]byte(jsonTOC))
|
||||
toc, err := prepareMetadata([]byte(jsonTOC), graphdriver.DifferOutputFormatDir)
|
||||
if err != nil {
|
||||
t.Errorf("got error from prepareMetadata: %v", err)
|
||||
}
|
||||
|
|
@ -91,7 +110,7 @@ func TestWriteCache(t *testing.T) {
|
|||
dest := bigDataToBuffer{
|
||||
buf: bytes.NewBuffer(nil),
|
||||
}
|
||||
cache, err := writeCache([]byte(jsonTOC), "foobar", &dest)
|
||||
cache, err := writeCache([]byte(jsonTOC), graphdriver.DifferOutputFormatDir, "foobar", &dest)
|
||||
if err != nil {
|
||||
t.Errorf("got error from writeCache: %v", err)
|
||||
}
|
||||
|
|
@ -156,7 +175,7 @@ func TestReadCache(t *testing.T) {
|
|||
dest := bigDataToBuffer{
|
||||
buf: bytes.NewBuffer(nil),
|
||||
}
|
||||
cache, err := writeCache([]byte(jsonTOC), "foobar", &dest)
|
||||
cache, err := writeCache([]byte(jsonTOC), graphdriver.DifferOutputFormatDir, "foobar", &dest)
|
||||
if err != nil {
|
||||
t.Errorf("got error from writeCache: %v", err)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ import (
|
|||
"github.com/containers/storage/pkg/system"
|
||||
"github.com/containers/storage/types"
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
jsoniter "github.com/json-iterator/go"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
"github.com/klauspost/pgzip"
|
||||
digest "github.com/opencontainers/go-digest"
|
||||
|
|
@ -41,6 +42,8 @@ const (
|
|||
newFileFlags = (unix.O_CREAT | unix.O_TRUNC | unix.O_EXCL | unix.O_WRONLY)
|
||||
containersOverrideXattr = "user.containers.override_stat"
|
||||
bigDataKey = "zstd-chunked-manifest"
|
||||
chunkedData = "zstd-chunked-data"
|
||||
chunkedLayerDataKey = "zstd-chunked-layer-data"
|
||||
|
||||
fileTypeZstdChunked = iota
|
||||
fileTypeEstargz
|
||||
|
|
@ -73,6 +76,11 @@ var xattrsToIgnore = map[string]interface{}{
|
|||
"security.selinux": true,
|
||||
}
|
||||
|
||||
// chunkedLayerData is used to store additional information about the layer
|
||||
type chunkedLayerData struct {
|
||||
Format graphdriver.DifferOutputFormat `json:"format"`
|
||||
}
|
||||
|
||||
func timeToTimespec(time *time.Time) (ts unix.Timespec) {
|
||||
if time == nil || time.IsZero() {
|
||||
// Return UTIME_OMIT special value
|
||||
|
|
@ -241,7 +249,7 @@ func copyFileFromOtherLayer(file *internal.FileMetadata, source string, name str
|
|||
|
||||
srcFile, err := openFileUnderRoot(name, srcDirfd, unix.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return false, nil, 0, fmt.Errorf("open source file under target rootfs: %w", err)
|
||||
return false, nil, 0, fmt.Errorf("open source file under target rootfs (%s): %w", name, err)
|
||||
}
|
||||
defer srcFile.Close()
|
||||
|
||||
|
|
@ -1324,6 +1332,38 @@ func (c *chunkedDiffer) findAndCopyFile(dirfd int, r *internal.FileMetadata, cop
|
|||
return false, nil
|
||||
}
|
||||
|
||||
func makeEntriesFlat(mergedEntries []internal.FileMetadata) ([]internal.FileMetadata, error) {
|
||||
var new []internal.FileMetadata
|
||||
|
||||
hashes := make(map[string]string)
|
||||
for i := range mergedEntries {
|
||||
if mergedEntries[i].Type != TypeReg {
|
||||
continue
|
||||
}
|
||||
if mergedEntries[i].Digest == "" {
|
||||
if mergedEntries[i].Size != 0 {
|
||||
return nil, fmt.Errorf("missing digest for %q", mergedEntries[i].Name)
|
||||
}
|
||||
continue
|
||||
}
|
||||
digest, err := digest.Parse(mergedEntries[i].Digest)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
d := digest.Encoded()
|
||||
|
||||
if hashes[d] != "" {
|
||||
continue
|
||||
}
|
||||
hashes[d] = d
|
||||
|
||||
mergedEntries[i].Name = fmt.Sprintf("%s/%s", d[0:2], d[2:])
|
||||
|
||||
new = append(new, mergedEntries[i])
|
||||
}
|
||||
return new, nil
|
||||
}
|
||||
|
||||
func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, differOpts *graphdriver.DifferOptions) (graphdriver.DriverWithDifferOutput, error) {
|
||||
defer c.layersCache.release()
|
||||
defer func() {
|
||||
|
|
@ -1332,11 +1372,21 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
|
|||
}
|
||||
}()
|
||||
|
||||
lcd := chunkedLayerData{
|
||||
Format: differOpts.Format,
|
||||
}
|
||||
|
||||
json := jsoniter.ConfigCompatibleWithStandardLibrary
|
||||
lcdBigData, err := json.Marshal(lcd)
|
||||
if err != nil {
|
||||
return graphdriver.DriverWithDifferOutput{}, err
|
||||
}
|
||||
output := graphdriver.DriverWithDifferOutput{
|
||||
Differ: c,
|
||||
TarSplit: c.tarSplit,
|
||||
BigData: map[string][]byte{
|
||||
bigDataKey: c.manifest,
|
||||
bigDataKey: c.manifest,
|
||||
chunkedLayerDataKey: lcdBigData,
|
||||
},
|
||||
TOCDigest: c.tocDigest,
|
||||
}
|
||||
|
|
@ -1396,6 +1446,21 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
|
|||
}
|
||||
defer unix.Close(dirfd)
|
||||
|
||||
if differOpts != nil && differOpts.Format == graphdriver.DifferOutputFormatFlat {
|
||||
mergedEntries, err = makeEntriesFlat(mergedEntries)
|
||||
if err != nil {
|
||||
return output, err
|
||||
}
|
||||
createdDirs := make(map[string]struct{})
|
||||
for _, e := range mergedEntries {
|
||||
d := e.Name[0:2]
|
||||
if _, found := createdDirs[d]; !found {
|
||||
unix.Mkdirat(dirfd, d, 0o755)
|
||||
createdDirs[d] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// hardlinks can point to missing files. So create them after all files
|
||||
// are retrieved
|
||||
var hardLinks []hardLinkToCreate
|
||||
|
|
|
|||
Loading…
Reference in New Issue