Merge pull request #1896 from giuseppe/release-1.53-fixes-for-composefs

[release-1.53] backport fixes for partial pulls and composefs
This commit is contained in:
Daniel J Walsh 2024-04-22 06:38:40 -04:00 committed by GitHub
commit 5554c8f94a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 894 additions and 381 deletions

View File

@ -0,0 +1,84 @@
package chunked
import (
"encoding/binary"
"hash/crc32"
"io"
)
type bloomFilter struct {
bitArray []uint64
k uint32
}
func newBloomFilter(size int, k uint32) *bloomFilter {
numElements := (size + 63) / 64
return &bloomFilter{
bitArray: make([]uint64, numElements),
k: k,
}
}
func newBloomFilterFromArray(bitArray []uint64, k uint32) *bloomFilter {
return &bloomFilter{
bitArray: bitArray,
k: k,
}
}
func (bf *bloomFilter) hashFn(item []byte, seed uint32) (uint64, uint64) {
if len(item) == 0 {
return 0, 0
}
mod := uint32(len(bf.bitArray) * 64)
seedSplit := seed % uint32(len(item))
hash := (crc32.ChecksumIEEE(item[:seedSplit]) ^ crc32.ChecksumIEEE(item[seedSplit:])) % mod
return uint64(hash / 64), uint64(1 << (hash % 64))
}
func (bf *bloomFilter) add(item []byte) {
for i := uint32(0); i < bf.k; i++ {
index, mask := bf.hashFn(item, i)
bf.bitArray[index] |= mask
}
}
func (bf *bloomFilter) maybeContains(item []byte) bool {
for i := uint32(0); i < bf.k; i++ {
index, mask := bf.hashFn(item, i)
if bf.bitArray[index]&mask == 0 {
return false
}
}
return true
}
func (bf *bloomFilter) writeTo(writer io.Writer) error {
if err := binary.Write(writer, binary.LittleEndian, uint64(len(bf.bitArray))); err != nil {
return err
}
if err := binary.Write(writer, binary.LittleEndian, uint32(bf.k)); err != nil {
return err
}
if err := binary.Write(writer, binary.LittleEndian, bf.bitArray); err != nil {
return err
}
return nil
}
func readBloomFilter(reader io.Reader) (*bloomFilter, error) {
var bloomFilterLen uint64
var k uint32
if err := binary.Read(reader, binary.LittleEndian, &bloomFilterLen); err != nil {
return nil, err
}
if err := binary.Read(reader, binary.LittleEndian, &k); err != nil {
return nil, err
}
bloomFilterArray := make([]uint64, bloomFilterLen)
if err := binary.Read(reader, binary.LittleEndian, &bloomFilterArray); err != nil {
return nil, err
}
return newBloomFilterFromArray(bloomFilterArray, k), nil
}

View File

@ -0,0 +1,127 @@
package chunked
import (
"bytes"
"io"
"testing"
digest "github.com/opencontainers/go-digest"
"github.com/stretchr/testify/assert"
)
var (
presentDigestInCache string
notPresentDigestInCache string
presentDigestInCacheBinary []byte
notPresentDigestInCacheBinary []byte
preloadedCache *cacheFile
preloadedbloomFilter *bloomFilter
benchmarkN int = 100000
)
// Using 3 hashes functions and n/m = 10 gives a false positive rate of ~1.7%:
// https://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
var (
factorNM int = 10
numberHashes uint32 = 3
)
func initCache(sizeCache int) (*cacheFile, string, string, *bloomFilter) {
var tagsBuffer bytes.Buffer
var vdata bytes.Buffer
var fnames bytes.Buffer
tags := [][]byte{}
tagLen := 0
digestLen := 64
var presentDigest, notPresentDigest string
bloomFilter := newBloomFilter(sizeCache*factorNM, numberHashes)
digester := digest.Canonical.Digester()
hash := digester.Hash()
for i := 0; i < sizeCache; i++ {
hash.Write([]byte("1"))
d := digester.Digest().String()
digestLen = len(d)
presentDigest = d
tag, err := appendTag([]byte(d), 0, 0)
if err != nil {
panic(err)
}
tagLen = len(tag)
tags = append(tags, tag)
bd, err := makeBinaryDigest(d)
if err != nil {
panic(err)
}
bloomFilter.add(bd)
}
hash.Write([]byte("1"))
notPresentDigest = digester.Digest().String()
writeCacheFileToWriter(io.Discard, bloomFilter, tags, tagLen, digestLen, vdata, fnames, &tagsBuffer)
cache := &cacheFile{
digestLen: digestLen,
tagLen: tagLen,
tags: tagsBuffer.Bytes(),
vdata: vdata.Bytes(),
}
return cache, presentDigest, notPresentDigest, bloomFilter
}
func init() {
var err error
preloadedCache, presentDigestInCache, notPresentDigestInCache, preloadedbloomFilter = initCache(10000)
presentDigestInCacheBinary, err = makeBinaryDigest(presentDigestInCache)
if err != nil {
panic(err)
}
notPresentDigestInCacheBinary, err = makeBinaryDigest(notPresentDigestInCache)
if err != nil {
panic(err)
}
}
func BenchmarkLookupBloomFilter(b *testing.B) {
for i := 0; i < benchmarkN; i++ {
if preloadedbloomFilter.maybeContains(notPresentDigestInCacheBinary) {
findTag(notPresentDigestInCache, preloadedCache)
}
if preloadedbloomFilter.maybeContains(presentDigestInCacheBinary) {
findTag(presentDigestInCache, preloadedCache)
}
}
}
func BenchmarkLookupBloomRaw(b *testing.B) {
for i := 0; i < benchmarkN; i++ {
findTag(notPresentDigestInCache, preloadedCache)
findTag(presentDigestInCache, preloadedCache)
}
}
func TestBloomFilter(t *testing.T) {
bloomFilter := newBloomFilter(1000, 1)
digester := digest.Canonical.Digester()
hash := digester.Hash()
for i := 0; i < 1000; i++ {
hash.Write([]byte("1"))
d := digester.Digest().String()
bd, err := makeBinaryDigest(d)
assert.NoError(t, err)
contains := bloomFilter.maybeContains(bd)
assert.False(t, contains)
}
for i := 0; i < 1000; i++ {
hash.Write([]byte("1"))
d := digester.Digest().String()
bd, err := makeBinaryDigest(d)
assert.NoError(t, err)
bloomFilter.add(bd)
contains := bloomFilter.maybeContains(bd)
assert.True(t, contains)
}
}

File diff suppressed because it is too large Load Diff

View File

@ -55,32 +55,49 @@ const jsonTOC = `
"chunkSize": 86252,
"chunkOffset": 17615,
"chunkDigest": "sha256:2a9d3f1b6b37abc8bb35eb8fa98b893a2a2447bcb01184c3bafc8c6b40da099d"
},
{
"type": "reg",
"name": "usr/lib/systemd/system/system-systemd\\x2dcryptsetup.slice",
"mode": 420,
"size": 468,
"modtime": "2024-03-03T18:04:57+01:00",
"accesstime": "0001-01-01T00:00:00Z",
"changetime": "0001-01-01T00:00:00Z",
"digest": "sha256:68dc6e85631e077f2bc751352459823844911b93b7ba2afd95d96c893222bb50",
"offset": 148185424,
"endOffset": 148185753
},
{
"type": "reg",
"name": "usr/lib/systemd/system/system-systemd\\x2dcryptsetup-hardlink.slice",
"linkName": "usr/lib/systemd/system/system-systemd\\x2dcryptsetup.slice"
}
]
}
`
func TestPrepareMetadata(t *testing.T) {
toc, err := prepareMetadata([]byte(jsonTOC), graphdriver.DifferOutputFormatDir)
toc, err := prepareCacheFile([]byte(jsonTOC), graphdriver.DifferOutputFormatDir)
if err != nil {
t.Errorf("got error from prepareMetadata: %v", err)
t.Errorf("got error from prepareCacheFile: %v", err)
}
if len(toc) != 2 {
t.Error("prepareMetadata returns the wrong length")
if len(toc) != 4 {
t.Error("prepareCacheFile returns the wrong length")
}
}
func TestPrepareMetadataFlat(t *testing.T) {
toc, err := prepareMetadata([]byte(jsonTOC), graphdriver.DifferOutputFormatFlat)
toc, err := prepareCacheFile([]byte(jsonTOC), graphdriver.DifferOutputFormatFlat)
if err != nil {
t.Errorf("got error from prepareMetadata: %v", err)
t.Errorf("got error from prepareCacheFile: %v", err)
}
for _, e := range toc {
if len(strings.Split(e.Name, "/")) != 2 {
t.Error("prepareMetadata returns the wrong number of path elements for flat directories")
t.Error("prepareCacheFile returns the wrong number of path elements for flat directories")
}
if len(filepath.Dir(e.Name)) != 2 {
t.Error("prepareMetadata returns the wrong path for flat directories")
t.Error("prepareCacheFile returns the wrong path for flat directories")
}
}
}
@ -103,10 +120,25 @@ func (b *bigDataToBuffer) SetLayerBigData(id, key string, data io.Reader) error
return err
}
func TestWriteCache(t *testing.T) {
toc, err := prepareMetadata([]byte(jsonTOC), graphdriver.DifferOutputFormatDir)
func findTag(digest string, cacheFile *cacheFile) (string, uint64, uint64) {
binaryDigest, err := makeBinaryDigest(digest)
if err != nil {
t.Errorf("got error from prepareMetadata: %v", err)
return "", 0, 0
}
if len(binaryDigest) != cacheFile.digestLen {
return "", 0, 0
}
found, off, len := findBinaryTag(binaryDigest, cacheFile)
if found {
return digest, off, len
}
return "", 0, 0
}
func TestWriteCache(t *testing.T) {
toc, err := prepareCacheFile([]byte(jsonTOC), graphdriver.DifferOutputFormatDir)
if err != nil {
t.Errorf("got error from prepareCacheFile: %v", err)
}
dest := bigDataToBuffer{
@ -116,25 +148,26 @@ func TestWriteCache(t *testing.T) {
if err != nil {
t.Errorf("got error from writeCache: %v", err)
}
if digest, _, _ := findTag("foobar", cache); digest != "" {
t.Error("found invalid tag")
if digest, _, _ := findTag("sha256:99fe908c699dc068438b23e28319cadff1f2153c3043bafb8e83a430bba0a2c2", cache); digest != "" {
t.Error("a present tag was not found")
}
for _, r := range toc {
if r.Digest != "" {
// find the element in the cache by the digest checksum
digest, off, len := findTag(r.Digest, cache)
digest, off, lenTag := findTag(r.Digest, cache)
if digest == "" {
t.Error("file tag not found")
}
if digest != r.Digest {
t.Error("wrong file found")
}
expectedLocation := generateFileLocation(r.Name, 0, uint64(r.Size))
location := cache.vdata[off : off+len]
if !bytes.Equal(location, expectedLocation) {
t.Errorf("wrong file found %q instead of %q", location, expectedLocation)
}
location := cache.vdata[off : off+lenTag]
_, offFile, fileSize, err := parseFileLocation(location)
assert.NoError(t, err)
assert.Equal(t, fileSize, uint64(r.Size))
assert.Equal(t, offFile, uint64(0))
fingerprint, err := calculateHardLinkFingerprint(r)
if err != nil {
@ -142,18 +175,19 @@ func TestWriteCache(t *testing.T) {
}
// find the element in the cache by the hardlink fingerprint
digest, off, len = findTag(fingerprint, cache)
digest, off, lenTag = findTag(fingerprint, cache)
if digest == "" {
t.Error("file tag not found")
}
if digest != fingerprint {
t.Error("wrong file found")
}
expectedLocation = generateFileLocation(r.Name, 0, uint64(r.Size))
location = cache.vdata[off : off+len]
if !bytes.Equal(location, expectedLocation) {
t.Errorf("wrong file found %q instead of %q", location, expectedLocation)
}
location = cache.vdata[off : off+lenTag]
_, offFile, fileSize, err = parseFileLocation(location)
assert.NoError(t, err)
assert.Equal(t, fileSize, uint64(r.Size))
assert.Equal(t, offFile, uint64(0))
}
if r.ChunkDigest != "" {
// find the element in the cache by the chunk digest checksum
@ -164,7 +198,7 @@ func TestWriteCache(t *testing.T) {
if digest != r.ChunkDigest {
t.Error("wrong digest found")
}
expectedLocation := generateFileLocation(r.Name, uint64(r.ChunkOffset), uint64(r.ChunkSize))
expectedLocation := generateFileLocation(0, uint64(r.ChunkOffset), uint64(r.ChunkSize))
location := cache.vdata[off : off+len]
if !bytes.Equal(location, expectedLocation) {
t.Errorf("wrong file found %q instead of %q", location, expectedLocation)
@ -182,7 +216,7 @@ func TestReadCache(t *testing.T) {
t.Errorf("got error from writeCache: %v", err)
}
cacheRead, err := readMetadataFromCache(dest.buf)
cacheRead, err := readCacheFileFromMemory(dest.buf.Bytes())
if err != nil {
t.Errorf("got error from readMetadataFromCache: %v", err)
}
@ -194,7 +228,7 @@ func TestReadCache(t *testing.T) {
func TestUnmarshalToc(t *testing.T) {
toc, err := unmarshalToc([]byte(jsonTOC))
assert.NoError(t, err)
assert.Equal(t, 4, len(toc.Entries))
assert.Equal(t, 6, len(toc.Entries))
_, err = unmarshalToc([]byte(jsonTOC + " \n\n\n\n "))
assert.NoError(t, err)
@ -210,4 +244,20 @@ func TestUnmarshalToc(t *testing.T) {
assert.Error(t, err)
_, err = unmarshalToc([]byte(jsonTOC + "123"))
assert.Error(t, err)
assert.Equal(t, toc.Entries[4].Name, "usr/lib/systemd/system/system-systemd\\x2dcryptsetup.slice", "invalid name escaped")
assert.Equal(t, toc.Entries[5].Name, "usr/lib/systemd/system/system-systemd\\x2dcryptsetup-hardlink.slice", "invalid name escaped")
assert.Equal(t, toc.Entries[5].Linkname, "usr/lib/systemd/system/system-systemd\\x2dcryptsetup.slice", "invalid link name escaped")
}
func TestMakeBinaryDigest(t *testing.T) {
binDigest, err := makeBinaryDigest("sha256:5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03")
assert.NoError(t, err)
expected := []byte{0x73, 0x68, 0x61, 0x32, 0x35, 0x36, 0x3a, 0x58, 0x91, 0xb5, 0xb5, 0x22, 0xd5, 0xdf, 0x8, 0x6d, 0xf, 0xf0, 0xb1, 0x10, 0xfb, 0xd9, 0xd2, 0x1b, 0xb4, 0xfc, 0x71, 0x63, 0xaf, 0x34, 0xd0, 0x82, 0x86, 0xa2, 0xe8, 0x46, 0xf6, 0xbe, 0x3}
assert.Equal(t, expected, binDigest)
_, err = makeBinaryDigest("sha256:foo")
assert.Error(t, err)
_, err = makeBinaryDigest("noAlgorithm")
assert.Error(t, err)
}

View File

@ -7,7 +7,6 @@ import (
"io"
"strconv"
"github.com/containerd/stargz-snapshotter/estargz"
"github.com/containers/storage/pkg/chunked/internal"
"github.com/klauspost/compress/zstd"
"github.com/klauspost/pgzip"
@ -33,7 +32,7 @@ func typeToTarType(t string) (byte, error) {
return r, nil
}
func readEstargzChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, annotations map[string]string) ([]byte, int64, error) {
func readEstargzChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, tocDigest digest.Digest) ([]byte, int64, error) {
// information on the format here https://github.com/containerd/stargz-snapshotter/blob/main/docs/stargz-estargz.md
footerSize := int64(51)
if blobSize <= footerSize {
@ -126,11 +125,7 @@ func readEstargzChunkedManifest(blobStream ImageSourceSeekable, blobSize int64,
return nil, 0, err
}
d, err := digest.Parse(annotations[estargz.TOCJSONDigestAnnotation])
if err != nil {
return nil, 0, err
}
if manifestDigester.Digest() != d {
if manifestDigester.Digest() != tocDigest {
return nil, 0, errors.New("invalid manifest checksum")
}
@ -140,7 +135,7 @@ func readEstargzChunkedManifest(blobStream ImageSourceSeekable, blobSize int64,
// readZstdChunkedManifest reads the zstd:chunked manifest from the seekable stream blobStream. The blob total size must
// be specified.
// This function uses the io.github.containers.zstd-chunked. annotations when specified.
func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, annotations map[string]string) ([]byte, []byte, int64, error) {
func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, tocDigest digest.Digest, annotations map[string]string) ([]byte, []byte, int64, error) {
footerSize := int64(internal.FooterSizeSupported)
if blobSize <= footerSize {
return nil, nil, 0, errors.New("blob too small")
@ -238,7 +233,7 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, ann
return nil, nil, 0, err
}
decodedBlob, err := decodeAndValidateBlob(manifest, footerData.LengthUncompressed, footerData.ChecksumAnnotation)
decodedBlob, err := decodeAndValidateBlob(manifest, footerData.LengthUncompressed, tocDigest.String())
if err != nil {
return nil, nil, 0, err
}

View File

@ -52,7 +52,7 @@ func escaped(val string, escape int) string {
if noescapeSpace {
hexEscape = !unicode.IsPrint(rune(c))
} else {
hexEscape = !unicode.IsGraphic(rune(c))
hexEscape = !unicode.IsPrint(rune(c)) || unicode.IsSpace(rune(c))
}
}

View File

@ -14,7 +14,6 @@ func TestEscaped(t *testing.T) {
escape int
want string
}{
{"Hello, World!", 0, "Hello, World!"},
{"12345", 0, "12345"},
{"", 0, ""},
{"\n", 0, "\\n"},
@ -25,9 +24,12 @@ func TestEscaped(t *testing.T) {
{"foo=bar", ESCAPE_EQUAL, "foo\\x3dbar"},
{"-", ESCAPE_LONE_DASH, "\\x2d"},
{"\n", NOESCAPE_SPACE, "\\n"},
{" ", 0, "\\x20"},
{" ", NOESCAPE_SPACE, " "},
{"\t", NOESCAPE_SPACE, "\\t"},
{"\n\t", NOESCAPE_SPACE, "\\n\\t"},
{"Hello World!", 0, "Hello\\x20World!"},
{"Hello World!", NOESCAPE_SPACE, "Hello World!"},
}
for _, test := range tests {

View File

@ -21,9 +21,6 @@ import (
type TOC struct {
Version int `json:"version"`
Entries []FileMetadata `json:"entries"`
// internal: used by unmarshalToc
StringsBuf bytes.Buffer `json:"-"`
}
type FileMetadata struct {
@ -48,9 +45,6 @@ type FileMetadata struct {
ChunkOffset int64 `json:"chunkOffset,omitempty"`
ChunkDigest string `json:"chunkDigest,omitempty"`
ChunkType string `json:"chunkType,omitempty"`
// internal: computed by mergeTOCEntries.
Chunks []*FileMetadata `json:"-"`
}
const (
@ -189,7 +183,6 @@ func WriteZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, off
Offset: manifestOffset,
LengthCompressed: uint64(len(compressedManifest)),
LengthUncompressed: uint64(len(manifest)),
ChecksumAnnotation: "", // unused
OffsetTarSplit: uint64(tarSplitOffset),
LengthCompressedTarSplit: uint64(len(tarSplitData.Data)),
LengthUncompressedTarSplit: uint64(tarSplitData.UncompressedSize),
@ -213,7 +206,6 @@ type ZstdChunkedFooterData struct {
Offset uint64
LengthCompressed uint64
LengthUncompressed uint64
ChecksumAnnotation string // Only used when reading a layer, not when creating it
OffsetTarSplit uint64
LengthCompressedTarSplit uint64
@ -240,11 +232,6 @@ func footerDataToBlob(footer ZstdChunkedFooterData) []byte {
func ReadFooterDataFromAnnotations(annotations map[string]string) (ZstdChunkedFooterData, error) {
var footerData ZstdChunkedFooterData
footerData.ChecksumAnnotation = annotations[ManifestChecksumKey]
if footerData.ChecksumAnnotation == "" {
return footerData, fmt.Errorf("manifest checksum annotation %q not found", ManifestChecksumKey)
}
offsetMetadata := annotations[ManifestInfoKey]
if _, err := fmt.Sscanf(offsetMetadata, "%d:%d:%d:%d", &footerData.Offset, &footerData.LengthCompressed, &footerData.LengthUncompressed, &footerData.ManifestType); err != nil {

View File

@ -15,7 +15,6 @@ func TestGenerateAndReadFooter(t *testing.T) {
Offset: 2,
LengthCompressed: 3,
LengthUncompressed: 4,
ChecksumAnnotation: "", // unused
OffsetTarSplit: 5,
LengthCompressedTarSplit: 6,
LengthUncompressedTarSplit: 7,

View File

@ -25,6 +25,7 @@ import (
"github.com/containers/storage/pkg/archive"
"github.com/containers/storage/pkg/chunked/compressor"
"github.com/containers/storage/pkg/chunked/internal"
"github.com/containers/storage/pkg/chunked/toc"
"github.com/containers/storage/pkg/fsverity"
"github.com/containers/storage/pkg/idtools"
"github.com/containers/storage/pkg/system"
@ -58,6 +59,21 @@ const (
copyGoRoutines = 32
)
// fileMetadata is a wrapper around internal.FileMetadata with additional private fields that
// are not part of the TOC document.
// Type: TypeChunk entries are stored in Chunks, the primary [fileMetadata] entries never use TypeChunk.
type fileMetadata struct {
internal.FileMetadata
// chunks stores the TypeChunk entries relevant to this entry when FileMetadata.Type == TypeReg.
chunks []*internal.FileMetadata
// skipSetAttrs is set when the file attributes must not be
// modified, e.g. it is a hard link from a different source,
// or a composefs file.
skipSetAttrs bool
}
type compressedFileType int
type chunkedDiffer struct {
@ -138,7 +154,8 @@ func doHardLink(srcFd int, destDirFd int, destBase string) error {
return err
}
func copyFileContent(srcFd int, destFile string, dirfd int, mode os.FileMode, useHardLinks bool) (*os.File, int64, error) {
func copyFileContent(srcFd int, fileMetadata *fileMetadata, dirfd int, mode os.FileMode, useHardLinks bool) (*os.File, int64, error) {
destFile := fileMetadata.Name
src := fmt.Sprintf("/proc/self/fd/%d", srcFd)
st, err := os.Stat(src)
if err != nil {
@ -156,6 +173,8 @@ func copyFileContent(srcFd int, destFile string, dirfd int, mode os.FileMode, us
err := doHardLink(srcFd, int(destDir.Fd()), destBase)
if err == nil {
// if the file was deduplicated with a hard link, skip overriding file metadata.
fileMetadata.skipSetAttrs = true
return nil, st.Size(), nil
}
}
@ -198,15 +217,15 @@ func (f *seekableFile) GetBlobAt(chunks []ImageSourceChunk) (chan io.ReadCloser,
return streams, errs, nil
}
func convertTarToZstdChunked(destDirectory string, payload *os.File) (*seekableFile, digest.Digest, map[string]string, error) {
func convertTarToZstdChunked(destDirectory string, payload *os.File) (int64, *seekableFile, digest.Digest, map[string]string, error) {
diff, err := archive.DecompressStream(payload)
if err != nil {
return nil, "", nil, err
return 0, nil, "", nil, err
}
fd, err := unix.Open(destDirectory, unix.O_TMPFILE|unix.O_RDWR|unix.O_CLOEXEC, 0o600)
if err != nil {
return nil, "", nil, err
return 0, nil, "", nil, err
}
f := os.NewFile(uintptr(fd), destDirectory)
@ -216,23 +235,24 @@ func convertTarToZstdChunked(destDirectory string, payload *os.File) (*seekableF
chunked, err := compressor.ZstdCompressor(f, newAnnotations, &level)
if err != nil {
f.Close()
return nil, "", nil, err
return 0, nil, "", nil, err
}
convertedOutputDigester := digest.Canonical.Digester()
if _, err := io.Copy(io.MultiWriter(chunked, convertedOutputDigester.Hash()), diff); err != nil {
copied, err := io.Copy(io.MultiWriter(chunked, convertedOutputDigester.Hash()), diff)
if err != nil {
f.Close()
return nil, "", nil, err
return 0, nil, "", nil, err
}
if err := chunked.Close(); err != nil {
f.Close()
return nil, "", nil, err
return 0, nil, "", nil, err
}
is := seekableFile{
file: f,
}
return &is, convertedOutputDigester.Digest(), newAnnotations, nil
return copied, &is, convertedOutputDigester.Digest(), newAnnotations, nil
}
// GetDiffer returns a differ than can be used with ApplyDiffWithDiffer.
@ -246,18 +266,26 @@ func GetDiffer(ctx context.Context, store storage.Store, blobDigest digest.Diges
return nil, errors.New("enable_partial_images not configured")
}
_, hasZstdChunkedTOC := annotations[internal.ManifestChecksumKey]
_, hasEstargzTOC := annotations[estargz.TOCJSONDigestAnnotation]
zstdChunkedTOCDigestString, hasZstdChunkedTOC := annotations[internal.ManifestChecksumKey]
estargzTOCDigestString, hasEstargzTOC := annotations[estargz.TOCJSONDigestAnnotation]
if hasZstdChunkedTOC && hasEstargzTOC {
return nil, errors.New("both zstd:chunked and eStargz TOC found")
}
if hasZstdChunkedTOC {
return makeZstdChunkedDiffer(ctx, store, blobSize, annotations, iss, &storeOpts)
zstdChunkedTOCDigest, err := digest.Parse(zstdChunkedTOCDigestString)
if err != nil {
return nil, fmt.Errorf("parsing zstd:chunked TOC digest %q: %w", zstdChunkedTOCDigestString, err)
}
return makeZstdChunkedDiffer(ctx, store, blobSize, zstdChunkedTOCDigest, annotations, iss, &storeOpts)
}
if hasEstargzTOC {
return makeEstargzChunkedDiffer(ctx, store, blobSize, annotations, iss, &storeOpts)
estargzTOCDigest, err := digest.Parse(estargzTOCDigestString)
if err != nil {
return nil, fmt.Errorf("parsing estargz TOC digest %q: %w", estargzTOCDigestString, err)
}
return makeEstargzChunkedDiffer(ctx, store, blobSize, estargzTOCDigest, iss, &storeOpts)
}
return makeConvertFromRawDiffer(ctx, store, blobDigest, blobSize, annotations, iss, &storeOpts)
@ -285,8 +313,8 @@ func makeConvertFromRawDiffer(ctx context.Context, store storage.Store, blobDige
}, nil
}
func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable, storeOpts *types.StoreOptions) (*chunkedDiffer, error) {
manifest, tarSplit, tocOffset, err := readZstdChunkedManifest(iss, blobSize, annotations)
func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, tocDigest digest.Digest, annotations map[string]string, iss ImageSourceSeekable, storeOpts *types.StoreOptions) (*chunkedDiffer, error) {
manifest, tarSplit, tocOffset, err := readZstdChunkedManifest(iss, blobSize, tocDigest, annotations)
if err != nil {
return nil, fmt.Errorf("read zstd:chunked manifest: %w", err)
}
@ -295,11 +323,6 @@ func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize in
return nil, err
}
tocDigest, err := digest.Parse(annotations[internal.ManifestChecksumKey])
if err != nil {
return nil, fmt.Errorf("parse TOC digest %q: %w", annotations[internal.ManifestChecksumKey], err)
}
return &chunkedDiffer{
fsVerityDigests: make(map[string]string),
blobSize: blobSize,
@ -315,8 +338,8 @@ func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize in
}, nil
}
func makeEstargzChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable, storeOpts *types.StoreOptions) (*chunkedDiffer, error) {
manifest, tocOffset, err := readEstargzChunkedManifest(iss, blobSize, annotations)
func makeEstargzChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, tocDigest digest.Digest, iss ImageSourceSeekable, storeOpts *types.StoreOptions) (*chunkedDiffer, error) {
manifest, tocOffset, err := readEstargzChunkedManifest(iss, blobSize, tocDigest)
if err != nil {
return nil, fmt.Errorf("read zstd:chunked manifest: %w", err)
}
@ -325,11 +348,6 @@ func makeEstargzChunkedDiffer(ctx context.Context, store storage.Store, blobSize
return nil, err
}
tocDigest, err := digest.Parse(annotations[estargz.TOCJSONDigestAnnotation])
if err != nil {
return nil, fmt.Errorf("parse TOC digest %q: %w", annotations[estargz.TOCJSONDigestAnnotation], err)
}
return &chunkedDiffer{
fsVerityDigests: make(map[string]string),
blobSize: blobSize,
@ -354,7 +372,7 @@ func makeCopyBuffer() []byte {
// name is the path to the file to copy in source.
// dirfd is an open file descriptor to the destination root directory.
// useHardLinks defines whether the deduplication can be performed using hard links.
func copyFileFromOtherLayer(file *internal.FileMetadata, source string, name string, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
func copyFileFromOtherLayer(file *fileMetadata, source string, name string, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
srcDirfd, err := unix.Open(source, unix.O_RDONLY, 0)
if err != nil {
return false, nil, 0, fmt.Errorf("open source file: %w", err)
@ -367,7 +385,7 @@ func copyFileFromOtherLayer(file *internal.FileMetadata, source string, name str
}
defer srcFile.Close()
dstFile, written, err := copyFileContent(int(srcFile.Fd()), file.Name, dirfd, 0, useHardLinks)
dstFile, written, err := copyFileContent(int(srcFile.Fd()), file, dirfd, 0, useHardLinks)
if err != nil {
return false, nil, 0, fmt.Errorf("copy content to %q: %w", file.Name, err)
}
@ -376,7 +394,7 @@ func copyFileFromOtherLayer(file *internal.FileMetadata, source string, name str
// canDedupMetadataWithHardLink says whether it is possible to deduplicate file with otherFile.
// It checks that the two files have the same UID, GID, file mode and xattrs.
func canDedupMetadataWithHardLink(file *internal.FileMetadata, otherFile *internal.FileMetadata) bool {
func canDedupMetadataWithHardLink(file *fileMetadata, otherFile *fileMetadata) bool {
if file.UID != otherFile.UID {
return false
}
@ -394,7 +412,7 @@ func canDedupMetadataWithHardLink(file *internal.FileMetadata, otherFile *intern
// canDedupFileWithHardLink checks if the specified file can be deduplicated by an
// open file, given its descriptor and stat data.
func canDedupFileWithHardLink(file *internal.FileMetadata, fd int, s os.FileInfo) bool {
func canDedupFileWithHardLink(file *fileMetadata, fd int, s os.FileInfo) bool {
st, ok := s.Sys().(*syscall.Stat_t)
if !ok {
return false
@ -420,11 +438,13 @@ func canDedupFileWithHardLink(file *internal.FileMetadata, fd int, s os.FileInfo
xattrs[x] = string(v)
}
// fill only the attributes used by canDedupMetadataWithHardLink.
otherFile := internal.FileMetadata{
UID: int(st.Uid),
GID: int(st.Gid),
Mode: int64(st.Mode),
Xattrs: xattrs,
otherFile := fileMetadata{
FileMetadata: internal.FileMetadata{
UID: int(st.Uid),
GID: int(st.Gid),
Mode: int64(st.Mode),
Xattrs: xattrs,
},
}
return canDedupMetadataWithHardLink(file, &otherFile)
}
@ -434,7 +454,7 @@ func canDedupFileWithHardLink(file *internal.FileMetadata, fd int, s os.FileInfo
// ostreeRepos is a list of OSTree repos.
// dirfd is an open fd to the destination checkout.
// useHardLinks defines whether the deduplication can be performed using hard links.
func findFileInOSTreeRepos(file *internal.FileMetadata, ostreeRepos []string, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
func findFileInOSTreeRepos(file *fileMetadata, ostreeRepos []string, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
digest, err := digest.Parse(file.Digest)
if err != nil {
logrus.Debugf("could not parse digest: %v", err)
@ -467,7 +487,7 @@ func findFileInOSTreeRepos(file *internal.FileMetadata, ostreeRepos []string, di
continue
}
dstFile, written, err := copyFileContent(fd, file.Name, dirfd, 0, useHardLinks)
dstFile, written, err := copyFileContent(fd, file, dirfd, 0, useHardLinks)
if err != nil {
logrus.Debugf("could not copyFileContent: %v", err)
return false, nil, 0, nil
@ -487,7 +507,7 @@ func findFileInOSTreeRepos(file *internal.FileMetadata, ostreeRepos []string, di
// file is the file to look for.
// dirfd is an open file descriptor to the checkout root directory.
// useHardLinks defines whether the deduplication can be performed using hard links.
func findFileInOtherLayers(cache *layersCache, file *internal.FileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
func findFileInOtherLayers(cache *layersCache, file *fileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
target, name, err := cache.findFileInOtherLayers(file, useHardLinks)
if err != nil || name == "" {
return false, nil, 0, err
@ -495,7 +515,7 @@ func findFileInOtherLayers(cache *layersCache, file *internal.FileMetadata, dirf
return copyFileFromOtherLayer(file, target, name, dirfd, useHardLinks)
}
func maybeDoIDRemap(manifest []internal.FileMetadata, options *archive.TarOptions) error {
func maybeDoIDRemap(manifest []fileMetadata, options *archive.TarOptions) error {
if options.ChownOpts == nil && len(options.UIDMaps) == 0 || len(options.GIDMaps) == 0 {
return nil
}
@ -529,7 +549,7 @@ func mapToSlice(inputMap map[uint32]struct{}) []uint32 {
return out
}
func collectIDs(entries []internal.FileMetadata) ([]uint32, []uint32) {
func collectIDs(entries []fileMetadata) ([]uint32, []uint32) {
uids := make(map[uint32]struct{})
gids := make(map[uint32]struct{})
for _, entry := range entries {
@ -549,7 +569,7 @@ type missingFileChunk struct {
Gap int64
Hole bool
File *internal.FileMetadata
File *fileMetadata
CompressedSize int64
UncompressedSize int64
@ -582,7 +602,10 @@ func (o *originFile) OpenFile() (io.ReadCloser, error) {
}
// setFileAttrs sets the file attributes for file given metadata
func setFileAttrs(dirfd int, file *os.File, mode os.FileMode, metadata *internal.FileMetadata, options *archive.TarOptions, usePath bool) error {
func setFileAttrs(dirfd int, file *os.File, mode os.FileMode, metadata *fileMetadata, options *archive.TarOptions, usePath bool) error {
if metadata.skipSetAttrs {
return nil
}
if file == nil || file.Fd() < 0 {
return errors.New("invalid file")
}
@ -944,14 +967,14 @@ type destinationFile struct {
dirfd int
file *os.File
hash hash.Hash
metadata *internal.FileMetadata
metadata *fileMetadata
options *archive.TarOptions
skipValidation bool
to io.Writer
recordFsVerity recordFsVerityFunc
}
func openDestinationFile(dirfd int, metadata *internal.FileMetadata, options *archive.TarOptions, skipValidation bool, recordFsVerity recordFsVerityFunc) (*destinationFile, error) {
func openDestinationFile(dirfd int, metadata *fileMetadata, options *archive.TarOptions, skipValidation bool, recordFsVerity recordFsVerityFunc) (*destinationFile, error) {
file, err := openFileUnderRoot(metadata.Name, dirfd, newFileFlags, 0)
if err != nil {
return nil, err
@ -1314,7 +1337,7 @@ func (c *chunkedDiffer) retrieveMissingFiles(stream ImageSourceSeekable, dest st
return nil
}
func safeMkdir(dirfd int, mode os.FileMode, name string, metadata *internal.FileMetadata, options *archive.TarOptions) error {
func safeMkdir(dirfd int, mode os.FileMode, name string, metadata *fileMetadata, options *archive.TarOptions) error {
parent := filepath.Dir(name)
base := filepath.Base(name)
@ -1343,7 +1366,7 @@ func safeMkdir(dirfd int, mode os.FileMode, name string, metadata *internal.File
return setFileAttrs(dirfd, file, mode, metadata, options, false)
}
func safeLink(dirfd int, mode os.FileMode, metadata *internal.FileMetadata, options *archive.TarOptions) error {
func safeLink(dirfd int, mode os.FileMode, metadata *fileMetadata, options *archive.TarOptions) error {
sourceFile, err := openFileUnderRoot(metadata.Linkname, dirfd, unix.O_PATH|unix.O_RDONLY|unix.O_NOFOLLOW, 0)
if err != nil {
return err
@ -1385,7 +1408,7 @@ func safeLink(dirfd int, mode os.FileMode, metadata *internal.FileMetadata, opti
return setFileAttrs(dirfd, newFile, mode, metadata, options, false)
}
func safeSymlink(dirfd int, mode os.FileMode, metadata *internal.FileMetadata, options *archive.TarOptions) error {
func safeSymlink(dirfd int, mode os.FileMode, metadata *fileMetadata, options *archive.TarOptions) error {
destDir, destBase := filepath.Dir(metadata.Name), filepath.Base(metadata.Name)
destDirFd := dirfd
if destDir != "." {
@ -1473,7 +1496,7 @@ type hardLinkToCreate struct {
dest string
dirfd int
mode os.FileMode
metadata *internal.FileMetadata
metadata *fileMetadata
}
func parseBooleanPullOption(storeOpts *storage.StoreOptions, name string, def bool) bool {
@ -1498,7 +1521,7 @@ func reopenFileReadOnly(f *os.File) (*os.File, error) {
return os.NewFile(uintptr(fd), f.Name()), nil
}
func (c *chunkedDiffer) findAndCopyFile(dirfd int, r *internal.FileMetadata, copyOptions *findAndCopyFileOptions, mode os.FileMode) (bool, error) {
func (c *chunkedDiffer) findAndCopyFile(dirfd int, r *fileMetadata, copyOptions *findAndCopyFileOptions, mode os.FileMode) (bool, error) {
finalizeFile := func(dstFile *os.File) error {
if dstFile == nil {
return nil
@ -1549,8 +1572,8 @@ func (c *chunkedDiffer) findAndCopyFile(dirfd int, r *internal.FileMetadata, cop
return false, nil
}
func makeEntriesFlat(mergedEntries []internal.FileMetadata) ([]internal.FileMetadata, error) {
var new []internal.FileMetadata
func makeEntriesFlat(mergedEntries []fileMetadata) ([]fileMetadata, error) {
var new []fileMetadata
hashes := make(map[string]string)
for i := range mergedEntries {
@ -1572,6 +1595,7 @@ func makeEntriesFlat(mergedEntries []internal.FileMetadata) ([]internal.FileMeta
hashes[d] = d
mergedEntries[i].Name = fmt.Sprintf("%s/%s", d[0:2], d[2:])
mergedEntries[i].skipSetAttrs = true
new = append(new, mergedEntries[i])
}
@ -1629,6 +1653,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
stream := c.stream
var uncompressedDigest digest.Digest
var convertedBlobSize int64
if c.convertToZstdChunked {
fd, err := unix.Open(dest, unix.O_TMPFILE|unix.O_RDWR|unix.O_CLOEXEC, 0o600)
@ -1656,10 +1681,11 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
return graphdriver.DriverWithDifferOutput{}, err
}
fileSource, diffID, annotations, err := convertTarToZstdChunked(dest, blobFile)
tarSize, fileSource, diffID, annotations, err := convertTarToZstdChunked(dest, blobFile)
if err != nil {
return graphdriver.DriverWithDifferOutput{}, err
}
convertedBlobSize = tarSize
// fileSource is a O_TMPFILE file descriptor, so we
// need to keep it open until the entire file is processed.
defer fileSource.Close()
@ -1668,7 +1694,14 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
blobFile.Close()
blobFile = nil
manifest, tarSplit, tocOffset, err := readZstdChunkedManifest(fileSource, c.blobSize, annotations)
tocDigest, err := toc.GetTOCDigest(annotations)
if err != nil {
return graphdriver.DriverWithDifferOutput{}, fmt.Errorf("internal error: parsing just-created zstd:chunked TOC digest: %w", err)
}
if tocDigest == nil {
return graphdriver.DriverWithDifferOutput{}, fmt.Errorf("internal error: just-created zstd:chunked missing TOC digest")
}
manifest, tarSplit, tocOffset, err := readZstdChunkedManifest(fileSource, c.blobSize, *tocDigest, annotations)
if err != nil {
return graphdriver.DriverWithDifferOutput{}, fmt.Errorf("read zstd:chunked manifest: %w", err)
}
@ -1729,14 +1762,19 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
var missingParts []missingPart
output.UIDs, output.GIDs = collectIDs(toc.Entries)
mergedEntries, totalSize, err := c.mergeTocEntries(c.fileType, toc.Entries)
mergedEntries, totalSizeFromTOC, err := c.mergeTocEntries(c.fileType, toc.Entries)
if err != nil {
return output, err
}
output.Size = totalSize
output.UIDs, output.GIDs = collectIDs(mergedEntries)
if convertedBlobSize > 0 {
// if the image was converted, store the original tar size, so that
// it can be recreated correctly.
output.Size = convertedBlobSize
} else {
output.Size = totalSizeFromTOC
}
if err := maybeDoIDRemap(mergedEntries, options); err != nil {
return output, err
@ -1789,7 +1827,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
njob int
index int
mode os.FileMode
metadata *internal.FileMetadata
metadata *fileMetadata
found bool
err error
@ -1961,7 +1999,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
remainingSize := r.Size
// the file is missing, attempt to find individual chunks.
for _, chunk := range r.Chunks {
for _, chunk := range r.chunks {
compressedSize := int64(chunk.EndOffset - chunk.Offset)
size := remainingSize
if chunk.ChunkSize > 0 {
@ -2045,7 +2083,7 @@ func mustSkipFile(fileType compressedFileType, e internal.FileMetadata) bool {
return false
}
func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []internal.FileMetadata) ([]internal.FileMetadata, int64, error) {
func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []internal.FileMetadata) ([]fileMetadata, int64, error) {
var totalFilesSize int64
countNextChunks := func(start int) int {
@ -2069,11 +2107,11 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i
}
}
mergedEntries := make([]internal.FileMetadata, size)
mergedEntries := make([]fileMetadata, size)
m := 0
for i := 0; i < len(entries); i++ {
e := entries[i]
if mustSkipFile(fileType, e) {
e := fileMetadata{FileMetadata: entries[i]}
if mustSkipFile(fileType, entries[i]) {
continue
}
@ -2086,12 +2124,12 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i
if e.Type == TypeReg {
nChunks := countNextChunks(i + 1)
e.Chunks = make([]*internal.FileMetadata, nChunks+1)
e.chunks = make([]*internal.FileMetadata, nChunks+1)
for j := 0; j <= nChunks; j++ {
// we need a copy here, otherwise we override the
// .Size later
copy := entries[i+j]
e.Chunks[j] = &copy
e.chunks[j] = &copy
e.EndOffset = entries[i+j].EndOffset
}
i += nChunks
@ -2110,10 +2148,10 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i
}
lastChunkOffset := mergedEntries[i].EndOffset
for j := len(mergedEntries[i].Chunks) - 1; j >= 0; j-- {
mergedEntries[i].Chunks[j].EndOffset = lastChunkOffset
mergedEntries[i].Chunks[j].Size = mergedEntries[i].Chunks[j].EndOffset - mergedEntries[i].Chunks[j].Offset
lastChunkOffset = mergedEntries[i].Chunks[j].Offset
for j := len(mergedEntries[i].chunks) - 1; j >= 0; j-- {
mergedEntries[i].chunks[j].EndOffset = lastChunkOffset
mergedEntries[i].chunks[j].Size = mergedEntries[i].chunks[j].EndOffset - mergedEntries[i].chunks[j].Offset
lastChunkOffset = mergedEntries[i].chunks[j].Offset
}
}
return mergedEntries, totalFilesSize, nil

View File

@ -12,8 +12,10 @@ import (
"testing"
"github.com/containers/storage/pkg/chunked/internal"
"github.com/containers/storage/pkg/chunked/toc"
"github.com/klauspost/compress/zstd"
"github.com/opencontainers/go-digest"
"github.com/stretchr/testify/require"
)
type seekable struct {
@ -148,7 +150,10 @@ func TestGenerateAndParseManifest(t *testing.T) {
t: t,
}
manifest, _, _, err := readZstdChunkedManifest(s, 8192, annotations)
tocDigest, err := toc.GetTOCDigest(annotations)
require.NoError(t, err)
require.NotNil(t, tocDigest)
manifest, _, _, err := readZstdChunkedManifest(s, 8192, *tocDigest, annotations)
if err != nil {
t.Error(err)
}