Merge pull request #2130 from mtrmac/chunked-size

Correctly compute UncompressedSize on zstd:chunked pull, don’t set it on estargz
This commit is contained in:
openshift-merge-bot[bot] 2024-10-15 22:02:51 +00:00 committed by GitHub
commit 14d1fce267
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 130 additions and 51 deletions

View File

@ -189,14 +189,14 @@ type Driver interface {
type DriverWithDifferOutput struct { type DriverWithDifferOutput struct {
Differ Differ Differ Differ
Target string Target string
Size int64 Size int64 // Size of the uncompressed layer, -1 if unknown. Must be known if UncompressedDigest is set.
UIDs []uint32 UIDs []uint32
GIDs []uint32 GIDs []uint32
UncompressedDigest digest.Digest UncompressedDigest digest.Digest
CompressedDigest digest.Digest CompressedDigest digest.Digest
Metadata string Metadata string
BigData map[string][]byte BigData map[string][]byte
TarSplit []byte TarSplit []byte // nil if not available
TOCDigest digest.Digest TOCDigest digest.Digest
// RootDirMode is the mode of the root directory of the layer, if specified. // RootDirMode is the mode of the root directory of the layer, if specified.
RootDirMode *os.FileMode RootDirMode *os.FileMode

View File

@ -136,9 +136,12 @@ type Layer struct {
TOCDigest digest.Digest `json:"toc-digest,omitempty"` TOCDigest digest.Digest `json:"toc-digest,omitempty"`
// UncompressedSize is the length of the blob that was last passed to // UncompressedSize is the length of the blob that was last passed to
// ApplyDiff() or create(), after we decompressed it. If // ApplyDiff() or create(), after we decompressed it.
// UncompressedDigest is not set, this should be treated as if it were //
// an uninitialized value. // - If UncompressedDigest is set, this must be set to a valid value.
// - Otherwise, if TOCDigest is set, this is either valid or -1.
// - If neither of this digests is set, this should be treated as if it were
// an uninitialized value.
UncompressedSize int64 `json:"diff-size,omitempty"` UncompressedSize int64 `json:"diff-size,omitempty"`
// CompressionType is the type of compression which we detected on the blob // CompressionType is the type of compression which we detected on the blob
@ -1214,8 +1217,8 @@ func (r *layerStore) Size(name string) (int64, error) {
// We use the presence of a non-empty digest as an indicator that the size value was intentionally set, and that // We use the presence of a non-empty digest as an indicator that the size value was intentionally set, and that
// a zero value is not just present because it was never set to anything else (which can happen if the layer was // a zero value is not just present because it was never set to anything else (which can happen if the layer was
// created by a version of this library that didn't keep track of digest and size information). // created by a version of this library that didn't keep track of digest and size information).
if layer.TOCDigest != "" || layer.UncompressedDigest != "" { if layer.UncompressedDigest != "" || layer.TOCDigest != "" {
return layer.UncompressedSize, nil return layer.UncompressedSize, nil // This may return -1 if only TOCDigest is set
} }
return -1, nil return -1, nil
} }
@ -2510,7 +2513,7 @@ func (r *layerStore) applyDiffFromStagingDirectory(id string, diffOutput *driver
return err return err
} }
if len(diffOutput.TarSplit) != 0 { if diffOutput.TarSplit != nil {
tsdata := bytes.Buffer{} tsdata := bytes.Buffer{}
compressor, err := pgzip.NewWriterLevel(&tsdata, pgzip.BestSpeed) compressor, err := pgzip.NewWriterLevel(&tsdata, pgzip.BestSpeed)
if err != nil { if err != nil {

View File

@ -139,7 +139,7 @@ func readEstargzChunkedManifest(blobStream ImageSourceSeekable, blobSize int64,
} }
// readZstdChunkedManifest reads the zstd:chunked manifest from the seekable stream blobStream. // readZstdChunkedManifest reads the zstd:chunked manifest from the seekable stream blobStream.
// Returns (manifest blob, parsed manifest, tar-split blob, manifest offset). // Returns (manifest blob, parsed manifest, tar-split blob or nil, manifest offset).
func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Digest, annotations map[string]string) ([]byte, *internal.TOC, []byte, int64, error) { func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Digest, annotations map[string]string) ([]byte, *internal.TOC, []byte, int64, error) {
offsetMetadata := annotations[internal.ManifestInfoKey] offsetMetadata := annotations[internal.ManifestInfoKey]
if offsetMetadata == "" { if offsetMetadata == "" {
@ -214,7 +214,7 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Di
return nil, nil, nil, 0, fmt.Errorf("unmarshaling TOC: %w", err) return nil, nil, nil, 0, fmt.Errorf("unmarshaling TOC: %w", err)
} }
decodedTarSplit := []byte{} var decodedTarSplit []byte = nil
if toc.TarSplitDigest != "" { if toc.TarSplitDigest != "" {
if tarSplitChunk.Offset <= 0 { if tarSplitChunk.Offset <= 0 {
return nil, nil, nil, 0, fmt.Errorf("TOC requires a tar-split, but the %s annotation does not describe a position", internal.TarSplitInfoKey) return nil, nil, nil, 0, fmt.Errorf("TOC requires a tar-split, but the %s annotation does not describe a position", internal.TarSplitInfoKey)
@ -288,6 +288,36 @@ func ensureTOCMatchesTarSplit(toc *internal.TOC, tarSplit []byte) error {
return nil return nil
} }
// tarSizeFromTarSplit computes the total tarball size, using only the tarSplit metadata
func tarSizeFromTarSplit(tarSplit []byte) (int64, error) {
var res int64 = 0
unpacker := storage.NewJSONUnpacker(bytes.NewReader(tarSplit))
for {
entry, err := unpacker.Next()
if err != nil {
if err == io.EOF {
break
}
return -1, fmt.Errorf("reading tar-split entries: %w", err)
}
switch entry.Type {
case storage.SegmentType:
res += int64(len(entry.Payload))
case storage.FileType:
// entry.Size is the “logical size”, which might not be the physical size for sparse entries;
// but the way tar-split/tar/asm.WriteOutputTarStream combines FileType entries and returned files contents,
// sparse files are not supported.
// Also https://github.com/opencontainers/image-spec/blob/main/layer.md says
// > Sparse files SHOULD NOT be used because they lack consistent support across tar implementations.
res += entry.Size
default:
return -1, fmt.Errorf("unexpected tar-split entry type %q", entry.Type)
}
}
return res, nil
}
// ensureTimePointersMatch ensures that a and b are equal // ensureTimePointersMatch ensures that a and b are equal
func ensureTimePointersMatch(a, b *time.Time) error { func ensureTimePointersMatch(a, b *time.Time) error {
// We didnt always use “timeIfNotZero” when creating the TOC, so treat time.IsZero the same as nil. // We didnt always use “timeIfNotZero” when creating the TOC, so treat time.IsZero the same as nil.

View File

@ -0,0 +1,45 @@
package chunked
import (
"bytes"
"io"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/vbatts/tar-split/archive/tar"
"github.com/vbatts/tar-split/tar/asm"
"github.com/vbatts/tar-split/tar/storage"
)
func TestTarSizeFromTarSplit(t *testing.T) {
var tarball bytes.Buffer
tarWriter := tar.NewWriter(&tarball)
for _, e := range someFiles {
tf, err := typeToTarType(e.Type)
require.NoError(t, err)
err = tarWriter.WriteHeader(&tar.Header{
Typeflag: tf,
Name: e.Name,
Size: e.Size,
Mode: e.Mode,
})
require.NoError(t, err)
data := make([]byte, e.Size)
_, err = tarWriter.Write(data)
require.NoError(t, err)
}
err := tarWriter.Close()
require.NoError(t, err)
expectedTarSize := int64(tarball.Len())
var tarSplit bytes.Buffer
tsReader, err := asm.NewInputTarStream(&tarball, storage.NewJSONPacker(&tarSplit), storage.NewDiscardFilePutter())
require.NoError(t, err)
_, err = io.Copy(io.Discard, tsReader)
require.NoError(t, err)
res, err := tarSizeFromTarSplit(tarSplit.Bytes())
require.NoError(t, err)
assert.Equal(t, expectedTarSize, res)
}

View File

@ -89,7 +89,8 @@ type chunkedDiffer struct {
// is no TOC referenced by the manifest. // is no TOC referenced by the manifest.
blobDigest digest.Digest blobDigest digest.Digest
blobSize int64 blobSize int64
uncompressedTarSize int64 // -1 if unknown
pullOptions map[string]string pullOptions map[string]string
@ -216,6 +217,7 @@ func makeConvertFromRawDiffer(store storage.Store, blobDigest digest.Digest, blo
fsVerityDigests: make(map[string]string), fsVerityDigests: make(map[string]string),
blobDigest: blobDigest, blobDigest: blobDigest,
blobSize: blobSize, blobSize: blobSize,
uncompressedTarSize: -1, // Will be computed later
convertToZstdChunked: true, convertToZstdChunked: true,
copyBuffer: makeCopyBuffer(), copyBuffer: makeCopyBuffer(),
layersCache: layersCache, layersCache: layersCache,
@ -229,24 +231,33 @@ func makeZstdChunkedDiffer(store storage.Store, blobSize int64, tocDigest digest
if err != nil { if err != nil {
return nil, fmt.Errorf("read zstd:chunked manifest: %w", err) return nil, fmt.Errorf("read zstd:chunked manifest: %w", err)
} }
var uncompressedTarSize int64 = -1
if tarSplit != nil {
uncompressedTarSize, err = tarSizeFromTarSplit(tarSplit)
if err != nil {
return nil, fmt.Errorf("computing size from tar-split")
}
}
layersCache, err := getLayersCache(store) layersCache, err := getLayersCache(store)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return &chunkedDiffer{ return &chunkedDiffer{
fsVerityDigests: make(map[string]string), fsVerityDigests: make(map[string]string),
blobSize: blobSize, blobSize: blobSize,
tocDigest: tocDigest, uncompressedTarSize: uncompressedTarSize,
copyBuffer: makeCopyBuffer(), tocDigest: tocDigest,
fileType: fileTypeZstdChunked, copyBuffer: makeCopyBuffer(),
layersCache: layersCache, fileType: fileTypeZstdChunked,
manifest: manifest, layersCache: layersCache,
toc: toc, manifest: manifest,
pullOptions: pullOptions, toc: toc,
stream: iss, pullOptions: pullOptions,
tarSplit: tarSplit, stream: iss,
tocOffset: tocOffset, tarSplit: tarSplit,
tocOffset: tocOffset,
}, nil }, nil
} }
@ -261,16 +272,17 @@ func makeEstargzChunkedDiffer(store storage.Store, blobSize int64, tocDigest dig
} }
return &chunkedDiffer{ return &chunkedDiffer{
fsVerityDigests: make(map[string]string), fsVerityDigests: make(map[string]string),
blobSize: blobSize, blobSize: blobSize,
tocDigest: tocDigest, uncompressedTarSize: -1, // We would have to read and decompress the whole layer
copyBuffer: makeCopyBuffer(), tocDigest: tocDigest,
fileType: fileTypeEstargz, copyBuffer: makeCopyBuffer(),
layersCache: layersCache, fileType: fileTypeEstargz,
manifest: manifest, layersCache: layersCache,
pullOptions: pullOptions, manifest: manifest,
stream: iss, pullOptions: pullOptions,
tocOffset: tocOffset, stream: iss,
tocOffset: tocOffset,
}, nil }, nil
} }
@ -1153,7 +1165,6 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
var compressedDigest digest.Digest var compressedDigest digest.Digest
var uncompressedDigest digest.Digest var uncompressedDigest digest.Digest
var convertedBlobSize int64
if c.convertToZstdChunked { if c.convertToZstdChunked {
fd, err := unix.Open(dest, unix.O_TMPFILE|unix.O_RDWR|unix.O_CLOEXEC, 0o600) fd, err := unix.Open(dest, unix.O_TMPFILE|unix.O_RDWR|unix.O_CLOEXEC, 0o600)
@ -1185,7 +1196,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
if err != nil { if err != nil {
return graphdriver.DriverWithDifferOutput{}, err return graphdriver.DriverWithDifferOutput{}, err
} }
convertedBlobSize = tarSize c.uncompressedTarSize = tarSize
// fileSource is a O_TMPFILE file descriptor, so we // fileSource is a O_TMPFILE file descriptor, so we
// need to keep it open until the entire file is processed. // need to keep it open until the entire file is processed.
defer fileSource.Close() defer fileSource.Close()
@ -1255,6 +1266,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
TOCDigest: c.tocDigest, TOCDigest: c.tocDigest,
UncompressedDigest: uncompressedDigest, UncompressedDigest: uncompressedDigest,
CompressedDigest: compressedDigest, CompressedDigest: compressedDigest,
Size: c.uncompressedTarSize,
} }
// When the hard links deduplication is used, file attributes are ignored because setting them // When the hard links deduplication is used, file attributes are ignored because setting them
@ -1268,19 +1280,12 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
var missingParts []missingPart var missingParts []missingPart
mergedEntries, totalSizeFromTOC, err := c.mergeTocEntries(c.fileType, toc.Entries) mergedEntries, err := c.mergeTocEntries(c.fileType, toc.Entries)
if err != nil { if err != nil {
return output, err return output, err
} }
output.UIDs, output.GIDs = collectIDs(mergedEntries) output.UIDs, output.GIDs = collectIDs(mergedEntries)
if convertedBlobSize > 0 {
// if the image was converted, store the original tar size, so that
// it can be recreated correctly.
output.Size = convertedBlobSize
} else {
output.Size = totalSizeFromTOC
}
if err := maybeDoIDRemap(mergedEntries, options); err != nil { if err := maybeDoIDRemap(mergedEntries, options); err != nil {
return output, err return output, err
@ -1597,9 +1602,7 @@ func mustSkipFile(fileType compressedFileType, e internal.FileMetadata) bool {
return false return false
} }
func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []internal.FileMetadata) ([]fileMetadata, int64, error) { func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []internal.FileMetadata) ([]fileMetadata, error) {
var totalFilesSize int64
countNextChunks := func(start int) int { countNextChunks := func(start int) int {
count := 0 count := 0
for _, e := range entries[start:] { for _, e := range entries[start:] {
@ -1629,10 +1632,8 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i
continue continue
} }
totalFilesSize += e.Size
if e.Type == TypeChunk { if e.Type == TypeChunk {
return nil, -1, fmt.Errorf("chunk type without a regular file") return nil, fmt.Errorf("chunk type without a regular file")
} }
if e.Type == TypeReg { if e.Type == TypeReg {
@ -1668,7 +1669,7 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i
lastChunkOffset = mergedEntries[i].chunks[j].Offset lastChunkOffset = mergedEntries[i].chunks[j].Offset
} }
} }
return mergedEntries, totalFilesSize, nil return mergedEntries, nil
} }
// validateChunkChecksum checks if the file at $root/$path[offset:chunk.ChunkSize] has the // validateChunkChecksum checks if the file at $root/$path[offset:chunk.ChunkSize] has the

View File

@ -2201,7 +2201,7 @@ func (s *store) ImageSize(id string) (int64, error) {
} }
// The UncompressedSize is only valid if there's a digest to go with it. // The UncompressedSize is only valid if there's a digest to go with it.
n := layer.UncompressedSize n := layer.UncompressedSize
if layer.UncompressedDigest == "" { if layer.UncompressedDigest == "" || n == -1 {
// Compute the size. // Compute the size.
n, err = layerStore.DiffSize("", layer.ID) n, err = layerStore.DiffSize("", layer.ID)
if err != nil { if err != nil {