Merge pull request #2130 from mtrmac/chunked-size
Correctly compute UncompressedSize on zstd:chunked pull, don’t set it on estargz
This commit is contained in:
commit
14d1fce267
|
|
@ -189,14 +189,14 @@ type Driver interface {
|
||||||
type DriverWithDifferOutput struct {
|
type DriverWithDifferOutput struct {
|
||||||
Differ Differ
|
Differ Differ
|
||||||
Target string
|
Target string
|
||||||
Size int64
|
Size int64 // Size of the uncompressed layer, -1 if unknown. Must be known if UncompressedDigest is set.
|
||||||
UIDs []uint32
|
UIDs []uint32
|
||||||
GIDs []uint32
|
GIDs []uint32
|
||||||
UncompressedDigest digest.Digest
|
UncompressedDigest digest.Digest
|
||||||
CompressedDigest digest.Digest
|
CompressedDigest digest.Digest
|
||||||
Metadata string
|
Metadata string
|
||||||
BigData map[string][]byte
|
BigData map[string][]byte
|
||||||
TarSplit []byte
|
TarSplit []byte // nil if not available
|
||||||
TOCDigest digest.Digest
|
TOCDigest digest.Digest
|
||||||
// RootDirMode is the mode of the root directory of the layer, if specified.
|
// RootDirMode is the mode of the root directory of the layer, if specified.
|
||||||
RootDirMode *os.FileMode
|
RootDirMode *os.FileMode
|
||||||
|
|
|
||||||
13
layers.go
13
layers.go
|
|
@ -136,8 +136,11 @@ type Layer struct {
|
||||||
TOCDigest digest.Digest `json:"toc-digest,omitempty"`
|
TOCDigest digest.Digest `json:"toc-digest,omitempty"`
|
||||||
|
|
||||||
// UncompressedSize is the length of the blob that was last passed to
|
// UncompressedSize is the length of the blob that was last passed to
|
||||||
// ApplyDiff() or create(), after we decompressed it. If
|
// ApplyDiff() or create(), after we decompressed it.
|
||||||
// UncompressedDigest is not set, this should be treated as if it were
|
//
|
||||||
|
// - If UncompressedDigest is set, this must be set to a valid value.
|
||||||
|
// - Otherwise, if TOCDigest is set, this is either valid or -1.
|
||||||
|
// - If neither of this digests is set, this should be treated as if it were
|
||||||
// an uninitialized value.
|
// an uninitialized value.
|
||||||
UncompressedSize int64 `json:"diff-size,omitempty"`
|
UncompressedSize int64 `json:"diff-size,omitempty"`
|
||||||
|
|
||||||
|
|
@ -1214,8 +1217,8 @@ func (r *layerStore) Size(name string) (int64, error) {
|
||||||
// We use the presence of a non-empty digest as an indicator that the size value was intentionally set, and that
|
// We use the presence of a non-empty digest as an indicator that the size value was intentionally set, and that
|
||||||
// a zero value is not just present because it was never set to anything else (which can happen if the layer was
|
// a zero value is not just present because it was never set to anything else (which can happen if the layer was
|
||||||
// created by a version of this library that didn't keep track of digest and size information).
|
// created by a version of this library that didn't keep track of digest and size information).
|
||||||
if layer.TOCDigest != "" || layer.UncompressedDigest != "" {
|
if layer.UncompressedDigest != "" || layer.TOCDigest != "" {
|
||||||
return layer.UncompressedSize, nil
|
return layer.UncompressedSize, nil // This may return -1 if only TOCDigest is set
|
||||||
}
|
}
|
||||||
return -1, nil
|
return -1, nil
|
||||||
}
|
}
|
||||||
|
|
@ -2510,7 +2513,7 @@ func (r *layerStore) applyDiffFromStagingDirectory(id string, diffOutput *driver
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(diffOutput.TarSplit) != 0 {
|
if diffOutput.TarSplit != nil {
|
||||||
tsdata := bytes.Buffer{}
|
tsdata := bytes.Buffer{}
|
||||||
compressor, err := pgzip.NewWriterLevel(&tsdata, pgzip.BestSpeed)
|
compressor, err := pgzip.NewWriterLevel(&tsdata, pgzip.BestSpeed)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
||||||
|
|
@ -139,7 +139,7 @@ func readEstargzChunkedManifest(blobStream ImageSourceSeekable, blobSize int64,
|
||||||
}
|
}
|
||||||
|
|
||||||
// readZstdChunkedManifest reads the zstd:chunked manifest from the seekable stream blobStream.
|
// readZstdChunkedManifest reads the zstd:chunked manifest from the seekable stream blobStream.
|
||||||
// Returns (manifest blob, parsed manifest, tar-split blob, manifest offset).
|
// Returns (manifest blob, parsed manifest, tar-split blob or nil, manifest offset).
|
||||||
func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Digest, annotations map[string]string) ([]byte, *internal.TOC, []byte, int64, error) {
|
func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Digest, annotations map[string]string) ([]byte, *internal.TOC, []byte, int64, error) {
|
||||||
offsetMetadata := annotations[internal.ManifestInfoKey]
|
offsetMetadata := annotations[internal.ManifestInfoKey]
|
||||||
if offsetMetadata == "" {
|
if offsetMetadata == "" {
|
||||||
|
|
@ -214,7 +214,7 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Di
|
||||||
return nil, nil, nil, 0, fmt.Errorf("unmarshaling TOC: %w", err)
|
return nil, nil, nil, 0, fmt.Errorf("unmarshaling TOC: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
decodedTarSplit := []byte{}
|
var decodedTarSplit []byte = nil
|
||||||
if toc.TarSplitDigest != "" {
|
if toc.TarSplitDigest != "" {
|
||||||
if tarSplitChunk.Offset <= 0 {
|
if tarSplitChunk.Offset <= 0 {
|
||||||
return nil, nil, nil, 0, fmt.Errorf("TOC requires a tar-split, but the %s annotation does not describe a position", internal.TarSplitInfoKey)
|
return nil, nil, nil, 0, fmt.Errorf("TOC requires a tar-split, but the %s annotation does not describe a position", internal.TarSplitInfoKey)
|
||||||
|
|
@ -288,6 +288,36 @@ func ensureTOCMatchesTarSplit(toc *internal.TOC, tarSplit []byte) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// tarSizeFromTarSplit computes the total tarball size, using only the tarSplit metadata
|
||||||
|
func tarSizeFromTarSplit(tarSplit []byte) (int64, error) {
|
||||||
|
var res int64 = 0
|
||||||
|
|
||||||
|
unpacker := storage.NewJSONUnpacker(bytes.NewReader(tarSplit))
|
||||||
|
for {
|
||||||
|
entry, err := unpacker.Next()
|
||||||
|
if err != nil {
|
||||||
|
if err == io.EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
return -1, fmt.Errorf("reading tar-split entries: %w", err)
|
||||||
|
}
|
||||||
|
switch entry.Type {
|
||||||
|
case storage.SegmentType:
|
||||||
|
res += int64(len(entry.Payload))
|
||||||
|
case storage.FileType:
|
||||||
|
// entry.Size is the “logical size”, which might not be the physical size for sparse entries;
|
||||||
|
// but the way tar-split/tar/asm.WriteOutputTarStream combines FileType entries and returned files contents,
|
||||||
|
// sparse files are not supported.
|
||||||
|
// Also https://github.com/opencontainers/image-spec/blob/main/layer.md says
|
||||||
|
// > Sparse files SHOULD NOT be used because they lack consistent support across tar implementations.
|
||||||
|
res += entry.Size
|
||||||
|
default:
|
||||||
|
return -1, fmt.Errorf("unexpected tar-split entry type %q", entry.Type)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res, nil
|
||||||
|
}
|
||||||
|
|
||||||
// ensureTimePointersMatch ensures that a and b are equal
|
// ensureTimePointersMatch ensures that a and b are equal
|
||||||
func ensureTimePointersMatch(a, b *time.Time) error {
|
func ensureTimePointersMatch(a, b *time.Time) error {
|
||||||
// We didn’t always use “timeIfNotZero” when creating the TOC, so treat time.IsZero the same as nil.
|
// We didn’t always use “timeIfNotZero” when creating the TOC, so treat time.IsZero the same as nil.
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,45 @@
|
||||||
|
package chunked
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"io"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
"github.com/vbatts/tar-split/archive/tar"
|
||||||
|
"github.com/vbatts/tar-split/tar/asm"
|
||||||
|
"github.com/vbatts/tar-split/tar/storage"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestTarSizeFromTarSplit(t *testing.T) {
|
||||||
|
var tarball bytes.Buffer
|
||||||
|
tarWriter := tar.NewWriter(&tarball)
|
||||||
|
for _, e := range someFiles {
|
||||||
|
tf, err := typeToTarType(e.Type)
|
||||||
|
require.NoError(t, err)
|
||||||
|
err = tarWriter.WriteHeader(&tar.Header{
|
||||||
|
Typeflag: tf,
|
||||||
|
Name: e.Name,
|
||||||
|
Size: e.Size,
|
||||||
|
Mode: e.Mode,
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
data := make([]byte, e.Size)
|
||||||
|
_, err = tarWriter.Write(data)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
err := tarWriter.Close()
|
||||||
|
require.NoError(t, err)
|
||||||
|
expectedTarSize := int64(tarball.Len())
|
||||||
|
|
||||||
|
var tarSplit bytes.Buffer
|
||||||
|
tsReader, err := asm.NewInputTarStream(&tarball, storage.NewJSONPacker(&tarSplit), storage.NewDiscardFilePutter())
|
||||||
|
require.NoError(t, err)
|
||||||
|
_, err = io.Copy(io.Discard, tsReader)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
res, err := tarSizeFromTarSplit(tarSplit.Bytes())
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, expectedTarSize, res)
|
||||||
|
}
|
||||||
|
|
@ -90,6 +90,7 @@ type chunkedDiffer struct {
|
||||||
blobDigest digest.Digest
|
blobDigest digest.Digest
|
||||||
|
|
||||||
blobSize int64
|
blobSize int64
|
||||||
|
uncompressedTarSize int64 // -1 if unknown
|
||||||
|
|
||||||
pullOptions map[string]string
|
pullOptions map[string]string
|
||||||
|
|
||||||
|
|
@ -216,6 +217,7 @@ func makeConvertFromRawDiffer(store storage.Store, blobDigest digest.Digest, blo
|
||||||
fsVerityDigests: make(map[string]string),
|
fsVerityDigests: make(map[string]string),
|
||||||
blobDigest: blobDigest,
|
blobDigest: blobDigest,
|
||||||
blobSize: blobSize,
|
blobSize: blobSize,
|
||||||
|
uncompressedTarSize: -1, // Will be computed later
|
||||||
convertToZstdChunked: true,
|
convertToZstdChunked: true,
|
||||||
copyBuffer: makeCopyBuffer(),
|
copyBuffer: makeCopyBuffer(),
|
||||||
layersCache: layersCache,
|
layersCache: layersCache,
|
||||||
|
|
@ -229,6 +231,14 @@ func makeZstdChunkedDiffer(store storage.Store, blobSize int64, tocDigest digest
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("read zstd:chunked manifest: %w", err)
|
return nil, fmt.Errorf("read zstd:chunked manifest: %w", err)
|
||||||
}
|
}
|
||||||
|
var uncompressedTarSize int64 = -1
|
||||||
|
if tarSplit != nil {
|
||||||
|
uncompressedTarSize, err = tarSizeFromTarSplit(tarSplit)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("computing size from tar-split")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
layersCache, err := getLayersCache(store)
|
layersCache, err := getLayersCache(store)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|
@ -237,6 +247,7 @@ func makeZstdChunkedDiffer(store storage.Store, blobSize int64, tocDigest digest
|
||||||
return &chunkedDiffer{
|
return &chunkedDiffer{
|
||||||
fsVerityDigests: make(map[string]string),
|
fsVerityDigests: make(map[string]string),
|
||||||
blobSize: blobSize,
|
blobSize: blobSize,
|
||||||
|
uncompressedTarSize: uncompressedTarSize,
|
||||||
tocDigest: tocDigest,
|
tocDigest: tocDigest,
|
||||||
copyBuffer: makeCopyBuffer(),
|
copyBuffer: makeCopyBuffer(),
|
||||||
fileType: fileTypeZstdChunked,
|
fileType: fileTypeZstdChunked,
|
||||||
|
|
@ -263,6 +274,7 @@ func makeEstargzChunkedDiffer(store storage.Store, blobSize int64, tocDigest dig
|
||||||
return &chunkedDiffer{
|
return &chunkedDiffer{
|
||||||
fsVerityDigests: make(map[string]string),
|
fsVerityDigests: make(map[string]string),
|
||||||
blobSize: blobSize,
|
blobSize: blobSize,
|
||||||
|
uncompressedTarSize: -1, // We would have to read and decompress the whole layer
|
||||||
tocDigest: tocDigest,
|
tocDigest: tocDigest,
|
||||||
copyBuffer: makeCopyBuffer(),
|
copyBuffer: makeCopyBuffer(),
|
||||||
fileType: fileTypeEstargz,
|
fileType: fileTypeEstargz,
|
||||||
|
|
@ -1153,7 +1165,6 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
|
||||||
|
|
||||||
var compressedDigest digest.Digest
|
var compressedDigest digest.Digest
|
||||||
var uncompressedDigest digest.Digest
|
var uncompressedDigest digest.Digest
|
||||||
var convertedBlobSize int64
|
|
||||||
|
|
||||||
if c.convertToZstdChunked {
|
if c.convertToZstdChunked {
|
||||||
fd, err := unix.Open(dest, unix.O_TMPFILE|unix.O_RDWR|unix.O_CLOEXEC, 0o600)
|
fd, err := unix.Open(dest, unix.O_TMPFILE|unix.O_RDWR|unix.O_CLOEXEC, 0o600)
|
||||||
|
|
@ -1185,7 +1196,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return graphdriver.DriverWithDifferOutput{}, err
|
return graphdriver.DriverWithDifferOutput{}, err
|
||||||
}
|
}
|
||||||
convertedBlobSize = tarSize
|
c.uncompressedTarSize = tarSize
|
||||||
// fileSource is a O_TMPFILE file descriptor, so we
|
// fileSource is a O_TMPFILE file descriptor, so we
|
||||||
// need to keep it open until the entire file is processed.
|
// need to keep it open until the entire file is processed.
|
||||||
defer fileSource.Close()
|
defer fileSource.Close()
|
||||||
|
|
@ -1255,6 +1266,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
|
||||||
TOCDigest: c.tocDigest,
|
TOCDigest: c.tocDigest,
|
||||||
UncompressedDigest: uncompressedDigest,
|
UncompressedDigest: uncompressedDigest,
|
||||||
CompressedDigest: compressedDigest,
|
CompressedDigest: compressedDigest,
|
||||||
|
Size: c.uncompressedTarSize,
|
||||||
}
|
}
|
||||||
|
|
||||||
// When the hard links deduplication is used, file attributes are ignored because setting them
|
// When the hard links deduplication is used, file attributes are ignored because setting them
|
||||||
|
|
@ -1268,19 +1280,12 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
|
||||||
|
|
||||||
var missingParts []missingPart
|
var missingParts []missingPart
|
||||||
|
|
||||||
mergedEntries, totalSizeFromTOC, err := c.mergeTocEntries(c.fileType, toc.Entries)
|
mergedEntries, err := c.mergeTocEntries(c.fileType, toc.Entries)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return output, err
|
return output, err
|
||||||
}
|
}
|
||||||
|
|
||||||
output.UIDs, output.GIDs = collectIDs(mergedEntries)
|
output.UIDs, output.GIDs = collectIDs(mergedEntries)
|
||||||
if convertedBlobSize > 0 {
|
|
||||||
// if the image was converted, store the original tar size, so that
|
|
||||||
// it can be recreated correctly.
|
|
||||||
output.Size = convertedBlobSize
|
|
||||||
} else {
|
|
||||||
output.Size = totalSizeFromTOC
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := maybeDoIDRemap(mergedEntries, options); err != nil {
|
if err := maybeDoIDRemap(mergedEntries, options); err != nil {
|
||||||
return output, err
|
return output, err
|
||||||
|
|
@ -1597,9 +1602,7 @@ func mustSkipFile(fileType compressedFileType, e internal.FileMetadata) bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []internal.FileMetadata) ([]fileMetadata, int64, error) {
|
func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []internal.FileMetadata) ([]fileMetadata, error) {
|
||||||
var totalFilesSize int64
|
|
||||||
|
|
||||||
countNextChunks := func(start int) int {
|
countNextChunks := func(start int) int {
|
||||||
count := 0
|
count := 0
|
||||||
for _, e := range entries[start:] {
|
for _, e := range entries[start:] {
|
||||||
|
|
@ -1629,10 +1632,8 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
totalFilesSize += e.Size
|
|
||||||
|
|
||||||
if e.Type == TypeChunk {
|
if e.Type == TypeChunk {
|
||||||
return nil, -1, fmt.Errorf("chunk type without a regular file")
|
return nil, fmt.Errorf("chunk type without a regular file")
|
||||||
}
|
}
|
||||||
|
|
||||||
if e.Type == TypeReg {
|
if e.Type == TypeReg {
|
||||||
|
|
@ -1668,7 +1669,7 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i
|
||||||
lastChunkOffset = mergedEntries[i].chunks[j].Offset
|
lastChunkOffset = mergedEntries[i].chunks[j].Offset
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return mergedEntries, totalFilesSize, nil
|
return mergedEntries, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// validateChunkChecksum checks if the file at $root/$path[offset:chunk.ChunkSize] has the
|
// validateChunkChecksum checks if the file at $root/$path[offset:chunk.ChunkSize] has the
|
||||||
|
|
|
||||||
2
store.go
2
store.go
|
|
@ -2201,7 +2201,7 @@ func (s *store) ImageSize(id string) (int64, error) {
|
||||||
}
|
}
|
||||||
// The UncompressedSize is only valid if there's a digest to go with it.
|
// The UncompressedSize is only valid if there's a digest to go with it.
|
||||||
n := layer.UncompressedSize
|
n := layer.UncompressedSize
|
||||||
if layer.UncompressedDigest == "" {
|
if layer.UncompressedDigest == "" || n == -1 {
|
||||||
// Compute the size.
|
// Compute the size.
|
||||||
n, err = layerStore.DiffSize("", layer.ID)
|
n, err = layerStore.DiffSize("", layer.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue