Use tar-split/tar/asm.IterateHeaders now that it has been accepted

... instead of our version which makes assumptions on the internal decisions of the tar-split project, and needs heuristics to guess where file padding ends. Signed-off-by: Miloslav Trmač <mitr@redhat.com>
2024-09-27 19:47:25 +02:00 · 2024-09-27 19:47:25 +02:00 · 39e467aa53
parent f45f1ed2f9
commit 39e467aa53
3 changed files with 5 additions and 173 deletions
--- a/pkg/chunked/compression_linux.go
+++ b/pkg/chunked/compression_linux.go
@ -2,6 +2,7 @@ package chunked

 import (
 	archivetar "archive/tar"
+	"bytes"
 	"errors"
 	"fmt"
 	"io"
@ -14,6 +15,8 @@ import (
 	"github.com/klauspost/pgzip"
 	digest "github.com/opencontainers/go-digest"
 	"github.com/vbatts/tar-split/archive/tar"
+	"github.com/vbatts/tar-split/tar/asm"
+	"github.com/vbatts/tar-split/tar/storage"
 	expMaps "golang.org/x/exp/maps"
 )

@ -256,7 +259,8 @@ func ensureTOCMatchesTarSplit(toc *internal.TOC, tarSplit []byte) error {
 		}
 	}

-	if err := iterateTarSplit(tarSplit, func(hdr *tar.Header) error {
+	unpacker := storage.NewJSONUnpacker(bytes.NewReader(tarSplit))
+	if err := asm.IterateHeaders(unpacker, func(hdr *tar.Header) error {
 		e, ok := pendingFiles[hdr.Name]
 		if !ok {
 			return fmt.Errorf("tar-split contains an entry for %q missing in TOC", hdr.Name)
--- a/pkg/chunked/tar_split_linux.go
+++ b/pkg/chunked/tar_split_linux.go
@ -1,68 +0,0 @@
-package chunked
-
-import (
-	"bytes"
-	"fmt"
-	"io"
-
-	"github.com/vbatts/tar-split/archive/tar"
-	"github.com/vbatts/tar-split/tar/storage"
-)
-
-// iterateTarSplit calls handler for each tar header in tarSplit
-func iterateTarSplit(tarSplit []byte, handler func(hdr *tar.Header) error) error {
-	// This, strictly speaking, hard-codes undocumented assumptions about how github.com/vbatts/tar-split/tar/asm.NewInputTarStream
-	// forms the tar-split contents. Pragmatically, NewInputTarStream should always produce storage.FileType entries at least
-	// for every non-empty file, which constraints it basically to the output we expect.
-	//
-	// Specifically, we assume:
-	// - There is a separate SegmentType entry for every tar header, but only one SegmentType entry for the full header incl. any extensions
-	// - (There is a FileType entry for every tar header, we ignore it)
-	// - Trailing padding of a file, if any, is included in the next SegmentType entry
-	// - At the end, there may be SegmentType entries just for the terminating zero blocks.
-
-	unpacker := storage.NewJSONUnpacker(bytes.NewReader(tarSplit))
-	for {
-		tsEntry, err := unpacker.Next()
-		if err != nil {
-			if err == io.EOF {
-				return nil
-			}
-			return fmt.Errorf("reading tar-split entries: %w", err)
-		}
-		switch tsEntry.Type {
-		case storage.SegmentType:
-			payload := tsEntry.Payload
-			// This is horrible, but we don’t know how much padding to skip. (It can be computed from the previous hdr.Size for non-sparse
-			// files, but for sparse files that is set to the logical size.)
-			//
-			// First, assume that all padding is zero bytes.
-			// A tar header starts with a file name, which might in principle be empty, but
-			// at least https://github.com/opencontainers/image-spec/blob/main/layer.md#populate-initial-filesystem suggests that
-			// the tar name should never be empty (it should be ".", or maybe "./").
-			//
-			// This will cause us to skip all zero bytes in the trailing blocks, but that’s fine.
-			i := 0
-			for i < len(payload) && payload[i] == 0 {
-				i++
-			}
-			payload = payload[i:]
-			tr := tar.NewReader(bytes.NewReader(payload))
-			hdr, err := tr.Next()
-			if err != nil {
-				if err == io.EOF { // Probably the last entry, but let’s let the unpacker drive that.
-					break
-				}
-				return fmt.Errorf("decoding a tar header from a tar-split entry: %w", err)
-			}
-			if err := handler(hdr); err != nil {
-				return err
-			}
-
-		case storage.FileType:
-			// Nothing
-		default:
-			return fmt.Errorf("unexpected tar-split entry type %q", tsEntry.Type)
-		}
-	}
-}
--- a/pkg/chunked/tar_split_linux_test.go
+++ b/pkg/chunked/tar_split_linux_test.go
@ -1,104 +0,0 @@
-package chunked
-
-import (
-	"bytes"
-	"fmt"
-	"io"
-	"testing"
-	"time"
-
-	"github.com/containers/storage/pkg/chunked/internal"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-	"github.com/vbatts/tar-split/archive/tar"
-	"github.com/vbatts/tar-split/tar/asm"
-	"github.com/vbatts/tar-split/tar/storage"
-)
-
-func createTestTarheader(index int, typeFlag byte, size int64) tar.Header {
-	n := (index + 1) * 100 // Use predictable, but distinct, values for all headers
-
-	res := tar.Header{
-		Typeflag:   typeFlag,
-		Name:       fmt.Sprintf("name%d", n),
-		Size:       size,
-		Mode:       int64(n + 1),
-		Uid:        n + 2,
-		Gid:        n + 3,
-		Uname:      fmt.Sprintf("user%d", n),
-		Gname:      fmt.Sprintf("group%d", n),
-		ModTime:    time.Unix(int64(n+4), 0),
-		AccessTime: time.Unix(int64(n+5), 0),
-		ChangeTime: time.Unix(int64(n+6), 0),
-		PAXRecords: map[string]string{fmt.Sprintf("key%d", n): fmt.Sprintf("value%d", n)},
-		Format:     tar.FormatPAX, // We must set a format, in the default one AccessTime and ChangeTime are discarded.
-	}
-	switch res.Typeflag {
-	case tar.TypeLink, tar.TypeSymlink:
-		res.Linkname = fmt.Sprintf("link%d", n)
-	case tar.TypeChar, tar.TypeBlock:
-		res.Devmajor = int64(n + 7)
-		res.Devminor = int64(n + 8)
-	}
-	return res
-}
-
-func TestIterateTarSplit(t *testing.T) {
-	entries := []struct {
-		typeFlag byte
-		size     int64
-	}{
-		{tar.TypeReg, 0},
-		{tar.TypeReg, 1},
-		{tar.TypeReg, 511},
-		{tar.TypeReg, 512},
-		{tar.TypeReg, 513},
-		{tar.TypeLink, 0},
-		{tar.TypeSymlink, 0},
-		{tar.TypeChar, 0},
-		{tar.TypeBlock, 0},
-		{tar.TypeDir, 0},
-		{tar.TypeFifo, 0},
-	}
-
-	var tarball bytes.Buffer
-	var expected []tar.Header
-	w := tar.NewWriter(&tarball)
-	for i, e := range entries {
-		hdr := createTestTarheader(i, e.typeFlag, e.size)
-		err := w.WriteHeader(&hdr)
-		require.NoError(t, err)
-		data := make([]byte, e.size)
-		_, err = w.Write(data)
-		require.NoError(t, err)
-		expected = append(expected, hdr)
-	}
-	err := w.Close()
-	require.NoError(t, err)
-
-	var tarSplit bytes.Buffer
-	tsReader, err := asm.NewInputTarStream(&tarball, storage.NewJSONPacker(&tarSplit), storage.NewDiscardFilePutter())
-	require.NoError(t, err)
-	_, err = io.Copy(io.Discard, tsReader)
-	require.NoError(t, err)
-
-	var actual []tar.Header
-	err = iterateTarSplit(tarSplit.Bytes(), func(hdr *tar.Header) error {
-		actual = append(actual, *hdr)
-		return nil
-	})
-	require.NoError(t, err)
-
-	assert.Equal(t, len(expected), len(actual))
-	for i := range expected {
-		// We would have to open-code an equality comparison of time.Time values; instead, convert to FileMetadata,
-		// because we already have that implemented for that type — and because it provides a tiny bit of code coverage
-		// testing for ensureFileMetadataAttributesMatch.
-		expected1, err := internal.NewFileMetadata(&expected[i])
-		require.NoError(t, err, i)
-		actual1, err := internal.NewFileMetadata(&actual[i])
-		require.NoError(t, err, i)
-		err = ensureFileMetadataAttributesMatch(&expected1, &actual1)
-		assert.NoError(t, err, i)
-	}
-}