Use tar-split/tar/asm.IterateHeaders now that it has been accepted
... instead of our version which makes assumptions on the internal decisions of the tar-split project, and needs heuristics to guess where file padding ends. Signed-off-by: Miloslav Trmač <mitr@redhat.com>
This commit is contained in:
parent
f45f1ed2f9
commit
39e467aa53
|
|
@ -2,6 +2,7 @@ package chunked
|
||||||
|
|
||||||
import (
|
import (
|
||||||
archivetar "archive/tar"
|
archivetar "archive/tar"
|
||||||
|
"bytes"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
|
@ -14,6 +15,8 @@ import (
|
||||||
"github.com/klauspost/pgzip"
|
"github.com/klauspost/pgzip"
|
||||||
digest "github.com/opencontainers/go-digest"
|
digest "github.com/opencontainers/go-digest"
|
||||||
"github.com/vbatts/tar-split/archive/tar"
|
"github.com/vbatts/tar-split/archive/tar"
|
||||||
|
"github.com/vbatts/tar-split/tar/asm"
|
||||||
|
"github.com/vbatts/tar-split/tar/storage"
|
||||||
expMaps "golang.org/x/exp/maps"
|
expMaps "golang.org/x/exp/maps"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -256,7 +259,8 @@ func ensureTOCMatchesTarSplit(toc *internal.TOC, tarSplit []byte) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := iterateTarSplit(tarSplit, func(hdr *tar.Header) error {
|
unpacker := storage.NewJSONUnpacker(bytes.NewReader(tarSplit))
|
||||||
|
if err := asm.IterateHeaders(unpacker, func(hdr *tar.Header) error {
|
||||||
e, ok := pendingFiles[hdr.Name]
|
e, ok := pendingFiles[hdr.Name]
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("tar-split contains an entry for %q missing in TOC", hdr.Name)
|
return fmt.Errorf("tar-split contains an entry for %q missing in TOC", hdr.Name)
|
||||||
|
|
|
||||||
|
|
@ -1,68 +0,0 @@
|
||||||
package chunked
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
|
|
||||||
"github.com/vbatts/tar-split/archive/tar"
|
|
||||||
"github.com/vbatts/tar-split/tar/storage"
|
|
||||||
)
|
|
||||||
|
|
||||||
// iterateTarSplit calls handler for each tar header in tarSplit
|
|
||||||
func iterateTarSplit(tarSplit []byte, handler func(hdr *tar.Header) error) error {
|
|
||||||
// This, strictly speaking, hard-codes undocumented assumptions about how github.com/vbatts/tar-split/tar/asm.NewInputTarStream
|
|
||||||
// forms the tar-split contents. Pragmatically, NewInputTarStream should always produce storage.FileType entries at least
|
|
||||||
// for every non-empty file, which constraints it basically to the output we expect.
|
|
||||||
//
|
|
||||||
// Specifically, we assume:
|
|
||||||
// - There is a separate SegmentType entry for every tar header, but only one SegmentType entry for the full header incl. any extensions
|
|
||||||
// - (There is a FileType entry for every tar header, we ignore it)
|
|
||||||
// - Trailing padding of a file, if any, is included in the next SegmentType entry
|
|
||||||
// - At the end, there may be SegmentType entries just for the terminating zero blocks.
|
|
||||||
|
|
||||||
unpacker := storage.NewJSONUnpacker(bytes.NewReader(tarSplit))
|
|
||||||
for {
|
|
||||||
tsEntry, err := unpacker.Next()
|
|
||||||
if err != nil {
|
|
||||||
if err == io.EOF {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return fmt.Errorf("reading tar-split entries: %w", err)
|
|
||||||
}
|
|
||||||
switch tsEntry.Type {
|
|
||||||
case storage.SegmentType:
|
|
||||||
payload := tsEntry.Payload
|
|
||||||
// This is horrible, but we don’t know how much padding to skip. (It can be computed from the previous hdr.Size for non-sparse
|
|
||||||
// files, but for sparse files that is set to the logical size.)
|
|
||||||
//
|
|
||||||
// First, assume that all padding is zero bytes.
|
|
||||||
// A tar header starts with a file name, which might in principle be empty, but
|
|
||||||
// at least https://github.com/opencontainers/image-spec/blob/main/layer.md#populate-initial-filesystem suggests that
|
|
||||||
// the tar name should never be empty (it should be ".", or maybe "./").
|
|
||||||
//
|
|
||||||
// This will cause us to skip all zero bytes in the trailing blocks, but that’s fine.
|
|
||||||
i := 0
|
|
||||||
for i < len(payload) && payload[i] == 0 {
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
payload = payload[i:]
|
|
||||||
tr := tar.NewReader(bytes.NewReader(payload))
|
|
||||||
hdr, err := tr.Next()
|
|
||||||
if err != nil {
|
|
||||||
if err == io.EOF { // Probably the last entry, but let’s let the unpacker drive that.
|
|
||||||
break
|
|
||||||
}
|
|
||||||
return fmt.Errorf("decoding a tar header from a tar-split entry: %w", err)
|
|
||||||
}
|
|
||||||
if err := handler(hdr); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
case storage.FileType:
|
|
||||||
// Nothing
|
|
||||||
default:
|
|
||||||
return fmt.Errorf("unexpected tar-split entry type %q", tsEntry.Type)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,104 +0,0 @@
|
||||||
package chunked
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/containers/storage/pkg/chunked/internal"
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
"github.com/vbatts/tar-split/archive/tar"
|
|
||||||
"github.com/vbatts/tar-split/tar/asm"
|
|
||||||
"github.com/vbatts/tar-split/tar/storage"
|
|
||||||
)
|
|
||||||
|
|
||||||
func createTestTarheader(index int, typeFlag byte, size int64) tar.Header {
|
|
||||||
n := (index + 1) * 100 // Use predictable, but distinct, values for all headers
|
|
||||||
|
|
||||||
res := tar.Header{
|
|
||||||
Typeflag: typeFlag,
|
|
||||||
Name: fmt.Sprintf("name%d", n),
|
|
||||||
Size: size,
|
|
||||||
Mode: int64(n + 1),
|
|
||||||
Uid: n + 2,
|
|
||||||
Gid: n + 3,
|
|
||||||
Uname: fmt.Sprintf("user%d", n),
|
|
||||||
Gname: fmt.Sprintf("group%d", n),
|
|
||||||
ModTime: time.Unix(int64(n+4), 0),
|
|
||||||
AccessTime: time.Unix(int64(n+5), 0),
|
|
||||||
ChangeTime: time.Unix(int64(n+6), 0),
|
|
||||||
PAXRecords: map[string]string{fmt.Sprintf("key%d", n): fmt.Sprintf("value%d", n)},
|
|
||||||
Format: tar.FormatPAX, // We must set a format, in the default one AccessTime and ChangeTime are discarded.
|
|
||||||
}
|
|
||||||
switch res.Typeflag {
|
|
||||||
case tar.TypeLink, tar.TypeSymlink:
|
|
||||||
res.Linkname = fmt.Sprintf("link%d", n)
|
|
||||||
case tar.TypeChar, tar.TypeBlock:
|
|
||||||
res.Devmajor = int64(n + 7)
|
|
||||||
res.Devminor = int64(n + 8)
|
|
||||||
}
|
|
||||||
return res
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestIterateTarSplit(t *testing.T) {
|
|
||||||
entries := []struct {
|
|
||||||
typeFlag byte
|
|
||||||
size int64
|
|
||||||
}{
|
|
||||||
{tar.TypeReg, 0},
|
|
||||||
{tar.TypeReg, 1},
|
|
||||||
{tar.TypeReg, 511},
|
|
||||||
{tar.TypeReg, 512},
|
|
||||||
{tar.TypeReg, 513},
|
|
||||||
{tar.TypeLink, 0},
|
|
||||||
{tar.TypeSymlink, 0},
|
|
||||||
{tar.TypeChar, 0},
|
|
||||||
{tar.TypeBlock, 0},
|
|
||||||
{tar.TypeDir, 0},
|
|
||||||
{tar.TypeFifo, 0},
|
|
||||||
}
|
|
||||||
|
|
||||||
var tarball bytes.Buffer
|
|
||||||
var expected []tar.Header
|
|
||||||
w := tar.NewWriter(&tarball)
|
|
||||||
for i, e := range entries {
|
|
||||||
hdr := createTestTarheader(i, e.typeFlag, e.size)
|
|
||||||
err := w.WriteHeader(&hdr)
|
|
||||||
require.NoError(t, err)
|
|
||||||
data := make([]byte, e.size)
|
|
||||||
_, err = w.Write(data)
|
|
||||||
require.NoError(t, err)
|
|
||||||
expected = append(expected, hdr)
|
|
||||||
}
|
|
||||||
err := w.Close()
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
var tarSplit bytes.Buffer
|
|
||||||
tsReader, err := asm.NewInputTarStream(&tarball, storage.NewJSONPacker(&tarSplit), storage.NewDiscardFilePutter())
|
|
||||||
require.NoError(t, err)
|
|
||||||
_, err = io.Copy(io.Discard, tsReader)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
var actual []tar.Header
|
|
||||||
err = iterateTarSplit(tarSplit.Bytes(), func(hdr *tar.Header) error {
|
|
||||||
actual = append(actual, *hdr)
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
assert.Equal(t, len(expected), len(actual))
|
|
||||||
for i := range expected {
|
|
||||||
// We would have to open-code an equality comparison of time.Time values; instead, convert to FileMetadata,
|
|
||||||
// because we already have that implemented for that type — and because it provides a tiny bit of code coverage
|
|
||||||
// testing for ensureFileMetadataAttributesMatch.
|
|
||||||
expected1, err := internal.NewFileMetadata(&expected[i])
|
|
||||||
require.NoError(t, err, i)
|
|
||||||
actual1, err := internal.NewFileMetadata(&actual[i])
|
|
||||||
require.NoError(t, err, i)
|
|
||||||
err = ensureFileMetadataAttributesMatch(&expected1, &actual1)
|
|
||||||
assert.NoError(t, err, i)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Loading…
Reference in New Issue