Split pkg/chunked.ZstdCompressor into a separate subpackage

containers/image/pkg/compression depends on that function, so it's
highly undesirable for it to drag in all of containers/storage.

So, move it into a separate subpackage, along with its exclusive
dependency tree.

Code that is called both from the new pkg/chunked/compressor
and the original pkg/chunked has been moved into pkg/chunked/internal ,
so that we don't expose any new public API. That move was made
purely mechanically without any concern for conceptual consistency,
to minimize the size of the dependency (and not to spend time on
that until the concept is proven to be useful).

Signed-off-by: Miloslav Trmač <mitr@redhat.com>
This commit is contained in:
Miloslav Trmač 2021-07-23 21:04:01 +02:00
parent 87fdb6d0eb
commit 41e2fb4e9b
5 changed files with 444 additions and 393 deletions

View File

@ -2,72 +2,29 @@ package chunked
import (
"bytes"
"encoding/base64"
"encoding/binary"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"time"
"github.com/containers/storage/pkg/ioutils"
"github.com/containers/storage/pkg/chunked/compressor"
"github.com/containers/storage/pkg/chunked/internal"
"github.com/klauspost/compress/zstd"
digest "github.com/opencontainers/go-digest"
"github.com/pkg/errors"
"github.com/vbatts/tar-split/archive/tar"
)
type zstdTOC struct {
Version int `json:"version"`
Entries []zstdFileMetadata `json:"entries"`
}
type zstdFileMetadata struct {
Type string `json:"type"`
Name string `json:"name"`
Linkname string `json:"linkName,omitempty"`
Mode int64 `json:"mode,omitempty"`
Size int64 `json:"size"`
UID int `json:"uid"`
GID int `json:"gid"`
ModTime time.Time `json:"modtime"`
AccessTime time.Time `json:"accesstime"`
ChangeTime time.Time `json:"changetime"`
Devmajor int64 `json:"devMajor"`
Devminor int64 `json:"devMinor"`
Xattrs map[string]string `json:"xattrs,omitempty"`
Digest string `json:"digest,omitempty"`
Offset int64 `json:"offset,omitempty"`
EndOffset int64 `json:"endOffset,omitempty"`
// Currently chunking is not supported.
ChunkSize int64 `json:"chunkSize,omitempty"`
ChunkOffset int64 `json:"chunkOffset,omitempty"`
ChunkDigest string `json:"chunkDigest,omitempty"`
}
const (
TypeReg = "reg"
TypeChunk = "chunk"
TypeLink = "hardlink"
TypeChar = "char"
TypeBlock = "block"
TypeDir = "dir"
TypeFifo = "fifo"
TypeSymlink = "symlink"
TypeReg = internal.TypeReg
TypeChunk = internal.TypeChunk
TypeLink = internal.TypeLink
TypeChar = internal.TypeChar
TypeBlock = internal.TypeBlock
TypeDir = internal.TypeDir
TypeFifo = internal.TypeFifo
TypeSymlink = internal.TypeSymlink
)
var tarTypes = map[byte]string{
tar.TypeReg: TypeReg,
tar.TypeRegA: TypeReg,
tar.TypeLink: TypeLink,
tar.TypeChar: TypeChar,
tar.TypeBlock: TypeBlock,
tar.TypeDir: TypeDir,
tar.TypeFifo: TypeFifo,
tar.TypeSymlink: TypeSymlink,
}
var typesToTar = map[string]byte{
TypeReg: tar.TypeReg,
TypeLink: tar.TypeLink,
@ -78,14 +35,6 @@ var typesToTar = map[string]byte{
TypeSymlink: tar.TypeSymlink,
}
func getType(t byte) (string, error) {
r, found := tarTypes[t]
if !found {
return "", fmt.Errorf("unknown tarball type: %v", t)
}
return r, nil
}
func typeToTarType(t string) (byte, error) {
r, found := typesToTar[t]
if !found {
@ -94,52 +43,30 @@ func typeToTarType(t string) (byte, error) {
return r, nil
}
const (
manifestChecksumKey = "io.containers.zstd-chunked.manifest-checksum"
manifestInfoKey = "io.containers.zstd-chunked.manifest-position"
// manifestTypeCRFS is a manifest file compatible with the CRFS TOC file.
manifestTypeCRFS = 1
// footerSizeSupported is the footer size supported by this implementation.
// Newer versions of the image format might increase this value, so reject
// any version that is not supported.
footerSizeSupported = 40
)
var (
// when the zstd decoder encounters a skippable frame + 1 byte for the size, it
// will ignore it.
// https://tools.ietf.org/html/rfc8478#section-3.1.2
skippableFrameMagic = []byte{0x50, 0x2a, 0x4d, 0x18}
zstdChunkedFrameMagic = []byte{0x47, 0x6e, 0x55, 0x6c, 0x49, 0x6e, 0x55, 0x78}
)
func isZstdChunkedFrameMagic(data []byte) bool {
if len(data) < 8 {
return false
}
return bytes.Equal(zstdChunkedFrameMagic, data[:8])
return bytes.Equal(internal.ZstdChunkedFrameMagic, data[:8])
}
// readZstdChunkedManifest reads the zstd:chunked manifest from the seekable stream blobStream. The blob total size must
// be specified.
// This function uses the io.containers.zstd-chunked. annotations when specified.
func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, annotations map[string]string) ([]byte, error) {
footerSize := int64(footerSizeSupported)
footerSize := int64(internal.FooterSizeSupported)
if blobSize <= footerSize {
return nil, errors.New("blob too small")
}
manifestChecksumAnnotation := annotations[manifestChecksumKey]
manifestChecksumAnnotation := annotations[internal.ManifestChecksumKey]
if manifestChecksumAnnotation == "" {
return nil, fmt.Errorf("manifest checksum annotation %q not found", manifestChecksumKey)
return nil, fmt.Errorf("manifest checksum annotation %q not found", internal.ManifestChecksumKey)
}
var offset, length, lengthUncompressed, manifestType uint64
if offsetMetadata := annotations[manifestInfoKey]; offsetMetadata != "" {
if offsetMetadata := annotations[internal.ManifestInfoKey]; offsetMetadata != "" {
if _, err := fmt.Sscanf(offsetMetadata, "%d:%d:%d:%d", &offset, &length, &lengthUncompressed, &manifestType); err != nil {
return nil, err
}
@ -173,7 +100,7 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, ann
}
}
if manifestType != manifestTypeCRFS {
if manifestType != internal.ManifestTypeCRFS {
return nil, errors.New("invalid manifest type")
}
@ -235,279 +162,8 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, ann
return manifest, nil
}
func appendZstdSkippableFrame(dest io.Writer, data []byte) error {
if _, err := dest.Write(skippableFrameMagic); err != nil {
return err
}
var size []byte = make([]byte, 4)
binary.LittleEndian.PutUint32(size, uint32(len(data)))
if _, err := dest.Write(size); err != nil {
return err
}
if _, err := dest.Write(data); err != nil {
return err
}
return nil
}
func writeZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, offset uint64, metadata []zstdFileMetadata, level int) error {
// 8 is the size of the zstd skippable frame header + the frame size
manifestOffset := offset + 8
toc := zstdTOC{
Version: 1,
Entries: metadata,
}
// Generate the manifest
manifest, err := json.Marshal(toc)
if err != nil {
return err
}
var compressedBuffer bytes.Buffer
zstdWriter, err := zstdWriterWithLevel(&compressedBuffer, level)
if err != nil {
return err
}
if _, err := zstdWriter.Write(manifest); err != nil {
zstdWriter.Close()
return err
}
if err := zstdWriter.Close(); err != nil {
return err
}
compressedManifest := compressedBuffer.Bytes()
manifestDigester := digest.Canonical.Digester()
manifestChecksum := manifestDigester.Hash()
if _, err := manifestChecksum.Write(compressedManifest); err != nil {
return err
}
outMetadata[manifestChecksumKey] = manifestDigester.Digest().String()
outMetadata[manifestInfoKey] = fmt.Sprintf("%d:%d:%d:%d", manifestOffset, len(compressedManifest), len(manifest), manifestTypeCRFS)
if err := appendZstdSkippableFrame(dest, compressedManifest); err != nil {
return err
}
// Store the offset to the manifest and its size in LE order
var manifestDataLE []byte = make([]byte, footerSizeSupported)
binary.LittleEndian.PutUint64(manifestDataLE, manifestOffset)
binary.LittleEndian.PutUint64(manifestDataLE[8:], uint64(len(compressedManifest)))
binary.LittleEndian.PutUint64(manifestDataLE[16:], uint64(len(manifest)))
binary.LittleEndian.PutUint64(manifestDataLE[24:], uint64(manifestTypeCRFS))
copy(manifestDataLE[32:], zstdChunkedFrameMagic)
return appendZstdSkippableFrame(dest, manifestDataLE)
}
func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, reader io.Reader, level int) error {
// total written so far. Used to retrieve partial offsets in the file
dest := ioutils.NewWriteCounter(destFile)
tr := tar.NewReader(reader)
tr.RawAccounting = true
buf := make([]byte, 4096)
zstdWriter, err := zstdWriterWithLevel(dest, level)
if err != nil {
return err
}
defer func() {
if zstdWriter != nil {
zstdWriter.Close()
zstdWriter.Flush()
}
}()
restartCompression := func() (int64, error) {
var offset int64
if zstdWriter != nil {
if err := zstdWriter.Close(); err != nil {
return 0, err
}
if err := zstdWriter.Flush(); err != nil {
return 0, err
}
offset = dest.Count
zstdWriter.Reset(dest)
}
return offset, nil
}
var metadata []zstdFileMetadata
for {
hdr, err := tr.Next()
if err != nil {
if err == io.EOF {
break
}
return err
}
rawBytes := tr.RawBytes()
if _, err := zstdWriter.Write(rawBytes); err != nil {
return err
}
payloadDigester := digest.Canonical.Digester()
payloadChecksum := payloadDigester.Hash()
payloadDest := io.MultiWriter(payloadChecksum, zstdWriter)
// Now handle the payload, if any
var startOffset, endOffset int64
checksum := ""
for {
read, errRead := tr.Read(buf)
if errRead != nil && errRead != io.EOF {
return err
}
// restart the compression only if there is
// a payload.
if read > 0 {
if startOffset == 0 {
startOffset, err = restartCompression()
if err != nil {
return err
}
}
_, err := payloadDest.Write(buf[:read])
if err != nil {
return err
}
}
if errRead == io.EOF {
if startOffset > 0 {
endOffset, err = restartCompression()
if err != nil {
return err
}
checksum = payloadDigester.Digest().String()
}
break
}
}
typ, err := getType(hdr.Typeflag)
if err != nil {
return err
}
xattrs := make(map[string]string)
for k, v := range hdr.Xattrs {
xattrs[k] = base64.StdEncoding.EncodeToString([]byte(v))
}
m := zstdFileMetadata{
Type: typ,
Name: hdr.Name,
Linkname: hdr.Linkname,
Mode: hdr.Mode,
Size: hdr.Size,
UID: hdr.Uid,
GID: hdr.Gid,
ModTime: hdr.ModTime,
AccessTime: hdr.AccessTime,
ChangeTime: hdr.ChangeTime,
Devmajor: hdr.Devmajor,
Devminor: hdr.Devminor,
Xattrs: xattrs,
Digest: checksum,
Offset: startOffset,
EndOffset: endOffset,
// ChunkSize is 0 for the last chunk
ChunkSize: 0,
ChunkOffset: 0,
ChunkDigest: checksum,
}
metadata = append(metadata, m)
}
rawBytes := tr.RawBytes()
if _, err := zstdWriter.Write(rawBytes); err != nil {
return err
}
if err := zstdWriter.Flush(); err != nil {
return err
}
if err := zstdWriter.Close(); err != nil {
return err
}
zstdWriter = nil
return writeZstdChunkedManifest(dest, outMetadata, uint64(dest.Count), metadata, level)
}
type zstdChunkedWriter struct {
tarSplitOut *io.PipeWriter
tarSplitErr chan error
}
func (w zstdChunkedWriter) Close() error {
err := <-w.tarSplitErr
if err != nil {
w.tarSplitOut.Close()
return err
}
return w.tarSplitOut.Close()
}
func (w zstdChunkedWriter) Write(p []byte) (int, error) {
select {
case err := <-w.tarSplitErr:
w.tarSplitOut.Close()
return 0, err
default:
return w.tarSplitOut.Write(p)
}
}
// zstdChunkedWriterWithLevel writes a zstd compressed tarball where each file is
// compressed separately so it can be addressed separately. Idea based on CRFS:
// https://github.com/google/crfs
// The difference with CRFS is that the zstd compression is used instead of gzip.
// The reason for it is that zstd supports embedding metadata ignored by the decoder
// as part of the compressed stream.
// A manifest json file with all the metadata is appended at the end of the tarball
// stream, using zstd skippable frames.
// The final file will look like:
// [FILE_1][FILE_2]..[FILE_N][SKIPPABLE FRAME 1][SKIPPABLE FRAME 2]
// Where:
// [FILE_N]: [ZSTD HEADER][TAR HEADER][PAYLOAD FILE_N][ZSTD FOOTER]
// [SKIPPABLE FRAME 1]: [ZSTD SKIPPABLE FRAME, SIZE=MANIFEST LENGTH][MANIFEST]
// [SKIPPABLE FRAME 2]: [ZSTD SKIPPABLE FRAME, SIZE=16][MANIFEST_OFFSET][MANIFEST_LENGTH][MANIFEST_LENGTH_UNCOMPRESSED][MANIFEST_TYPE][CHUNKED_ZSTD_MAGIC_NUMBER]
// MANIFEST_OFFSET, MANIFEST_LENGTH, MANIFEST_LENGTH_UNCOMPRESSED and CHUNKED_ZSTD_MAGIC_NUMBER are 64 bits unsigned in little endian format.
func zstdChunkedWriterWithLevel(out io.Writer, metadata map[string]string, level int) (io.WriteCloser, error) {
ch := make(chan error, 1)
r, w := io.Pipe()
go func() {
ch <- writeZstdChunkedStream(out, metadata, r, level)
io.Copy(ioutil.Discard, r)
r.Close()
close(ch)
}()
return zstdChunkedWriter{
tarSplitOut: w,
tarSplitErr: ch,
}, nil
}
func zstdWriterWithLevel(dest io.Writer, level int) (*zstd.Encoder, error) {
el := zstd.EncoderLevelFromZstd(level)
return zstd.NewWriter(dest, zstd.WithEncoderLevel(el))
}
// ZstdCompressor is a CompressorFunc for the zstd compression algorithm.
// Deprecated: Use pkg/chunked/compressor.ZstdCompressor.
func ZstdCompressor(r io.Writer, metadata map[string]string, level *int) (io.WriteCloser, error) {
if level == nil {
l := 3
level = &l
}
return zstdChunkedWriterWithLevel(r, metadata, *level)
return compressor.ZstdCompressor(r, metadata, level)
}

View File

@ -0,0 +1,220 @@
package compressor
// NOTE: This is used from github.com/containers/image by callers that
// don't otherwise use containers/storage, so don't make this depend on any
// larger software like the graph drivers.
import (
"encoding/base64"
"io"
"io/ioutil"
"github.com/containers/storage/pkg/chunked/internal"
"github.com/containers/storage/pkg/ioutils"
"github.com/opencontainers/go-digest"
"github.com/vbatts/tar-split/archive/tar"
)
func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, reader io.Reader, level int) error {
// total written so far. Used to retrieve partial offsets in the file
dest := ioutils.NewWriteCounter(destFile)
tr := tar.NewReader(reader)
tr.RawAccounting = true
buf := make([]byte, 4096)
zstdWriter, err := internal.ZstdWriterWithLevel(dest, level)
if err != nil {
return err
}
defer func() {
if zstdWriter != nil {
zstdWriter.Close()
zstdWriter.Flush()
}
}()
restartCompression := func() (int64, error) {
var offset int64
if zstdWriter != nil {
if err := zstdWriter.Close(); err != nil {
return 0, err
}
if err := zstdWriter.Flush(); err != nil {
return 0, err
}
offset = dest.Count
zstdWriter.Reset(dest)
}
return offset, nil
}
var metadata []internal.ZstdFileMetadata
for {
hdr, err := tr.Next()
if err != nil {
if err == io.EOF {
break
}
return err
}
rawBytes := tr.RawBytes()
if _, err := zstdWriter.Write(rawBytes); err != nil {
return err
}
payloadDigester := digest.Canonical.Digester()
payloadChecksum := payloadDigester.Hash()
payloadDest := io.MultiWriter(payloadChecksum, zstdWriter)
// Now handle the payload, if any
var startOffset, endOffset int64
checksum := ""
for {
read, errRead := tr.Read(buf)
if errRead != nil && errRead != io.EOF {
return err
}
// restart the compression only if there is
// a payload.
if read > 0 {
if startOffset == 0 {
startOffset, err = restartCompression()
if err != nil {
return err
}
}
_, err := payloadDest.Write(buf[:read])
if err != nil {
return err
}
}
if errRead == io.EOF {
if startOffset > 0 {
endOffset, err = restartCompression()
if err != nil {
return err
}
checksum = payloadDigester.Digest().String()
}
break
}
}
typ, err := internal.GetType(hdr.Typeflag)
if err != nil {
return err
}
xattrs := make(map[string]string)
for k, v := range hdr.Xattrs {
xattrs[k] = base64.StdEncoding.EncodeToString([]byte(v))
}
m := internal.ZstdFileMetadata{
Type: typ,
Name: hdr.Name,
Linkname: hdr.Linkname,
Mode: hdr.Mode,
Size: hdr.Size,
UID: hdr.Uid,
GID: hdr.Gid,
ModTime: hdr.ModTime,
AccessTime: hdr.AccessTime,
ChangeTime: hdr.ChangeTime,
Devmajor: hdr.Devmajor,
Devminor: hdr.Devminor,
Xattrs: xattrs,
Digest: checksum,
Offset: startOffset,
EndOffset: endOffset,
// ChunkSize is 0 for the last chunk
ChunkSize: 0,
ChunkOffset: 0,
ChunkDigest: checksum,
}
metadata = append(metadata, m)
}
rawBytes := tr.RawBytes()
if _, err := zstdWriter.Write(rawBytes); err != nil {
return err
}
if err := zstdWriter.Flush(); err != nil {
return err
}
if err := zstdWriter.Close(); err != nil {
return err
}
zstdWriter = nil
return internal.WriteZstdChunkedManifest(dest, outMetadata, uint64(dest.Count), metadata, level)
}
type zstdChunkedWriter struct {
tarSplitOut *io.PipeWriter
tarSplitErr chan error
}
func (w zstdChunkedWriter) Close() error {
err := <-w.tarSplitErr
if err != nil {
w.tarSplitOut.Close()
return err
}
return w.tarSplitOut.Close()
}
func (w zstdChunkedWriter) Write(p []byte) (int, error) {
select {
case err := <-w.tarSplitErr:
w.tarSplitOut.Close()
return 0, err
default:
return w.tarSplitOut.Write(p)
}
}
// zstdChunkedWriterWithLevel writes a zstd compressed tarball where each file is
// compressed separately so it can be addressed separately. Idea based on CRFS:
// https://github.com/google/crfs
// The difference with CRFS is that the zstd compression is used instead of gzip.
// The reason for it is that zstd supports embedding metadata ignored by the decoder
// as part of the compressed stream.
// A manifest json file with all the metadata is appended at the end of the tarball
// stream, using zstd skippable frames.
// The final file will look like:
// [FILE_1][FILE_2]..[FILE_N][SKIPPABLE FRAME 1][SKIPPABLE FRAME 2]
// Where:
// [FILE_N]: [ZSTD HEADER][TAR HEADER][PAYLOAD FILE_N][ZSTD FOOTER]
// [SKIPPABLE FRAME 1]: [ZSTD SKIPPABLE FRAME, SIZE=MANIFEST LENGTH][MANIFEST]
// [SKIPPABLE FRAME 2]: [ZSTD SKIPPABLE FRAME, SIZE=16][MANIFEST_OFFSET][MANIFEST_LENGTH][MANIFEST_LENGTH_UNCOMPRESSED][MANIFEST_TYPE][CHUNKED_ZSTD_MAGIC_NUMBER]
// MANIFEST_OFFSET, MANIFEST_LENGTH, MANIFEST_LENGTH_UNCOMPRESSED and CHUNKED_ZSTD_MAGIC_NUMBER are 64 bits unsigned in little endian format.
func zstdChunkedWriterWithLevel(out io.Writer, metadata map[string]string, level int) (io.WriteCloser, error) {
ch := make(chan error, 1)
r, w := io.Pipe()
go func() {
ch <- writeZstdChunkedStream(out, metadata, r, level)
io.Copy(ioutil.Discard, r)
r.Close()
close(ch)
}()
return zstdChunkedWriter{
tarSplitOut: w,
tarSplitErr: ch,
}, nil
}
// ZstdCompressor is a CompressorFunc for the zstd compression algorithm.
func ZstdCompressor(r io.Writer, metadata map[string]string, level *int) (io.WriteCloser, error) {
if level == nil {
l := 3
level = &l
}
return zstdChunkedWriterWithLevel(r, metadata, *level)
}

View File

@ -0,0 +1,172 @@
package internal
// NOTE: This is used from github.com/containers/image by callers that
// don't otherwise use containers/storage, so don't make this depend on any
// larger software like the graph drivers.
import (
"archive/tar"
"bytes"
"encoding/binary"
"encoding/json"
"fmt"
"io"
"time"
"github.com/klauspost/compress/zstd"
"github.com/opencontainers/go-digest"
)
type ZstdTOC struct {
Version int `json:"version"`
Entries []ZstdFileMetadata `json:"entries"`
}
type ZstdFileMetadata struct {
Type string `json:"type"`
Name string `json:"name"`
Linkname string `json:"linkName,omitempty"`
Mode int64 `json:"mode,omitempty"`
Size int64 `json:"size"`
UID int `json:"uid"`
GID int `json:"gid"`
ModTime time.Time `json:"modtime"`
AccessTime time.Time `json:"accesstime"`
ChangeTime time.Time `json:"changetime"`
Devmajor int64 `json:"devMajor"`
Devminor int64 `json:"devMinor"`
Xattrs map[string]string `json:"xattrs,omitempty"`
Digest string `json:"digest,omitempty"`
Offset int64 `json:"offset,omitempty"`
EndOffset int64 `json:"endOffset,omitempty"`
// Currently chunking is not supported.
ChunkSize int64 `json:"chunkSize,omitempty"`
ChunkOffset int64 `json:"chunkOffset,omitempty"`
ChunkDigest string `json:"chunkDigest,omitempty"`
}
const (
TypeReg = "reg"
TypeChunk = "chunk"
TypeLink = "hardlink"
TypeChar = "char"
TypeBlock = "block"
TypeDir = "dir"
TypeFifo = "fifo"
TypeSymlink = "symlink"
)
var TarTypes = map[byte]string{
tar.TypeReg: TypeReg,
tar.TypeRegA: TypeReg,
tar.TypeLink: TypeLink,
tar.TypeChar: TypeChar,
tar.TypeBlock: TypeBlock,
tar.TypeDir: TypeDir,
tar.TypeFifo: TypeFifo,
tar.TypeSymlink: TypeSymlink,
}
func GetType(t byte) (string, error) {
r, found := TarTypes[t]
if !found {
return "", fmt.Errorf("unknown tarball type: %v", t)
}
return r, nil
}
const (
ManifestChecksumKey = "io.containers.zstd-chunked.manifest-checksum"
ManifestInfoKey = "io.containers.zstd-chunked.manifest-position"
// ManifestTypeCRFS is a manifest file compatible with the CRFS TOC file.
ManifestTypeCRFS = 1
// FooterSizeSupported is the footer size supported by this implementation.
// Newer versions of the image format might increase this value, so reject
// any version that is not supported.
FooterSizeSupported = 40
)
var (
// when the zstd decoder encounters a skippable frame + 1 byte for the size, it
// will ignore it.
// https://tools.ietf.org/html/rfc8478#section-3.1.2
skippableFrameMagic = []byte{0x50, 0x2a, 0x4d, 0x18}
ZstdChunkedFrameMagic = []byte{0x47, 0x6e, 0x55, 0x6c, 0x49, 0x6e, 0x55, 0x78}
)
func appendZstdSkippableFrame(dest io.Writer, data []byte) error {
if _, err := dest.Write(skippableFrameMagic); err != nil {
return err
}
var size []byte = make([]byte, 4)
binary.LittleEndian.PutUint32(size, uint32(len(data)))
if _, err := dest.Write(size); err != nil {
return err
}
if _, err := dest.Write(data); err != nil {
return err
}
return nil
}
func WriteZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, offset uint64, metadata []ZstdFileMetadata, level int) error {
// 8 is the size of the zstd skippable frame header + the frame size
manifestOffset := offset + 8
toc := ZstdTOC{
Version: 1,
Entries: metadata,
}
// Generate the manifest
manifest, err := json.Marshal(toc)
if err != nil {
return err
}
var compressedBuffer bytes.Buffer
zstdWriter, err := ZstdWriterWithLevel(&compressedBuffer, level)
if err != nil {
return err
}
if _, err := zstdWriter.Write(manifest); err != nil {
zstdWriter.Close()
return err
}
if err := zstdWriter.Close(); err != nil {
return err
}
compressedManifest := compressedBuffer.Bytes()
manifestDigester := digest.Canonical.Digester()
manifestChecksum := manifestDigester.Hash()
if _, err := manifestChecksum.Write(compressedManifest); err != nil {
return err
}
outMetadata[ManifestChecksumKey] = manifestDigester.Digest().String()
outMetadata[ManifestInfoKey] = fmt.Sprintf("%d:%d:%d:%d", manifestOffset, len(compressedManifest), len(manifest), ManifestTypeCRFS)
if err := appendZstdSkippableFrame(dest, compressedManifest); err != nil {
return err
}
// Store the offset to the manifest and its size in LE order
var manifestDataLE []byte = make([]byte, FooterSizeSupported)
binary.LittleEndian.PutUint64(manifestDataLE, manifestOffset)
binary.LittleEndian.PutUint64(manifestDataLE[8:], uint64(len(compressedManifest)))
binary.LittleEndian.PutUint64(manifestDataLE[16:], uint64(len(manifest)))
binary.LittleEndian.PutUint64(manifestDataLE[24:], uint64(ManifestTypeCRFS))
copy(manifestDataLE[32:], ZstdChunkedFrameMagic)
return appendZstdSkippableFrame(dest, manifestDataLE)
}
func ZstdWriterWithLevel(dest io.Writer, level int) (*zstd.Encoder, error) {
el := zstd.EncoderLevelFromZstd(level)
return zstd.NewWriter(dest, zstd.WithEncoderLevel(el))
}

View File

@ -19,6 +19,7 @@ import (
graphdriver "github.com/containers/storage/drivers"
driversCopy "github.com/containers/storage/drivers/copy"
"github.com/containers/storage/pkg/archive"
"github.com/containers/storage/pkg/chunked/internal"
"github.com/containers/storage/pkg/idtools"
"github.com/containers/storage/types"
"github.com/klauspost/compress/zstd"
@ -39,7 +40,7 @@ const (
type chunkedZstdDiffer struct {
stream ImageSourceSeekable
manifest []byte
layersMetadata map[string][]zstdFileMetadata
layersMetadata map[string][]internal.ZstdFileMetadata
layersTarget map[string]string
}
@ -75,11 +76,11 @@ func copyFileContent(src, destFile, root string, dirfd int, missingDirsMode, mod
return dstFile, st.Size(), err
}
func prepareOtherLayersCache(layersMetadata map[string][]zstdFileMetadata) map[string]map[string]*zstdFileMetadata {
maps := make(map[string]map[string]*zstdFileMetadata)
func prepareOtherLayersCache(layersMetadata map[string][]internal.ZstdFileMetadata) map[string]map[string]*internal.ZstdFileMetadata {
maps := make(map[string]map[string]*internal.ZstdFileMetadata)
for layerID, v := range layersMetadata {
r := make(map[string]*zstdFileMetadata)
r := make(map[string]*internal.ZstdFileMetadata)
for i := range v {
r[v[i].Digest] = &v[i]
}
@ -88,13 +89,13 @@ func prepareOtherLayersCache(layersMetadata map[string][]zstdFileMetadata) map[s
return maps
}
func getLayersCache(store storage.Store) (map[string][]zstdFileMetadata, map[string]string, error) {
func getLayersCache(store storage.Store) (map[string][]internal.ZstdFileMetadata, map[string]string, error) {
allLayers, err := store.Layers()
if err != nil {
return nil, nil, err
}
layersMetadata := make(map[string][]zstdFileMetadata)
layersMetadata := make(map[string][]internal.ZstdFileMetadata)
layersTarget := make(map[string]string)
for _, r := range allLayers {
manifestReader, err := store.LayerBigData(r.ID, bigDataKey)
@ -106,7 +107,7 @@ func getLayersCache(store storage.Store) (map[string][]zstdFileMetadata, map[str
if err != nil {
return nil, nil, err
}
var toc zstdTOC
var toc internal.ZstdTOC
if err := json.Unmarshal(manifest, &toc); err != nil {
continue
}
@ -123,7 +124,7 @@ func getLayersCache(store storage.Store) (map[string][]zstdFileMetadata, map[str
// GetDiffer returns a differ than can be used with ApplyDiffWithDiffer.
func GetDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (graphdriver.Differ, error) {
if _, ok := annotations[manifestChecksumKey]; ok {
if _, ok := annotations[internal.ManifestChecksumKey]; ok {
return makeZstdChunkedDiffer(ctx, store, blobSize, annotations, iss)
}
return nil, errors.New("blob type not supported for partial retrieval")
@ -147,7 +148,7 @@ func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize in
}, nil
}
func findFileInOtherLayers(file zstdFileMetadata, root string, dirfd int, layersMetadata map[string]map[string]*zstdFileMetadata, layersTarget map[string]string, missingDirsMode os.FileMode) (*os.File, int64, error) {
func findFileInOtherLayers(file internal.ZstdFileMetadata, root string, dirfd int, layersMetadata map[string]map[string]*internal.ZstdFileMetadata, layersTarget map[string]string, missingDirsMode os.FileMode) (*os.File, int64, error) {
// this is ugly, needs to be indexed
for layerID, checksums := range layersMetadata {
m, found := checksums[file.Digest]
@ -194,7 +195,7 @@ func getFileDigest(f *os.File) (digest.Digest, error) {
// findFileOnTheHost checks whether the requested file already exist on the host and copies the file content from there if possible.
// It is currently implemented to look only at the file with the same path. Ideally it can detect the same content also at different
// paths.
func findFileOnTheHost(file zstdFileMetadata, root string, dirfd int, missingDirsMode os.FileMode) (*os.File, int64, error) {
func findFileOnTheHost(file internal.ZstdFileMetadata, root string, dirfd int, missingDirsMode os.FileMode) (*os.File, int64, error) {
sourceFile := filepath.Clean(filepath.Join("/", file.Name))
if !strings.HasPrefix(sourceFile, "/usr/") {
// limit host deduplication to files under /usr.
@ -251,7 +252,7 @@ func findFileOnTheHost(file zstdFileMetadata, root string, dirfd int, missingDir
return dstFile, written, nil
}
func maybeDoIDRemap(manifest []zstdFileMetadata, options *archive.TarOptions) error {
func maybeDoIDRemap(manifest []internal.ZstdFileMetadata, options *archive.TarOptions) error {
if options.ChownOpts == nil && len(options.UIDMaps) == 0 || len(options.GIDMaps) == 0 {
return nil
}
@ -278,7 +279,7 @@ func maybeDoIDRemap(manifest []zstdFileMetadata, options *archive.TarOptions) er
}
type missingFile struct {
File *zstdFileMetadata
File *internal.ZstdFileMetadata
Gap int64
}
@ -291,7 +292,7 @@ type missingChunk struct {
Files []missingFile
}
func setFileAttrs(file *os.File, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) error {
func setFileAttrs(file *os.File, mode os.FileMode, metadata *internal.ZstdFileMetadata, options *archive.TarOptions) error {
if file == nil || file.Fd() < 0 {
return errors.Errorf("invalid file")
}
@ -346,7 +347,7 @@ func openFileUnderRoot(name, root string, dirfd int, flags uint64, mode os.FileM
return os.NewFile(uintptr(fd), name), nil
}
func createFileFromZstdStream(dest string, dirfd int, reader io.Reader, missingDirsMode, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) (err error) {
func createFileFromZstdStream(dest string, dirfd int, reader io.Reader, missingDirsMode, mode os.FileMode, metadata *internal.ZstdFileMetadata, options *archive.TarOptions) (err error) {
file, err := openFileUnderRoot(metadata.Name, dest, dirfd, newFileFlags, 0)
if err != nil {
return err
@ -497,7 +498,7 @@ func retrieveMissingFiles(input *chunkedZstdDiffer, dest string, dirfd int, miss
return nil
}
func safeMkdir(target string, dirfd int, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) error {
func safeMkdir(target string, dirfd int, mode os.FileMode, metadata *internal.ZstdFileMetadata, options *archive.TarOptions) error {
parent := filepath.Dir(metadata.Name)
base := filepath.Base(metadata.Name)
@ -526,7 +527,7 @@ func safeMkdir(target string, dirfd int, mode os.FileMode, metadata *zstdFileMet
return setFileAttrs(file, mode, metadata, options)
}
func safeLink(target string, dirfd int, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) error {
func safeLink(target string, dirfd int, mode os.FileMode, metadata *internal.ZstdFileMetadata, options *archive.TarOptions) error {
sourceFile, err := openFileUnderRoot(metadata.Linkname, target, dirfd, unix.O_RDONLY, 0)
if err != nil {
return err
@ -558,7 +559,7 @@ func safeLink(target string, dirfd int, mode os.FileMode, metadata *zstdFileMeta
return setFileAttrs(newFile, mode, metadata, options)
}
func safeSymlink(target string, dirfd int, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) error {
func safeSymlink(target string, dirfd int, mode os.FileMode, metadata *internal.ZstdFileMetadata, options *archive.TarOptions) error {
destDir, destBase := filepath.Dir(metadata.Name), filepath.Base(metadata.Name)
destDirFd := dirfd
if destDir != "." {
@ -636,7 +637,7 @@ type hardLinkToCreate struct {
dest string
dirfd int
mode os.FileMode
metadata *zstdFileMetadata
metadata *internal.ZstdFileMetadata
}
func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions) (graphdriver.DriverWithDifferOutput, error) {
@ -659,7 +660,7 @@ func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions)
}
// Generate the manifest
var toc zstdTOC
var toc internal.ZstdTOC
if err := json.Unmarshal(d.manifest, &toc); err != nil {
return output, err
}
@ -667,7 +668,7 @@ func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions)
whiteoutConverter := archive.GetWhiteoutConverter(options.WhiteoutFormat, options.WhiteoutData)
var missingChunks []missingChunk
var mergedEntries []zstdFileMetadata
var mergedEntries []internal.ZstdFileMetadata
if err := maybeDoIDRemap(toc.Entries, options); err != nil {
return output, err

View File

@ -10,10 +10,12 @@ import (
"io"
"io/ioutil"
"testing"
"github.com/containers/storage/pkg/chunked/internal"
)
func TestIsZstdChunkedFrameMagic(t *testing.T) {
b := append(zstdChunkedFrameMagic[:], make([]byte, 200)...)
b := append(internal.ZstdChunkedFrameMagic[:], make([]byte, 200)...)
if !isZstdChunkedFrameMagic(b) {
t.Fatal("Chunked frame magic not found")
}
@ -54,7 +56,7 @@ func (s seekable) GetBlobAt(req []ImageSourceChunk) (chan io.ReadCloser, chan er
return m, e, nil
}
var someFiles = []zstdFileMetadata{
var someFiles = []internal.ZstdFileMetadata{
{
Type: "dir",
Name: "/foo",
@ -93,14 +95,14 @@ func TestGenerateAndParseManifest(t *testing.T) {
var b bytes.Buffer
writer := bufio.NewWriter(&b)
if err := writeZstdChunkedManifest(writer, annotations, offsetManifest, someFiles[:], 9); err != nil {
if err := internal.WriteZstdChunkedManifest(writer, annotations, offsetManifest, someFiles[:], 9); err != nil {
t.Error(err)
}
if err := writer.Flush(); err != nil {
t.Error(err)
}
offsetMetadata := annotations[manifestInfoKey]
offsetMetadata := annotations[internal.ManifestInfoKey]
if offsetMetadata == "" {
t.Fatal("Annotation not found")
}
@ -113,7 +115,7 @@ func TestGenerateAndParseManifest(t *testing.T) {
if offset != offsetManifest+8 {
t.Fatalf("Invalid offset %d", offset)
}
if manifestType != manifestTypeCRFS {
if manifestType != internal.ManifestTypeCRFS {
t.Fatalf("Invalid manifest type %d", manifestType)
}
if b.Len() == 0 {
@ -133,7 +135,7 @@ func TestGenerateAndParseManifest(t *testing.T) {
t.Error(err)
}
var toc zstdTOC
var toc internal.ZstdTOC
if err := json.Unmarshal(manifest, &toc); err != nil {
t.Error(err)
}
@ -159,16 +161,16 @@ func TestGetTarType(t *testing.T) {
if _, err := typeToTarType("FOO"); err == nil {
t.Fatal("Invalid typeToTarType conversion")
}
for k, v := range tarTypes {
r, err := getType(k)
for k, v := range internal.TarTypes {
r, err := internal.GetType(k)
if err != nil {
t.Error(err)
}
if r != v {
t.Fatal("Invalid getType conversion")
t.Fatal("Invalid GetType conversion")
}
}
if _, err := getType(byte('Z')); err == nil {
t.Fatal("Invalid getType conversion")
if _, err := internal.GetType(byte('Z')); err == nil {
t.Fatal("Invalid GetType conversion")
}
}