From 41e2fb4e9b8454c20334c97e1512e4d412c016d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Fri, 23 Jul 2021 21:04:01 +0200 Subject: [PATCH] Split pkg/chunked.ZstdCompressor into a separate subpackage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit containers/image/pkg/compression depends on that function, so it's highly undesirable for it to drag in all of containers/storage. So, move it into a separate subpackage, along with its exclusive dependency tree. Code that is called both from the new pkg/chunked/compressor and the original pkg/chunked has been moved into pkg/chunked/internal , so that we don't expose any new public API. That move was made purely mechanically without any concern for conceptual consistency, to minimize the size of the dependency (and not to spend time on that until the concept is proven to be useful). Signed-off-by: Miloslav Trmač --- storage/pkg/chunked/compression.go | 380 +------------------ storage/pkg/chunked/compressor/compressor.go | 220 +++++++++++ storage/pkg/chunked/internal/compression.go | 172 +++++++++ storage/pkg/chunked/storage_linux.go | 41 +- storage/pkg/chunked/zstdchunked_test.go | 24 +- 5 files changed, 444 insertions(+), 393 deletions(-) create mode 100644 storage/pkg/chunked/compressor/compressor.go create mode 100644 storage/pkg/chunked/internal/compression.go diff --git a/storage/pkg/chunked/compression.go b/storage/pkg/chunked/compression.go index 605be4b8f8..f2811fb9a9 100644 --- a/storage/pkg/chunked/compression.go +++ b/storage/pkg/chunked/compression.go @@ -2,72 +2,29 @@ package chunked import ( "bytes" - "encoding/base64" "encoding/binary" - "encoding/json" "fmt" "io" - "io/ioutil" - "time" - "github.com/containers/storage/pkg/ioutils" + "github.com/containers/storage/pkg/chunked/compressor" + "github.com/containers/storage/pkg/chunked/internal" "github.com/klauspost/compress/zstd" digest "github.com/opencontainers/go-digest" "github.com/pkg/errors" "github.com/vbatts/tar-split/archive/tar" ) -type zstdTOC struct { - Version int `json:"version"` - Entries []zstdFileMetadata `json:"entries"` -} - -type zstdFileMetadata struct { - Type string `json:"type"` - Name string `json:"name"` - Linkname string `json:"linkName,omitempty"` - Mode int64 `json:"mode,omitempty"` - Size int64 `json:"size"` - UID int `json:"uid"` - GID int `json:"gid"` - ModTime time.Time `json:"modtime"` - AccessTime time.Time `json:"accesstime"` - ChangeTime time.Time `json:"changetime"` - Devmajor int64 `json:"devMajor"` - Devminor int64 `json:"devMinor"` - Xattrs map[string]string `json:"xattrs,omitempty"` - Digest string `json:"digest,omitempty"` - Offset int64 `json:"offset,omitempty"` - EndOffset int64 `json:"endOffset,omitempty"` - - // Currently chunking is not supported. - ChunkSize int64 `json:"chunkSize,omitempty"` - ChunkOffset int64 `json:"chunkOffset,omitempty"` - ChunkDigest string `json:"chunkDigest,omitempty"` -} - const ( - TypeReg = "reg" - TypeChunk = "chunk" - TypeLink = "hardlink" - TypeChar = "char" - TypeBlock = "block" - TypeDir = "dir" - TypeFifo = "fifo" - TypeSymlink = "symlink" + TypeReg = internal.TypeReg + TypeChunk = internal.TypeChunk + TypeLink = internal.TypeLink + TypeChar = internal.TypeChar + TypeBlock = internal.TypeBlock + TypeDir = internal.TypeDir + TypeFifo = internal.TypeFifo + TypeSymlink = internal.TypeSymlink ) -var tarTypes = map[byte]string{ - tar.TypeReg: TypeReg, - tar.TypeRegA: TypeReg, - tar.TypeLink: TypeLink, - tar.TypeChar: TypeChar, - tar.TypeBlock: TypeBlock, - tar.TypeDir: TypeDir, - tar.TypeFifo: TypeFifo, - tar.TypeSymlink: TypeSymlink, -} - var typesToTar = map[string]byte{ TypeReg: tar.TypeReg, TypeLink: tar.TypeLink, @@ -78,14 +35,6 @@ var typesToTar = map[string]byte{ TypeSymlink: tar.TypeSymlink, } -func getType(t byte) (string, error) { - r, found := tarTypes[t] - if !found { - return "", fmt.Errorf("unknown tarball type: %v", t) - } - return r, nil -} - func typeToTarType(t string) (byte, error) { r, found := typesToTar[t] if !found { @@ -94,52 +43,30 @@ func typeToTarType(t string) (byte, error) { return r, nil } -const ( - manifestChecksumKey = "io.containers.zstd-chunked.manifest-checksum" - manifestInfoKey = "io.containers.zstd-chunked.manifest-position" - - // manifestTypeCRFS is a manifest file compatible with the CRFS TOC file. - manifestTypeCRFS = 1 - - // footerSizeSupported is the footer size supported by this implementation. - // Newer versions of the image format might increase this value, so reject - // any version that is not supported. - footerSizeSupported = 40 -) - -var ( - // when the zstd decoder encounters a skippable frame + 1 byte for the size, it - // will ignore it. - // https://tools.ietf.org/html/rfc8478#section-3.1.2 - skippableFrameMagic = []byte{0x50, 0x2a, 0x4d, 0x18} - - zstdChunkedFrameMagic = []byte{0x47, 0x6e, 0x55, 0x6c, 0x49, 0x6e, 0x55, 0x78} -) - func isZstdChunkedFrameMagic(data []byte) bool { if len(data) < 8 { return false } - return bytes.Equal(zstdChunkedFrameMagic, data[:8]) + return bytes.Equal(internal.ZstdChunkedFrameMagic, data[:8]) } // readZstdChunkedManifest reads the zstd:chunked manifest from the seekable stream blobStream. The blob total size must // be specified. // This function uses the io.containers.zstd-chunked. annotations when specified. func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, annotations map[string]string) ([]byte, error) { - footerSize := int64(footerSizeSupported) + footerSize := int64(internal.FooterSizeSupported) if blobSize <= footerSize { return nil, errors.New("blob too small") } - manifestChecksumAnnotation := annotations[manifestChecksumKey] + manifestChecksumAnnotation := annotations[internal.ManifestChecksumKey] if manifestChecksumAnnotation == "" { - return nil, fmt.Errorf("manifest checksum annotation %q not found", manifestChecksumKey) + return nil, fmt.Errorf("manifest checksum annotation %q not found", internal.ManifestChecksumKey) } var offset, length, lengthUncompressed, manifestType uint64 - if offsetMetadata := annotations[manifestInfoKey]; offsetMetadata != "" { + if offsetMetadata := annotations[internal.ManifestInfoKey]; offsetMetadata != "" { if _, err := fmt.Sscanf(offsetMetadata, "%d:%d:%d:%d", &offset, &length, &lengthUncompressed, &manifestType); err != nil { return nil, err } @@ -173,7 +100,7 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, ann } } - if manifestType != manifestTypeCRFS { + if manifestType != internal.ManifestTypeCRFS { return nil, errors.New("invalid manifest type") } @@ -235,279 +162,8 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, ann return manifest, nil } -func appendZstdSkippableFrame(dest io.Writer, data []byte) error { - if _, err := dest.Write(skippableFrameMagic); err != nil { - return err - } - - var size []byte = make([]byte, 4) - binary.LittleEndian.PutUint32(size, uint32(len(data))) - if _, err := dest.Write(size); err != nil { - return err - } - if _, err := dest.Write(data); err != nil { - return err - } - return nil -} - -func writeZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, offset uint64, metadata []zstdFileMetadata, level int) error { - // 8 is the size of the zstd skippable frame header + the frame size - manifestOffset := offset + 8 - - toc := zstdTOC{ - Version: 1, - Entries: metadata, - } - - // Generate the manifest - manifest, err := json.Marshal(toc) - if err != nil { - return err - } - - var compressedBuffer bytes.Buffer - zstdWriter, err := zstdWriterWithLevel(&compressedBuffer, level) - if err != nil { - return err - } - if _, err := zstdWriter.Write(manifest); err != nil { - zstdWriter.Close() - return err - } - if err := zstdWriter.Close(); err != nil { - return err - } - compressedManifest := compressedBuffer.Bytes() - - manifestDigester := digest.Canonical.Digester() - manifestChecksum := manifestDigester.Hash() - if _, err := manifestChecksum.Write(compressedManifest); err != nil { - return err - } - - outMetadata[manifestChecksumKey] = manifestDigester.Digest().String() - outMetadata[manifestInfoKey] = fmt.Sprintf("%d:%d:%d:%d", manifestOffset, len(compressedManifest), len(manifest), manifestTypeCRFS) - if err := appendZstdSkippableFrame(dest, compressedManifest); err != nil { - return err - } - - // Store the offset to the manifest and its size in LE order - var manifestDataLE []byte = make([]byte, footerSizeSupported) - binary.LittleEndian.PutUint64(manifestDataLE, manifestOffset) - binary.LittleEndian.PutUint64(manifestDataLE[8:], uint64(len(compressedManifest))) - binary.LittleEndian.PutUint64(manifestDataLE[16:], uint64(len(manifest))) - binary.LittleEndian.PutUint64(manifestDataLE[24:], uint64(manifestTypeCRFS)) - copy(manifestDataLE[32:], zstdChunkedFrameMagic) - - return appendZstdSkippableFrame(dest, manifestDataLE) -} - -func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, reader io.Reader, level int) error { - // total written so far. Used to retrieve partial offsets in the file - dest := ioutils.NewWriteCounter(destFile) - - tr := tar.NewReader(reader) - tr.RawAccounting = true - - buf := make([]byte, 4096) - - zstdWriter, err := zstdWriterWithLevel(dest, level) - if err != nil { - return err - } - defer func() { - if zstdWriter != nil { - zstdWriter.Close() - zstdWriter.Flush() - } - }() - - restartCompression := func() (int64, error) { - var offset int64 - if zstdWriter != nil { - if err := zstdWriter.Close(); err != nil { - return 0, err - } - if err := zstdWriter.Flush(); err != nil { - return 0, err - } - offset = dest.Count - zstdWriter.Reset(dest) - } - return offset, nil - } - - var metadata []zstdFileMetadata - for { - hdr, err := tr.Next() - if err != nil { - if err == io.EOF { - break - } - return err - } - - rawBytes := tr.RawBytes() - if _, err := zstdWriter.Write(rawBytes); err != nil { - return err - } - payloadDigester := digest.Canonical.Digester() - payloadChecksum := payloadDigester.Hash() - - payloadDest := io.MultiWriter(payloadChecksum, zstdWriter) - - // Now handle the payload, if any - var startOffset, endOffset int64 - checksum := "" - for { - read, errRead := tr.Read(buf) - if errRead != nil && errRead != io.EOF { - return err - } - - // restart the compression only if there is - // a payload. - if read > 0 { - if startOffset == 0 { - startOffset, err = restartCompression() - if err != nil { - return err - } - } - _, err := payloadDest.Write(buf[:read]) - if err != nil { - return err - } - } - if errRead == io.EOF { - if startOffset > 0 { - endOffset, err = restartCompression() - if err != nil { - return err - } - checksum = payloadDigester.Digest().String() - } - break - } - } - - typ, err := getType(hdr.Typeflag) - if err != nil { - return err - } - xattrs := make(map[string]string) - for k, v := range hdr.Xattrs { - xattrs[k] = base64.StdEncoding.EncodeToString([]byte(v)) - } - m := zstdFileMetadata{ - Type: typ, - Name: hdr.Name, - Linkname: hdr.Linkname, - Mode: hdr.Mode, - Size: hdr.Size, - UID: hdr.Uid, - GID: hdr.Gid, - ModTime: hdr.ModTime, - AccessTime: hdr.AccessTime, - ChangeTime: hdr.ChangeTime, - Devmajor: hdr.Devmajor, - Devminor: hdr.Devminor, - Xattrs: xattrs, - Digest: checksum, - Offset: startOffset, - EndOffset: endOffset, - - // ChunkSize is 0 for the last chunk - ChunkSize: 0, - ChunkOffset: 0, - ChunkDigest: checksum, - } - metadata = append(metadata, m) - } - - rawBytes := tr.RawBytes() - if _, err := zstdWriter.Write(rawBytes); err != nil { - return err - } - if err := zstdWriter.Flush(); err != nil { - return err - } - if err := zstdWriter.Close(); err != nil { - return err - } - zstdWriter = nil - - return writeZstdChunkedManifest(dest, outMetadata, uint64(dest.Count), metadata, level) -} - -type zstdChunkedWriter struct { - tarSplitOut *io.PipeWriter - tarSplitErr chan error -} - -func (w zstdChunkedWriter) Close() error { - err := <-w.tarSplitErr - if err != nil { - w.tarSplitOut.Close() - return err - } - return w.tarSplitOut.Close() -} - -func (w zstdChunkedWriter) Write(p []byte) (int, error) { - select { - case err := <-w.tarSplitErr: - w.tarSplitOut.Close() - return 0, err - default: - return w.tarSplitOut.Write(p) - } -} - -// zstdChunkedWriterWithLevel writes a zstd compressed tarball where each file is -// compressed separately so it can be addressed separately. Idea based on CRFS: -// https://github.com/google/crfs -// The difference with CRFS is that the zstd compression is used instead of gzip. -// The reason for it is that zstd supports embedding metadata ignored by the decoder -// as part of the compressed stream. -// A manifest json file with all the metadata is appended at the end of the tarball -// stream, using zstd skippable frames. -// The final file will look like: -// [FILE_1][FILE_2]..[FILE_N][SKIPPABLE FRAME 1][SKIPPABLE FRAME 2] -// Where: -// [FILE_N]: [ZSTD HEADER][TAR HEADER][PAYLOAD FILE_N][ZSTD FOOTER] -// [SKIPPABLE FRAME 1]: [ZSTD SKIPPABLE FRAME, SIZE=MANIFEST LENGTH][MANIFEST] -// [SKIPPABLE FRAME 2]: [ZSTD SKIPPABLE FRAME, SIZE=16][MANIFEST_OFFSET][MANIFEST_LENGTH][MANIFEST_LENGTH_UNCOMPRESSED][MANIFEST_TYPE][CHUNKED_ZSTD_MAGIC_NUMBER] -// MANIFEST_OFFSET, MANIFEST_LENGTH, MANIFEST_LENGTH_UNCOMPRESSED and CHUNKED_ZSTD_MAGIC_NUMBER are 64 bits unsigned in little endian format. -func zstdChunkedWriterWithLevel(out io.Writer, metadata map[string]string, level int) (io.WriteCloser, error) { - ch := make(chan error, 1) - r, w := io.Pipe() - - go func() { - ch <- writeZstdChunkedStream(out, metadata, r, level) - io.Copy(ioutil.Discard, r) - r.Close() - close(ch) - }() - - return zstdChunkedWriter{ - tarSplitOut: w, - tarSplitErr: ch, - }, nil -} - -func zstdWriterWithLevel(dest io.Writer, level int) (*zstd.Encoder, error) { - el := zstd.EncoderLevelFromZstd(level) - return zstd.NewWriter(dest, zstd.WithEncoderLevel(el)) -} - // ZstdCompressor is a CompressorFunc for the zstd compression algorithm. +// Deprecated: Use pkg/chunked/compressor.ZstdCompressor. func ZstdCompressor(r io.Writer, metadata map[string]string, level *int) (io.WriteCloser, error) { - if level == nil { - l := 3 - level = &l - } - - return zstdChunkedWriterWithLevel(r, metadata, *level) + return compressor.ZstdCompressor(r, metadata, level) } diff --git a/storage/pkg/chunked/compressor/compressor.go b/storage/pkg/chunked/compressor/compressor.go new file mode 100644 index 0000000000..a205b73fdb --- /dev/null +++ b/storage/pkg/chunked/compressor/compressor.go @@ -0,0 +1,220 @@ +package compressor + +// NOTE: This is used from github.com/containers/image by callers that +// don't otherwise use containers/storage, so don't make this depend on any +// larger software like the graph drivers. + +import ( + "encoding/base64" + "io" + "io/ioutil" + + "github.com/containers/storage/pkg/chunked/internal" + "github.com/containers/storage/pkg/ioutils" + "github.com/opencontainers/go-digest" + "github.com/vbatts/tar-split/archive/tar" +) + +func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, reader io.Reader, level int) error { + // total written so far. Used to retrieve partial offsets in the file + dest := ioutils.NewWriteCounter(destFile) + + tr := tar.NewReader(reader) + tr.RawAccounting = true + + buf := make([]byte, 4096) + + zstdWriter, err := internal.ZstdWriterWithLevel(dest, level) + if err != nil { + return err + } + defer func() { + if zstdWriter != nil { + zstdWriter.Close() + zstdWriter.Flush() + } + }() + + restartCompression := func() (int64, error) { + var offset int64 + if zstdWriter != nil { + if err := zstdWriter.Close(); err != nil { + return 0, err + } + if err := zstdWriter.Flush(); err != nil { + return 0, err + } + offset = dest.Count + zstdWriter.Reset(dest) + } + return offset, nil + } + + var metadata []internal.ZstdFileMetadata + for { + hdr, err := tr.Next() + if err != nil { + if err == io.EOF { + break + } + return err + } + + rawBytes := tr.RawBytes() + if _, err := zstdWriter.Write(rawBytes); err != nil { + return err + } + payloadDigester := digest.Canonical.Digester() + payloadChecksum := payloadDigester.Hash() + + payloadDest := io.MultiWriter(payloadChecksum, zstdWriter) + + // Now handle the payload, if any + var startOffset, endOffset int64 + checksum := "" + for { + read, errRead := tr.Read(buf) + if errRead != nil && errRead != io.EOF { + return err + } + + // restart the compression only if there is + // a payload. + if read > 0 { + if startOffset == 0 { + startOffset, err = restartCompression() + if err != nil { + return err + } + } + _, err := payloadDest.Write(buf[:read]) + if err != nil { + return err + } + } + if errRead == io.EOF { + if startOffset > 0 { + endOffset, err = restartCompression() + if err != nil { + return err + } + checksum = payloadDigester.Digest().String() + } + break + } + } + + typ, err := internal.GetType(hdr.Typeflag) + if err != nil { + return err + } + xattrs := make(map[string]string) + for k, v := range hdr.Xattrs { + xattrs[k] = base64.StdEncoding.EncodeToString([]byte(v)) + } + m := internal.ZstdFileMetadata{ + Type: typ, + Name: hdr.Name, + Linkname: hdr.Linkname, + Mode: hdr.Mode, + Size: hdr.Size, + UID: hdr.Uid, + GID: hdr.Gid, + ModTime: hdr.ModTime, + AccessTime: hdr.AccessTime, + ChangeTime: hdr.ChangeTime, + Devmajor: hdr.Devmajor, + Devminor: hdr.Devminor, + Xattrs: xattrs, + Digest: checksum, + Offset: startOffset, + EndOffset: endOffset, + + // ChunkSize is 0 for the last chunk + ChunkSize: 0, + ChunkOffset: 0, + ChunkDigest: checksum, + } + metadata = append(metadata, m) + } + + rawBytes := tr.RawBytes() + if _, err := zstdWriter.Write(rawBytes); err != nil { + return err + } + if err := zstdWriter.Flush(); err != nil { + return err + } + if err := zstdWriter.Close(); err != nil { + return err + } + zstdWriter = nil + + return internal.WriteZstdChunkedManifest(dest, outMetadata, uint64(dest.Count), metadata, level) +} + +type zstdChunkedWriter struct { + tarSplitOut *io.PipeWriter + tarSplitErr chan error +} + +func (w zstdChunkedWriter) Close() error { + err := <-w.tarSplitErr + if err != nil { + w.tarSplitOut.Close() + return err + } + return w.tarSplitOut.Close() +} + +func (w zstdChunkedWriter) Write(p []byte) (int, error) { + select { + case err := <-w.tarSplitErr: + w.tarSplitOut.Close() + return 0, err + default: + return w.tarSplitOut.Write(p) + } +} + +// zstdChunkedWriterWithLevel writes a zstd compressed tarball where each file is +// compressed separately so it can be addressed separately. Idea based on CRFS: +// https://github.com/google/crfs +// The difference with CRFS is that the zstd compression is used instead of gzip. +// The reason for it is that zstd supports embedding metadata ignored by the decoder +// as part of the compressed stream. +// A manifest json file with all the metadata is appended at the end of the tarball +// stream, using zstd skippable frames. +// The final file will look like: +// [FILE_1][FILE_2]..[FILE_N][SKIPPABLE FRAME 1][SKIPPABLE FRAME 2] +// Where: +// [FILE_N]: [ZSTD HEADER][TAR HEADER][PAYLOAD FILE_N][ZSTD FOOTER] +// [SKIPPABLE FRAME 1]: [ZSTD SKIPPABLE FRAME, SIZE=MANIFEST LENGTH][MANIFEST] +// [SKIPPABLE FRAME 2]: [ZSTD SKIPPABLE FRAME, SIZE=16][MANIFEST_OFFSET][MANIFEST_LENGTH][MANIFEST_LENGTH_UNCOMPRESSED][MANIFEST_TYPE][CHUNKED_ZSTD_MAGIC_NUMBER] +// MANIFEST_OFFSET, MANIFEST_LENGTH, MANIFEST_LENGTH_UNCOMPRESSED and CHUNKED_ZSTD_MAGIC_NUMBER are 64 bits unsigned in little endian format. +func zstdChunkedWriterWithLevel(out io.Writer, metadata map[string]string, level int) (io.WriteCloser, error) { + ch := make(chan error, 1) + r, w := io.Pipe() + + go func() { + ch <- writeZstdChunkedStream(out, metadata, r, level) + io.Copy(ioutil.Discard, r) + r.Close() + close(ch) + }() + + return zstdChunkedWriter{ + tarSplitOut: w, + tarSplitErr: ch, + }, nil +} + +// ZstdCompressor is a CompressorFunc for the zstd compression algorithm. +func ZstdCompressor(r io.Writer, metadata map[string]string, level *int) (io.WriteCloser, error) { + if level == nil { + l := 3 + level = &l + } + + return zstdChunkedWriterWithLevel(r, metadata, *level) +} diff --git a/storage/pkg/chunked/internal/compression.go b/storage/pkg/chunked/internal/compression.go new file mode 100644 index 0000000000..af0025c206 --- /dev/null +++ b/storage/pkg/chunked/internal/compression.go @@ -0,0 +1,172 @@ +package internal + +// NOTE: This is used from github.com/containers/image by callers that +// don't otherwise use containers/storage, so don't make this depend on any +// larger software like the graph drivers. + +import ( + "archive/tar" + "bytes" + "encoding/binary" + "encoding/json" + "fmt" + "io" + "time" + + "github.com/klauspost/compress/zstd" + "github.com/opencontainers/go-digest" +) + +type ZstdTOC struct { + Version int `json:"version"` + Entries []ZstdFileMetadata `json:"entries"` +} + +type ZstdFileMetadata struct { + Type string `json:"type"` + Name string `json:"name"` + Linkname string `json:"linkName,omitempty"` + Mode int64 `json:"mode,omitempty"` + Size int64 `json:"size"` + UID int `json:"uid"` + GID int `json:"gid"` + ModTime time.Time `json:"modtime"` + AccessTime time.Time `json:"accesstime"` + ChangeTime time.Time `json:"changetime"` + Devmajor int64 `json:"devMajor"` + Devminor int64 `json:"devMinor"` + Xattrs map[string]string `json:"xattrs,omitempty"` + Digest string `json:"digest,omitempty"` + Offset int64 `json:"offset,omitempty"` + EndOffset int64 `json:"endOffset,omitempty"` + + // Currently chunking is not supported. + ChunkSize int64 `json:"chunkSize,omitempty"` + ChunkOffset int64 `json:"chunkOffset,omitempty"` + ChunkDigest string `json:"chunkDigest,omitempty"` +} + +const ( + TypeReg = "reg" + TypeChunk = "chunk" + TypeLink = "hardlink" + TypeChar = "char" + TypeBlock = "block" + TypeDir = "dir" + TypeFifo = "fifo" + TypeSymlink = "symlink" +) + +var TarTypes = map[byte]string{ + tar.TypeReg: TypeReg, + tar.TypeRegA: TypeReg, + tar.TypeLink: TypeLink, + tar.TypeChar: TypeChar, + tar.TypeBlock: TypeBlock, + tar.TypeDir: TypeDir, + tar.TypeFifo: TypeFifo, + tar.TypeSymlink: TypeSymlink, +} + +func GetType(t byte) (string, error) { + r, found := TarTypes[t] + if !found { + return "", fmt.Errorf("unknown tarball type: %v", t) + } + return r, nil +} + +const ( + ManifestChecksumKey = "io.containers.zstd-chunked.manifest-checksum" + ManifestInfoKey = "io.containers.zstd-chunked.manifest-position" + + // ManifestTypeCRFS is a manifest file compatible with the CRFS TOC file. + ManifestTypeCRFS = 1 + + // FooterSizeSupported is the footer size supported by this implementation. + // Newer versions of the image format might increase this value, so reject + // any version that is not supported. + FooterSizeSupported = 40 +) + +var ( + // when the zstd decoder encounters a skippable frame + 1 byte for the size, it + // will ignore it. + // https://tools.ietf.org/html/rfc8478#section-3.1.2 + skippableFrameMagic = []byte{0x50, 0x2a, 0x4d, 0x18} + + ZstdChunkedFrameMagic = []byte{0x47, 0x6e, 0x55, 0x6c, 0x49, 0x6e, 0x55, 0x78} +) + +func appendZstdSkippableFrame(dest io.Writer, data []byte) error { + if _, err := dest.Write(skippableFrameMagic); err != nil { + return err + } + + var size []byte = make([]byte, 4) + binary.LittleEndian.PutUint32(size, uint32(len(data))) + if _, err := dest.Write(size); err != nil { + return err + } + if _, err := dest.Write(data); err != nil { + return err + } + return nil +} + +func WriteZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, offset uint64, metadata []ZstdFileMetadata, level int) error { + // 8 is the size of the zstd skippable frame header + the frame size + manifestOffset := offset + 8 + + toc := ZstdTOC{ + Version: 1, + Entries: metadata, + } + + // Generate the manifest + manifest, err := json.Marshal(toc) + if err != nil { + return err + } + + var compressedBuffer bytes.Buffer + zstdWriter, err := ZstdWriterWithLevel(&compressedBuffer, level) + if err != nil { + return err + } + if _, err := zstdWriter.Write(manifest); err != nil { + zstdWriter.Close() + return err + } + if err := zstdWriter.Close(); err != nil { + return err + } + compressedManifest := compressedBuffer.Bytes() + + manifestDigester := digest.Canonical.Digester() + manifestChecksum := manifestDigester.Hash() + if _, err := manifestChecksum.Write(compressedManifest); err != nil { + return err + } + + outMetadata[ManifestChecksumKey] = manifestDigester.Digest().String() + outMetadata[ManifestInfoKey] = fmt.Sprintf("%d:%d:%d:%d", manifestOffset, len(compressedManifest), len(manifest), ManifestTypeCRFS) + if err := appendZstdSkippableFrame(dest, compressedManifest); err != nil { + return err + } + + // Store the offset to the manifest and its size in LE order + var manifestDataLE []byte = make([]byte, FooterSizeSupported) + binary.LittleEndian.PutUint64(manifestDataLE, manifestOffset) + binary.LittleEndian.PutUint64(manifestDataLE[8:], uint64(len(compressedManifest))) + binary.LittleEndian.PutUint64(manifestDataLE[16:], uint64(len(manifest))) + binary.LittleEndian.PutUint64(manifestDataLE[24:], uint64(ManifestTypeCRFS)) + copy(manifestDataLE[32:], ZstdChunkedFrameMagic) + + return appendZstdSkippableFrame(dest, manifestDataLE) +} + +func ZstdWriterWithLevel(dest io.Writer, level int) (*zstd.Encoder, error) { + el := zstd.EncoderLevelFromZstd(level) + return zstd.NewWriter(dest, zstd.WithEncoderLevel(el)) +} diff --git a/storage/pkg/chunked/storage_linux.go b/storage/pkg/chunked/storage_linux.go index 8c5062475c..0f14d8af99 100644 --- a/storage/pkg/chunked/storage_linux.go +++ b/storage/pkg/chunked/storage_linux.go @@ -19,6 +19,7 @@ import ( graphdriver "github.com/containers/storage/drivers" driversCopy "github.com/containers/storage/drivers/copy" "github.com/containers/storage/pkg/archive" + "github.com/containers/storage/pkg/chunked/internal" "github.com/containers/storage/pkg/idtools" "github.com/containers/storage/types" "github.com/klauspost/compress/zstd" @@ -39,7 +40,7 @@ const ( type chunkedZstdDiffer struct { stream ImageSourceSeekable manifest []byte - layersMetadata map[string][]zstdFileMetadata + layersMetadata map[string][]internal.ZstdFileMetadata layersTarget map[string]string } @@ -75,11 +76,11 @@ func copyFileContent(src, destFile, root string, dirfd int, missingDirsMode, mod return dstFile, st.Size(), err } -func prepareOtherLayersCache(layersMetadata map[string][]zstdFileMetadata) map[string]map[string]*zstdFileMetadata { - maps := make(map[string]map[string]*zstdFileMetadata) +func prepareOtherLayersCache(layersMetadata map[string][]internal.ZstdFileMetadata) map[string]map[string]*internal.ZstdFileMetadata { + maps := make(map[string]map[string]*internal.ZstdFileMetadata) for layerID, v := range layersMetadata { - r := make(map[string]*zstdFileMetadata) + r := make(map[string]*internal.ZstdFileMetadata) for i := range v { r[v[i].Digest] = &v[i] } @@ -88,13 +89,13 @@ func prepareOtherLayersCache(layersMetadata map[string][]zstdFileMetadata) map[s return maps } -func getLayersCache(store storage.Store) (map[string][]zstdFileMetadata, map[string]string, error) { +func getLayersCache(store storage.Store) (map[string][]internal.ZstdFileMetadata, map[string]string, error) { allLayers, err := store.Layers() if err != nil { return nil, nil, err } - layersMetadata := make(map[string][]zstdFileMetadata) + layersMetadata := make(map[string][]internal.ZstdFileMetadata) layersTarget := make(map[string]string) for _, r := range allLayers { manifestReader, err := store.LayerBigData(r.ID, bigDataKey) @@ -106,7 +107,7 @@ func getLayersCache(store storage.Store) (map[string][]zstdFileMetadata, map[str if err != nil { return nil, nil, err } - var toc zstdTOC + var toc internal.ZstdTOC if err := json.Unmarshal(manifest, &toc); err != nil { continue } @@ -123,7 +124,7 @@ func getLayersCache(store storage.Store) (map[string][]zstdFileMetadata, map[str // GetDiffer returns a differ than can be used with ApplyDiffWithDiffer. func GetDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (graphdriver.Differ, error) { - if _, ok := annotations[manifestChecksumKey]; ok { + if _, ok := annotations[internal.ManifestChecksumKey]; ok { return makeZstdChunkedDiffer(ctx, store, blobSize, annotations, iss) } return nil, errors.New("blob type not supported for partial retrieval") @@ -147,7 +148,7 @@ func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize in }, nil } -func findFileInOtherLayers(file zstdFileMetadata, root string, dirfd int, layersMetadata map[string]map[string]*zstdFileMetadata, layersTarget map[string]string, missingDirsMode os.FileMode) (*os.File, int64, error) { +func findFileInOtherLayers(file internal.ZstdFileMetadata, root string, dirfd int, layersMetadata map[string]map[string]*internal.ZstdFileMetadata, layersTarget map[string]string, missingDirsMode os.FileMode) (*os.File, int64, error) { // this is ugly, needs to be indexed for layerID, checksums := range layersMetadata { m, found := checksums[file.Digest] @@ -194,7 +195,7 @@ func getFileDigest(f *os.File) (digest.Digest, error) { // findFileOnTheHost checks whether the requested file already exist on the host and copies the file content from there if possible. // It is currently implemented to look only at the file with the same path. Ideally it can detect the same content also at different // paths. -func findFileOnTheHost(file zstdFileMetadata, root string, dirfd int, missingDirsMode os.FileMode) (*os.File, int64, error) { +func findFileOnTheHost(file internal.ZstdFileMetadata, root string, dirfd int, missingDirsMode os.FileMode) (*os.File, int64, error) { sourceFile := filepath.Clean(filepath.Join("/", file.Name)) if !strings.HasPrefix(sourceFile, "/usr/") { // limit host deduplication to files under /usr. @@ -251,7 +252,7 @@ func findFileOnTheHost(file zstdFileMetadata, root string, dirfd int, missingDir return dstFile, written, nil } -func maybeDoIDRemap(manifest []zstdFileMetadata, options *archive.TarOptions) error { +func maybeDoIDRemap(manifest []internal.ZstdFileMetadata, options *archive.TarOptions) error { if options.ChownOpts == nil && len(options.UIDMaps) == 0 || len(options.GIDMaps) == 0 { return nil } @@ -278,7 +279,7 @@ func maybeDoIDRemap(manifest []zstdFileMetadata, options *archive.TarOptions) er } type missingFile struct { - File *zstdFileMetadata + File *internal.ZstdFileMetadata Gap int64 } @@ -291,7 +292,7 @@ type missingChunk struct { Files []missingFile } -func setFileAttrs(file *os.File, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) error { +func setFileAttrs(file *os.File, mode os.FileMode, metadata *internal.ZstdFileMetadata, options *archive.TarOptions) error { if file == nil || file.Fd() < 0 { return errors.Errorf("invalid file") } @@ -346,7 +347,7 @@ func openFileUnderRoot(name, root string, dirfd int, flags uint64, mode os.FileM return os.NewFile(uintptr(fd), name), nil } -func createFileFromZstdStream(dest string, dirfd int, reader io.Reader, missingDirsMode, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) (err error) { +func createFileFromZstdStream(dest string, dirfd int, reader io.Reader, missingDirsMode, mode os.FileMode, metadata *internal.ZstdFileMetadata, options *archive.TarOptions) (err error) { file, err := openFileUnderRoot(metadata.Name, dest, dirfd, newFileFlags, 0) if err != nil { return err @@ -497,7 +498,7 @@ func retrieveMissingFiles(input *chunkedZstdDiffer, dest string, dirfd int, miss return nil } -func safeMkdir(target string, dirfd int, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) error { +func safeMkdir(target string, dirfd int, mode os.FileMode, metadata *internal.ZstdFileMetadata, options *archive.TarOptions) error { parent := filepath.Dir(metadata.Name) base := filepath.Base(metadata.Name) @@ -526,7 +527,7 @@ func safeMkdir(target string, dirfd int, mode os.FileMode, metadata *zstdFileMet return setFileAttrs(file, mode, metadata, options) } -func safeLink(target string, dirfd int, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) error { +func safeLink(target string, dirfd int, mode os.FileMode, metadata *internal.ZstdFileMetadata, options *archive.TarOptions) error { sourceFile, err := openFileUnderRoot(metadata.Linkname, target, dirfd, unix.O_RDONLY, 0) if err != nil { return err @@ -558,7 +559,7 @@ func safeLink(target string, dirfd int, mode os.FileMode, metadata *zstdFileMeta return setFileAttrs(newFile, mode, metadata, options) } -func safeSymlink(target string, dirfd int, mode os.FileMode, metadata *zstdFileMetadata, options *archive.TarOptions) error { +func safeSymlink(target string, dirfd int, mode os.FileMode, metadata *internal.ZstdFileMetadata, options *archive.TarOptions) error { destDir, destBase := filepath.Dir(metadata.Name), filepath.Base(metadata.Name) destDirFd := dirfd if destDir != "." { @@ -636,7 +637,7 @@ type hardLinkToCreate struct { dest string dirfd int mode os.FileMode - metadata *zstdFileMetadata + metadata *internal.ZstdFileMetadata } func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions) (graphdriver.DriverWithDifferOutput, error) { @@ -659,7 +660,7 @@ func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions) } // Generate the manifest - var toc zstdTOC + var toc internal.ZstdTOC if err := json.Unmarshal(d.manifest, &toc); err != nil { return output, err } @@ -667,7 +668,7 @@ func (d *chunkedZstdDiffer) ApplyDiff(dest string, options *archive.TarOptions) whiteoutConverter := archive.GetWhiteoutConverter(options.WhiteoutFormat, options.WhiteoutData) var missingChunks []missingChunk - var mergedEntries []zstdFileMetadata + var mergedEntries []internal.ZstdFileMetadata if err := maybeDoIDRemap(toc.Entries, options); err != nil { return output, err diff --git a/storage/pkg/chunked/zstdchunked_test.go b/storage/pkg/chunked/zstdchunked_test.go index a26eb19f06..bfc9279b8b 100644 --- a/storage/pkg/chunked/zstdchunked_test.go +++ b/storage/pkg/chunked/zstdchunked_test.go @@ -10,10 +10,12 @@ import ( "io" "io/ioutil" "testing" + + "github.com/containers/storage/pkg/chunked/internal" ) func TestIsZstdChunkedFrameMagic(t *testing.T) { - b := append(zstdChunkedFrameMagic[:], make([]byte, 200)...) + b := append(internal.ZstdChunkedFrameMagic[:], make([]byte, 200)...) if !isZstdChunkedFrameMagic(b) { t.Fatal("Chunked frame magic not found") } @@ -54,7 +56,7 @@ func (s seekable) GetBlobAt(req []ImageSourceChunk) (chan io.ReadCloser, chan er return m, e, nil } -var someFiles = []zstdFileMetadata{ +var someFiles = []internal.ZstdFileMetadata{ { Type: "dir", Name: "/foo", @@ -93,14 +95,14 @@ func TestGenerateAndParseManifest(t *testing.T) { var b bytes.Buffer writer := bufio.NewWriter(&b) - if err := writeZstdChunkedManifest(writer, annotations, offsetManifest, someFiles[:], 9); err != nil { + if err := internal.WriteZstdChunkedManifest(writer, annotations, offsetManifest, someFiles[:], 9); err != nil { t.Error(err) } if err := writer.Flush(); err != nil { t.Error(err) } - offsetMetadata := annotations[manifestInfoKey] + offsetMetadata := annotations[internal.ManifestInfoKey] if offsetMetadata == "" { t.Fatal("Annotation not found") } @@ -113,7 +115,7 @@ func TestGenerateAndParseManifest(t *testing.T) { if offset != offsetManifest+8 { t.Fatalf("Invalid offset %d", offset) } - if manifestType != manifestTypeCRFS { + if manifestType != internal.ManifestTypeCRFS { t.Fatalf("Invalid manifest type %d", manifestType) } if b.Len() == 0 { @@ -133,7 +135,7 @@ func TestGenerateAndParseManifest(t *testing.T) { t.Error(err) } - var toc zstdTOC + var toc internal.ZstdTOC if err := json.Unmarshal(manifest, &toc); err != nil { t.Error(err) } @@ -159,16 +161,16 @@ func TestGetTarType(t *testing.T) { if _, err := typeToTarType("FOO"); err == nil { t.Fatal("Invalid typeToTarType conversion") } - for k, v := range tarTypes { - r, err := getType(k) + for k, v := range internal.TarTypes { + r, err := internal.GetType(k) if err != nil { t.Error(err) } if r != v { - t.Fatal("Invalid getType conversion") + t.Fatal("Invalid GetType conversion") } } - if _, err := getType(byte('Z')); err == nil { - t.Fatal("Invalid getType conversion") + if _, err := internal.GetType(byte('Z')); err == nil { + t.Fatal("Invalid GetType conversion") } }