This resolves the "signing ambiguity" by requiring that images
must have a DiffID entry, and it must match, in partial pulls.

Signed-off-by: Miloslav Trmač <mitr@redhat.com>
This commit is contained in:
Miloslav Trmač 2025-01-21 22:57:45 +01:00
parent f20d9bd637
commit 6639c20278
8 changed files with 284 additions and 97 deletions

4
go.mod
View File

@ -17,7 +17,7 @@ require (
github.com/containers/common v0.61.1-0.20250120135258-06628cb958e9
github.com/containers/conmon v2.0.20+incompatible
github.com/containers/gvisor-tap-vsock v0.8.2
github.com/containers/image/v5 v5.33.1-0.20250116221711-317a9885aed9
github.com/containers/image/v5 v5.33.2-0.20250122201336-16f7e1e0e1fd
github.com/containers/libhvee v0.9.0
github.com/containers/ocicrypt v1.2.1
github.com/containers/psgo v1.9.0
@ -211,7 +211,7 @@ require (
github.com/u-root/uio v0.0.0-20240224005618-d2acac8f3701 // indirect
github.com/ugorji/go/codec v1.2.12 // indirect
github.com/ulikunitz/xz v0.5.12 // indirect
github.com/vbatts/tar-split v0.11.6 // indirect
github.com/vbatts/tar-split v0.11.7 // indirect
github.com/vishvananda/netns v0.0.4 // indirect
github.com/yusufpapurcu/wmi v1.2.4 // indirect
go.mongodb.org/mongo-driver v1.14.0 // indirect

12
go.sum
View File

@ -84,8 +84,8 @@ github.com/containers/conmon v2.0.20+incompatible h1:YbCVSFSCqFjjVwHTPINGdMX1F6J
github.com/containers/conmon v2.0.20+incompatible/go.mod h1:hgwZ2mtuDrppv78a/cOBNiCm6O0UMWGx1mu7P00nu5I=
github.com/containers/gvisor-tap-vsock v0.8.2 h1:uQMBCCHlIIj62fPjbvgm6AL5EzsP6TP5eviByOJEsOg=
github.com/containers/gvisor-tap-vsock v0.8.2/go.mod h1:EMRe2o63ddq2zxcP0hTysmxCf/5JlaNEg8/gpzP0ox4=
github.com/containers/image/v5 v5.33.1-0.20250116221711-317a9885aed9 h1:oAYA8USA2AL4LS+SK9RGw3e6Lv/BEFI7+2Z7B9Bcjs0=
github.com/containers/image/v5 v5.33.1-0.20250116221711-317a9885aed9/go.mod h1:eWqddLtRxT+IYU/063W8rk2HzSS7LotGkROGNJ243CA=
github.com/containers/image/v5 v5.33.2-0.20250122201336-16f7e1e0e1fd h1:+3Rikfp8UWWAVihJwK8x4zrfwAA3R/uwdtsX33QyeSA=
github.com/containers/image/v5 v5.33.2-0.20250122201336-16f7e1e0e1fd/go.mod h1:wTYNEK3AOQnVlBaQLmq7AmXXWqDvRizeZlT8fE/kaA4=
github.com/containers/libhvee v0.9.0 h1:5UxJMka1lDfxTeITA25Pd8QVVttJAG43eQS1Getw1tc=
github.com/containers/libhvee v0.9.0/go.mod h1:p44VJd8jMIx3SRN1eM6PxfCEwXQE0lJ0dQppCAlzjPQ=
github.com/containers/libtrust v0.0.0-20230121012942-c1716e8a8d01 h1:Qzk5C6cYglewc+UyGf6lc8Mj2UaPTHy/iF2De0/77CA=
@ -129,8 +129,8 @@ github.com/disiqueira/gotree/v3 v3.0.2 h1:ik5iuLQQoufZBNPY518dXhiO5056hyNBIK9lWh
github.com/disiqueira/gotree/v3 v3.0.2/go.mod h1:ZuyjE4+mUQZlbpkI24AmruZKhg3VHEgPLDY8Qk+uUu8=
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
github.com/docker/cli v27.5.0+incompatible h1:aMphQkcGtpHixwwhAXJT1rrK/detk2JIvDaFkLctbGM=
github.com/docker/cli v27.5.0+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
github.com/docker/cli v27.5.1+incompatible h1:JB9cieUT9YNiMITtIsguaN55PLOHhBSz3LKVc6cqWaY=
github.com/docker/cli v27.5.1+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBirtxJnzDrHLEKxTAYk=
github.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
github.com/docker/docker v27.5.0+incompatible h1:um++2NcQtGRTz5eEgO6aJimo6/JxrTXC941hd05JO6U=
@ -513,8 +513,8 @@ github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65E
github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc=
github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/vbatts/tar-split v0.11.6 h1:4SjTW5+PU11n6fZenf2IPoV8/tz3AaYHMWjf23envGs=
github.com/vbatts/tar-split v0.11.6/go.mod h1:dqKNtesIOr2j2Qv3W/cHjnvk9I8+G7oAkFDFN6TCBEI=
github.com/vbatts/tar-split v0.11.7 h1:ixZ93pO/GmvaZw4Vq9OwmfZK/kc2zKdPfu0B+gYqs3U=
github.com/vbatts/tar-split v0.11.7/go.mod h1:eF6B6i6ftWQcDqEn3/iGFRFRo8cBIMSJVOpnNdfTMFA=
github.com/vbauerster/mpb/v8 v8.9.1 h1:LH5R3lXPfE2e3lIGxN7WNWv3Hl5nWO6LRi2B0L0ERHw=
github.com/vbauerster/mpb/v8 v8.9.1/go.mod h1:4XMvznPh8nfe2NpnDo1QTPvW9MVkUhbG90mPWvmOzcQ=
github.com/vishvananda/netlink v1.3.0 h1:X7l42GfcV4S6E4vHTsw48qbrV+9PVojNfIhZcwQdrZk=

View File

@ -1056,6 +1056,15 @@ func (c *dockerClient) getBlob(ctx context.Context, ref dockerReference, info ty
func (c *dockerClient) getOCIDescriptorContents(ctx context.Context, ref dockerReference, desc imgspecv1.Descriptor, maxSize int, cache types.BlobInfoCache) ([]byte, error) {
// Note that this copies all kinds of attachments: attestations, and whatever else is there,
// not just signatures. We leave the signature consumers to decide based on the MIME type.
if err := desc.Digest.Validate(); err != nil { // .Algorithm() might panic without this check
return nil, fmt.Errorf("invalid digest %q: %w", desc.Digest.String(), err)
}
digestAlgorithm := desc.Digest.Algorithm()
if !digestAlgorithm.Available() {
return nil, fmt.Errorf("invalid digest %q: unsupported digest algorithm %q", desc.Digest.String(), digestAlgorithm.String())
}
reader, _, err := c.getBlob(ctx, ref, manifest.BlobInfoFromOCI1Descriptor(desc), cache)
if err != nil {
return nil, err
@ -1065,6 +1074,10 @@ func (c *dockerClient) getOCIDescriptorContents(ctx context.Context, ref dockerR
if err != nil {
return nil, fmt.Errorf("reading blob %s in %s: %w", desc.Digest.String(), ref.ref.Name(), err)
}
actualDigest := digestAlgorithm.FromBytes(payload)
if actualDigest != desc.Digest {
return nil, fmt.Errorf("digest mismatch, expected %q, got %q", desc.Digest.String(), actualDigest.String())
}
return payload, nil
}

View File

@ -33,6 +33,7 @@ import (
graphdriver "github.com/containers/storage/drivers"
"github.com/containers/storage/pkg/archive"
"github.com/containers/storage/pkg/chunked"
"github.com/containers/storage/pkg/chunked/toc"
"github.com/containers/storage/pkg/ioutils"
digest "github.com/opencontainers/go-digest"
imgspecv1 "github.com/opencontainers/image-spec/specs-go/v1"
@ -111,8 +112,10 @@ type storageImageDestinationLockProtected struct {
//
// Ideally we wouldnt have blobDiffIDs, and we would just keep records by index, but the public API does not require the caller
// to provide layer indices; and configs dont have layer indices. blobDiffIDs needs to exist for those cases.
indexToDiffID map[int]digest.Digest // Mapping from layer index to DiffID
indexToTOCDigest map[int]digest.Digest // Mapping from layer index to a TOC Digest
indexToDiffID map[int]digest.Digest // Mapping from layer index to DiffID
// Mapping from layer index to a TOC Digest.
// If this is set, then either c/storage/pkg/chunked/toc.GetTOCDigest must have returned a value, or indexToDiffID must be set as well.
indexToTOCDigest map[int]digest.Digest
blobDiffIDs map[digest.Digest]digest.Digest // Mapping from layer blobsums to their corresponding DiffIDs. CAREFUL: See the WARNING above.
// Layer data: Before commitLayer is called, either at least one of (diffOutputs, indexToAdditionalLayer, filenames)
@ -343,6 +346,56 @@ func (f *zstdFetcher) GetBlobAt(chunks []chunked.ImageSourceChunk) (chan io.Read
// If the call fails with ErrFallbackToOrdinaryLayerDownload, the caller can fall back to PutBlobWithOptions.
// The fallback _must not_ be done otherwise.
func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAccessor private.BlobChunkAccessor, srcInfo types.BlobInfo, options private.PutBlobPartialOptions) (_ private.UploadedBlob, retErr error) {
inputTOCDigest, err := toc.GetTOCDigest(srcInfo.Annotations)
if err != nil {
return private.UploadedBlob{}, err
}
// The identity of partially-pulled layers is, as long as we keep compatibility with tar-like consumers,
// unfixably ambiguous: there are two possible “views” of the same file (same compressed digest),
// the traditional “view” that decompresses the primary stream and consumes a tar file,
// and the partial-pull “view” that starts with the TOC.
// The two “views” have two separate metadata sets and may refer to different parts of the blob for file contents;
// the direct way to ensure they are consistent would be to read the full primary stream (and authenticate it against
// the compressed digest), and ensure the metadata and layer contents exactly match the partially-pulled contents -
// making the partial pull completely pointless.
//
// Instead, for partial-pull-capable layers (with inputTOCDigest set), we require the image to “commit”
// to uncompressed layer digest values via the config's RootFS.DiffIDs array:
// they are already naturally computed for traditionally-pulled layers, and for partially-pulled layers we
// do the optimal partial pull, and then reconstruct the uncompressed tar stream just to (expensively) compute this digest.
//
// Layers which dont support partial pulls (inputTOCDigest == "", incl. all schema1 layers) can be let through:
// the partial pull code will either not engage, or consume the full layer; and the rules of indexToTOCDigest / layerIdentifiedByTOC
// ensure the layer is identified by DiffID, i.e. using the traditional “view”.
//
// But if inputTOCDigest is set and the input image doesn't have RootFS.DiffIDs (the config is invalid for schema2/OCI),
// don't allow a partial pull, and fall back to PutBlobWithOptions.
//
// (The user can opt out of the DiffID commitment checking by a c/storage option, giving up security for performance,
// but we will still trigger the fall back here, and we will still enforce a DiffID match, so that the set of accepted images
// is the same in both cases, and so that users are not tempted to set the c/storage option to allow accepting some invalid images.)
var untrustedDiffID digest.Digest // "" if unknown
udid, err := s.untrustedLayerDiffID(options.LayerIndex)
if err != nil {
var diffIDUnknownErr untrustedLayerDiffIDUnknownError
switch {
case errors.Is(err, errUntrustedLayerDiffIDNotYetAvailable):
// PutBlobPartial is a private API, so all callers are within c/image, and should have called
// NoteOriginalOCIConfig first.
return private.UploadedBlob{}, fmt.Errorf("internal error: in PutBlobPartial, untrustedLayerDiffID returned errUntrustedLayerDiffIDNotYetAvailable")
case errors.As(err, &diffIDUnknownErr):
if inputTOCDigest != nil {
return private.UploadedBlob{}, private.NewErrFallbackToOrdinaryLayerDownload(err)
}
untrustedDiffID = "" // A schema1 image or a non-TOC layer with no ambiguity, let it through
default:
return private.UploadedBlob{}, err
}
} else {
untrustedDiffID = udid
}
fetcher := zstdFetcher{
chunkAccessor: chunkAccessor,
ctx: ctx,
@ -382,7 +435,18 @@ func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAcces
if err := func() error { // A scope for defer
defer s.lock.Unlock()
// For true partial pulls, c/storage decides whether to compute the uncompressed digest based on an option in storage.conf
// (defaults to true, to avoid ambiguity.)
// c/storage can also be configured, to consume a layer not prepared for partial pulls (primarily to allow composefs conversion),
// and in that case it always consumes the full blob and always computes the uncompressed digest.
if out.UncompressedDigest != "" {
// This is centrally enforced later, in commitLayer, but because we have the value available,
// we might just as well check immediately.
if untrustedDiffID != "" && out.UncompressedDigest != untrustedDiffID {
return fmt.Errorf("uncompressed digest of layer %q is %q, config claims %q", srcInfo.Digest.String(),
out.UncompressedDigest.String(), untrustedDiffID.String())
}
s.lockProtected.indexToDiffID[options.LayerIndex] = out.UncompressedDigest
if out.TOCDigest != "" {
s.lockProtected.indexToTOCDigest[options.LayerIndex] = out.TOCDigest
@ -401,6 +465,11 @@ func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAcces
options.Cache.RecordDigestUncompressedPair(out.CompressedDigest, out.UncompressedDigest)
}
} else {
// Sanity-check the defined rules for indexToTOCDigest.
if inputTOCDigest == nil {
return fmt.Errorf("internal error: PrepareStagedLayer returned a TOC-only identity for layer %q with no TOC digest", srcInfo.Digest.String())
}
// Use diffID for layer identity if it is known.
if uncompressedDigest := options.Cache.UncompressedDigestForTOC(out.TOCDigest); uncompressedDigest != "" {
s.lockProtected.indexToDiffID[options.LayerIndex] = uncompressedDigest
@ -449,22 +518,43 @@ func (s *storageImageDestination) tryReusingBlobAsPending(blobDigest digest.Dige
if err := blobDigest.Validate(); err != nil {
return false, private.ReusedBlob{}, fmt.Errorf("Can not check for a blob with invalid digest: %w", err)
}
if options.TOCDigest != "" {
useTOCDigest := false // If set, (options.TOCDigest != "" && options.LayerIndex != nil) AND we can use options.TOCDigest safely.
if options.TOCDigest != "" && options.LayerIndex != nil {
if err := options.TOCDigest.Validate(); err != nil {
return false, private.ReusedBlob{}, fmt.Errorf("Can not check for a blob with invalid digest: %w", err)
}
// Only consider using TOCDigest if we can avoid ambiguous image “views”, see the detailed comment in PutBlobPartial.
_, err := s.untrustedLayerDiffID(*options.LayerIndex)
if err != nil {
var diffIDUnknownErr untrustedLayerDiffIDUnknownError
switch {
case errors.Is(err, errUntrustedLayerDiffIDNotYetAvailable):
// options.TOCDigest is a private API, so all callers are within c/image, and should have called
// NoteOriginalOCIConfig first.
return false, private.ReusedBlob{}, fmt.Errorf("internal error: in TryReusingBlobWithOptions, untrustedLayerDiffID returned errUntrustedLayerDiffIDNotYetAvailable")
case errors.As(err, &diffIDUnknownErr):
logrus.Debugf("Not using TOC %q to look for layer reuse: %v", options.TOCDigest, err)
// But dont abort entirely, keep useTOCDigest = false, try a blobDigest match.
default:
return false, private.ReusedBlob{}, err
}
} else {
useTOCDigest = true
}
}
// lock the entire method as it executes fairly quickly
s.lock.Lock()
defer s.lock.Unlock()
if options.SrcRef != nil && options.TOCDigest != "" && options.LayerIndex != nil {
if options.SrcRef != nil && useTOCDigest {
// Check if we have the layer in the underlying additional layer store.
aLayer, err := s.imageRef.transport.store.LookupAdditionalLayer(options.TOCDigest, options.SrcRef.String())
if err != nil && !errors.Is(err, storage.ErrLayerUnknown) {
return false, private.ReusedBlob{}, fmt.Errorf(`looking for compressed layers with digest %q and labels: %w`, blobDigest, err)
} else if err == nil {
// Compare the long comment in PutBlobPartial. We assume that the Additional Layer Store will, somehow,
// avoid layer “view” ambiguity.
alsTOCDigest := aLayer.TOCDigest()
if alsTOCDigest != options.TOCDigest {
// FIXME: If alsTOCDigest is "", the Additional Layer Store FUSE server is probably just too old, and we could
@ -537,13 +627,13 @@ func (s *storageImageDestination) tryReusingBlobAsPending(blobDigest digest.Dige
return false, private.ReusedBlob{}, fmt.Errorf(`looking for layers with digest %q: %w`, uncompressedDigest, err)
}
if found, reused := reusedBlobFromLayerLookup(layers, blobDigest, size, options); found {
s.lockProtected.blobDiffIDs[blobDigest] = uncompressedDigest
s.lockProtected.blobDiffIDs[reused.Digest] = uncompressedDigest
return true, reused, nil
}
}
}
if options.TOCDigest != "" && options.LayerIndex != nil {
if useTOCDigest {
// Check if we know which which UncompressedDigest the TOC digest resolves to, and we have a match for that.
// Prefer this over LayersByTOCDigest because we can identify the layer using UncompressedDigest, maximizing reuse.
uncompressedDigest := options.Cache.UncompressedDigestForTOC(options.TOCDigest)
@ -605,13 +695,22 @@ func reusedBlobFromLayerLookup(layers []storage.Layer, blobDigest digest.Digest,
// trustedLayerIdentityData is a _consistent_ set of information known about a single layer.
type trustedLayerIdentityData struct {
layerIdentifiedByTOC bool // true if we decided the layer should be identified by tocDigest, false if by diffID
// true if we decided the layer should be identified by tocDigest, false if by diffID
// This can only be true if c/storage/pkg/chunked/toc.GetTOCDigest returns a value.
layerIdentifiedByTOC bool
diffID digest.Digest // A digest of the uncompressed full contents of the layer, or "" if unknown; must be set if !layerIdentifiedByTOC
tocDigest digest.Digest // A digest of the TOC digest, or "" if unknown; must be set if layerIdentifiedByTOC
blobDigest digest.Digest // A digest of the (possibly-compressed) layer as presented, or "" if unknown/untrusted.
}
// logString() prints a representation of trusted suitable identifying a layer in logs and errors.
// The string is already quoted to expose malicious input and does not need to be quoted again.
// Note that it does not include _all_ of the contents.
func (trusted trustedLayerIdentityData) logString() string {
return fmt.Sprintf("%q/%q/%q", trusted.blobDigest, trusted.tocDigest, trusted.diffID)
}
// trustedLayerIdentityDataLocked returns a _consistent_ set of information for a layer with (layerIndex, blobDigest).
// blobDigest is the (possibly-compressed) layer digest referenced in the manifest.
// It returns (trusted, true) if the layer was found, or (_, false) if insufficient data is available.
@ -822,23 +921,6 @@ func (s *storageImageDestination) queueOrCommit(index int, info addedLayerInfo)
return nil
}
// singleLayerIDComponent returns a single layers the input to computing a layer (chain) ID,
// and an indication whether the input already has the shape of a layer ID.
// It returns ("", false) if the layer is not found at all (which should never happen)
func (s *storageImageDestination) singleLayerIDComponent(layerIndex int, blobDigest digest.Digest) (string, bool) {
s.lock.Lock()
defer s.lock.Unlock()
trusted, ok := s.trustedLayerIdentityDataLocked(layerIndex, blobDigest)
if !ok {
return "", false
}
if trusted.layerIdentifiedByTOC {
return "@TOC=" + trusted.tocDigest.Encoded(), false // "@" is not a valid start of a digest.Digest, so this is unambiguous.
}
return trusted.diffID.Encoded(), true // This looks like chain IDs, and it uses the traditional value.
}
// commitLayer commits the specified layer with the given index to the storage.
// size can usually be -1; it can be provided if the layer is not known to be already present in blobDiffIDs.
//
@ -850,16 +932,15 @@ func (s *storageImageDestination) singleLayerIDComponent(layerIndex int, blobDig
// must guarantee that, at any given time, at most one goroutine may execute
// `commitLayer()`.
func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, size int64) (bool, error) {
// Already committed? Return early.
if _, alreadyCommitted := s.indexToStorageID[index]; alreadyCommitted {
return false, nil
}
// Start with an empty string or the previous layer ID. Note that
// `s.indexToStorageID` can only be accessed by *one* goroutine at any
// given time. Hence, we don't need to lock accesses.
var parentLayer string
var parentLayer string // "" if no parent
if index != 0 {
// s.indexToStorageID can only be written by this function, and our caller
// is responsible for ensuring it can be only be called by *one* goroutine at any
// given time. Hence, we don't need to lock accesses.
prev, ok := s.indexToStorageID[index-1]
if !ok {
return false, fmt.Errorf("Internal error: commitLayer called with previous layer %d not committed yet", index-1)
@ -867,18 +948,17 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si
parentLayer = prev
}
// Carry over the previous ID for empty non-base layers.
if info.emptyLayer {
s.indexToStorageID[index] = parentLayer
return false, nil
}
// Check if there's already a layer with the ID that we'd give to the result of applying
// this layer blob to its parent, if it has one, or the blob's hex value otherwise.
// The layerID refers either to the DiffID or the digest of the TOC.
layerIDComponent, layerIDComponentStandalone := s.singleLayerIDComponent(index, info.digest)
if layerIDComponent == "" {
// Check if it's elsewhere and the caller just forgot to pass it to us in a PutBlob() / TryReusingBlob() / …
// Collect trusted parameters of the layer.
s.lock.Lock()
trusted, ok := s.trustedLayerIdentityDataLocked(index, info.digest)
s.lock.Unlock()
if !ok {
// Check if the layer exists already and the caller just (incorrectly) forgot to pass it to us in a PutBlob() / TryReusingBlob() / …
//
// Use none.NoCache to avoid a repeated DiffID lookup in the BlobInfoCache: a caller
// that relies on using a blob digest that has never been seen by the store had better call
@ -902,23 +982,54 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si
return false, fmt.Errorf("error determining uncompressed digest for blob %q", info.digest.String())
}
layerIDComponent, layerIDComponentStandalone = s.singleLayerIDComponent(index, info.digest)
if layerIDComponent == "" {
s.lock.Lock()
trusted, ok = s.trustedLayerIdentityDataLocked(index, info.digest)
s.lock.Unlock()
if !ok {
return false, fmt.Errorf("we have blob %q, but don't know its layer ID", info.digest.String())
}
}
id := layerIDComponent
if !layerIDComponentStandalone || parentLayer != "" {
id = digest.Canonical.FromString(parentLayer + "+" + layerIDComponent).Encoded()
// Ensure that we always see the same “view” of a layer, as identified by the layers uncompressed digest,
// unless the user has explicitly opted out of this in storage.conf: see the more detailed explanation in PutBlobPartial.
if trusted.diffID != "" {
untrustedDiffID, err := s.untrustedLayerDiffID(index)
if err != nil {
var diffIDUnknownErr untrustedLayerDiffIDUnknownError
switch {
case errors.Is(err, errUntrustedLayerDiffIDNotYetAvailable):
logrus.Debugf("Skipping commit for layer %q, manifest not yet available for DiffID check", index)
return true, nil
case errors.As(err, &diffIDUnknownErr):
// If untrustedLayerDiffIDUnknownError, the input image is schema1, has no TOC annotations,
// so we could not have reused a TOC-identified layer nor have done a TOC-identified partial pull,
// i.e. there is no other “view” to worry about. Sanity-check that we really see the only expected view.
//
// Or, maybe, the input image is OCI, and has invalid/missing DiffID values in config. In that case
// we _must_ fail if we used a TOC-identified layer - but PutBlobPartial should have already
// refused to do a partial pull, so we are in an inconsistent state.
if trusted.layerIdentifiedByTOC {
return false, fmt.Errorf("internal error: layer %d for blob %s was identified by TOC, but we don't have a DiffID in config",
index, trusted.logString())
}
// else a schema1 image or a non-TOC layer with no ambiguity, let it through
default:
return false, err
}
} else if trusted.diffID != untrustedDiffID {
return false, fmt.Errorf("layer %d (blob %s) does not match config's DiffID %q", index, trusted.logString(), untrustedDiffID)
}
}
id := layerID(parentLayer, trusted)
if layer, err2 := s.imageRef.transport.store.Layer(id); layer != nil && err2 == nil {
// There's already a layer that should have the right contents, just reuse it.
s.indexToStorageID[index] = layer.ID
return false, nil
}
layer, err := s.createNewLayer(index, info.digest, parentLayer, id)
layer, err := s.createNewLayer(index, trusted, parentLayer, id)
if err != nil {
return false, err
}
@ -929,32 +1040,62 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si
return false, nil
}
// createNewLayer creates a new layer newLayerID for (index, layerDigest) on top of parentLayer (which may be "").
// layerID computes a layer (“chain”) ID for (a possibly-empty parentID, trusted)
func layerID(parentID string, trusted trustedLayerIdentityData) string {
var component string
mustHash := false
if trusted.layerIdentifiedByTOC {
// "@" is not a valid start of a digest.Digest.Encoded(), so this is unambiguous with the !layerIdentifiedByTOC case.
// But we _must_ hash this below to get a Digest.Encoded()-formatted value.
component = "@TOC=" + trusted.tocDigest.Encoded()
mustHash = true
} else {
component = trusted.diffID.Encoded() // This looks like chain IDs, and it uses the traditional value.
}
if parentID == "" && !mustHash {
return component
}
return digest.Canonical.FromString(parentID + "+" + component).Encoded()
}
// createNewLayer creates a new layer newLayerID for (index, trusted) on top of parentLayer (which may be "").
// If the layer cannot be committed yet, the function returns (nil, nil).
func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.Digest, parentLayer, newLayerID string) (*storage.Layer, error) {
func (s *storageImageDestination) createNewLayer(index int, trusted trustedLayerIdentityData, parentLayer, newLayerID string) (*storage.Layer, error) {
s.lock.Lock()
diffOutput, ok := s.lockProtected.diffOutputs[index]
s.lock.Unlock()
if ok {
// If we know a trusted DiffID value (e.g. from a BlobInfoCache), set it in diffOutput.
// Typically, we compute a trusted DiffID value to authenticate the layer contents, see the detailed explanation
// in PutBlobPartial. If the user has opted out of that, but we know a trusted DiffID value
// (e.g. from a BlobInfoCache), set it in diffOutput.
// That way it will be persisted in storage even if the cache is deleted; also
// we can use the value below to avoid the untrustedUncompressedDigest logic (and notably
// the costly commit delay until a manifest is available).
s.lock.Lock()
if d, ok := s.lockProtected.indexToDiffID[index]; ok {
diffOutput.UncompressedDigest = d
// we can use the value below to avoid the untrustedUncompressedDigest logic.
if diffOutput.UncompressedDigest == "" && trusted.diffID != "" {
diffOutput.UncompressedDigest = trusted.diffID
}
s.lock.Unlock()
var untrustedUncompressedDigest digest.Digest
if diffOutput.UncompressedDigest == "" {
d, err := s.untrustedLayerDiffID(index)
if err != nil {
return nil, err
}
if d == "" {
logrus.Debugf("Skipping commit for layer %q, manifest not yet available", newLayerID)
return nil, nil
var diffIDUnknownErr untrustedLayerDiffIDUnknownError
switch {
case errors.Is(err, errUntrustedLayerDiffIDNotYetAvailable):
logrus.Debugf("Skipping commit for layer %q, manifest not yet available", newLayerID)
return nil, nil
case errors.As(err, &diffIDUnknownErr):
// If untrustedLayerDiffIDUnknownError, the input image is schema1, has no TOC annotations,
// so we should have !trusted.layerIdentifiedByTOC, i.e. we should have set
// diffOutput.UncompressedDigest above in this function, at the very latest.
//
// Or, maybe, the input image is OCI, and has invalid/missing DiffID values in config. In that case
// commitLayer should have already refused this image when dealing with the “view” ambiguity.
return nil, fmt.Errorf("internal error: layer %d for blob %s was partially-pulled with unknown UncompressedDigest, but we don't have a DiffID in config",
index, trusted.logString())
default:
return nil, err
}
}
untrustedUncompressedDigest = d
@ -1002,14 +1143,10 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D
// then we need to read the desired contents from a layer.
var filename string
var gotFilename bool
s.lock.Lock()
trusted, ok := s.trustedLayerIdentityDataLocked(index, layerDigest)
if ok && trusted.blobDigest != "" {
if trusted.blobDigest != "" {
s.lock.Lock()
filename, gotFilename = s.lockProtected.filenames[trusted.blobDigest]
}
s.lock.Unlock()
if !ok { // We have already determined newLayerID, so the data must have been available.
return nil, fmt.Errorf("internal inconsistency: layer (%d, %q) not found", index, layerDigest)
s.lock.Unlock()
}
var trustedOriginalDigest digest.Digest // For storage.LayerOptions
var trustedOriginalSize *int64
@ -1036,7 +1173,7 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D
}
}
if layer == nil {
return nil, fmt.Errorf("layer for blob %q/%q/%q not found", trusted.blobDigest, trusted.tocDigest, trusted.diffID)
return nil, fmt.Errorf("layer for blob %s not found", trusted.logString())
}
// Read the layer's contents.
@ -1046,7 +1183,7 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D
}
diff, err2 := s.imageRef.transport.store.Diff("", layer.ID, diffOptions)
if err2 != nil {
return nil, fmt.Errorf("reading layer %q for blob %q/%q/%q: %w", layer.ID, trusted.blobDigest, trusted.tocDigest, trusted.diffID, err2)
return nil, fmt.Errorf("reading layer %q for blob %s: %w", layer.ID, trusted.logString(), err2)
}
// Copy the layer diff to a file. Diff() takes a lock that it holds
// until the ReadCloser that it returns is closed, and PutLayer() wants
@ -1125,7 +1262,7 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D
UncompressedDigest: trusted.diffID,
}, file)
if err != nil && !errors.Is(err, storage.ErrDuplicateID) {
return nil, fmt.Errorf("adding layer with blob %q/%q/%q: %w", trusted.blobDigest, trusted.tocDigest, trusted.diffID, err)
return nil, fmt.Errorf("adding layer with blob %s: %w", trusted.logString(), err)
}
return layer, nil
}
@ -1175,9 +1312,29 @@ func (u *uncommittedImageSource) GetBlob(ctx context.Context, info types.BlobInf
return io.NopCloser(bytes.NewReader(blob)), int64(len(blob)), nil
}
// errUntrustedLayerDiffIDNotYetAvailable is returned by untrustedLayerDiffID
// if the value is not yet available (but it can be available after s.manifests is set).
// This should only happen for external callers of the transport, not for c/image/copy.
//
// Callers of untrustedLayerDiffID before PutManifest must handle this error specially;
// callers after PutManifest can use the default, reporting an internal error.
var errUntrustedLayerDiffIDNotYetAvailable = errors.New("internal error: untrustedLayerDiffID has no value available and fallback was not implemented")
// untrustedLayerDiffIDUnknownError is returned by untrustedLayerDiffID
// if the images format does not provide DiffIDs.
type untrustedLayerDiffIDUnknownError struct {
layerIndex int
}
func (e untrustedLayerDiffIDUnknownError) Error() string {
return fmt.Sprintf("DiffID value for layer %d is unknown or explicitly empty", e.layerIndex)
}
// untrustedLayerDiffID returns a DiffID value for layerIndex from the images config.
// If the value is not yet available (but it can be available after s.manifets is set), it returns ("", nil).
// WARNING: We dont validate the DiffID value against the layer contents; it must not be used for any deduplication.
// It may return two special errors, errUntrustedLayerDiffIDNotYetAvailable or untrustedLayerDiffIDUnknownError.
//
// WARNING: This function does not even validate that the returned digest has a valid format.
// WARNING: We dont _always_ validate this DiffID value against the layer contents; it must not be used for any deduplication.
func (s *storageImageDestination) untrustedLayerDiffID(layerIndex int) (digest.Digest, error) {
// At this point, we are either inside the multi-threaded scope of HasThreadSafePutBlob,
// nothing is writing to s.manifest yet, and s.untrustedDiffIDValues might have been set
@ -1190,7 +1347,7 @@ func (s *storageImageDestination) untrustedLayerDiffID(layerIndex int) (digest.D
// via NoteOriginalOCIConfig; this is a compatibility fallback for external callers
// of the public types.ImageDestination.
if s.manifest == nil {
return "", nil
return "", errUntrustedLayerDiffIDNotYetAvailable
}
ctx := context.Background() // This is all happening in memory, no need to worry about cancellation.
@ -1207,24 +1364,27 @@ func (s *storageImageDestination) untrustedLayerDiffID(layerIndex int) (digest.D
s.setUntrustedDiffIDValuesFromOCIConfig(configOCI)
}
// Let entirely empty / missing diffIDs through; but if the array does exist, expect it to contain an entry for every layer,
// and fail hard on missing entries. This tries to account for completely naive image producers who just dont fill DiffID,
// while still detecting incorrectly-built / confused images.
//
// schema1 images dont have DiffID values in the config.
// Our schema1.OCIConfig code produces non-empty DiffID arrays of empty values, so treat arrays of all-empty
// values as “DiffID unknown”.
// For schema 1, it is important to exit here, before the layerIndex >= len(s.untrustedDiffIDValues)
// check, because the format conversion from schema1 to OCI used to compute untrustedDiffIDValues
// changes the number of layres (drops items with Schema1V1Compatibility.ThrowAway).
if !slices.ContainsFunc(s.untrustedDiffIDValues, func(d digest.Digest) bool {
return d != ""
}) {
return "", untrustedLayerDiffIDUnknownError{
layerIndex: layerIndex,
}
}
if layerIndex >= len(s.untrustedDiffIDValues) {
return "", fmt.Errorf("image config has only %d DiffID values, but a layer with index %d exists", len(s.untrustedDiffIDValues), layerIndex)
}
res := s.untrustedDiffIDValues[layerIndex]
if res == "" {
// In practice, this should, right now, only matter for pulls of OCI images
// (this code path implies that we did a partial pull because a layer has an annotation),
// So, DiffIDs should always be set.
//
// It is, though, reachable by pulling an OCI image while converting to schema1,
// using a manual (skopeo copy) or something similar, not (podman pull).
//
// Our schema1.OCIConfig code produces non-empty DiffID arrays of empty values.
// The current semantics of this function are that ("", nil) means "try again later",
// which is not what we want to happen; for now, turn that into an explicit error.
return "", fmt.Errorf("DiffID value for layer %d is unknown or explicitly empty", layerIndex)
}
return res, nil
return s.untrustedDiffIDValues[layerIndex], nil
}
// setUntrustedDiffIDValuesFromOCIConfig updates s.untrustedDiffIDvalues from config.

View File

@ -8,7 +8,7 @@ const (
// VersionMinor is for functionality in a backwards-compatible manner
VersionMinor = 34
// VersionPatch is for backwards-compatible bug fixes
VersionPatch = 0
VersionPatch = 1
// VersionDev indicates development branch. Releases will be empty string.
VersionDev = "-dev"

View File

@ -143,6 +143,10 @@ const (
blockSize = 512 // Size of each block in a tar stream
nameSize = 100 // Max length of the name field in USTAR format
prefixSize = 155 // Max length of the prefix field in USTAR format
// Max length of a special file (PAX header, GNU long name or link).
// This matches the limit used by libarchive.
maxSpecialFileSize = 1 << 20
)
// blockPadding computes the number of bytes needed to pad offset up to the

View File

@ -144,7 +144,7 @@ func (tr *Reader) next() (*Header, error) {
continue // This is a meta header affecting the next header
case TypeGNULongName, TypeGNULongLink:
format.mayOnlyBe(FormatGNU)
realname, err := io.ReadAll(tr)
realname, err := readSpecialFile(tr)
if err != nil {
return nil, err
}
@ -338,7 +338,7 @@ func mergePAX(hdr *Header, paxHdrs map[string]string) (err error) {
// parsePAX parses PAX headers.
// If an extended header (type 'x') is invalid, ErrHeader is returned
func parsePAX(r io.Reader) (map[string]string, error) {
buf, err := io.ReadAll(r)
buf, err := readSpecialFile(r)
if err != nil {
return nil, err
}
@ -889,6 +889,16 @@ func tryReadFull(r io.Reader, b []byte) (n int, err error) {
return n, err
}
// readSpecialFile is like io.ReadAll except it returns
// ErrFieldTooLong if more than maxSpecialFileSize is read.
func readSpecialFile(r io.Reader) ([]byte, error) {
buf, err := io.ReadAll(io.LimitReader(r, maxSpecialFileSize+1))
if len(buf) > maxSpecialFileSize {
return nil, ErrFieldTooLong
}
return buf, err
}
// discard skips n bytes in r, reporting an error if unable to do so.
func discard(tr *Reader, n int64) error {
var seekSkipped, copySkipped int64

4
vendor/modules.txt vendored
View File

@ -251,7 +251,7 @@ github.com/containers/conmon/runner/config
# github.com/containers/gvisor-tap-vsock v0.8.2
## explicit; go 1.22.0
github.com/containers/gvisor-tap-vsock/pkg/types
# github.com/containers/image/v5 v5.33.1-0.20250116221711-317a9885aed9
# github.com/containers/image/v5 v5.33.2-0.20250122201336-16f7e1e0e1fd
## explicit; go 1.22.8
github.com/containers/image/v5/copy
github.com/containers/image/v5/directory
@ -1104,7 +1104,7 @@ github.com/ulikunitz/xz
github.com/ulikunitz/xz/internal/hash
github.com/ulikunitz/xz/internal/xlog
github.com/ulikunitz/xz/lzma
# github.com/vbatts/tar-split v0.11.6
# github.com/vbatts/tar-split v0.11.7
## explicit; go 1.17
github.com/vbatts/tar-split/archive/tar
github.com/vbatts/tar-split/tar/asm