From f6bda786edb4b85981ace7004d2817a81eb39ce6 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Fri, 20 Sep 2024 13:17:45 +0200 Subject: [PATCH] vendor latest c/common To include the pkg/netns changes. Signed-off-by: Paul Holzinger --- go.mod | 2 +- go.sum | 4 +- .../containers/common/libimage/copier.go | 34 +++- .../containers/common/libimage/import.go | 4 +- .../common/libimage/manifest_list.go | 164 +++++++++++++++++- .../common/libimage/manifests/manifests.go | 28 +-- .../containers/common/libimage/pull.go | 12 +- .../containers/common/libimage/push.go | 4 +- .../containers/common/libimage/save.go | 8 +- .../common/pkg/netns/netns_linux.go | 164 +++++++++++++----- vendor/modules.txt | 2 +- 11 files changed, 343 insertions(+), 83 deletions(-) diff --git a/go.mod b/go.mod index fc44452dcc..53406c6247 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/checkpoint-restore/go-criu/v7 v7.2.0 github.com/containernetworking/plugins v1.5.1 github.com/containers/buildah v1.37.0 - github.com/containers/common v0.60.1-0.20240918122915-db8145750e1d + github.com/containers/common v0.60.1-0.20240920125326-ff6611ae40ad github.com/containers/conmon v2.0.20+incompatible github.com/containers/gvisor-tap-vsock v0.7.5 github.com/containers/image/v5 v5.32.1-0.20240806084436-e3e9287ca8e6 diff --git a/go.sum b/go.sum index fd9edeb3fb..c2c60a7eb5 100644 --- a/go.sum +++ b/go.sum @@ -81,8 +81,8 @@ github.com/containernetworking/plugins v1.5.1 h1:T5ji+LPYjjgW0QM+KyrigZbLsZ8jaX+ github.com/containernetworking/plugins v1.5.1/go.mod h1:MIQfgMayGuHYs0XdNudf31cLLAC+i242hNm6KuDGqCM= github.com/containers/buildah v1.37.0 h1:jvHwu1vIwIqnHyOSg9eef9Apdpry+5oWLrm43gdf8Rk= github.com/containers/buildah v1.37.0/go.mod h1:MKd79tkluMf6vtH06SedhBQK5OB7E0pFVIuiTTw3dJk= -github.com/containers/common v0.60.1-0.20240918122915-db8145750e1d h1:AAEZbfeh92xKohiQoEk6sx+e/8OLIXzIElJ7H69cxVg= -github.com/containers/common v0.60.1-0.20240918122915-db8145750e1d/go.mod h1:CPKbz94MP7eKS5LdkBZbcDbQgAHncjogq/hYY9r4Spw= +github.com/containers/common v0.60.1-0.20240920125326-ff6611ae40ad h1:Ida4yFcnk+xGPynWR267zGGUddWTfpAVMSzo6PhjPFQ= +github.com/containers/common v0.60.1-0.20240920125326-ff6611ae40ad/go.mod h1:UjxkwBehRqlASg/duCPlXbsc2hu5y+iYwUt+8/N4w+8= github.com/containers/conmon v2.0.20+incompatible h1:YbCVSFSCqFjjVwHTPINGdMX1F6JXHGTUje2ZYobNrkg= github.com/containers/conmon v2.0.20+incompatible/go.mod h1:hgwZ2mtuDrppv78a/cOBNiCm6O0UMWGx1mu7P00nu5I= github.com/containers/gvisor-tap-vsock v0.7.5 h1:bTy4u3DOmmUPwurL6me2rsgfypAFDhyeJleUcQmBR/E= diff --git a/vendor/github.com/containers/common/libimage/copier.go b/vendor/github.com/containers/common/libimage/copier.go index 1f4f925fe5..5151343ed6 100644 --- a/vendor/github.com/containers/common/libimage/copier.go +++ b/vendor/github.com/containers/common/libimage/copier.go @@ -60,6 +60,13 @@ type CopyOptions struct { CertDirPath string // Force layer compression when copying to a `dir` transport destination. DirForceCompress bool + + // ImageListSelection is one of CopySystemImage, CopyAllImages, or + // CopySpecificImages, to control whether, when the source reference is a list, + // copy.Image() copies only an image which matches the current runtime + // environment, or all images which match the supplied reference, or only + // specific images from the source reference. + ImageListSelection copy.ImageListSelection // Allow contacting registries over HTTP, or HTTPS with failed TLS // verification. Note that this does not affect other TLS connections. InsecureSkipTLSVerify types.OptionalBool @@ -206,13 +213,17 @@ func getDockerAuthConfig(name, passwd, creds, idToken string) (*types.DockerAuth } } +// NewCopier is a simple, exported wrapper for newCopier +func NewCopier(options *CopyOptions, sc *types.SystemContext) (*copier, error) { + return newCopier(options, sc) +} + // newCopier creates a copier. Note that fields in options *may* overwrite the // counterparts of the specified system context. Please make sure to call // `(*copier).close()`. -func (r *Runtime) newCopier(options *CopyOptions) (*copier, error) { +func newCopier(options *CopyOptions, sc *types.SystemContext) (*copier, error) { c := copier{extendTimeoutSocket: options.extendTimeoutSocket} - c.systemContext = r.systemContextCopy() - + c.systemContext = sc if options.SourceLookupReferenceFunc != nil { c.sourceLookup = options.SourceLookupReferenceFunc } @@ -300,6 +311,7 @@ func (r *Runtime) newCopier(options *CopyOptions) (*copier, error) { c.imageCopyOptions.ProgressInterval = time.Second } + c.imageCopyOptions.ImageListSelection = options.ImageListSelection c.imageCopyOptions.ForceCompressionFormat = options.ForceCompressionFormat c.imageCopyOptions.ForceManifestMIMEType = options.ManifestMIMEType c.imageCopyOptions.SourceCtx = c.systemContext @@ -325,14 +337,22 @@ func (r *Runtime) newCopier(options *CopyOptions) (*copier, error) { return &c, nil } -// close open resources. -func (c *copier) close() error { +// newCopier creates a copier. Note that fields in options *may* overwrite the +// counterparts of the specified system context. Please make sure to call +// `(*copier).close()`. +func (r *Runtime) newCopier(options *CopyOptions) (*copier, error) { + sc := r.systemContextCopy() + return newCopier(options, sc) +} + +// Close open resources. +func (c *copier) Close() error { return c.policyContext.Destroy() } -// copy the source to the destination. Returns the bytes of the copied +// Copy the source to the destination. Returns the bytes of the copied // manifest which may be used for digest computation. -func (c *copier) copy(ctx context.Context, source, destination types.ImageReference) ([]byte, error) { +func (c *copier) Copy(ctx context.Context, source, destination types.ImageReference) ([]byte, error) { logrus.Debugf("Copying source image %s to destination image %s", source.StringWithinTransport(), destination.StringWithinTransport()) // Avoid running out of time when running inside a systemd unit by diff --git a/vendor/github.com/containers/common/libimage/import.go b/vendor/github.com/containers/common/libimage/import.go index 552c48eae0..a03f288533 100644 --- a/vendor/github.com/containers/common/libimage/import.go +++ b/vendor/github.com/containers/common/libimage/import.go @@ -108,9 +108,9 @@ func (r *Runtime) Import(ctx context.Context, path string, options *ImportOption if err != nil { return "", err } - defer c.close() + defer c.Close() - if _, err := c.copy(ctx, srcRef, destRef); err != nil { + if _, err := c.Copy(ctx, srcRef, destRef); err != nil { return "", err } diff --git a/vendor/github.com/containers/common/libimage/manifest_list.go b/vendor/github.com/containers/common/libimage/manifest_list.go index 1db0cf4dfd..a23315da3c 100644 --- a/vendor/github.com/containers/common/libimage/manifest_list.go +++ b/vendor/github.com/containers/common/libimage/manifest_list.go @@ -7,20 +7,28 @@ import ( "errors" "fmt" "maps" + "os" + "path/filepath" "slices" "time" "github.com/containers/common/libimage/define" "github.com/containers/common/libimage/manifests" + manifesterrors "github.com/containers/common/pkg/manifests" + "github.com/containers/common/pkg/supplemented" imageCopy "github.com/containers/image/v5/copy" "github.com/containers/image/v5/docker" "github.com/containers/image/v5/manifest" + "github.com/containers/image/v5/oci/layout" + "github.com/containers/image/v5/signature" "github.com/containers/image/v5/transports/alltransports" "github.com/containers/image/v5/types" "github.com/containers/storage" structcopier "github.com/jinzhu/copier" "github.com/opencontainers/go-digest" + imgspec "github.com/opencontainers/image-spec/specs-go" imgspecv1 "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/sirupsen/logrus" ) // NOTE: the abstractions and APIs here are a first step to further merge @@ -101,8 +109,157 @@ func (r *Runtime) lookupManifestList(name string) (*Image, manifests.List, error return image, list, nil } +// ConvertToManifestList converts the image into a manifest list if it is not +// already also a list. An error is returned if the conversion fails. +func (i *Image) ConvertToManifestList(ctx context.Context) (*ManifestList, error) { + // If we don't need to do anything, don't do anything. + if list, err := i.ToManifestList(); err == nil || !errors.Is(err, ErrNotAManifestList) { + return list, err + } + + // Determine which type we prefer for the new manifest list or image index. + _, imageManifestType, err := i.Manifest(ctx) + if err != nil { + return nil, fmt.Errorf("reading the image's manifest: %w", err) + } + var preferredListType string + switch imageManifestType { + case manifest.DockerV2Schema2MediaType, + manifest.DockerV2Schema1SignedMediaType, + manifest.DockerV2Schema1MediaType, + manifest.DockerV2ListMediaType: + preferredListType = manifest.DockerV2ListMediaType + case imgspecv1.MediaTypeImageManifest, imgspecv1.MediaTypeImageIndex: + preferredListType = imgspecv1.MediaTypeImageIndex + default: + preferredListType = "" + } + + // Create a list and add the image's manifest to it. Use OCI format + // for now. If we need to convert it to Docker format, we'll do that + // while copying it. + list := manifests.Create() + if _, err := list.Add(ctx, &i.runtime.systemContext, i.storageReference, false); err != nil { + return nil, fmt.Errorf("generating new image index: %w", err) + } + listBytes, err := list.Serialize(imgspecv1.MediaTypeImageIndex) + if err != nil { + return nil, fmt.Errorf("serializing image index: %w", err) + } + listDigest, err := manifest.Digest(listBytes) + if err != nil { + return nil, fmt.Errorf("digesting image index: %w", err) + } + + // Build an OCI layout containing the image index as the only item. + tmp, err := os.MkdirTemp("", "") + if err != nil { + return nil, fmt.Errorf("serializing initial list: %w", err) + } + defer os.RemoveAll(tmp) + + // Drop our image index in there. + if err := os.Mkdir(filepath.Join(tmp, imgspecv1.ImageBlobsDir), 0o755); err != nil { + return nil, fmt.Errorf("creating directory for blobs: %w", err) + } + if err := os.Mkdir(filepath.Join(tmp, imgspecv1.ImageBlobsDir, listDigest.Algorithm().String()), 0o755); err != nil { + return nil, fmt.Errorf("creating directory for %s blobs: %w", listDigest.Algorithm().String(), err) + } + listFile := filepath.Join(tmp, imgspecv1.ImageBlobsDir, listDigest.Algorithm().String(), listDigest.Encoded()) + if err := os.WriteFile(listFile, listBytes, 0o644); err != nil { + return nil, fmt.Errorf("writing image index for OCI layout: %w", err) + } + + // Build the index for the layout. + index := imgspecv1.Index{ + Versioned: imgspec.Versioned{ + SchemaVersion: 2, + }, + MediaType: imgspecv1.MediaTypeImageIndex, + Manifests: []imgspecv1.Descriptor{{ + MediaType: imgspecv1.MediaTypeImageIndex, + Digest: listDigest, + Size: int64(len(listBytes)), + }}, + } + indexBytes, err := json.Marshal(&index) + if err != nil { + return nil, fmt.Errorf("encoding image index for OCI layout: %w", err) + } + + // Write the index for the layout. + indexFile := filepath.Join(tmp, imgspecv1.ImageIndexFile) + if err := os.WriteFile(indexFile, indexBytes, 0o644); err != nil { + return nil, fmt.Errorf("writing top-level index for OCI layout: %w", err) + } + + // Write the "why yes, this is an OCI layout" file. + layoutFile := filepath.Join(tmp, imgspecv1.ImageLayoutFile) + layoutBytes, err := json.Marshal(imgspecv1.ImageLayout{Version: imgspecv1.ImageLayoutVersion}) + if err != nil { + return nil, fmt.Errorf("encoding image layout structure for OCI layout: %w", err) + } + if err := os.WriteFile(layoutFile, layoutBytes, 0o644); err != nil { + return nil, fmt.Errorf("writing oci-layout file: %w", err) + } + + // Build an OCI layout reference to use as a source. + tmpRef, err := layout.NewReference(tmp, "") + if err != nil { + return nil, fmt.Errorf("creating reference to directory: %w", err) + } + bundle := supplemented.Reference(tmpRef, []types.ImageReference{i.storageReference}, imageCopy.CopySystemImage, nil) + + // Build a policy that ensures we don't prevent ourselves from reading + // this reference. + signaturePolicy, err := signature.DefaultPolicy(&i.runtime.systemContext) + if err != nil { + return nil, fmt.Errorf("obtaining default signature policy: %w", err) + } + acceptAnything := signature.PolicyTransportScopes{ + "": []signature.PolicyRequirement{signature.NewPRInsecureAcceptAnything()}, + } + signaturePolicy.Transports[i.storageReference.Transport().Name()] = acceptAnything + signaturePolicy.Transports[tmpRef.Transport().Name()] = acceptAnything + policyContext, err := signature.NewPolicyContext(signaturePolicy) + if err != nil { + return nil, fmt.Errorf("creating new signature policy context: %w", err) + } + defer func() { + if err2 := policyContext.Destroy(); err2 != nil { + logrus.Errorf("Destroying signature policy context: %v", err2) + } + }() + + // Copy from the OCI layout into the same image record, so that it gets + // both its own manifest and the image index. + copyOptions := imageCopy.Options{ + ForceManifestMIMEType: imageManifestType, + } + if _, err := imageCopy.Image(ctx, policyContext, i.storageReference, bundle, ©Options); err != nil { + return nil, fmt.Errorf("writing updates to image: %w", err) + } + + // Now explicitly write the list's manifest to the image as its "main" + // manifest. + if _, err := list.SaveToImage(i.runtime.store, i.ID(), i.storageImage.Names, preferredListType); err != nil { + return nil, fmt.Errorf("saving image index: %w", err) + } + + // Reload the record. + if err = i.reload(); err != nil { + return nil, fmt.Errorf("reloading image record: %w", err) + } + mList, err := i.runtime.LookupManifestList(i.storageImage.ID) + if err != nil { + return nil, fmt.Errorf("looking up new manifest list: %w", err) + } + + return mList, nil +} + // ToManifestList converts the image into a manifest list. An error is thrown -// if the image is no manifest list. +// if the image is not a manifest list. func (i *Image) ToManifestList() (*ManifestList, error) { list, err := i.getManifestList() if err != nil { @@ -194,6 +351,9 @@ func (m *ManifestList) reload() error { // getManifestList is a helper to obtain a manifest list func (i *Image) getManifestList() (manifests.List, error) { _, list, err := manifests.LoadFromImage(i.runtime.store, i.ID()) + if errors.Is(err, manifesterrors.ErrManifestTypeNotSupported) { + err = fmt.Errorf("%s: %w", err.Error(), ErrNotAManifestList) + } return list, err } @@ -636,7 +796,7 @@ func (m *ManifestList) Push(ctx context.Context, destination string, options *Ma if err != nil { return "", err } - defer copier.close() + defer copier.Close() pushOptions := manifests.PushOptions{ AddCompression: options.AddCompression, diff --git a/vendor/github.com/containers/common/libimage/manifests/manifests.go b/vendor/github.com/containers/common/libimage/manifests/manifests.go index a1625bb1bf..50c32569ac 100644 --- a/vendor/github.com/containers/common/libimage/manifests/manifests.go +++ b/vendor/github.com/containers/common/libimage/manifests/manifests.go @@ -342,8 +342,7 @@ func (l *list) Reference(store storage.Store, multiple cp.ImageListSelection, in } } // write the index that refers to this one artifact image - tag := "latest" - indexFile := filepath.Join(tmp, "index.json") + indexFile := filepath.Join(tmp, v1.ImageIndexFile) index := v1.Index{ Versioned: imgspec.Versioned{ SchemaVersion: 2, @@ -353,9 +352,6 @@ func (l *list) Reference(store storage.Store, multiple cp.ImageListSelection, in MediaType: v1.MediaTypeImageManifest, Digest: artifactManifestDigest, Size: int64(len(contents)), - Annotations: map[string]string{ - v1.AnnotationRefName: tag, - }, }}, } indexBytes, err := json.Marshal(&index) @@ -366,12 +362,16 @@ func (l *list) Reference(store storage.Store, multiple cp.ImageListSelection, in return nil, fmt.Errorf("writing image index for OCI layout: %w", err) } // write the layout file - layoutFile := filepath.Join(tmp, "oci-layout") - if err := os.WriteFile(layoutFile, []byte(`{"imageLayoutVersion": "1.0.0"}`), 0o644); err != nil { + layoutFile := filepath.Join(tmp, v1.ImageLayoutFile) + layoutBytes, err := json.Marshal(v1.ImageLayout{Version: v1.ImageLayoutVersion}) + if err != nil { + return nil, fmt.Errorf("encoding image layout for OCI layout: %w", err) + } + if err := os.WriteFile(layoutFile, layoutBytes, 0o644); err != nil { return nil, fmt.Errorf("writing oci-layout file: %w", err) } // build the reference to this artifact image's oci layout - ref, err := ocilayout.NewReference(tmp, tag) + ref, err := ocilayout.NewReference(tmp, "") if err != nil { return nil, fmt.Errorf("creating ImageReference for artifact with files %q: %w", symlinkedFiles, err) } @@ -676,14 +676,14 @@ func (l *list) Add(ctx context.Context, sys *types.SystemContext, ref types.Imag // This should provide for all of the ways to construct a manifest outlined in // https://github.com/opencontainers/image-spec/blob/main/manifest.md#guidelines-for-artifact-usage -// * no blobs → set ManifestArtifactType -// * blobs, no configuration → set ManifestArtifactType and possibly LayerMediaType, and provide file names -// * blobs and configuration → set ManifestArtifactType, possibly LayerMediaType, and ConfigDescriptor, and provide file names +// - no blobs → set ManifestArtifactType +// - blobs, no configuration → set ManifestArtifactType and possibly LayerMediaType, and provide file names +// - blobs and configuration → set ManifestArtifactType, possibly LayerMediaType, and ConfigDescriptor, and provide file names // // The older style of describing artifacts: -// * leave ManifestArtifactType blank -// * specify a zero-length application/vnd.oci.image.config.v1+json config blob -// * set LayerMediaType to a custom type +// - leave ManifestArtifactType blank +// - specify a zero-length application/vnd.oci.image.config.v1+json config blob +// - set LayerMediaType to a custom type // // When reading data produced elsewhere, note that newer tooling will produce // manifests with ArtifactType set. If the manifest's ArtifactType is not set, diff --git a/vendor/github.com/containers/common/libimage/pull.go b/vendor/github.com/containers/common/libimage/pull.go index 3db1b2992b..c4ad5df0c7 100644 --- a/vendor/github.com/containers/common/libimage/pull.go +++ b/vendor/github.com/containers/common/libimage/pull.go @@ -235,7 +235,7 @@ func (r *Runtime) copyFromDefault(ctx context.Context, ref types.ImageReference, if err != nil { return nil, err } - defer c.close() + defer c.Close() // Figure out a name for the storage destination. var storageName, imageName string @@ -321,7 +321,7 @@ func (r *Runtime) copyFromDefault(ctx context.Context, ref types.ImageReference, return nil, fmt.Errorf("parsing %q: %w", storageName, err) } - _, err = c.copy(ctx, ref, destRef) + _, err = c.Copy(ctx, ref, destRef) return []string{imageName}, err } @@ -391,7 +391,7 @@ func (r *Runtime) copyFromDockerArchiveReaderReference(ctx context.Context, read if err != nil { return nil, err } - defer c.close() + defer c.Close() // Get a slice of storage references we can copy. references, destNames, err := r.storageReferencesReferencesFromArchiveReader(ctx, readerRef, reader) @@ -401,7 +401,7 @@ func (r *Runtime) copyFromDockerArchiveReaderReference(ctx context.Context, read // Now copy all of the images. Use readerRef for performance. for _, destRef := range references { - if _, err := c.copy(ctx, readerRef, destRef); err != nil { + if _, err := c.Copy(ctx, readerRef, destRef); err != nil { return nil, err } } @@ -640,7 +640,7 @@ func (r *Runtime) copySingleImageFromRegistry(ctx context.Context, imageName str if err != nil { return nil, err } - defer c.close() + defer c.Close() var pullErrors []error for _, candidate := range resolved.PullCandidates { @@ -678,7 +678,7 @@ func (r *Runtime) copySingleImageFromRegistry(ctx context.Context, imageName str } } var manifestBytes []byte - if manifestBytes, err = c.copy(ctx, srcRef, destRef); err != nil { + if manifestBytes, err = c.Copy(ctx, srcRef, destRef); err != nil { logrus.Debugf("Error pulling candidate %s: %v", candidateString, err) pullErrors = append(pullErrors, err) continue diff --git a/vendor/github.com/containers/common/libimage/push.go b/vendor/github.com/containers/common/libimage/push.go index f89b8fc070..5db6cfbcfe 100644 --- a/vendor/github.com/containers/common/libimage/push.go +++ b/vendor/github.com/containers/common/libimage/push.go @@ -114,7 +114,7 @@ func (r *Runtime) Push(ctx context.Context, source, destination string, options return nil, err } - defer c.close() + defer c.Close() - return c.copy(ctx, srcRef, destRef) + return c.Copy(ctx, srcRef, destRef) } diff --git a/vendor/github.com/containers/common/libimage/save.go b/vendor/github.com/containers/common/libimage/save.go index 62cad3288d..46529d10f3 100644 --- a/vendor/github.com/containers/common/libimage/save.go +++ b/vendor/github.com/containers/common/libimage/save.go @@ -123,9 +123,9 @@ func (r *Runtime) saveSingleImage(ctx context.Context, name, format, path string if err != nil { return err } - defer c.close() + defer c.Close() - _, err = c.copy(ctx, srcRef, destRef) + _, err = c.Copy(ctx, srcRef, destRef) return err } @@ -208,7 +208,7 @@ func (r *Runtime) saveDockerArchive(ctx context.Context, names []string, path st if err != nil { return err } - defer c.close() + defer c.Close() destRef, err := writer.NewReference(nil) if err != nil { @@ -220,7 +220,7 @@ func (r *Runtime) saveDockerArchive(ctx context.Context, names []string, path st return err } - if _, err := c.copy(ctx, srcRef, destRef); err != nil { + if _, err := c.Copy(ctx, srcRef, destRef); err != nil { return err } } diff --git a/vendor/github.com/containers/common/pkg/netns/netns_linux.go b/vendor/github.com/containers/common/pkg/netns/netns_linux.go index db35fd15a1..5461b05f75 100644 --- a/vendor/github.com/containers/common/pkg/netns/netns_linux.go +++ b/vendor/github.com/containers/common/pkg/netns/netns_linux.go @@ -40,6 +40,8 @@ import ( // threadNsPath is the /proc path to the current netns handle for the current thread const threadNsPath = "/proc/thread-self/ns/net" +var errNoFreeName = errors.New("failed to find free netns path name") + // GetNSRunDir returns the dir of where to create the netNS. When running // rootless, it needs to be at a location writable by user. func GetNSRunDir() (string, error) { @@ -60,14 +62,26 @@ func NewNSAtPath(nsPath string) (ns.NetNS, error) { // NewNS creates a new persistent (bind-mounted) network namespace and returns // an object representing that namespace, without switching to it. func NewNS() (ns.NetNS, error) { + nsRunDir, err := GetNSRunDir() + if err != nil { + return nil, err + } + + // Create the directory for mounting network namespaces + // This needs to be a shared mountpoint in case it is mounted in to + // other namespaces (containers) + err = makeNetnsDir(nsRunDir) + if err != nil { + return nil, err + } + for range 10000 { - b := make([]byte, 16) - _, err := rand.Reader.Read(b) + nsName, err := getRandomNetnsName() if err != nil { - return nil, fmt.Errorf("failed to generate random netns name: %v", err) + return nil, err } - nsName := fmt.Sprintf("netns-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:]) - ns, err := NewNSWithName(nsName) + nsPath := path.Join(nsRunDir, nsName) + ns, err := newNSPath(nsPath) if err == nil { return ns, nil } @@ -77,62 +91,128 @@ func NewNS() (ns.NetNS, error) { } return nil, err } - return nil, errors.New("failed to find free netns path name") + return nil, errNoFreeName } -// NewNSWithName creates a new persistent (bind-mounted) network namespace and returns -// an object representing that namespace, without switching to it. -func NewNSWithName(name string) (ns.NetNS, error) { +// NewNSFrom creates a persistent (bind-mounted) network namespace from the +// given netns path, i.e. /proc//ns/net, and returns the new full path to +// the bind mounted file in the netns run dir. +func NewNSFrom(fromNetns string) (string, error) { nsRunDir, err := GetNSRunDir() if err != nil { - return nil, err + return "", err } - // Create the directory for mounting network namespaces - // This needs to be a shared mountpoint in case it is mounted in to - // other namespaces (containers) - err = os.MkdirAll(nsRunDir, 0o755) + err = makeNetnsDir(nsRunDir) if err != nil { - return nil, err + return "", err } - // Remount the namespace directory shared. This will fail if it is not - // already a mountpoint, so bind-mount it on to itself to "upgrade" it - // to a mountpoint. + for range 10000 { + nsName, err := getRandomNetnsName() + if err != nil { + return "", err + } + nsPath := filepath.Join(nsRunDir, nsName) + + // create an empty file to use as at the mount point + err = createNetnsFile(nsPath) + if err != nil { + // retry when the name already exists + if errors.Is(err, os.ErrExist) { + continue + } + return "", err + } + + err = unix.Mount(fromNetns, nsPath, "none", unix.MS_BIND|unix.MS_SHARED|unix.MS_REC, "") + if err != nil { + // Do not leak the ns on errors + _ = os.RemoveAll(nsPath) + return "", fmt.Errorf("failed to bind mount ns at %s: %v", nsPath, err) + } + return nsPath, nil + } + + return "", errNoFreeName +} + +func getRandomNetnsName() (string, error) { + b := make([]byte, 16) + _, err := rand.Reader.Read(b) + if err != nil { + return "", fmt.Errorf("failed to generate random netns name: %v", err) + } + return fmt.Sprintf("netns-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:]), nil +} + +func makeNetnsDir(nsRunDir string) error { + err := os.MkdirAll(nsRunDir, 0o755) + if err != nil { + return err + } + // Important, the bind mount setup is racy if two process try to set it up in parallel. + // This can have very bad consequences because we end up with two duplicated mounts + // for the netns file that then might have a different parent mounts. + // Also because as root netns dir is also created by ip netns we should not race against them. + // Use a lock on the netns dir like they do, compare the iproute2 ip netns add code. + // https://github.com/iproute2/iproute2/blob/8b9d9ea42759c91d950356ca43930a975d0c352b/ip/ipnetns.c#L806-L815 + + dirFD, err := unix.Open(nsRunDir, unix.O_RDONLY|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) + if err != nil { + return &os.PathError{Op: "open", Path: nsRunDir, Err: err} + } + // closing the fd will also unlock so we do not have to call flock(fd,LOCK_UN) + defer unix.Close(dirFD) + + err = unix.Flock(dirFD, unix.LOCK_EX) + if err != nil { + return fmt.Errorf("failed to lock %s dir: %w", nsRunDir, err) + } + + // Remount the namespace directory shared. This will fail with EINVAL + // if it is not already a mountpoint, so bind-mount it on to itself + // to "upgrade" it to a mountpoint. + err = unix.Mount("", nsRunDir, "none", unix.MS_SHARED|unix.MS_REC, "") + if err == nil { + return nil + } + if err != unix.EINVAL { + return fmt.Errorf("mount --make-rshared %s failed: %q", nsRunDir, err) + } + + // Recursively remount /run/netns on itself. The recursive flag is + // so that any existing netns bindmounts are carried over. + err = unix.Mount(nsRunDir, nsRunDir, "none", unix.MS_BIND|unix.MS_REC, "") + if err != nil { + return fmt.Errorf("mount --rbind %s %s failed: %q", nsRunDir, nsRunDir, err) + } + + // Now we can make it shared err = unix.Mount("", nsRunDir, "none", unix.MS_SHARED|unix.MS_REC, "") if err != nil { - if err != unix.EINVAL { - return nil, fmt.Errorf("mount --make-rshared %s failed: %q", nsRunDir, err) - } - - // Recursively remount /run/netns on itself. The recursive flag is - // so that any existing netns bindmounts are carried over. - err = unix.Mount(nsRunDir, nsRunDir, "none", unix.MS_BIND|unix.MS_REC, "") - if err != nil { - return nil, fmt.Errorf("mount --rbind %s %s failed: %q", nsRunDir, nsRunDir, err) - } - - // Now we can make it shared - err = unix.Mount("", nsRunDir, "none", unix.MS_SHARED|unix.MS_REC, "") - if err != nil { - return nil, fmt.Errorf("mount --make-rshared %s failed: %q", nsRunDir, err) - } + return fmt.Errorf("mount --make-rshared %s failed: %q", nsRunDir, err) } - nsPath := path.Join(nsRunDir, name) - return newNSPath(nsPath) + return nil +} + +// createNetnsFile created the file with O_EXCL to ensure there are no conflicts with others +// Callers should check for ErrExist and loop over it to find a free file. +func createNetnsFile(path string) error { + mountPointFd, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o600) + if err != nil { + return err + } + return mountPointFd.Close() } func newNSPath(nsPath string) (ns.NetNS, error) { - // create an empty file at the mount point - mountPointFd, err := os.OpenFile(nsPath, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o600) + // create an empty file to use as at the mount point + err := createNetnsFile(nsPath) if err != nil { return nil, err } - if err := mountPointFd.Close(); err != nil { - return nil, err - } - // Ensure the mount point is cleaned up on errors; if the namespace // was successfully mounted this will have no effect because the file // is in-use diff --git a/vendor/modules.txt b/vendor/modules.txt index b0961ceba5..e9baaeab6e 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -171,7 +171,7 @@ github.com/containers/buildah/pkg/sshagent github.com/containers/buildah/pkg/util github.com/containers/buildah/pkg/volumes github.com/containers/buildah/util -# github.com/containers/common v0.60.1-0.20240918122915-db8145750e1d +# github.com/containers/common v0.60.1-0.20240920125326-ff6611ae40ad ## explicit; go 1.22.0 github.com/containers/common/internal github.com/containers/common/internal/attributedstring