From f6bda786edb4b85981ace7004d2817a81eb39ce6 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Fri, 20 Sep 2024 13:17:45 +0200 Subject: [PATCH 1/2] vendor latest c/common To include the pkg/netns changes. Signed-off-by: Paul Holzinger --- go.mod | 2 +- go.sum | 4 +- .../containers/common/libimage/copier.go | 34 +++- .../containers/common/libimage/import.go | 4 +- .../common/libimage/manifest_list.go | 164 +++++++++++++++++- .../common/libimage/manifests/manifests.go | 28 +-- .../containers/common/libimage/pull.go | 12 +- .../containers/common/libimage/push.go | 4 +- .../containers/common/libimage/save.go | 8 +- .../common/pkg/netns/netns_linux.go | 164 +++++++++++++----- vendor/modules.txt | 2 +- 11 files changed, 343 insertions(+), 83 deletions(-) diff --git a/go.mod b/go.mod index fc44452dcc..53406c6247 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/checkpoint-restore/go-criu/v7 v7.2.0 github.com/containernetworking/plugins v1.5.1 github.com/containers/buildah v1.37.0 - github.com/containers/common v0.60.1-0.20240918122915-db8145750e1d + github.com/containers/common v0.60.1-0.20240920125326-ff6611ae40ad github.com/containers/conmon v2.0.20+incompatible github.com/containers/gvisor-tap-vsock v0.7.5 github.com/containers/image/v5 v5.32.1-0.20240806084436-e3e9287ca8e6 diff --git a/go.sum b/go.sum index fd9edeb3fb..c2c60a7eb5 100644 --- a/go.sum +++ b/go.sum @@ -81,8 +81,8 @@ github.com/containernetworking/plugins v1.5.1 h1:T5ji+LPYjjgW0QM+KyrigZbLsZ8jaX+ github.com/containernetworking/plugins v1.5.1/go.mod h1:MIQfgMayGuHYs0XdNudf31cLLAC+i242hNm6KuDGqCM= github.com/containers/buildah v1.37.0 h1:jvHwu1vIwIqnHyOSg9eef9Apdpry+5oWLrm43gdf8Rk= github.com/containers/buildah v1.37.0/go.mod h1:MKd79tkluMf6vtH06SedhBQK5OB7E0pFVIuiTTw3dJk= -github.com/containers/common v0.60.1-0.20240918122915-db8145750e1d h1:AAEZbfeh92xKohiQoEk6sx+e/8OLIXzIElJ7H69cxVg= -github.com/containers/common v0.60.1-0.20240918122915-db8145750e1d/go.mod h1:CPKbz94MP7eKS5LdkBZbcDbQgAHncjogq/hYY9r4Spw= +github.com/containers/common v0.60.1-0.20240920125326-ff6611ae40ad h1:Ida4yFcnk+xGPynWR267zGGUddWTfpAVMSzo6PhjPFQ= +github.com/containers/common v0.60.1-0.20240920125326-ff6611ae40ad/go.mod h1:UjxkwBehRqlASg/duCPlXbsc2hu5y+iYwUt+8/N4w+8= github.com/containers/conmon v2.0.20+incompatible h1:YbCVSFSCqFjjVwHTPINGdMX1F6JXHGTUje2ZYobNrkg= github.com/containers/conmon v2.0.20+incompatible/go.mod h1:hgwZ2mtuDrppv78a/cOBNiCm6O0UMWGx1mu7P00nu5I= github.com/containers/gvisor-tap-vsock v0.7.5 h1:bTy4u3DOmmUPwurL6me2rsgfypAFDhyeJleUcQmBR/E= diff --git a/vendor/github.com/containers/common/libimage/copier.go b/vendor/github.com/containers/common/libimage/copier.go index 1f4f925fe5..5151343ed6 100644 --- a/vendor/github.com/containers/common/libimage/copier.go +++ b/vendor/github.com/containers/common/libimage/copier.go @@ -60,6 +60,13 @@ type CopyOptions struct { CertDirPath string // Force layer compression when copying to a `dir` transport destination. DirForceCompress bool + + // ImageListSelection is one of CopySystemImage, CopyAllImages, or + // CopySpecificImages, to control whether, when the source reference is a list, + // copy.Image() copies only an image which matches the current runtime + // environment, or all images which match the supplied reference, or only + // specific images from the source reference. + ImageListSelection copy.ImageListSelection // Allow contacting registries over HTTP, or HTTPS with failed TLS // verification. Note that this does not affect other TLS connections. InsecureSkipTLSVerify types.OptionalBool @@ -206,13 +213,17 @@ func getDockerAuthConfig(name, passwd, creds, idToken string) (*types.DockerAuth } } +// NewCopier is a simple, exported wrapper for newCopier +func NewCopier(options *CopyOptions, sc *types.SystemContext) (*copier, error) { + return newCopier(options, sc) +} + // newCopier creates a copier. Note that fields in options *may* overwrite the // counterparts of the specified system context. Please make sure to call // `(*copier).close()`. -func (r *Runtime) newCopier(options *CopyOptions) (*copier, error) { +func newCopier(options *CopyOptions, sc *types.SystemContext) (*copier, error) { c := copier{extendTimeoutSocket: options.extendTimeoutSocket} - c.systemContext = r.systemContextCopy() - + c.systemContext = sc if options.SourceLookupReferenceFunc != nil { c.sourceLookup = options.SourceLookupReferenceFunc } @@ -300,6 +311,7 @@ func (r *Runtime) newCopier(options *CopyOptions) (*copier, error) { c.imageCopyOptions.ProgressInterval = time.Second } + c.imageCopyOptions.ImageListSelection = options.ImageListSelection c.imageCopyOptions.ForceCompressionFormat = options.ForceCompressionFormat c.imageCopyOptions.ForceManifestMIMEType = options.ManifestMIMEType c.imageCopyOptions.SourceCtx = c.systemContext @@ -325,14 +337,22 @@ func (r *Runtime) newCopier(options *CopyOptions) (*copier, error) { return &c, nil } -// close open resources. -func (c *copier) close() error { +// newCopier creates a copier. Note that fields in options *may* overwrite the +// counterparts of the specified system context. Please make sure to call +// `(*copier).close()`. +func (r *Runtime) newCopier(options *CopyOptions) (*copier, error) { + sc := r.systemContextCopy() + return newCopier(options, sc) +} + +// Close open resources. +func (c *copier) Close() error { return c.policyContext.Destroy() } -// copy the source to the destination. Returns the bytes of the copied +// Copy the source to the destination. Returns the bytes of the copied // manifest which may be used for digest computation. -func (c *copier) copy(ctx context.Context, source, destination types.ImageReference) ([]byte, error) { +func (c *copier) Copy(ctx context.Context, source, destination types.ImageReference) ([]byte, error) { logrus.Debugf("Copying source image %s to destination image %s", source.StringWithinTransport(), destination.StringWithinTransport()) // Avoid running out of time when running inside a systemd unit by diff --git a/vendor/github.com/containers/common/libimage/import.go b/vendor/github.com/containers/common/libimage/import.go index 552c48eae0..a03f288533 100644 --- a/vendor/github.com/containers/common/libimage/import.go +++ b/vendor/github.com/containers/common/libimage/import.go @@ -108,9 +108,9 @@ func (r *Runtime) Import(ctx context.Context, path string, options *ImportOption if err != nil { return "", err } - defer c.close() + defer c.Close() - if _, err := c.copy(ctx, srcRef, destRef); err != nil { + if _, err := c.Copy(ctx, srcRef, destRef); err != nil { return "", err } diff --git a/vendor/github.com/containers/common/libimage/manifest_list.go b/vendor/github.com/containers/common/libimage/manifest_list.go index 1db0cf4dfd..a23315da3c 100644 --- a/vendor/github.com/containers/common/libimage/manifest_list.go +++ b/vendor/github.com/containers/common/libimage/manifest_list.go @@ -7,20 +7,28 @@ import ( "errors" "fmt" "maps" + "os" + "path/filepath" "slices" "time" "github.com/containers/common/libimage/define" "github.com/containers/common/libimage/manifests" + manifesterrors "github.com/containers/common/pkg/manifests" + "github.com/containers/common/pkg/supplemented" imageCopy "github.com/containers/image/v5/copy" "github.com/containers/image/v5/docker" "github.com/containers/image/v5/manifest" + "github.com/containers/image/v5/oci/layout" + "github.com/containers/image/v5/signature" "github.com/containers/image/v5/transports/alltransports" "github.com/containers/image/v5/types" "github.com/containers/storage" structcopier "github.com/jinzhu/copier" "github.com/opencontainers/go-digest" + imgspec "github.com/opencontainers/image-spec/specs-go" imgspecv1 "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/sirupsen/logrus" ) // NOTE: the abstractions and APIs here are a first step to further merge @@ -101,8 +109,157 @@ func (r *Runtime) lookupManifestList(name string) (*Image, manifests.List, error return image, list, nil } +// ConvertToManifestList converts the image into a manifest list if it is not +// already also a list. An error is returned if the conversion fails. +func (i *Image) ConvertToManifestList(ctx context.Context) (*ManifestList, error) { + // If we don't need to do anything, don't do anything. + if list, err := i.ToManifestList(); err == nil || !errors.Is(err, ErrNotAManifestList) { + return list, err + } + + // Determine which type we prefer for the new manifest list or image index. + _, imageManifestType, err := i.Manifest(ctx) + if err != nil { + return nil, fmt.Errorf("reading the image's manifest: %w", err) + } + var preferredListType string + switch imageManifestType { + case manifest.DockerV2Schema2MediaType, + manifest.DockerV2Schema1SignedMediaType, + manifest.DockerV2Schema1MediaType, + manifest.DockerV2ListMediaType: + preferredListType = manifest.DockerV2ListMediaType + case imgspecv1.MediaTypeImageManifest, imgspecv1.MediaTypeImageIndex: + preferredListType = imgspecv1.MediaTypeImageIndex + default: + preferredListType = "" + } + + // Create a list and add the image's manifest to it. Use OCI format + // for now. If we need to convert it to Docker format, we'll do that + // while copying it. + list := manifests.Create() + if _, err := list.Add(ctx, &i.runtime.systemContext, i.storageReference, false); err != nil { + return nil, fmt.Errorf("generating new image index: %w", err) + } + listBytes, err := list.Serialize(imgspecv1.MediaTypeImageIndex) + if err != nil { + return nil, fmt.Errorf("serializing image index: %w", err) + } + listDigest, err := manifest.Digest(listBytes) + if err != nil { + return nil, fmt.Errorf("digesting image index: %w", err) + } + + // Build an OCI layout containing the image index as the only item. + tmp, err := os.MkdirTemp("", "") + if err != nil { + return nil, fmt.Errorf("serializing initial list: %w", err) + } + defer os.RemoveAll(tmp) + + // Drop our image index in there. + if err := os.Mkdir(filepath.Join(tmp, imgspecv1.ImageBlobsDir), 0o755); err != nil { + return nil, fmt.Errorf("creating directory for blobs: %w", err) + } + if err := os.Mkdir(filepath.Join(tmp, imgspecv1.ImageBlobsDir, listDigest.Algorithm().String()), 0o755); err != nil { + return nil, fmt.Errorf("creating directory for %s blobs: %w", listDigest.Algorithm().String(), err) + } + listFile := filepath.Join(tmp, imgspecv1.ImageBlobsDir, listDigest.Algorithm().String(), listDigest.Encoded()) + if err := os.WriteFile(listFile, listBytes, 0o644); err != nil { + return nil, fmt.Errorf("writing image index for OCI layout: %w", err) + } + + // Build the index for the layout. + index := imgspecv1.Index{ + Versioned: imgspec.Versioned{ + SchemaVersion: 2, + }, + MediaType: imgspecv1.MediaTypeImageIndex, + Manifests: []imgspecv1.Descriptor{{ + MediaType: imgspecv1.MediaTypeImageIndex, + Digest: listDigest, + Size: int64(len(listBytes)), + }}, + } + indexBytes, err := json.Marshal(&index) + if err != nil { + return nil, fmt.Errorf("encoding image index for OCI layout: %w", err) + } + + // Write the index for the layout. + indexFile := filepath.Join(tmp, imgspecv1.ImageIndexFile) + if err := os.WriteFile(indexFile, indexBytes, 0o644); err != nil { + return nil, fmt.Errorf("writing top-level index for OCI layout: %w", err) + } + + // Write the "why yes, this is an OCI layout" file. + layoutFile := filepath.Join(tmp, imgspecv1.ImageLayoutFile) + layoutBytes, err := json.Marshal(imgspecv1.ImageLayout{Version: imgspecv1.ImageLayoutVersion}) + if err != nil { + return nil, fmt.Errorf("encoding image layout structure for OCI layout: %w", err) + } + if err := os.WriteFile(layoutFile, layoutBytes, 0o644); err != nil { + return nil, fmt.Errorf("writing oci-layout file: %w", err) + } + + // Build an OCI layout reference to use as a source. + tmpRef, err := layout.NewReference(tmp, "") + if err != nil { + return nil, fmt.Errorf("creating reference to directory: %w", err) + } + bundle := supplemented.Reference(tmpRef, []types.ImageReference{i.storageReference}, imageCopy.CopySystemImage, nil) + + // Build a policy that ensures we don't prevent ourselves from reading + // this reference. + signaturePolicy, err := signature.DefaultPolicy(&i.runtime.systemContext) + if err != nil { + return nil, fmt.Errorf("obtaining default signature policy: %w", err) + } + acceptAnything := signature.PolicyTransportScopes{ + "": []signature.PolicyRequirement{signature.NewPRInsecureAcceptAnything()}, + } + signaturePolicy.Transports[i.storageReference.Transport().Name()] = acceptAnything + signaturePolicy.Transports[tmpRef.Transport().Name()] = acceptAnything + policyContext, err := signature.NewPolicyContext(signaturePolicy) + if err != nil { + return nil, fmt.Errorf("creating new signature policy context: %w", err) + } + defer func() { + if err2 := policyContext.Destroy(); err2 != nil { + logrus.Errorf("Destroying signature policy context: %v", err2) + } + }() + + // Copy from the OCI layout into the same image record, so that it gets + // both its own manifest and the image index. + copyOptions := imageCopy.Options{ + ForceManifestMIMEType: imageManifestType, + } + if _, err := imageCopy.Image(ctx, policyContext, i.storageReference, bundle, ©Options); err != nil { + return nil, fmt.Errorf("writing updates to image: %w", err) + } + + // Now explicitly write the list's manifest to the image as its "main" + // manifest. + if _, err := list.SaveToImage(i.runtime.store, i.ID(), i.storageImage.Names, preferredListType); err != nil { + return nil, fmt.Errorf("saving image index: %w", err) + } + + // Reload the record. + if err = i.reload(); err != nil { + return nil, fmt.Errorf("reloading image record: %w", err) + } + mList, err := i.runtime.LookupManifestList(i.storageImage.ID) + if err != nil { + return nil, fmt.Errorf("looking up new manifest list: %w", err) + } + + return mList, nil +} + // ToManifestList converts the image into a manifest list. An error is thrown -// if the image is no manifest list. +// if the image is not a manifest list. func (i *Image) ToManifestList() (*ManifestList, error) { list, err := i.getManifestList() if err != nil { @@ -194,6 +351,9 @@ func (m *ManifestList) reload() error { // getManifestList is a helper to obtain a manifest list func (i *Image) getManifestList() (manifests.List, error) { _, list, err := manifests.LoadFromImage(i.runtime.store, i.ID()) + if errors.Is(err, manifesterrors.ErrManifestTypeNotSupported) { + err = fmt.Errorf("%s: %w", err.Error(), ErrNotAManifestList) + } return list, err } @@ -636,7 +796,7 @@ func (m *ManifestList) Push(ctx context.Context, destination string, options *Ma if err != nil { return "", err } - defer copier.close() + defer copier.Close() pushOptions := manifests.PushOptions{ AddCompression: options.AddCompression, diff --git a/vendor/github.com/containers/common/libimage/manifests/manifests.go b/vendor/github.com/containers/common/libimage/manifests/manifests.go index a1625bb1bf..50c32569ac 100644 --- a/vendor/github.com/containers/common/libimage/manifests/manifests.go +++ b/vendor/github.com/containers/common/libimage/manifests/manifests.go @@ -342,8 +342,7 @@ func (l *list) Reference(store storage.Store, multiple cp.ImageListSelection, in } } // write the index that refers to this one artifact image - tag := "latest" - indexFile := filepath.Join(tmp, "index.json") + indexFile := filepath.Join(tmp, v1.ImageIndexFile) index := v1.Index{ Versioned: imgspec.Versioned{ SchemaVersion: 2, @@ -353,9 +352,6 @@ func (l *list) Reference(store storage.Store, multiple cp.ImageListSelection, in MediaType: v1.MediaTypeImageManifest, Digest: artifactManifestDigest, Size: int64(len(contents)), - Annotations: map[string]string{ - v1.AnnotationRefName: tag, - }, }}, } indexBytes, err := json.Marshal(&index) @@ -366,12 +362,16 @@ func (l *list) Reference(store storage.Store, multiple cp.ImageListSelection, in return nil, fmt.Errorf("writing image index for OCI layout: %w", err) } // write the layout file - layoutFile := filepath.Join(tmp, "oci-layout") - if err := os.WriteFile(layoutFile, []byte(`{"imageLayoutVersion": "1.0.0"}`), 0o644); err != nil { + layoutFile := filepath.Join(tmp, v1.ImageLayoutFile) + layoutBytes, err := json.Marshal(v1.ImageLayout{Version: v1.ImageLayoutVersion}) + if err != nil { + return nil, fmt.Errorf("encoding image layout for OCI layout: %w", err) + } + if err := os.WriteFile(layoutFile, layoutBytes, 0o644); err != nil { return nil, fmt.Errorf("writing oci-layout file: %w", err) } // build the reference to this artifact image's oci layout - ref, err := ocilayout.NewReference(tmp, tag) + ref, err := ocilayout.NewReference(tmp, "") if err != nil { return nil, fmt.Errorf("creating ImageReference for artifact with files %q: %w", symlinkedFiles, err) } @@ -676,14 +676,14 @@ func (l *list) Add(ctx context.Context, sys *types.SystemContext, ref types.Imag // This should provide for all of the ways to construct a manifest outlined in // https://github.com/opencontainers/image-spec/blob/main/manifest.md#guidelines-for-artifact-usage -// * no blobs → set ManifestArtifactType -// * blobs, no configuration → set ManifestArtifactType and possibly LayerMediaType, and provide file names -// * blobs and configuration → set ManifestArtifactType, possibly LayerMediaType, and ConfigDescriptor, and provide file names +// - no blobs → set ManifestArtifactType +// - blobs, no configuration → set ManifestArtifactType and possibly LayerMediaType, and provide file names +// - blobs and configuration → set ManifestArtifactType, possibly LayerMediaType, and ConfigDescriptor, and provide file names // // The older style of describing artifacts: -// * leave ManifestArtifactType blank -// * specify a zero-length application/vnd.oci.image.config.v1+json config blob -// * set LayerMediaType to a custom type +// - leave ManifestArtifactType blank +// - specify a zero-length application/vnd.oci.image.config.v1+json config blob +// - set LayerMediaType to a custom type // // When reading data produced elsewhere, note that newer tooling will produce // manifests with ArtifactType set. If the manifest's ArtifactType is not set, diff --git a/vendor/github.com/containers/common/libimage/pull.go b/vendor/github.com/containers/common/libimage/pull.go index 3db1b2992b..c4ad5df0c7 100644 --- a/vendor/github.com/containers/common/libimage/pull.go +++ b/vendor/github.com/containers/common/libimage/pull.go @@ -235,7 +235,7 @@ func (r *Runtime) copyFromDefault(ctx context.Context, ref types.ImageReference, if err != nil { return nil, err } - defer c.close() + defer c.Close() // Figure out a name for the storage destination. var storageName, imageName string @@ -321,7 +321,7 @@ func (r *Runtime) copyFromDefault(ctx context.Context, ref types.ImageReference, return nil, fmt.Errorf("parsing %q: %w", storageName, err) } - _, err = c.copy(ctx, ref, destRef) + _, err = c.Copy(ctx, ref, destRef) return []string{imageName}, err } @@ -391,7 +391,7 @@ func (r *Runtime) copyFromDockerArchiveReaderReference(ctx context.Context, read if err != nil { return nil, err } - defer c.close() + defer c.Close() // Get a slice of storage references we can copy. references, destNames, err := r.storageReferencesReferencesFromArchiveReader(ctx, readerRef, reader) @@ -401,7 +401,7 @@ func (r *Runtime) copyFromDockerArchiveReaderReference(ctx context.Context, read // Now copy all of the images. Use readerRef for performance. for _, destRef := range references { - if _, err := c.copy(ctx, readerRef, destRef); err != nil { + if _, err := c.Copy(ctx, readerRef, destRef); err != nil { return nil, err } } @@ -640,7 +640,7 @@ func (r *Runtime) copySingleImageFromRegistry(ctx context.Context, imageName str if err != nil { return nil, err } - defer c.close() + defer c.Close() var pullErrors []error for _, candidate := range resolved.PullCandidates { @@ -678,7 +678,7 @@ func (r *Runtime) copySingleImageFromRegistry(ctx context.Context, imageName str } } var manifestBytes []byte - if manifestBytes, err = c.copy(ctx, srcRef, destRef); err != nil { + if manifestBytes, err = c.Copy(ctx, srcRef, destRef); err != nil { logrus.Debugf("Error pulling candidate %s: %v", candidateString, err) pullErrors = append(pullErrors, err) continue diff --git a/vendor/github.com/containers/common/libimage/push.go b/vendor/github.com/containers/common/libimage/push.go index f89b8fc070..5db6cfbcfe 100644 --- a/vendor/github.com/containers/common/libimage/push.go +++ b/vendor/github.com/containers/common/libimage/push.go @@ -114,7 +114,7 @@ func (r *Runtime) Push(ctx context.Context, source, destination string, options return nil, err } - defer c.close() + defer c.Close() - return c.copy(ctx, srcRef, destRef) + return c.Copy(ctx, srcRef, destRef) } diff --git a/vendor/github.com/containers/common/libimage/save.go b/vendor/github.com/containers/common/libimage/save.go index 62cad3288d..46529d10f3 100644 --- a/vendor/github.com/containers/common/libimage/save.go +++ b/vendor/github.com/containers/common/libimage/save.go @@ -123,9 +123,9 @@ func (r *Runtime) saveSingleImage(ctx context.Context, name, format, path string if err != nil { return err } - defer c.close() + defer c.Close() - _, err = c.copy(ctx, srcRef, destRef) + _, err = c.Copy(ctx, srcRef, destRef) return err } @@ -208,7 +208,7 @@ func (r *Runtime) saveDockerArchive(ctx context.Context, names []string, path st if err != nil { return err } - defer c.close() + defer c.Close() destRef, err := writer.NewReference(nil) if err != nil { @@ -220,7 +220,7 @@ func (r *Runtime) saveDockerArchive(ctx context.Context, names []string, path st return err } - if _, err := c.copy(ctx, srcRef, destRef); err != nil { + if _, err := c.Copy(ctx, srcRef, destRef); err != nil { return err } } diff --git a/vendor/github.com/containers/common/pkg/netns/netns_linux.go b/vendor/github.com/containers/common/pkg/netns/netns_linux.go index db35fd15a1..5461b05f75 100644 --- a/vendor/github.com/containers/common/pkg/netns/netns_linux.go +++ b/vendor/github.com/containers/common/pkg/netns/netns_linux.go @@ -40,6 +40,8 @@ import ( // threadNsPath is the /proc path to the current netns handle for the current thread const threadNsPath = "/proc/thread-self/ns/net" +var errNoFreeName = errors.New("failed to find free netns path name") + // GetNSRunDir returns the dir of where to create the netNS. When running // rootless, it needs to be at a location writable by user. func GetNSRunDir() (string, error) { @@ -60,14 +62,26 @@ func NewNSAtPath(nsPath string) (ns.NetNS, error) { // NewNS creates a new persistent (bind-mounted) network namespace and returns // an object representing that namespace, without switching to it. func NewNS() (ns.NetNS, error) { + nsRunDir, err := GetNSRunDir() + if err != nil { + return nil, err + } + + // Create the directory for mounting network namespaces + // This needs to be a shared mountpoint in case it is mounted in to + // other namespaces (containers) + err = makeNetnsDir(nsRunDir) + if err != nil { + return nil, err + } + for range 10000 { - b := make([]byte, 16) - _, err := rand.Reader.Read(b) + nsName, err := getRandomNetnsName() if err != nil { - return nil, fmt.Errorf("failed to generate random netns name: %v", err) + return nil, err } - nsName := fmt.Sprintf("netns-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:]) - ns, err := NewNSWithName(nsName) + nsPath := path.Join(nsRunDir, nsName) + ns, err := newNSPath(nsPath) if err == nil { return ns, nil } @@ -77,62 +91,128 @@ func NewNS() (ns.NetNS, error) { } return nil, err } - return nil, errors.New("failed to find free netns path name") + return nil, errNoFreeName } -// NewNSWithName creates a new persistent (bind-mounted) network namespace and returns -// an object representing that namespace, without switching to it. -func NewNSWithName(name string) (ns.NetNS, error) { +// NewNSFrom creates a persistent (bind-mounted) network namespace from the +// given netns path, i.e. /proc//ns/net, and returns the new full path to +// the bind mounted file in the netns run dir. +func NewNSFrom(fromNetns string) (string, error) { nsRunDir, err := GetNSRunDir() if err != nil { - return nil, err + return "", err } - // Create the directory for mounting network namespaces - // This needs to be a shared mountpoint in case it is mounted in to - // other namespaces (containers) - err = os.MkdirAll(nsRunDir, 0o755) + err = makeNetnsDir(nsRunDir) if err != nil { - return nil, err + return "", err } - // Remount the namespace directory shared. This will fail if it is not - // already a mountpoint, so bind-mount it on to itself to "upgrade" it - // to a mountpoint. + for range 10000 { + nsName, err := getRandomNetnsName() + if err != nil { + return "", err + } + nsPath := filepath.Join(nsRunDir, nsName) + + // create an empty file to use as at the mount point + err = createNetnsFile(nsPath) + if err != nil { + // retry when the name already exists + if errors.Is(err, os.ErrExist) { + continue + } + return "", err + } + + err = unix.Mount(fromNetns, nsPath, "none", unix.MS_BIND|unix.MS_SHARED|unix.MS_REC, "") + if err != nil { + // Do not leak the ns on errors + _ = os.RemoveAll(nsPath) + return "", fmt.Errorf("failed to bind mount ns at %s: %v", nsPath, err) + } + return nsPath, nil + } + + return "", errNoFreeName +} + +func getRandomNetnsName() (string, error) { + b := make([]byte, 16) + _, err := rand.Reader.Read(b) + if err != nil { + return "", fmt.Errorf("failed to generate random netns name: %v", err) + } + return fmt.Sprintf("netns-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:]), nil +} + +func makeNetnsDir(nsRunDir string) error { + err := os.MkdirAll(nsRunDir, 0o755) + if err != nil { + return err + } + // Important, the bind mount setup is racy if two process try to set it up in parallel. + // This can have very bad consequences because we end up with two duplicated mounts + // for the netns file that then might have a different parent mounts. + // Also because as root netns dir is also created by ip netns we should not race against them. + // Use a lock on the netns dir like they do, compare the iproute2 ip netns add code. + // https://github.com/iproute2/iproute2/blob/8b9d9ea42759c91d950356ca43930a975d0c352b/ip/ipnetns.c#L806-L815 + + dirFD, err := unix.Open(nsRunDir, unix.O_RDONLY|unix.O_DIRECTORY|unix.O_CLOEXEC, 0) + if err != nil { + return &os.PathError{Op: "open", Path: nsRunDir, Err: err} + } + // closing the fd will also unlock so we do not have to call flock(fd,LOCK_UN) + defer unix.Close(dirFD) + + err = unix.Flock(dirFD, unix.LOCK_EX) + if err != nil { + return fmt.Errorf("failed to lock %s dir: %w", nsRunDir, err) + } + + // Remount the namespace directory shared. This will fail with EINVAL + // if it is not already a mountpoint, so bind-mount it on to itself + // to "upgrade" it to a mountpoint. + err = unix.Mount("", nsRunDir, "none", unix.MS_SHARED|unix.MS_REC, "") + if err == nil { + return nil + } + if err != unix.EINVAL { + return fmt.Errorf("mount --make-rshared %s failed: %q", nsRunDir, err) + } + + // Recursively remount /run/netns on itself. The recursive flag is + // so that any existing netns bindmounts are carried over. + err = unix.Mount(nsRunDir, nsRunDir, "none", unix.MS_BIND|unix.MS_REC, "") + if err != nil { + return fmt.Errorf("mount --rbind %s %s failed: %q", nsRunDir, nsRunDir, err) + } + + // Now we can make it shared err = unix.Mount("", nsRunDir, "none", unix.MS_SHARED|unix.MS_REC, "") if err != nil { - if err != unix.EINVAL { - return nil, fmt.Errorf("mount --make-rshared %s failed: %q", nsRunDir, err) - } - - // Recursively remount /run/netns on itself. The recursive flag is - // so that any existing netns bindmounts are carried over. - err = unix.Mount(nsRunDir, nsRunDir, "none", unix.MS_BIND|unix.MS_REC, "") - if err != nil { - return nil, fmt.Errorf("mount --rbind %s %s failed: %q", nsRunDir, nsRunDir, err) - } - - // Now we can make it shared - err = unix.Mount("", nsRunDir, "none", unix.MS_SHARED|unix.MS_REC, "") - if err != nil { - return nil, fmt.Errorf("mount --make-rshared %s failed: %q", nsRunDir, err) - } + return fmt.Errorf("mount --make-rshared %s failed: %q", nsRunDir, err) } - nsPath := path.Join(nsRunDir, name) - return newNSPath(nsPath) + return nil +} + +// createNetnsFile created the file with O_EXCL to ensure there are no conflicts with others +// Callers should check for ErrExist and loop over it to find a free file. +func createNetnsFile(path string) error { + mountPointFd, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o600) + if err != nil { + return err + } + return mountPointFd.Close() } func newNSPath(nsPath string) (ns.NetNS, error) { - // create an empty file at the mount point - mountPointFd, err := os.OpenFile(nsPath, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o600) + // create an empty file to use as at the mount point + err := createNetnsFile(nsPath) if err != nil { return nil, err } - if err := mountPointFd.Close(); err != nil { - return nil, err - } - // Ensure the mount point is cleaned up on errors; if the namespace // was successfully mounted this will have no effect because the file // is in-use diff --git a/vendor/modules.txt b/vendor/modules.txt index b0961ceba5..e9baaeab6e 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -171,7 +171,7 @@ github.com/containers/buildah/pkg/sshagent github.com/containers/buildah/pkg/util github.com/containers/buildah/pkg/volumes github.com/containers/buildah/util -# github.com/containers/common v0.60.1-0.20240918122915-db8145750e1d +# github.com/containers/common v0.60.1-0.20240920125326-ff6611ae40ad ## explicit; go 1.22.0 github.com/containers/common/internal github.com/containers/common/internal/attributedstring From 792796183f92946a6722c084b1ba9e7c602eefd0 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Fri, 20 Sep 2024 13:19:17 +0200 Subject: [PATCH 2/2] libpod: setupNetNS() correctly mount netns The netns dir has a special logic to bind mout itself and make itslef shared. This code here didn't which lead to catastrophic bug during netns unmounting as we were unable to unmount the netns as the mount got duplicated and had the wrong parent mount. This caused us to loop forever trying to remove the file. Fixes https://issues.redhat.com/browse/RHEL-59620 Fixes #23685 Signed-off-by: Paul Holzinger --- libpod/networking_linux.go | 29 +---------------------------- test/system/550-pause-process.bats | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 28 deletions(-) diff --git a/libpod/networking_linux.go b/libpod/networking_linux.go index bf9f635987..688985b5ec 100644 --- a/libpod/networking_linux.go +++ b/libpod/networking_linux.go @@ -3,11 +3,8 @@ package libpod import ( - "crypto/rand" "fmt" "net" - "os" - "path/filepath" "github.com/containernetworking/plugins/pkg/ns" "github.com/containers/common/libnetwork/types" @@ -17,7 +14,6 @@ import ( "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" "github.com/vishvananda/netlink" - "golang.org/x/sys/unix" ) // Create and configure a new network namespace for a container @@ -104,33 +100,10 @@ func (r *Runtime) createNetNS(ctr *Container) (n string, q map[string]types.Stat // Configure the network namespace using the container process func (r *Runtime) setupNetNS(ctr *Container) error { nsProcess := fmt.Sprintf("/proc/%d/ns/net", ctr.state.PID) - - b := make([]byte, 16) - - if _, err := rand.Reader.Read(b); err != nil { - return fmt.Errorf("failed to generate random netns name: %w", err) - } - nsPath, err := netns.GetNSRunDir() + nsPath, err := netns.NewNSFrom(nsProcess) if err != nil { return err } - nsPath = filepath.Join(nsPath, fmt.Sprintf("netns-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:])) - - if err := os.MkdirAll(filepath.Dir(nsPath), 0711); err != nil { - return err - } - - mountPointFd, err := os.Create(nsPath) - if err != nil { - return err - } - if err := mountPointFd.Close(); err != nil { - return err - } - - if err := unix.Mount(nsProcess, nsPath, "none", unix.MS_BIND, ""); err != nil { - return fmt.Errorf("cannot mount %s: %w", nsPath, err) - } networkStatus, err := r.configureNetNS(ctr, nsPath) diff --git a/test/system/550-pause-process.bats b/test/system/550-pause-process.bats index cd38609306..69da5a1d8b 100644 --- a/test/system/550-pause-process.bats +++ b/test/system/550-pause-process.bats @@ -149,3 +149,22 @@ function _check_pause_process() { run_podman rm -f -t0 $cname1 } + +# regression test for https://issues.redhat.com/browse/RHEL-59620 +@test "rootless userns can unmount netns properly" { + skip_if_not_rootless "pause process is only used as rootless" + skip_if_remote "system migrate not supported via remote" + + # Use podman system migrate to stop the currently running pause process + run_podman system migrate + + # First run a container with a custom userns as this uses different netns setup logic. + local cname=c-$(safename) + run_podman run --userns keep-id --name $cname -d $IMAGE sleep 100 + + # Now run a "normal" container without userns + run_podman run --rm $IMAGE true + + # This used to hang trying to unmount the netns. + run_podman rm -f -t0 $cname +}