From 716eb000facc3b8a6085fea3d80de4ffc25790b4 Mon Sep 17 00:00:00 2001 From: Matt Heon Date: Fri, 28 Feb 2025 09:06:58 -0500 Subject: [PATCH 1/2] Bump to latest c/storage main Includes a patch for quotas that is needed for this PR. Signed-off-by: Matt Heon --- go.mod | 2 +- go.sum | 4 +- .../drivers/quota/projectquota_supported.go | 15 +++- .../github.com/containers/storage/layers.go | 70 +++++++++++++------ .../containers/storage/pkg/archive/archive.go | 17 ++++- .../containers/storage/pkg/archive/changes.go | 2 +- vendor/modules.txt | 2 +- 7 files changed, 81 insertions(+), 31 deletions(-) diff --git a/go.mod b/go.mod index 3c11ff7c4d..d629f008d0 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( github.com/containers/libhvee v0.10.0 github.com/containers/ocicrypt v1.2.1 github.com/containers/psgo v1.9.0 - github.com/containers/storage v1.57.2-0.20250214174508-b6f6fb27264f + github.com/containers/storage v1.57.2-0.20250228100055-700b765b2111 github.com/containers/winquit v1.1.0 github.com/coreos/go-systemd/v22 v22.5.1-0.20231103132048-7d375ecc2b09 github.com/crc-org/crc/v2 v2.45.0 diff --git a/go.sum b/go.sum index 5b0414fef8..6dfd7e9f35 100644 --- a/go.sum +++ b/go.sum @@ -96,8 +96,8 @@ github.com/containers/ocicrypt v1.2.1 h1:0qIOTT9DoYwcKmxSt8QJt+VzMY18onl9jUXsxpV github.com/containers/ocicrypt v1.2.1/go.mod h1:aD0AAqfMp0MtwqWgHM1bUwe1anx0VazI108CRrSKINQ= github.com/containers/psgo v1.9.0 h1:eJ74jzSaCHnWt26OlKZROSyUyRcGDf+gYBdXnxrMW4g= github.com/containers/psgo v1.9.0/go.mod h1:0YoluUm43Mz2UnBIh1P+6V6NWcbpTL5uRtXyOcH0B5A= -github.com/containers/storage v1.57.2-0.20250214174508-b6f6fb27264f h1:moo+peQbWNAZBI7+NTg+4RUOq1JuOllkfCs7ximmbg0= -github.com/containers/storage v1.57.2-0.20250214174508-b6f6fb27264f/go.mod h1:egC90qMy0fTpGjkaHj667syy1Cbr3XPZEVX/qkUPrdM= +github.com/containers/storage v1.57.2-0.20250228100055-700b765b2111 h1:NMmaECeWzq2cWAXfPnsl7oFc2jyb/YRcPbzYT8jpQUA= +github.com/containers/storage v1.57.2-0.20250228100055-700b765b2111/go.mod h1:egC90qMy0fTpGjkaHj667syy1Cbr3XPZEVX/qkUPrdM= github.com/containers/winquit v1.1.0 h1:jArun04BNDQvt2W0Y78kh9TazN2EIEMG5Im6/JY7+pE= github.com/containers/winquit v1.1.0/go.mod h1:PsPeZlnbkmGGIToMPHF1zhWjBUkd8aHjMOr/vFcPxw8= github.com/coreos/go-oidc/v3 v3.12.0 h1:sJk+8G2qq94rDI6ehZ71Bol3oUHy63qNYmkiSjrc/Jo= diff --git a/vendor/github.com/containers/storage/drivers/quota/projectquota_supported.go b/vendor/github.com/containers/storage/drivers/quota/projectquota_supported.go index 59ba5b0b21..c3b334f7cd 100644 --- a/vendor/github.com/containers/storage/drivers/quota/projectquota_supported.go +++ b/vendor/github.com/containers/storage/drivers/quota/projectquota_supported.go @@ -190,7 +190,8 @@ func NewControl(basePath string) (*Control, error) { } // SetQuota - assign a unique project id to directory and set the quota limits -// for that project id +// for that project id. +// targetPath must exist, must be a directory, and must be empty. func (q *Control) SetQuota(targetPath string, quota Quota) error { var projectID uint32 value, ok := q.quotas.Load(targetPath) @@ -200,10 +201,20 @@ func (q *Control) SetQuota(targetPath string, quota Quota) error { if !ok { projectID = q.nextProjectID + // The directory we are setting an ID on must be empty, as + // the ID will not be propagated to pre-existing subdirectories. + dents, err := os.ReadDir(targetPath) + if err != nil { + return fmt.Errorf("reading directory %s: %w", targetPath, err) + } + if len(dents) > 0 { + return fmt.Errorf("can only set project ID on empty directories, %s is not empty", targetPath) + } + // // assign project id to new container directory // - err := setProjectID(targetPath, projectID) + err = setProjectID(targetPath, projectID) if err != nil { return err } diff --git a/vendor/github.com/containers/storage/layers.go b/vendor/github.com/containers/storage/layers.go index 69a4887bfb..8fbd842fbc 100644 --- a/vendor/github.com/containers/storage/layers.go +++ b/vendor/github.com/containers/storage/layers.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "maps" + "math/bits" "os" "path" "path/filepath" @@ -46,11 +47,13 @@ const ( type layerLocations uint8 -// The backing store is split in two json files, one (the volatile) -// that is written without fsync() meaning it isn't as robust to -// unclean shutdown +// The backing store is split in three json files. +// The volatile store is written without fsync() meaning it isn't as robust to unclean shutdown. +// Optionally, an image store can be configured to store RO layers. +// The stable store is used for the remaining layers that don't go into the other stores. const ( stableLayerLocation layerLocations = 1 << iota + imageStoreLayerLocation volatileLayerLocation numLayerLocationIndex = iota @@ -60,6 +63,10 @@ func layerLocationFromIndex(index int) layerLocations { return 1 << index } +func indexFromLayerLocation(location layerLocations) int { + return bits.TrailingZeros(uint(location)) +} + // A Layer is a record of a copy-on-write layer that's stored by the lower // level graph driver. type Layer struct { @@ -164,8 +171,8 @@ type Layer struct { // ReadOnly is true if this layer resides in a read-only layer store. ReadOnly bool `json:"-"` - // volatileStore is true if the container is from the volatile json file - volatileStore bool `json:"-"` + // location is the location of the store where the layer is present. + location layerLocations `json:"-"` // BigDataNames is a list of names of data items that we keep for the // convenience of the caller. They can be large, and are only in @@ -430,14 +437,6 @@ type layerStore struct { driver drivers.Driver } -// The caller must hold r.inProcessLock for reading. -func layerLocation(l *Layer) layerLocations { - if l.volatileStore { - return volatileLayerLocation - } - return stableLayerLocation -} - func copyLayer(l *Layer) *Layer { return &Layer{ ID: l.ID, @@ -455,7 +454,7 @@ func copyLayer(l *Layer) *Layer { TOCDigest: l.TOCDigest, CompressionType: l.CompressionType, ReadOnly: l.ReadOnly, - volatileStore: l.volatileStore, + location: l.location, BigDataNames: copySlicePreferringNil(l.BigDataNames), Flags: copyMapPreferringNil(l.Flags), UIDMap: copySlicePreferringNil(l.UIDMap), @@ -658,7 +657,11 @@ func (r *layerStore) layersModified() (lockfile.LastWrite, bool, error) { // modified manually, then we have to reload the storage in // any case. for locationIndex := 0; locationIndex < numLayerLocationIndex; locationIndex++ { - info, err := os.Stat(r.jsonPath[locationIndex]) + rpath := r.jsonPath[locationIndex] + if rpath == "" { + continue + } + info, err := os.Stat(rpath) if err != nil && !os.IsNotExist(err) { return lockfile.LastWrite{}, false, fmt.Errorf("stat layers file: %w", err) } @@ -794,6 +797,9 @@ func (r *layerStore) load(lockedForWriting bool) (bool, error) { for locationIndex := 0; locationIndex < numLayerLocationIndex; locationIndex++ { location := layerLocationFromIndex(locationIndex) rpath := r.jsonPath[locationIndex] + if rpath == "" { + continue + } info, err := os.Stat(rpath) if err != nil { if !os.IsNotExist(err) { @@ -820,9 +826,7 @@ func (r *layerStore) load(lockedForWriting bool) (bool, error) { continue // skip invalid duplicated layer } // Remember where the layer came from - if location == volatileLayerLocation { - layer.volatileStore = true - } + layer.location = location layers = append(layers, layer) ids[layer.ID] = layer } @@ -843,7 +847,7 @@ func (r *layerStore) load(lockedForWriting bool) (bool, error) { if conflict, ok := names[name]; ok { r.removeName(conflict, name) errorToResolveBySaving = ErrDuplicateLayerNames - modifiedLocations |= layerLocation(conflict) + modifiedLocations |= conflict.location } names[name] = layers[n] } @@ -939,7 +943,7 @@ func (r *layerStore) load(lockedForWriting bool) (bool, error) { incompleteDeletionErrors = errors.Join(incompleteDeletionErrors, fmt.Errorf("deleting layer %#v: %w", layer.ID, err)) } - modifiedLocations |= layerLocation(layer) + modifiedLocations |= layer.location } if err := r.saveLayers(modifiedLocations); err != nil { return false, err @@ -1008,7 +1012,7 @@ func (r *layerStore) save(saveLocations layerLocations) error { // The caller must hold r.lockfile locked for writing. // The caller must hold r.inProcessLock for WRITING. func (r *layerStore) saveFor(modifiedLayer *Layer) error { - return r.save(layerLocation(modifiedLayer)) + return r.save(modifiedLayer.location) } // The caller must hold r.lockfile locked for writing. @@ -1033,12 +1037,15 @@ func (r *layerStore) saveLayers(saveLocations layerLocations) error { continue } rpath := r.jsonPath[locationIndex] + if rpath == "" { + return fmt.Errorf("internal error: no path for location %v", location) + } if err := os.MkdirAll(filepath.Dir(rpath), 0o700); err != nil { return err } subsetLayers := make([]*Layer, 0, len(r.layers)) for _, layer := range r.layers { - if layerLocation(layer) == location { + if layer.location == location { subsetLayers = append(subsetLayers, layer) } } @@ -1138,12 +1145,17 @@ func (s *store) newLayerStore(rundir, layerdir, imagedir string, driver drivers. if transient { volatileDir = rundir } + layersImageDir := "" + if imagedir != "" { + layersImageDir = filepath.Join(imagedir, "layers.json") + } rlstore := layerStore{ lockfile: newMultipleLockFile(lockFiles...), mountsLockfile: mountsLockfile, rundir: rundir, jsonPath: [numLayerLocationIndex]string{ filepath.Join(layerdir, "layers.json"), + layersImageDir, filepath.Join(volatileDir, "volatile-layers.json"), }, layerdir: layerdir, @@ -1181,6 +1193,7 @@ func newROLayerStore(rundir string, layerdir string, driver drivers.Driver) (roL rundir: rundir, jsonPath: [numLayerLocationIndex]string{ filepath.Join(layerdir, "layers.json"), + "", filepath.Join(layerdir, "volatile-layers.json"), }, layerdir: layerdir, @@ -1329,6 +1342,17 @@ func (r *layerStore) PutAdditionalLayer(id string, parentLayer *Layer, names []s return copyLayer(layer), nil } +func (r *layerStore) pickStoreLocation(volatile, writeable bool) layerLocations { + switch { + case volatile: + return volatileLayerLocation + case !writeable && r.jsonPath[indexFromLayerLocation(imageStoreLayerLocation)] != "": + return imageStoreLayerLocation + default: + return stableLayerLocation + } +} + // Requires startWriting. func (r *layerStore) create(id string, parentLayer *Layer, names []string, mountLabel string, options map[string]string, moreOptions *LayerOptions, writeable bool, diff io.Reader, slo *stagedLayerOptions) (layer *Layer, size int64, err error) { if moreOptions == nil { @@ -1421,7 +1445,7 @@ func (r *layerStore) create(id string, parentLayer *Layer, names []string, mount UIDMap: copySlicePreferringNil(moreOptions.UIDMap), GIDMap: copySlicePreferringNil(moreOptions.GIDMap), BigDataNames: []string{}, - volatileStore: moreOptions.Volatile, + location: r.pickStoreLocation(moreOptions.Volatile, writeable), } layer.Flags[incompleteFlag] = true diff --git a/vendor/github.com/containers/storage/pkg/archive/archive.go b/vendor/github.com/containers/storage/pkg/archive/archive.go index 982d0f0f5d..ad64b4ad0d 100644 --- a/vendor/github.com/containers/storage/pkg/archive/archive.go +++ b/vendor/github.com/containers/storage/pkg/archive/archive.go @@ -16,6 +16,7 @@ import ( "strings" "sync" "syscall" + "time" "github.com/containers/storage/pkg/fileutils" "github.com/containers/storage/pkg/idtools" @@ -67,6 +68,8 @@ type ( CopyPass bool // ForceMask, if set, indicates the permission mask used for created files. ForceMask *os.FileMode + // Timestamp, if set, will be set in each header as create/mod/access time + Timestamp *time.Time } ) @@ -494,15 +497,19 @@ type tarWriter struct { // from the traditional behavior/format to get features like subsecond // precision in timestamps. CopyPass bool + + // Timestamp, if set, will be set in each header as create/mod/access time + Timestamp *time.Time } -func newTarWriter(idMapping *idtools.IDMappings, writer io.Writer, chownOpts *idtools.IDPair) *tarWriter { +func newTarWriter(idMapping *idtools.IDMappings, writer io.Writer, chownOpts *idtools.IDPair, timestamp *time.Time) *tarWriter { return &tarWriter{ SeenFiles: make(map[uint64]string), TarWriter: tar.NewWriter(writer), Buffer: pools.BufioWriter32KPool.Get(nil), IDMappings: idMapping, ChownOpts: chownOpts, + Timestamp: timestamp, } } @@ -600,6 +607,13 @@ func (ta *tarWriter) addFile(path, name string) error { hdr.Gname = "" } + // if override timestamp set, replace all times with this + if ta.Timestamp != nil { + hdr.ModTime = *ta.Timestamp + hdr.AccessTime = *ta.Timestamp + hdr.ChangeTime = *ta.Timestamp + } + maybeTruncateHeaderModTime(hdr) if ta.WhiteoutConverter != nil { @@ -866,6 +880,7 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) idtools.NewIDMappingsFromMaps(options.UIDMaps, options.GIDMaps), compressWriter, options.ChownOpts, + options.Timestamp, ) ta.WhiteoutConverter = GetWhiteoutConverter(options.WhiteoutFormat, options.WhiteoutData) ta.CopyPass = options.CopyPass diff --git a/vendor/github.com/containers/storage/pkg/archive/changes.go b/vendor/github.com/containers/storage/pkg/archive/changes.go index f5eefd579e..37e9f947f5 100644 --- a/vendor/github.com/containers/storage/pkg/archive/changes.go +++ b/vendor/github.com/containers/storage/pkg/archive/changes.go @@ -452,7 +452,7 @@ func ChangesSize(newDir string, changes []Change) int64 { func ExportChanges(dir string, changes []Change, uidMaps, gidMaps []idtools.IDMap) (io.ReadCloser, error) { reader, writer := io.Pipe() go func() { - ta := newTarWriter(idtools.NewIDMappingsFromMaps(uidMaps, gidMaps), writer, nil) + ta := newTarWriter(idtools.NewIDMappingsFromMaps(uidMaps, gidMaps), writer, nil, nil) // this buffer is needed for the duration of this piped stream defer pools.BufioWriter32KPool.Put(ta.Buffer) diff --git a/vendor/modules.txt b/vendor/modules.txt index fb2d6e05cc..51a44be676 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -364,7 +364,7 @@ github.com/containers/psgo/internal/dev github.com/containers/psgo/internal/host github.com/containers/psgo/internal/proc github.com/containers/psgo/internal/process -# github.com/containers/storage v1.57.2-0.20250214174508-b6f6fb27264f +# github.com/containers/storage v1.57.2-0.20250228100055-700b765b2111 ## explicit; go 1.22.0 github.com/containers/storage github.com/containers/storage/drivers From f71067d710b7da8b10ef88a2185c3c86245db5e5 Mon Sep 17 00:00:00 2001 From: Matt Heon Date: Thu, 27 Feb 2025 10:38:53 -0500 Subject: [PATCH 2/2] Create quota before _data dir for volumes This resolves an ordering issue that prevented quotas from being applied. XFS quotas are applied recursively, but only for subdirectories created after the quota is applied; if we create `_data` before the quota, and then use `_data` for all data in the volume, the quota will never be used by the volume. Also, add a test that volume quotas are working as designed using an XFS formatted loop device in the system tests. This should prevent any further regressions on basic quota functionality, such as quotas being shared between volumes. Fixes #25368 Signed-off-by: Matt Heon --- libpod/runtime_volume_common.go | 27 ++++++----- rpm/podman.spec | 1 + test/system/161-volume-quotas.bats | 78 ++++++++++++++++++++++++++++++ test/system/README.md | 1 + 4 files changed, 96 insertions(+), 11 deletions(-) create mode 100644 test/system/161-volume-quotas.bats diff --git a/libpod/runtime_volume_common.go b/libpod/runtime_volume_common.go index f215667c16..c5cee9b997 100644 --- a/libpod/runtime_volume_common.go +++ b/libpod/runtime_volume_common.go @@ -168,16 +168,11 @@ func (r *Runtime) newVolume(ctx context.Context, noCreatePluginVolume bool, opti if err := idtools.SafeChown(volPathRoot, volume.config.UID, volume.config.GID); err != nil { return nil, fmt.Errorf("chowning volume directory %q to %d:%d: %w", volPathRoot, volume.config.UID, volume.config.GID, err) } - fullVolPath := filepath.Join(volPathRoot, "_data") - if err := os.MkdirAll(fullVolPath, 0755); err != nil { - return nil, fmt.Errorf("creating volume directory %q: %w", fullVolPath, err) - } - if err := idtools.SafeChown(fullVolPath, volume.config.UID, volume.config.GID); err != nil { - return nil, fmt.Errorf("chowning volume directory %q to %d:%d: %w", fullVolPath, volume.config.UID, volume.config.GID, err) - } - if err := LabelVolumePath(fullVolPath, volume.config.MountLabel); err != nil { - return nil, err - } + + // Setting quotas must happen *before* the _data inner directory + // is created, as the volume must be empty for the quota to be + // properly applied - if any subdirectories exist before the + // quota is applied, the quota will not be applied to them. switch { case volume.config.DisableQuota: if volume.config.Size > 0 || volume.config.Inodes > 0 { @@ -206,10 +201,20 @@ func (r *Runtime) newVolume(ctx context.Context, noCreatePluginVolume bool, opti // subdirectory - so the quota ID assignment logic works // properly. if err := q.SetQuota(volPathRoot, quota); err != nil { - return nil, fmt.Errorf("failed to set size quota size=%d inodes=%d for volume directory %q: %w", volume.config.Size, volume.config.Inodes, fullVolPath, err) + return nil, fmt.Errorf("failed to set size quota size=%d inodes=%d for volume directory %q: %w", volume.config.Size, volume.config.Inodes, volPathRoot, err) } } + fullVolPath := filepath.Join(volPathRoot, "_data") + if err := os.MkdirAll(fullVolPath, 0755); err != nil { + return nil, fmt.Errorf("creating volume directory %q: %w", fullVolPath, err) + } + if err := idtools.SafeChown(fullVolPath, volume.config.UID, volume.config.GID); err != nil { + return nil, fmt.Errorf("chowning volume directory %q to %d:%d: %w", fullVolPath, volume.config.UID, volume.config.GID, err) + } + if err := LabelVolumePath(fullVolPath, volume.config.MountLabel); err != nil { + return nil, err + } volume.config.MountPoint = fullVolPath } diff --git a/rpm/podman.spec b/rpm/podman.spec index 185a8e804c..02df5efe59 100644 --- a/rpm/podman.spec +++ b/rpm/podman.spec @@ -151,6 +151,7 @@ Requires: openssl Requires: socat Requires: buildah Requires: gnupg +Requires: xfsprogs %description tests %{summary} diff --git a/test/system/161-volume-quotas.bats b/test/system/161-volume-quotas.bats new file mode 100644 index 0000000000..8019499509 --- /dev/null +++ b/test/system/161-volume-quotas.bats @@ -0,0 +1,78 @@ +#!/usr/bin/env bats -*- bats -*- +# +# podman volume XFS quota tests +# +# bats file_tags=distro-integration +# + +load helpers + +function setup() { + basic_setup + + run_podman '?' volume rm -a +} + +function teardown() { + run_podman '?' rm -af -t 0 + run_podman '?' volume rm -a + + loop=$PODMAN_TMPDIR/disk.img + vol_path=$PODMAN_TMPDIR/volpath + if [ -f ${loop} ]; then + if [ -d ${vol_path} ]; then + if mountpoint ${vol_path}; then + umount "$vol_path" + fi + rm -rf "$vol_path" + fi + + while read path dev; do + if [[ "$path" == "$loop" ]]; then + losetup -d $dev + fi + done < <(losetup -l --noheadings --output BACK-FILE,NAME) + rm -f $loop + fi + + basic_teardown +} + +@test "podman volumes with XFS quotas" { + skip_if_rootless "Quotas are only possible with root" + skip_if_remote "Requires --root flag, not possible w/ remote" + + # Minimum XFS filesystem size is 300mb + loop=$PODMAN_TMPDIR/disk.img + fallocate -l 300m ${loop} + run -0 losetup -f --show $loop + loop_dev="$output" + mkfs.xfs $loop_dev + + safe_opts=$(podman_isolation_opts ${PODMAN_TMPDIR}) + vol_path=$PODMAN_TMPDIR/volpath + mkdir -p $vol_path + safe_opts="$safe_opts --volumepath=$vol_path" + mount -t xfs -o defaults,pquota $loop_dev $vol_path + + vol_one="testvol1" + run_podman $safe_opts volume create --opt o=size=2m $vol_one + + vol_two="testvol2" + run_podman $safe_opts volume create --opt o=size=4m $vol_two + + ctrname="testctr" + run_podman $safe_opts run -d --name=$ctrname -i -v $vol_one:/one -v $vol_two:/two $IMAGE top + + run_podman $safe_opts exec $ctrname dd if=/dev/zero of=/one/oneMB bs=1M count=1 + run_podman 1 $safe_opts exec $ctrname dd if=/dev/zero of=/one/twoMB bs=1M count=1 + assert "$output" =~ "No space left on device" + run_podman $safe_opts exec $ctrname dd if=/dev/zero of=/two/threeMB bs=1M count=3 + run_podman 1 $safe_opts exec $ctrname dd if=/dev/zero of=/two/oneMB bs=1M count=1 + assert "$output" =~ "No space left on device" + + run_podman $safe_opts rm -f -t 0 $ctrname + run_podman $safe_opts volume rm -af +} + +# vim: filetype=sh diff --git a/test/system/README.md b/test/system/README.md index 1800a1fd17..fdd873a655 100644 --- a/test/system/README.md +++ b/test/system/README.md @@ -92,6 +92,7 @@ Requirements - socat - buildah - gnupg +- xfsprogs Further Details