vendor: update c/storage

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
Giuseppe Scrivano 2024-07-01 11:35:39 +02:00
parent 5cc57427f3
commit 5e156c424f
No known key found for this signature in database
GPG Key ID: 67E38F7A8BA21772
54 changed files with 1272 additions and 1170 deletions

4
go.mod
View File

@ -21,7 +21,7 @@ require (
github.com/containers/libhvee v0.7.1
github.com/containers/ocicrypt v1.1.10
github.com/containers/psgo v1.9.0
github.com/containers/storage v1.54.0
github.com/containers/storage v1.54.1-0.20240627145511-52b643e1ff51
github.com/containers/winquit v1.1.0
github.com/coreos/go-systemd/v22 v22.5.1-0.20231103132048-7d375ecc2b09
github.com/coreos/stream-metadata-go v0.4.4
@ -88,7 +88,7 @@ require (
require (
dario.cat/mergo v1.0.0 // indirect
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect
github.com/Microsoft/hcsshim v0.12.3 // indirect
github.com/Microsoft/hcsshim v0.12.4 // indirect
github.com/VividCortex/ewma v1.2.0 // indirect
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d // indirect
github.com/aead/serpent v0.0.0-20160714141033-fba169763ea6 // indirect

8
go.sum
View File

@ -12,8 +12,8 @@ github.com/BurntSushi/toml v1.4.0 h1:kuoIxZQy2WRRk1pttg9asf+WVv6tWQuBNVmK8+nqPr0
github.com/BurntSushi/toml v1.4.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/Microsoft/hcsshim v0.12.3 h1:LS9NXqXhMoqNCplK1ApmVSfB4UnVLRDWRapB6EIlxE0=
github.com/Microsoft/hcsshim v0.12.3/go.mod h1:Iyl1WVpZzr+UkzjekHZbV8o5Z9ZkxNGx6CtY2Qg/JVQ=
github.com/Microsoft/hcsshim v0.12.4 h1:Ev7YUMHAHoWNm+aDSPzc5W9s6E2jyL1szpVDJeZ/Rr4=
github.com/Microsoft/hcsshim v0.12.4/go.mod h1:Iyl1WVpZzr+UkzjekHZbV8o5Z9ZkxNGx6CtY2Qg/JVQ=
github.com/Netflix/go-expect v0.0.0-20220104043353-73e0943537d2 h1:+vx7roKuyA63nhn5WAunQHLTznkw5W8b1Xc0dNjp83s=
github.com/Netflix/go-expect v0.0.0-20220104043353-73e0943537d2/go.mod h1:HBCaDeC1lPdgDeDbhX8XFpy1jqjK0IBG8W5K+xYqA0w=
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
@ -97,8 +97,8 @@ github.com/containers/ocicrypt v1.1.10 h1:r7UR6o8+lyhkEywetubUUgcKFjOWOaWz8cEBrC
github.com/containers/ocicrypt v1.1.10/go.mod h1:YfzSSr06PTHQwSTUKqDSjish9BeW1E4HUmreluQcMd8=
github.com/containers/psgo v1.9.0 h1:eJ74jzSaCHnWt26OlKZROSyUyRcGDf+gYBdXnxrMW4g=
github.com/containers/psgo v1.9.0/go.mod h1:0YoluUm43Mz2UnBIh1P+6V6NWcbpTL5uRtXyOcH0B5A=
github.com/containers/storage v1.54.0 h1:xwYAlf6n9OnIlURQLLg3FYHbO74fQ/2W2N6EtQEUM4I=
github.com/containers/storage v1.54.0/go.mod h1:PlMOoinRrBSnhYODLxt4EXl0nmJt+X0kjG0Xdt9fMTw=
github.com/containers/storage v1.54.1-0.20240627145511-52b643e1ff51 h1:0ipwtt1iNX4gSje0iQHHtnvqnU45uUyGO1LVGBkpoSE=
github.com/containers/storage v1.54.1-0.20240627145511-52b643e1ff51/go.mod h1:y1CGloHDYq9uK3Og/zLkrJ8vpSuFwNaIWOyB8IX076w=
github.com/containers/winquit v1.1.0 h1:jArun04BNDQvt2W0Y78kh9TazN2EIEMG5Im6/JY7+pE=
github.com/containers/winquit v1.1.0/go.mod h1:PsPeZlnbkmGGIToMPHF1zhWjBUkd8aHjMOr/vFcPxw8=
github.com/coreos/go-oidc/v3 v3.10.0 h1:tDnXHnLyiTVyT/2zLDGj09pFPkhND8Gl8lnTRhoEaJU=

View File

@ -57,9 +57,10 @@ type PaPolicy struct {
type OutboundNatPolicy struct {
Policy
VIP string `json:"VIP,omitempty"`
Exceptions []string `json:"ExceptionList,omitempty"`
Destinations []string `json:",omitempty"`
VIP string `json:"VIP,omitempty"`
Exceptions []string `json:"ExceptionList,omitempty"`
Destinations []string `json:",omitempty"`
MaxPortPoolUsage uint16 `json:",omitempty"`
}
type ProxyPolicy struct {

View File

@ -23,7 +23,7 @@ env:
# GCE project where images live
IMAGE_PROJECT: "libpod-218412"
# VM Image built in containers/automation_images
IMAGE_SUFFIX: "c20240513t140131z-f40f39d13"
IMAGE_SUFFIX: "c20240529t141726z-f40f39d13"
FEDORA_CACHE_IMAGE_NAME: "fedora-${IMAGE_SUFFIX}"
DEBIAN_CACHE_IMAGE_NAME: "debian-${IMAGE_SUFFIX}"

View File

@ -53,6 +53,8 @@ local-cross cross: ## cross build the binaries for arm, darwin, and freebsd
os=`echo $${target} | cut -f1 -d/` ; \
arch=`echo $${target} | cut -f2 -d/` ; \
suffix=$${os}.$${arch} ; \
echo env CGO_ENABLED=0 GOOS=$${os} GOARCH=$${arch} $(GO) build -compiler gc -tags \"$(NATIVETAGS) $(TAGS)\" $(FLAGS) ./... ; \
env CGO_ENABLED=0 GOOS=$${os} GOARCH=$${arch} $(GO) build -compiler gc -tags "$(NATIVETAGS) $(TAGS)" $(FLAGS) ./... || exit 1 ; \
echo env CGO_ENABLED=0 GOOS=$${os} GOARCH=$${arch} $(GO) build -compiler gc -tags \"$(NATIVETAGS) $(TAGS)\" $(FLAGS) -o containers-storage.$${suffix} ./cmd/containers-storage ; \
env CGO_ENABLED=0 GOOS=$${os} GOARCH=$${arch} $(GO) build -compiler gc -tags "$(NATIVETAGS) $(TAGS)" $(FLAGS) -o containers-storage.$${suffix} ./cmd/containers-storage || exit 1 ; \
done

View File

@ -1 +1 @@
1.54.0
1.55.0-dev

View File

@ -75,8 +75,6 @@ func init() {
type Driver struct {
sync.Mutex
root string
uidMaps []idtools.IDMap
gidMaps []idtools.IDMap
ctr *graphdriver.RefCounter
pathCacheLock sync.Mutex
pathCache map[string]string
@ -129,22 +127,16 @@ func Init(home string, options graphdriver.Options) (graphdriver.Driver, error)
a := &Driver{
root: home,
uidMaps: options.UIDMaps,
gidMaps: options.GIDMaps,
pathCache: make(map[string]string),
ctr: graphdriver.NewRefCounter(graphdriver.NewFsChecker(graphdriver.FsMagicAufs)),
locker: locker.New(),
mountOptions: mountOptions,
}
rootUID, rootGID, err := idtools.GetRootUIDGID(options.UIDMaps, options.GIDMaps)
if err != nil {
return nil, err
}
// Create the root aufs driver dir and return
// if it already exists
// If not populate the dir structure
if err := idtools.MkdirAllAs(home, 0o700, rootUID, rootGID); err != nil {
if err := os.MkdirAll(home, 0o700); err != nil {
if os.IsExist(err) {
return a, nil
}
@ -157,7 +149,7 @@ func Init(home string, options graphdriver.Options) (graphdriver.Driver, error)
// Populate the dir structure
for _, p := range paths {
if err := idtools.MkdirAllAs(path.Join(home, p), 0o700, rootUID, rootGID); err != nil {
if err := os.MkdirAll(path.Join(home, p), 0o700); err != nil {
return nil, err
}
}
@ -334,7 +326,7 @@ func (a *Driver) createDirsFor(id, parent string) error {
// The path of directories are <aufs_root_path>/mnt/<image_id>
// and <aufs_root_path>/diff/<image_id>
for _, p := range paths {
rootPair := idtools.NewIDMappingsFromMaps(a.uidMaps, a.gidMaps).RootPair()
rootPair := idtools.IDPair{UID: 0, GID: 0}
rootPerms := defaultPerms
if parent != "" {
st, err := system.Stat(path.Join(a.rootPath(), p, parent))

View File

@ -66,11 +66,7 @@ func Init(home string, options graphdriver.Options) (graphdriver.Driver, error)
return nil, fmt.Errorf("%q is not on a btrfs filesystem: %w", home, graphdriver.ErrPrerequisites)
}
rootUID, rootGID, err := idtools.GetRootUIDGID(options.UIDMaps, options.GIDMaps)
if err != nil {
return nil, err
}
if err := idtools.MkdirAllAs(filepath.Join(home, "subvolumes"), 0o700, rootUID, rootGID); err != nil {
if err := os.MkdirAll(filepath.Join(home, "subvolumes"), 0o700); err != nil {
return nil, err
}
@ -85,8 +81,6 @@ func Init(home string, options graphdriver.Options) (graphdriver.Driver, error)
driver := &Driver{
home: home,
uidMaps: options.UIDMaps,
gidMaps: options.GIDMaps,
options: opt,
}
@ -129,8 +123,6 @@ func parseOptions(opt []string) (btrfsOptions, bool, error) {
type Driver struct {
// root of the file system
home string
uidMaps []idtools.IDMap
gidMaps []idtools.IDMap
options btrfsOptions
quotaEnabled bool
once sync.Once
@ -481,11 +473,7 @@ func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts
func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error {
quotas := d.quotasDir()
subvolumes := d.subvolumesDir()
rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
if err != nil {
return err
}
if err := idtools.MkdirAllAs(subvolumes, 0o700, rootUID, rootGID); err != nil {
if err := os.MkdirAll(subvolumes, 0o700); err != nil {
return err
}
if parent == "" {
@ -523,7 +511,7 @@ func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error {
if err := d.setStorageSize(path.Join(subvolumes, id), driver); err != nil {
return err
}
if err := idtools.MkdirAllAs(quotas, 0o700, rootUID, rootGID); err != nil {
if err := os.MkdirAll(quotas, 0o700); err != nil {
return err
}
if err := os.WriteFile(path.Join(quotas, id), []byte(fmt.Sprint(driver.options.size)), 0o644); err != nil {
@ -531,14 +519,6 @@ func (d *Driver) Create(id, parent string, opts *graphdriver.CreateOpts) error {
}
}
// if we have a remapped root (user namespaces enabled), change the created snapshot
// dir ownership to match
if rootUID != 0 || rootGID != 0 {
if err := os.Chown(path.Join(subvolumes, id), rootUID, rootGID); err != nil {
return err
}
}
mountLabel := ""
if opts != nil {
mountLabel = opts.MountLabel

View File

@ -4,11 +4,12 @@ import (
"bytes"
"errors"
"fmt"
"io/fs"
"os"
"github.com/containers/storage/pkg/idtools"
"github.com/containers/storage/pkg/reexec"
"github.com/opencontainers/selinux/pkg/pwalk"
"github.com/opencontainers/selinux/pkg/pwalkdir"
)
const (
@ -54,13 +55,14 @@ func chownByMapsMain() {
chowner := newLChowner()
chown := func(path string, info os.FileInfo, _ error) error {
if path == "." {
var chown fs.WalkDirFunc = func(path string, d fs.DirEntry, _ error) error {
info, err := d.Info()
if path == "." || err != nil {
return nil
}
return chowner.LChown(path, info, toHost, toContainer)
}
if err := pwalk.Walk(".", chown); err != nil {
if err := pwalkdir.Walk(".", chown); err != nil {
fmt.Fprintf(os.Stderr, "error during chown: %v", err)
os.Exit(1)
}

View File

@ -1,5 +1,5 @@
//go:build linux || darwin || freebsd || solaris
// +build linux darwin freebsd solaris
//go:build !windows
// +build !windows
package graphdriver

View File

@ -50,13 +50,13 @@ func CopyRegularToFile(srcPath string, dstFile *os.File, fileinfo os.FileInfo, c
defer srcFile.Close()
if *copyWithFileClone {
_, _, err = unix.Syscall(unix.SYS_IOCTL, dstFile.Fd(), C.FICLONE, srcFile.Fd())
if err == nil {
_, _, errno := unix.Syscall(unix.SYS_IOCTL, dstFile.Fd(), C.FICLONE, srcFile.Fd())
if errno == 0 {
return nil
}
*copyWithFileClone = false
if err == unix.EXDEV {
if errno == unix.EXDEV {
*copyWithFileRange = false
}
}

View File

@ -215,20 +215,25 @@ const (
DifferOutputFormatFlat
)
// DifferFsVerity is a part of the experimental Differ interface and should not be used from outside of c/storage.
// It configures the fsverity requirement.
type DifferFsVerity int
const (
// DifferFsVerityDisabled means no fs-verity is used
DifferFsVerityDisabled = iota
// DifferFsVerityEnabled means fs-verity is used when supported
DifferFsVerityEnabled
// DifferFsVerityIfAvailable means fs-verity is used when supported by
// the underlying kernel and filesystem.
DifferFsVerityIfAvailable
// DifferFsVerityRequired means fs-verity is required
// DifferFsVerityRequired means fs-verity is required. Note this is not
// currently set or exposed by the overlay driver.
DifferFsVerityRequired
)
// DifferOptions overrides how the differ work
// DifferOptions is a part of the experimental Differ interface and should not be used from outside of c/storage.
// It overrides how the differ works.
type DifferOptions struct {
// Format defines the destination directory layout format
Format DifferOutputFormat
@ -377,8 +382,6 @@ type Options struct {
ImageStore string
DriverPriority []string
DriverOptions []string
UIDMaps []idtools.IDMap
GIDMaps []idtools.IDMap
ExperimentalEnabled bool
}

View File

@ -9,6 +9,7 @@ import (
"errors"
"fmt"
"io"
"io/fs"
"os"
"os/exec"
"path"
@ -119,8 +120,6 @@ type Driver struct {
home string
runhome string
imageStore string
uidMaps []idtools.IDMap
gidMaps []idtools.IDMap
ctr *graphdriver.RefCounter
quotaCtl *quota.Control
options overlayOptions
@ -332,13 +331,9 @@ func Init(home string, options graphdriver.Options) (graphdriver.Driver, error)
backingFs = fsName
runhome := filepath.Join(options.RunRoot, filepath.Base(home))
rootUID, rootGID, err := idtools.GetRootUIDGID(options.UIDMaps, options.GIDMaps)
if err != nil {
return nil, err
}
// Create the driver home dir
if err := idtools.MkdirAllAs(path.Join(home, linkDir), 0o755, 0, 0); err != nil {
if err := os.MkdirAll(path.Join(home, linkDir), 0o755); err != nil {
return nil, err
}
@ -348,7 +343,7 @@ func Init(home string, options graphdriver.Options) (graphdriver.Driver, error)
}
}
if err := idtools.MkdirAllAs(runhome, 0o700, rootUID, rootGID); err != nil {
if err := os.MkdirAll(runhome, 0o700); err != nil {
return nil, err
}
@ -373,9 +368,6 @@ func Init(home string, options graphdriver.Options) (graphdriver.Driver, error)
return nil, err
}
} else {
if opts.forceMask != nil {
return nil, errors.New("'force_mask' is supported only with 'mount_program'")
}
// check if they are running over btrfs, aufs, overlay, or ecryptfs
switch fsMagic {
case graphdriver.FsMagicAufs, graphdriver.FsMagicOverlay, graphdriver.FsMagicEcryptfs:
@ -457,8 +449,6 @@ func Init(home string, options graphdriver.Options) (graphdriver.Driver, error)
home: home,
imageStore: options.ImageStore,
runhome: runhome,
uidMaps: options.UIDMaps,
gidMaps: options.GIDMaps,
ctr: graphdriver.NewRefCounter(graphdriver.NewFsChecker(fileSystemType)),
supportsDType: supportsDType,
usingMetacopy: usingMetacopy,
@ -983,6 +973,10 @@ func (d *Driver) CreateReadWrite(id, parent string, opts *graphdriver.CreateOpts
}
}
if d.options.forceMask != nil && d.options.mountProgram == "" {
return fmt.Errorf("overlay: force_mask option for writeable layers is only supported with a mount_program")
}
if _, ok := opts.StorageOpt["size"]; !ok {
if opts.StorageOpt == nil {
opts.StorageOpt = map[string]string{}
@ -1021,8 +1015,8 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, readOnl
disableQuota := readOnly
uidMaps := d.uidMaps
gidMaps := d.gidMaps
var uidMaps []idtools.IDMap
var gidMaps []idtools.IDMap
if opts != nil && opts.IDMappings != nil {
uidMaps = opts.IDMappings.UIDs()
@ -1047,14 +1041,23 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, readOnl
if err := idtools.MkdirAllAndChownNew(path.Dir(dir), 0o755, idPair); err != nil {
return err
}
st := idtools.Stat{IDs: idPair, Mode: defaultPerms}
if parent != "" {
parentBase := d.dir(parent)
st, err := system.Stat(filepath.Join(parentBase, "diff"))
if err != nil {
return err
parentDiff := filepath.Join(parentBase, "diff")
if xSt, err := idtools.GetContainersOverrideXattr(parentDiff); err == nil {
st = xSt
} else {
systemSt, err := system.Stat(parentDiff)
if err != nil {
return err
}
st.IDs.UID = int(systemSt.UID())
st.IDs.GID = int(systemSt.GID())
st.Mode = os.FileMode(systemSt.Mode())
}
rootUID = int(st.UID())
rootGID = int(st.GID())
}
if err := fileutils.Lexists(dir); err == nil {
@ -1100,22 +1103,21 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, readOnl
}
}
perms := defaultPerms
forcedSt := st
if d.options.forceMask != nil {
perms = *d.options.forceMask
forcedSt.IDs = idPair
forcedSt.Mode = *d.options.forceMask
}
if parent != "" {
parentBase := d.dir(parent)
st, err := system.Stat(filepath.Join(parentBase, "diff"))
if err != nil {
diff := path.Join(dir, "diff")
if err := idtools.MkdirAs(diff, forcedSt.Mode, forcedSt.IDs.UID, forcedSt.IDs.GID); err != nil {
return err
}
if d.options.forceMask != nil {
if err := idtools.SetContainersOverrideXattr(diff, st); err != nil {
return err
}
perms = os.FileMode(st.Mode())
}
if err := idtools.MkdirAs(path.Join(dir, "diff"), perms, rootUID, rootGID); err != nil {
return err
}
lid := generateID(idLength)
@ -1130,16 +1132,16 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, readOnl
return err
}
if err := idtools.MkdirAs(path.Join(dir, "work"), 0o700, rootUID, rootGID); err != nil {
if err := idtools.MkdirAs(path.Join(dir, "work"), 0o700, forcedSt.IDs.UID, forcedSt.IDs.GID); err != nil {
return err
}
if err := idtools.MkdirAs(path.Join(dir, "merged"), 0o700, rootUID, rootGID); err != nil {
if err := idtools.MkdirAs(path.Join(dir, "merged"), 0o700, forcedSt.IDs.UID, forcedSt.IDs.GID); err != nil {
return err
}
// if no parent directory, create a dummy lower directory and skip writing a "lowers" file
if parent == "" {
return idtools.MkdirAs(path.Join(dir, "empty"), 0o700, rootUID, rootGID)
return idtools.MkdirAs(path.Join(dir, "empty"), 0o700, forcedSt.IDs.UID, forcedSt.IDs.GID)
}
lower, err := d.getLower(parent)
@ -1283,12 +1285,6 @@ func (d *Driver) getLowerDirs(id string) ([]string, error) {
}
func (d *Driver) optsAppendMappings(opts string, uidMaps, gidMaps []idtools.IDMap) string {
if uidMaps == nil {
uidMaps = d.uidMaps
}
if gidMaps == nil {
gidMaps = d.gidMaps
}
if uidMaps != nil {
var uids, gids bytes.Buffer
if len(uidMaps) == 1 && uidMaps[0].Size == 1 {
@ -1539,11 +1535,7 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
for err == nil {
absLowers = append(absLowers, filepath.Join(dir, nameWithSuffix("diff", diffN)))
diffN++
st, err = os.Stat(filepath.Join(dir, nameWithSuffix("diff", diffN)))
if err == nil && !permsKnown {
perms = os.FileMode(st.Mode())
permsKnown = true
}
err = fileutils.Exists(filepath.Join(dir, nameWithSuffix("diff", diffN)))
}
idmappedMountProcessPid := -1
@ -2024,11 +2016,27 @@ func (d *Driver) getWhiteoutFormat() archive.WhiteoutFormat {
}
type overlayFileGetter struct {
diffDirs []string
diffDirs []string
composefsMounts map[string]*os.File // map from diff dir to the directory with the composefs blob mounted
}
func (g *overlayFileGetter) Get(path string) (io.ReadCloser, error) {
buf := make([]byte, unix.PathMax)
for _, d := range g.diffDirs {
if f, found := g.composefsMounts[d]; found {
// there is no *at equivalent for getxattr, but it can be emulated by opening the file under /proc/self/fd/$FD/$PATH
len, err := unix.Getxattr(fmt.Sprintf("/proc/self/fd/%d/%s", int(f.Fd()), path), "trusted.overlay.redirect", buf)
if err != nil {
if errors.Is(err, unix.ENODATA) {
continue
}
return nil, &fs.PathError{Op: "getxattr", Path: path, Err: err}
}
// the xattr value is the path to the file in the composefs layer diff directory
return os.Open(filepath.Join(d, string(buf[:len])))
}
f, err := os.Open(filepath.Join(d, path))
if err == nil {
return f, nil
@ -2041,7 +2049,16 @@ func (g *overlayFileGetter) Get(path string) (io.ReadCloser, error) {
}
func (g *overlayFileGetter) Close() error {
return nil
var errs *multierror.Error
for _, f := range g.composefsMounts {
if err := f.Close(); err != nil {
errs = multierror.Append(errs, err)
}
if err := unix.Rmdir(f.Name()); err != nil {
errs = multierror.Append(errs, err)
}
}
return errs.ErrorOrNil()
}
func (d *Driver) getStagingDir(id string) string {
@ -2052,10 +2069,7 @@ func (d *Driver) getStagingDir(id string) string {
// DiffGetter returns a FileGetCloser that can read files from the directory that
// contains files for the layer differences, either for this layer, or one of our
// lowers if we're just a template directory. Used for direct access for tar-split.
func (d *Driver) DiffGetter(id string) (graphdriver.FileGetCloser, error) {
if d.usingComposefs {
return nil, nil
}
func (d *Driver) DiffGetter(id string) (_ graphdriver.FileGetCloser, Err error) {
p, err := d.getDiffPath(id)
if err != nil {
return nil, err
@ -2064,7 +2078,41 @@ func (d *Driver) DiffGetter(id string) (graphdriver.FileGetCloser, error) {
if err != nil {
return nil, err
}
return &overlayFileGetter{diffDirs: append([]string{p}, paths...)}, nil
// map from diff dir to the directory with the composefs blob mounted
composefsMounts := make(map[string]*os.File)
defer func() {
if Err != nil {
for _, f := range composefsMounts {
f.Close()
unix.Rmdir(f.Name())
}
}
}()
diffDirs := append([]string{p}, paths...)
for _, diffDir := range diffDirs {
// diffDir has the form $GRAPH_ROOT/overlay/$ID/diff, so grab the $ID from the parent directory
id := path.Base(path.Dir(diffDir))
composefsBlob := d.getComposefsData(id)
if fileutils.Exists(composefsBlob) != nil {
// not a composefs layer, ignore it
continue
}
dir, err := os.MkdirTemp(d.runhome, "composefs-mnt")
if err != nil {
return nil, err
}
if err := mountComposefsBlob(composefsBlob, dir); err != nil {
return nil, err
}
fd, err := os.Open(dir)
if err != nil {
return nil, err
}
composefsMounts[diffDir] = fd
_ = unix.Unmount(dir, unix.MNT_DETACH)
}
return &overlayFileGetter{diffDirs: diffDirs, composefsMounts: composefsMounts}, nil
}
// CleanupStagingDirectory cleanups the staging directory.
@ -2100,9 +2148,16 @@ func supportsDataOnlyLayersCached(home, runhome string) (bool, error) {
// ApplyDiffWithDiffer applies the changes in the new layer using the specified function
func (d *Driver) ApplyDiffWithDiffer(id, parent string, options *graphdriver.ApplyDiffWithDifferOpts, differ graphdriver.Differ) (output graphdriver.DriverWithDifferOutput, errRet error) {
var idMappings *idtools.IDMappings
var forceMask *os.FileMode
if options != nil {
idMappings = options.Mappings
forceMask = options.ForceMask
}
if d.options.forceMask != nil {
forceMask = d.options.forceMask
}
if idMappings == nil {
idMappings = &idtools.IDMappings{}
}
@ -2120,8 +2175,8 @@ func (d *Driver) ApplyDiffWithDiffer(id, parent string, options *graphdriver.App
return graphdriver.DriverWithDifferOutput{}, err
}
perms := defaultPerms
if d.options.forceMask != nil {
perms = *d.options.forceMask
if forceMask != nil {
perms = *forceMask
}
applyDir = filepath.Join(layerDir, "dir")
if err := os.Mkdir(applyDir, perms); err != nil {
@ -2155,7 +2210,7 @@ func (d *Driver) ApplyDiffWithDiffer(id, parent string, options *graphdriver.App
}
if d.usingComposefs {
differOptions.Format = graphdriver.DifferOutputFormatFlat
differOptions.UseFsVerity = graphdriver.DifferFsVerityEnabled
differOptions.UseFsVerity = graphdriver.DifferFsVerityIfAvailable
}
out, err := differ.ApplyDiff(applyDir, &archive.TarOptions{
UIDMaps: idMappings.UIDs(),
@ -2163,6 +2218,7 @@ func (d *Driver) ApplyDiffWithDiffer(id, parent string, options *graphdriver.App
IgnoreChownErrors: d.options.ignoreChownErrors,
WhiteoutFormat: d.getWhiteoutFormat(),
InUserNS: unshare.IsRootless(),
ForceMask: forceMask,
}, &differOptions)
out.Target = applyDir
@ -2476,6 +2532,19 @@ func nameWithSuffix(name string, number int) string {
return fmt.Sprintf("%s%d", name, number)
}
func validateOneAdditionalLayerPath(target string) error {
for _, p := range []string{
filepath.Join(target, "diff"),
filepath.Join(target, "info"),
filepath.Join(target, "blob"),
} {
if err := fileutils.Exists(p); err != nil {
return err
}
}
return nil
}
func (d *Driver) getAdditionalLayerPath(tocDigest digest.Digest, ref string) (string, error) {
refElem := base64.StdEncoding.EncodeToString([]byte(ref))
for _, ls := range d.options.layerStores {
@ -2484,18 +2553,11 @@ func (d *Driver) getAdditionalLayerPath(tocDigest digest.Digest, ref string) (st
ref = refElem
}
target := path.Join(ls.path, ref, tocDigest.String())
// Check if all necessary files exist
for _, p := range []string{
filepath.Join(target, "diff"),
filepath.Join(target, "info"),
filepath.Join(target, "blob"),
} {
if err := fileutils.Exists(p); err != nil {
wrapped := fmt.Errorf("failed to stat additional layer %q: %w", p, err)
return "", fmt.Errorf("%v: %w", wrapped, graphdriver.ErrLayerUnknown)
}
err := validateOneAdditionalLayerPath(target)
if err == nil {
return target, nil
}
return target, nil
logrus.Debugf("additional Layer Store %v failed to stat additional layer: %v", ls, err)
}
return "", fmt.Errorf("additional layer (%q, %q) not found: %w", tocDigest, ref, graphdriver.ErrLayerUnknown)

View File

@ -1,5 +1,5 @@
//go:build linux && cgo
// +build linux,cgo
//go:build linux && cgo && !exclude_disk_quota
// +build linux,cgo,!exclude_disk_quota
package overlay

View File

@ -0,0 +1,18 @@
//go:build linux && (!cgo || exclude_disk_quota)
// +build linux
// +build !cgo exclude_disk_quota
package overlay
import (
"path"
"github.com/containers/storage/pkg/directory"
)
// ReadWriteDiskUsage returns the disk usage of the writable directory for the ID.
// For Overlay, it attempts to check the XFS quota for size, and falls back to
// finding the size of the "diff" directory.
func (d *Driver) ReadWriteDiskUsage(id string) (*directory.DiskUsage, error) {
return directory.Usage(path.Join(d.dir(id), "diff"))
}

View File

@ -5,18 +5,8 @@ package overlay
import (
"fmt"
"path"
"github.com/containers/storage/pkg/directory"
)
// ReadWriteDiskUsage returns the disk usage of the writable directory for the ID.
// For Overlay, it attempts to check the XFS quota for size, and falls back to
// finding the size of the "diff" directory.
func (d *Driver) ReadWriteDiskUsage(id string) (*directory.DiskUsage, error) {
return directory.Usage(path.Join(d.dir(id), "diff"))
}
func getComposeFsHelper() (string, error) {
return "", fmt.Errorf("composefs not supported on this build")
}

View File

@ -19,16 +19,6 @@ package quota
#include <linux/quota.h>
#include <linux/dqblk_xfs.h>
#ifndef FS_XFLAG_PROJINHERIT
struct fsxattr {
__u32 fsx_xflags;
__u32 fsx_extsize;
__u32 fsx_nextents;
__u32 fsx_projid;
unsigned char fsx_pad[12];
};
#define FS_XFLAG_PROJINHERIT 0x00000200
#endif
#ifndef FS_IOC_FSGETXATTR
#define FS_IOC_FSGETXATTR _IOR ('X', 31, struct fsxattr)
#endif
@ -357,7 +347,6 @@ func setProjectID(targetPath string, projectID uint32) error {
return fmt.Errorf("failed to get projid for %s: %w", targetPath, errno)
}
fsx.fsx_projid = C.__u32(projectID)
fsx.fsx_xflags |= C.FS_XFLAG_PROJINHERIT
_, _, errno = unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.FS_IOC_FSSETXATTR,
uintptr(unsafe.Pointer(&fsx)))
if errno != 0 {

View File

@ -33,12 +33,10 @@ func Init(home string, options graphdriver.Options) (graphdriver.Driver, error)
d := &Driver{
name: "vfs",
home: home,
idMappings: idtools.NewIDMappingsFromMaps(options.UIDMaps, options.GIDMaps),
imageStore: options.ImageStore,
}
rootIDs := d.idMappings.RootPair()
if err := idtools.MkdirAllAndChown(filepath.Join(home, "dir"), 0o700, rootIDs); err != nil {
if err := os.MkdirAll(filepath.Join(home, "dir"), 0o700); err != nil {
return nil, err
}
for _, option := range options.DriverOptions {
@ -79,7 +77,6 @@ type Driver struct {
name string
home string
additionalHomes []string
idMappings *idtools.IDMappings
ignoreChownErrors bool
naiveDiff graphdriver.DiffDriver
updater graphdriver.LayerIDMapUpdater
@ -152,14 +149,21 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, ro bool
return fmt.Errorf("--storage-opt is not supported for vfs")
}
idMappings := d.idMappings
var uidMaps []idtools.IDMap
var gidMaps []idtools.IDMap
if opts != nil && opts.IDMappings != nil {
idMappings = opts.IDMappings
uidMaps = opts.IDMappings.UIDs()
gidMaps = opts.IDMappings.GIDs()
}
rootUID, rootGID, err := idtools.GetRootUIDGID(uidMaps, gidMaps)
if err != nil {
return err
}
dir := d.dir2(id, ro)
rootIDs := idMappings.RootPair()
if err := idtools.MkdirAllAndChown(filepath.Dir(dir), 0o700, rootIDs); err != nil {
if err := os.MkdirAll(filepath.Dir(dir), 0o700); err != nil {
return err
}
@ -174,16 +178,17 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, ro bool
rootPerms = os.FileMode(0o700)
}
idPair := idtools.IDPair{UID: rootUID, GID: rootGID}
if parent != "" {
st, err := system.Stat(d.dir(parent))
if err != nil {
return err
}
rootPerms = os.FileMode(st.Mode())
rootIDs.UID = int(st.UID())
rootIDs.GID = int(st.GID())
idPair.UID = int(st.UID())
idPair.GID = int(st.GID())
}
if err := idtools.MkdirAndChown(dir, rootPerms, rootIDs); err != nil {
if err := idtools.MkdirAllAndChownNew(dir, rootPerms, idPair); err != nil {
return err
}
labelOpts := []string{"level:s0"}

View File

@ -106,11 +106,7 @@ func Init(base string, opt graphdriver.Options) (graphdriver.Driver, error) {
return nil, fmt.Errorf("zfs get all -t filesystem -rHp '%s' should contain '%s'", options.fsName, options.fsName)
}
rootUID, rootGID, err := idtools.GetRootUIDGID(opt.UIDMaps, opt.GIDMaps)
if err != nil {
return nil, fmt.Errorf("failed to get root uid/gid: %w", err)
}
if err := idtools.MkdirAllAs(base, 0o700, rootUID, rootGID); err != nil {
if err := os.MkdirAll(base, 0o700); err != nil {
return nil, fmt.Errorf("failed to create '%s': %w", base, err)
}
@ -118,8 +114,6 @@ func Init(base string, opt graphdriver.Options) (graphdriver.Driver, error) {
dataset: rootDataset,
options: options,
filesystemsCache: filesystemsCache,
uidMaps: opt.UIDMaps,
gidMaps: opt.GIDMaps,
ctr: graphdriver.NewRefCounter(graphdriver.NewDefaultChecker()),
}
return graphdriver.NewNaiveDiffDriver(d, graphdriver.NewNaiveLayerIDMapUpdater(d)), nil
@ -177,8 +171,6 @@ type Driver struct {
options zfsOptions
sync.Mutex // protects filesystem cache against concurrent access
filesystemsCache map[string]bool
uidMaps []idtools.IDMap
gidMaps []idtools.IDMap
ctr *graphdriver.RefCounter
}
@ -448,12 +440,8 @@ func (d *Driver) Get(id string, options graphdriver.MountOpts) (_ string, retErr
opts := label.FormatMountLabel(mountOptions, options.MountLabel)
logrus.WithField("storage-driver", "zfs").Debugf(`mount("%s", "%s", "%s")`, filesystem, mountpoint, opts)
rootUID, rootGID, err := idtools.GetRootUIDGID(d.uidMaps, d.gidMaps)
if err != nil {
return "", err
}
// Create the target directories if they don't exist
if err := idtools.MkdirAllAs(mountpoint, 0o755, rootUID, rootGID); err != nil {
if err := os.MkdirAll(mountpoint, 0o755); err != nil {
return "", err
}

View File

@ -5,7 +5,7 @@ import (
)
// Deprecated: Use lockfile.*LockFile.
type Locker = lockfile.Locker //lint:ignore SA1019 // lockfile.Locker is deprecated
type Locker = lockfile.Locker //nolint:staticcheck // SA1019 lockfile.Locker is deprecated
// Deprecated: Use lockfile.GetLockFile.
func GetLockfile(path string) (lockfile.Locker, error) {

View File

@ -70,6 +70,8 @@ type (
}
)
const PaxSchilyXattr = "SCHILY.xattr."
const (
tarExt = "tar"
solaris = "solaris"
@ -169,10 +171,17 @@ func DetectCompression(source []byte) Compression {
}
// DecompressStream decompresses the archive and returns a ReaderCloser with the decompressed archive.
func DecompressStream(archive io.Reader) (io.ReadCloser, error) {
func DecompressStream(archive io.Reader) (_ io.ReadCloser, Err error) {
p := pools.BufioReader32KPool
buf := p.Get(archive)
bs, err := buf.Peek(10)
defer func() {
if Err != nil {
p.Put(buf)
}
}()
if err != nil && err != io.EOF {
// Note: we'll ignore any io.EOF error because there are some odd
// cases where the layer.tar file will be empty (zero bytes) and
@ -189,6 +198,12 @@ func DecompressStream(archive io.Reader) (io.ReadCloser, error) {
readBufWrapper := p.NewReadCloserWrapper(buf, buf)
return readBufWrapper, nil
case Gzip:
cleanup := func() {
p.Put(buf)
}
if rc, canUse := tryProcFilter([]string{"pigz", "-d"}, buf, cleanup); canUse {
return rc, nil
}
gzReader, err := gzip.NewReader(buf)
if err != nil {
return nil, err
@ -207,6 +222,12 @@ func DecompressStream(archive io.Reader) (io.ReadCloser, error) {
readBufWrapper := p.NewReadCloserWrapper(buf, xzReader)
return readBufWrapper, nil
case Zstd:
cleanup := func() {
p.Put(buf)
}
if rc, canUse := tryProcFilter([]string{"zstd", "-d"}, buf, cleanup); canUse {
return rc, nil
}
return zstdReader(buf)
default:
return nil, fmt.Errorf("unsupported compression format %s", (&compression).Extension())
@ -214,9 +235,16 @@ func DecompressStream(archive io.Reader) (io.ReadCloser, error) {
}
// CompressStream compresses the dest with specified compression algorithm.
func CompressStream(dest io.Writer, compression Compression) (io.WriteCloser, error) {
func CompressStream(dest io.Writer, compression Compression) (_ io.WriteCloser, Err error) {
p := pools.BufioWriter32KPool
buf := p.Get(dest)
defer func() {
if Err != nil {
p.Put(buf)
}
}()
switch compression {
case Uncompressed:
writeBufWrapper := p.NewWriteCloserWrapper(buf, buf)
@ -391,11 +419,11 @@ func FileInfoHeader(name string, fi os.FileInfo, link string) (*tar.Header, erro
return hdr, nil
}
// ReadSecurityXattrToTarHeader reads security.capability, security,image
// readSecurityXattrToTarHeader reads security.capability, security,image
// xattrs from filesystem to a tar header
func ReadSecurityXattrToTarHeader(path string, hdr *tar.Header) error {
if hdr.Xattrs == nil {
hdr.Xattrs = make(map[string]string)
func readSecurityXattrToTarHeader(path string, hdr *tar.Header) error {
if hdr.PAXRecords == nil {
hdr.PAXRecords = make(map[string]string)
}
for _, xattr := range []string{"security.capability", "security.ima"} {
capability, err := system.Lgetxattr(path, xattr)
@ -403,14 +431,14 @@ func ReadSecurityXattrToTarHeader(path string, hdr *tar.Header) error {
return fmt.Errorf("failed to read %q attribute from %q: %w", xattr, path, err)
}
if capability != nil {
hdr.Xattrs[xattr] = string(capability)
hdr.PAXRecords[PaxSchilyXattr+xattr] = string(capability)
}
}
return nil
}
// ReadUserXattrToTarHeader reads user.* xattr from filesystem to a tar header
func ReadUserXattrToTarHeader(path string, hdr *tar.Header) error {
// readUserXattrToTarHeader reads user.* xattr from filesystem to a tar header
func readUserXattrToTarHeader(path string, hdr *tar.Header) error {
xattrs, err := system.Llistxattr(path)
if err != nil && !errors.Is(err, system.EOPNOTSUPP) && err != system.ErrNotSupportedPlatform {
return err
@ -425,10 +453,10 @@ func ReadUserXattrToTarHeader(path string, hdr *tar.Header) error {
}
return err
}
if hdr.Xattrs == nil {
hdr.Xattrs = make(map[string]string)
if hdr.PAXRecords == nil {
hdr.PAXRecords = make(map[string]string)
}
hdr.Xattrs[key] = string(value)
hdr.PAXRecords[PaxSchilyXattr+key] = string(value)
}
}
return nil
@ -516,10 +544,10 @@ func (ta *tarAppender) addTarFile(path, name string) error {
if err != nil {
return err
}
if err := ReadSecurityXattrToTarHeader(path, hdr); err != nil {
if err := readSecurityXattrToTarHeader(path, hdr); err != nil {
return err
}
if err := ReadUserXattrToTarHeader(path, hdr); err != nil {
if err := readUserXattrToTarHeader(path, hdr); err != nil {
return err
}
if err := ReadFileFlagsToTarHeader(path, hdr); err != nil {
@ -642,7 +670,7 @@ func createTarFile(path, extractDir string, hdr *tar.Header, reader io.Reader, L
}
}
case tar.TypeReg, tar.TypeRegA:
case tar.TypeReg:
// Source is regular file. We use system.OpenFileSequential to use sequential
// file access to avoid depleting the standby list on Windows.
// On Linux, this equates to a regular os.OpenFile
@ -701,8 +729,11 @@ func createTarFile(path, extractDir string, hdr *tar.Header, reader io.Reader, L
}
if forceMask != nil && (hdr.Typeflag != tar.TypeSymlink || runtime.GOOS == "darwin") {
value := fmt.Sprintf("%d:%d:0%o", hdr.Uid, hdr.Gid, hdrInfo.Mode()&0o7777)
if err := system.Lsetxattr(path, idtools.ContainersOverrideXattr, []byte(value), 0); err != nil {
value := idtools.Stat{
IDs: idtools.IDPair{UID: hdr.Uid, GID: hdr.Gid},
Mode: hdrInfo.Mode() & 0o7777,
}
if err := idtools.SetContainersOverrideXattr(path, value); err != nil {
return err
}
}
@ -753,11 +784,15 @@ func createTarFile(path, extractDir string, hdr *tar.Header, reader io.Reader, L
}
var errs []string
for key, value := range hdr.Xattrs {
if _, found := xattrsToIgnore[key]; found {
for key, value := range hdr.PAXRecords {
xattrKey, ok := strings.CutPrefix(key, PaxSchilyXattr)
if !ok {
continue
}
if err := system.Lsetxattr(path, key, []byte(value), 0); err != nil {
if _, found := xattrsToIgnore[xattrKey]; found {
continue
}
if err := system.Lsetxattr(path, xattrKey, []byte(value), 0); err != nil {
if errors.Is(err, syscall.ENOTSUP) || (inUserns && errors.Is(err, syscall.EPERM)) {
// We ignore errors here because not all graphdrivers support
// xattrs *cough* old versions of AUFS *cough*. However only
@ -1113,9 +1148,14 @@ loop:
}
}
if options.ForceMask != nil && rootHdr != nil {
value := fmt.Sprintf("%d:%d:0%o", rootHdr.Uid, rootHdr.Gid, rootHdr.Mode)
if err := system.Lsetxattr(dest, idtools.ContainersOverrideXattr, []byte(value), 0); err != nil {
if options.ForceMask != nil {
value := idtools.Stat{Mode: 0o755}
if rootHdr != nil {
value.IDs.UID = rootHdr.Uid
value.IDs.GID = rootHdr.Gid
value.Mode = os.FileMode(rootHdr.Mode)
}
if err := idtools.SetContainersOverrideXattr(dest, value); err != nil {
return err
}
}
@ -1337,7 +1377,7 @@ func remapIDs(readIDMappings, writeIDMappings *idtools.IDMappings, chownOpts *id
}
} else if runtime.GOOS == darwin {
uid, gid = hdr.Uid, hdr.Gid
if xstat, ok := hdr.Xattrs[idtools.ContainersOverrideXattr]; ok {
if xstat, ok := hdr.PAXRecords[PaxSchilyXattr+idtools.ContainersOverrideXattr]; ok {
attrs := strings.Split(string(xstat), ":")
if len(attrs) == 3 {
val, err := strconv.ParseUint(attrs[0], 10, 32)

View File

@ -1,5 +1,5 @@
//go:build freebsd || darwin
// +build freebsd darwin
//go:build netbsd || freebsd || darwin
// +build netbsd freebsd darwin
package archive

View File

@ -48,8 +48,8 @@ func (o overlayWhiteoutConverter) ConvertWrite(hdr *tar.Header, path string, fi
return nil, err
}
if len(opaque) == 1 && opaque[0] == 'y' {
if hdr.Xattrs != nil {
delete(hdr.Xattrs, getOverlayOpaqueXattrName())
if hdr.PAXRecords != nil {
delete(hdr.PAXRecords, PaxSchilyXattr+getOverlayOpaqueXattrName())
}
// If there are no lower layers, then it can't have been deleted in this layer.
if len(o.rolayers) == 0 {

View File

@ -31,9 +31,9 @@ func statDifferent(oldStat *system.StatT, oldInfo *FileInfo, newStat *system.Sta
ownerChanged ||
oldStat.Rdev() != newStat.Rdev() ||
oldStat.Flags() != newStat.Flags() ||
!sameFsTimeSpec(oldStat.Mtim(), newStat.Mtim()) ||
// Don't look at size for dirs, its not a good measure of change
(oldStat.Mode()&unix.S_IFDIR != unix.S_IFDIR &&
(!sameFsTimeSpec(oldStat.Mtim(), newStat.Mtim()) || (oldStat.Size() != newStat.Size()))) {
((oldStat.Mode()&unix.S_IFDIR != unix.S_IFDIR) && (oldStat.Size() != newStat.Size())) {
return true
}
return false

View File

@ -0,0 +1,55 @@
package archive
import (
"bytes"
"fmt"
"io"
"os/exec"
"strings"
"sync"
)
var filterPath sync.Map
func getFilterPath(name string) string {
path, ok := filterPath.Load(name)
if ok {
return path.(string)
}
path, err := exec.LookPath(name)
if err != nil {
path = ""
}
filterPath.Store(name, path)
return path.(string)
}
// tryProcFilter tries to run the command specified in args, passing input to its stdin and returning its stdout.
// cleanup() is a caller provided function that will be called when the command finishes running, regardless of
// whether it succeeds or fails.
// If the command is not found, it returns (nil, false) and the cleanup function is not called.
func tryProcFilter(args []string, input io.Reader, cleanup func()) (io.ReadCloser, bool) {
path := getFilterPath(args[0])
if path == "" {
return nil, false
}
var stderrBuf bytes.Buffer
r, w := io.Pipe()
cmd := exec.Command(path, args[1:]...)
cmd.Stdin = input
cmd.Stdout = w
cmd.Stderr = &stderrBuf
go func() {
err := cmd.Run()
if err != nil && stderrBuf.Len() > 0 {
err = fmt.Errorf("%s: %w", strings.TrimRight(stderrBuf.String(), "\n"), err)
}
w.CloseWithError(err) // CloseWithErr(nil) == Close()
cleanup()
}()
return r, true
}

View File

@ -19,10 +19,13 @@ import (
// Old root is removed after the call to pivot_root so it is no longer available under the new root.
// This is similar to how libcontainer sets up a container's rootfs
func chroot(path string) (err error) {
caps, err := capability.NewPid(0)
caps, err := capability.NewPid2(0)
if err != nil {
return err
}
if err := caps.Load(); err != nil {
return err
}
// initialize nss libraries in Glibc so that the dynamic libraries are loaded in the host
// environment not in the chroot from untrusted files.

View File

@ -2,10 +2,15 @@ package chunked
import (
"encoding/binary"
"fmt"
"hash/crc32"
"io"
"github.com/docker/go-units"
)
const bloomFilterMaxLength = 100 * units.MB // max size for bloom filter
type bloomFilter struct {
bitArray []uint64
k uint32
@ -79,6 +84,10 @@ func readBloomFilter(reader io.Reader) (*bloomFilter, error) {
if err := binary.Read(reader, binary.LittleEndian, &k); err != nil {
return nil, err
}
// sanity check
if bloomFilterLen > bloomFilterMaxLength {
return nil, fmt.Errorf("bloom filter length %d exceeds max length %d", bloomFilterLen, bloomFilterMaxLength)
}
bloomFilterArray := make([]uint64, bloomFilterLen)
if err := binary.Read(reader, binary.LittleEndian, &bloomFilterArray); err != nil {
return nil, err

View File

@ -18,6 +18,7 @@ import (
graphdriver "github.com/containers/storage/drivers"
"github.com/containers/storage/pkg/chunked/internal"
"github.com/containers/storage/pkg/ioutils"
"github.com/docker/go-units"
jsoniter "github.com/json-iterator/go"
digest "github.com/opencontainers/go-digest"
"github.com/sirupsen/logrus"
@ -34,6 +35,8 @@ const (
// https://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
bloomFilterScale = 10 // how much bigger is the bloom filter than the number of entries
bloomFilterHashes = 3 // number of hash functions for the bloom filter
maxTagsLen = 100 * units.MB // max size for tags len
)
type cacheFile struct {
@ -635,6 +638,14 @@ func readCacheFileFromMemory(bigDataBuffer []byte) (*cacheFile, error) {
if err := binary.Read(bigData, binary.LittleEndian, &fnamesLen); err != nil {
return nil, err
}
if tagsLen > maxTagsLen {
return nil, fmt.Errorf("tags len %d exceeds the maximum allowed size %d", tagsLen, maxTagsLen)
}
if digestLen > tagLen {
return nil, fmt.Errorf("digest len %d exceeds the tag len %d", digestLen, tagLen)
}
tags := make([]byte, tagsLen)
if _, err := bigData.Read(tags); err != nil {
return nil, err
@ -643,6 +654,10 @@ func readCacheFileFromMemory(bigDataBuffer []byte) (*cacheFile, error) {
// retrieve the unread part of the buffer.
remaining := bigDataBuffer[len(bigDataBuffer)-bigData.Len():]
if vdataLen >= uint64(len(remaining)) {
return nil, fmt.Errorf("vdata len %d exceeds the remaining buffer size %d", vdataLen, len(remaining))
}
vdata := remaining[:vdataLen]
fnames := remaining[vdataLen:]
@ -901,7 +916,7 @@ func unmarshalToc(manifest []byte) (*internal.TOC, error) {
s := iter.ReadString()
d, err := digest.Parse(s)
if err != nil {
return nil, fmt.Errorf("Invalid tarSplitDigest %q: %w", s, err)
return nil, fmt.Errorf("invalid tarSplitDigest %q: %w", s, err)
}
toc.TarSplitDigest = d

View File

@ -209,16 +209,27 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Di
}
decodedTarSplit := []byte{}
if tarSplitChunk.Offset > 0 {
if toc.TarSplitDigest != "" {
if tarSplitChunk.Offset <= 0 {
return nil, nil, nil, 0, fmt.Errorf("TOC requires a tar-split, but the %s annotation does not describe a position", internal.TarSplitInfoKey)
}
tarSplit, err := readBlob(tarSplitChunk.Length)
if err != nil {
return nil, nil, nil, 0, err
}
decodedTarSplit, err = decodeAndValidateBlob(tarSplit, tarSplitLengthUncompressed, toc.TarSplitDigest.String())
if err != nil {
return nil, nil, nil, 0, fmt.Errorf("validating and decompressing tar-split: %w", err)
}
} else if tarSplitChunk.Offset > 0 {
// We must ignore the tar-split when the digest is not present in the TOC, because we cant authenticate it.
//
// But if we asked for the chunk, now we must consume the data to not block the producer.
// Ideally the GetBlobAt API should be changed so that this is not necessary.
_, err := readBlob(tarSplitChunk.Length)
if err != nil {
return nil, nil, nil, 0, err
}
}
return decodedBlob, toc, decodedTarSplit, int64(manifestChunk.Offset), err
}

View File

@ -9,7 +9,9 @@ import (
"bytes"
"encoding/base64"
"io"
"strings"
"github.com/containers/storage/pkg/archive"
"github.com/containers/storage/pkg/chunked/internal"
"github.com/containers/storage/pkg/ioutils"
"github.com/klauspost/compress/zstd"
@ -374,8 +376,12 @@ func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, r
return err
}
xattrs := make(map[string]string)
for k, v := range hdr.Xattrs {
xattrs[k] = base64.StdEncoding.EncodeToString([]byte(v))
for k, v := range hdr.PAXRecords {
xattrKey, ok := strings.CutPrefix(k, archive.PaxSchilyXattr)
if !ok {
continue
}
xattrs[xattrKey] = base64.StdEncoding.EncodeToString([]byte(v))
}
entries := []internal.FileMetadata{
{

View File

@ -1,3 +1,5 @@
//go:build unix
package dump
import (
@ -5,9 +7,9 @@ import (
"fmt"
"io"
"path/filepath"
"reflect"
"strings"
"time"
"unicode"
"github.com/containers/storage/pkg/chunked/internal"
"golang.org/x/sys/unix"
@ -25,15 +27,21 @@ func escaped(val string, escape int) string {
escapeEqual := escape&ESCAPE_EQUAL != 0
escapeLoneDash := escape&ESCAPE_LONE_DASH != 0
length := len(val)
if escapeLoneDash && val == "-" {
return fmt.Sprintf("\\x%.2x", val[0])
}
// This is intended to match the C isprint API with LC_CTYPE=C
isprint := func(c byte) bool {
return c >= 32 && c < 127
}
// This is intended to match the C isgraph API with LC_CTYPE=C
isgraph := func(c byte) bool {
return c > 32 && c < 127
}
var result string
for i := 0; i < length; i++ {
c := val[i]
for _, c := range []byte(val) {
hexEscape := false
var special string
@ -50,9 +58,9 @@ func escaped(val string, escape int) string {
hexEscape = escapeEqual
default:
if noescapeSpace {
hexEscape = !unicode.IsPrint(rune(c))
hexEscape = !isprint(c)
} else {
hexEscape = !unicode.IsPrint(rune(c)) || unicode.IsSpace(rune(c))
hexEscape = !isgraph(c)
}
}
@ -104,9 +112,30 @@ func sanitizeName(name string) string {
return path
}
func dumpNode(out io.Writer, links map[string]int, verityDigests map[string]string, entry *internal.FileMetadata) error {
func dumpNode(out io.Writer, added map[string]*internal.FileMetadata, links map[string]int, verityDigests map[string]string, entry *internal.FileMetadata) error {
path := sanitizeName(entry.Name)
parent := filepath.Dir(path)
if _, found := added[parent]; !found && path != "/" {
parentEntry := &internal.FileMetadata{
Name: parent,
Type: internal.TypeDir,
Mode: 0o755,
}
if err := dumpNode(out, added, links, verityDigests, parentEntry); err != nil {
return err
}
}
if e, found := added[path]; found {
// if the entry was already added, make sure it has the same data
if !reflect.DeepEqual(*e, *entry) {
return fmt.Errorf("entry %q already added with different data", path)
}
return nil
}
added[path] = entry
if _, err := fmt.Fprint(out, escaped(path, ESCAPE_STANDARD)); err != nil {
return err
}
@ -151,7 +180,7 @@ func dumpNode(out io.Writer, links map[string]int, verityDigests map[string]stri
}
}
if _, err := fmt.Fprintf(out, escapedOptional(payload, ESCAPE_LONE_DASH)); err != nil {
if _, err := fmt.Fprint(out, escapedOptional(payload, ESCAPE_LONE_DASH)); err != nil {
return err
}
@ -165,7 +194,7 @@ func dumpNode(out io.Writer, links map[string]int, verityDigests map[string]stri
return err
}
digest := verityDigests[payload]
if _, err := fmt.Fprintf(out, escapedOptional(digest, ESCAPE_LONE_DASH)); err != nil {
if _, err := fmt.Fprint(out, escapedOptional(digest, ESCAPE_LONE_DASH)); err != nil {
return err
}
@ -201,6 +230,7 @@ func GenerateDump(tocI interface{}, verityDigests map[string]string) (io.Reader,
}()
links := make(map[string]int)
added := make(map[string]*internal.FileMetadata)
for _, e := range toc.Entries {
if e.Linkname == "" {
continue
@ -211,14 +241,14 @@ func GenerateDump(tocI interface{}, verityDigests map[string]string) (io.Reader,
links[e.Linkname] = links[e.Linkname] + 1
}
if len(toc.Entries) == 0 || (sanitizeName(toc.Entries[0].Name) != "/") {
if len(toc.Entries) == 0 {
root := &internal.FileMetadata{
Name: "/",
Type: internal.TypeDir,
Mode: 0o755,
}
if err := dumpNode(w, links, verityDigests, root); err != nil {
if err := dumpNode(w, added, links, verityDigests, root); err != nil {
pipeW.CloseWithError(err)
closed = true
return
@ -229,7 +259,7 @@ func GenerateDump(tocI interface{}, verityDigests map[string]string) (io.Reader,
if e.Type == internal.TypeChunk {
continue
}
if err := dumpNode(w, links, verityDigests, &e); err != nil {
if err := dumpNode(w, added, links, verityDigests, &e); err != nil {
pipeW.CloseWithError(err)
closed = true
return

View File

@ -0,0 +1,603 @@
package chunked
import (
"encoding/base64"
"errors"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"strings"
"sync/atomic"
"syscall"
"time"
driversCopy "github.com/containers/storage/drivers/copy"
"github.com/containers/storage/pkg/archive"
"github.com/containers/storage/pkg/chunked/internal"
securejoin "github.com/cyphar/filepath-securejoin"
"github.com/vbatts/tar-split/archive/tar"
"golang.org/x/sys/unix"
)
// procPathForFile returns an absolute path in /proc which
// refers to the file; see procPathForFd.
func procPathForFile(f *os.File) string {
return procPathForFd(int(f.Fd()))
}
// procPathForFd returns an absolute path in /proc which
// refers to the file; this allows passing a file descriptor
// in places that don't accept a file descriptor.
func procPathForFd(fd int) string {
return fmt.Sprintf("/proc/self/fd/%d", fd)
}
// fileMetadata is a wrapper around internal.FileMetadata with additional private fields that
// are not part of the TOC document.
// Type: TypeChunk entries are stored in Chunks, the primary [fileMetadata] entries never use TypeChunk.
type fileMetadata struct {
internal.FileMetadata
// chunks stores the TypeChunk entries relevant to this entry when FileMetadata.Type == TypeReg.
chunks []*internal.FileMetadata
// skipSetAttrs is set when the file attributes must not be
// modified, e.g. it is a hard link from a different source,
// or a composefs file.
skipSetAttrs bool
}
func doHardLink(dirfd, srcFd int, destFile string) error {
destDir, destBase := filepath.Split(destFile)
destDirFd := dirfd
if destDir != "" && destDir != "." {
f, err := openOrCreateDirUnderRoot(dirfd, destDir, 0)
if err != nil {
return err
}
defer f.Close()
destDirFd = int(f.Fd())
}
doLink := func() error {
// Using unix.AT_EMPTY_PATH requires CAP_DAC_READ_SEARCH while this variant that uses
// /proc/self/fd doesn't and can be used with rootless.
srcPath := procPathForFd(srcFd)
err := unix.Linkat(unix.AT_FDCWD, srcPath, destDirFd, destBase, unix.AT_SYMLINK_FOLLOW)
if err != nil {
return &fs.PathError{Op: "linkat", Path: destFile, Err: err}
}
return nil
}
err := doLink()
// if the destination exists, unlink it first and try again
if err != nil && os.IsExist(err) {
unix.Unlinkat(destDirFd, destBase, 0)
return doLink()
}
return err
}
func copyFileContent(srcFd int, fileMetadata *fileMetadata, dirfd int, mode os.FileMode, useHardLinks bool) (*os.File, int64, error) {
destFile := fileMetadata.Name
src := procPathForFd(srcFd)
st, err := os.Stat(src)
if err != nil {
return nil, -1, fmt.Errorf("copy file content for %q: %w", destFile, err)
}
copyWithFileRange, copyWithFileClone := true, true
if useHardLinks {
err := doHardLink(dirfd, srcFd, destFile)
if err == nil {
// if the file was deduplicated with a hard link, skip overriding file metadata.
fileMetadata.skipSetAttrs = true
return nil, st.Size(), nil
}
}
// If the destination file already exists, we shouldn't blow it away
dstFile, err := openFileUnderRoot(dirfd, destFile, newFileFlags, mode)
if err != nil {
return nil, -1, fmt.Errorf("open file %q under rootfs for copy: %w", destFile, err)
}
err = driversCopy.CopyRegularToFile(src, dstFile, st, &copyWithFileRange, &copyWithFileClone)
if err != nil {
dstFile.Close()
return nil, -1, fmt.Errorf("copy to file %q under rootfs: %w", destFile, err)
}
return dstFile, st.Size(), nil
}
func timeToTimespec(time *time.Time) (ts unix.Timespec) {
if time == nil || time.IsZero() {
// Return UTIME_OMIT special value
ts.Sec = 0
ts.Nsec = ((1 << 30) - 2)
return
}
return unix.NsecToTimespec(time.UnixNano())
}
// chown changes the owner and group of the file at the specified path under the directory
// pointed by dirfd.
// If nofollow is true, the function will not follow symlinks.
// If path is empty, the function will change the owner and group of the file descriptor.
// absolutePath is the absolute path of the file, used only for error messages.
func chown(dirfd int, path string, uid, gid int, nofollow bool, absolutePath string) error {
var err error
flags := 0
if nofollow {
flags |= unix.AT_SYMLINK_NOFOLLOW
} else if path == "" {
flags |= unix.AT_EMPTY_PATH
}
err = unix.Fchownat(dirfd, path, uid, gid, flags)
if err == nil {
return nil
}
if errors.Is(err, syscall.EINVAL) {
return fmt.Errorf(`potentially insufficient UIDs or GIDs available in the user namespace (requested %d:%d for %s): Check /etc/subuid and /etc/subgid if configured locally and run "podman system migrate": %w`, uid, gid, path, err)
}
return &fs.PathError{Op: "fchownat", Path: absolutePath, Err: err}
}
// setFileAttrs sets the file attributes for file given metadata
func setFileAttrs(dirfd int, file *os.File, mode os.FileMode, metadata *fileMetadata, options *archive.TarOptions, usePath bool) error {
if metadata.skipSetAttrs {
return nil
}
if file == nil {
return errors.New("invalid file")
}
fd := int(file.Fd())
t, err := typeToTarType(metadata.Type)
if err != nil {
return err
}
// If it is a symlink, force to use the path
if t == tar.TypeSymlink {
usePath = true
}
baseName := ""
if usePath {
dirName := filepath.Dir(metadata.Name)
if dirName != "" {
parentFd, err := openFileUnderRoot(dirfd, dirName, unix.O_PATH|unix.O_DIRECTORY, 0)
if err != nil {
return err
}
defer parentFd.Close()
dirfd = int(parentFd.Fd())
}
baseName = filepath.Base(metadata.Name)
}
doChown := func() error {
var err error
if usePath {
err = chown(dirfd, baseName, metadata.UID, metadata.GID, true, metadata.Name)
} else {
err = chown(fd, "", metadata.UID, metadata.GID, false, metadata.Name)
}
if options.IgnoreChownErrors {
return nil
}
return err
}
doSetXattr := func(k string, v []byte) error {
err := unix.Fsetxattr(fd, k, v, 0)
if err != nil {
return &fs.PathError{Op: "fsetxattr", Path: metadata.Name, Err: err}
}
return nil
}
doUtimes := func() error {
ts := []unix.Timespec{timeToTimespec(metadata.AccessTime), timeToTimespec(metadata.ModTime)}
var err error
if usePath {
err = unix.UtimesNanoAt(dirfd, baseName, ts, unix.AT_SYMLINK_NOFOLLOW)
} else {
err = unix.UtimesNanoAt(unix.AT_FDCWD, procPathForFd(fd), ts, 0)
}
if err != nil {
return &fs.PathError{Op: "utimensat", Path: metadata.Name, Err: err}
}
return nil
}
doChmod := func() error {
var err error
op := ""
if usePath {
err = unix.Fchmodat(dirfd, baseName, uint32(mode), unix.AT_SYMLINK_NOFOLLOW)
op = "fchmodat"
} else {
err = unix.Fchmod(fd, uint32(mode))
op = "fchmod"
}
if err != nil {
return &fs.PathError{Op: op, Path: metadata.Name, Err: err}
}
return nil
}
if err := doChown(); err != nil {
return err
}
canIgnore := func(err error) bool {
return err == nil || errors.Is(err, unix.ENOSYS) || errors.Is(err, unix.ENOTSUP)
}
for k, v := range metadata.Xattrs {
if _, found := xattrsToIgnore[k]; found {
continue
}
data, err := base64.StdEncoding.DecodeString(v)
if err != nil {
return fmt.Errorf("decode xattr %q: %w", v, err)
}
if err := doSetXattr(k, data); !canIgnore(err) {
return fmt.Errorf("set xattr %s=%q for %q: %w", k, data, metadata.Name, err)
}
}
if err := doUtimes(); !canIgnore(err) {
return err
}
if err := doChmod(); !canIgnore(err) {
return err
}
return nil
}
func openFileUnderRootFallback(dirfd int, name string, flags uint64, mode os.FileMode) (int, error) {
root := procPathForFd(dirfd)
targetRoot, err := os.Readlink(root)
if err != nil {
return -1, err
}
hasNoFollow := (flags & unix.O_NOFOLLOW) != 0
var fd int
// If O_NOFOLLOW is specified in the flags, then resolve only the parent directory and use the
// last component as the path to openat().
if hasNoFollow {
dirName, baseName := filepath.Split(name)
if dirName != "" && dirName != "." {
newRoot, err := securejoin.SecureJoin(root, dirName)
if err != nil {
return -1, err
}
root = newRoot
}
parentDirfd, err := unix.Open(root, unix.O_PATH|unix.O_CLOEXEC, 0)
if err != nil {
return -1, &fs.PathError{Op: "open", Path: root, Err: err}
}
defer unix.Close(parentDirfd)
fd, err = unix.Openat(parentDirfd, baseName, int(flags), uint32(mode))
if err != nil {
return -1, &fs.PathError{Op: "openat", Path: name, Err: err}
}
} else {
newPath, err := securejoin.SecureJoin(root, name)
if err != nil {
return -1, err
}
fd, err = unix.Openat(dirfd, newPath, int(flags), uint32(mode))
if err != nil {
return -1, &fs.PathError{Op: "openat", Path: newPath, Err: err}
}
}
target, err := os.Readlink(procPathForFd(fd))
if err != nil {
unix.Close(fd)
return -1, err
}
// Add an additional check to make sure the opened fd is inside the rootfs
if !strings.HasPrefix(target, targetRoot) {
unix.Close(fd)
return -1, fmt.Errorf("while resolving %q. It resolves outside the root directory", name)
}
return fd, err
}
func openFileUnderRootOpenat2(dirfd int, name string, flags uint64, mode os.FileMode) (int, error) {
how := unix.OpenHow{
Flags: flags,
Mode: uint64(mode & 0o7777),
Resolve: unix.RESOLVE_IN_ROOT,
}
fd, err := unix.Openat2(dirfd, name, &how)
if err != nil {
return -1, &fs.PathError{Op: "openat2", Path: name, Err: err}
}
return fd, nil
}
// skipOpenat2 is set when openat2 is not supported by the underlying kernel and avoid
// using it again.
var skipOpenat2 int32
// openFileUnderRootRaw tries to open a file using openat2 and if it is not supported fallbacks to a
// userspace lookup.
func openFileUnderRootRaw(dirfd int, name string, flags uint64, mode os.FileMode) (int, error) {
var fd int
var err error
if name == "" {
fd, err := unix.Dup(dirfd)
if err != nil {
return -1, fmt.Errorf("failed to duplicate file descriptor %d: %w", dirfd, err)
}
return fd, nil
}
if atomic.LoadInt32(&skipOpenat2) > 0 {
fd, err = openFileUnderRootFallback(dirfd, name, flags, mode)
} else {
fd, err = openFileUnderRootOpenat2(dirfd, name, flags, mode)
// If the function failed with ENOSYS, switch off the support for openat2
// and fallback to using safejoin.
if err != nil && errors.Is(err, unix.ENOSYS) {
atomic.StoreInt32(&skipOpenat2, 1)
fd, err = openFileUnderRootFallback(dirfd, name, flags, mode)
}
}
return fd, err
}
// openFileUnderRoot safely opens a file under the specified root directory using openat2
// dirfd is an open file descriptor to the target checkout directory.
// name is the path to open relative to dirfd.
// flags are the flags to pass to the open syscall.
// mode specifies the mode to use for newly created files.
func openFileUnderRoot(dirfd int, name string, flags uint64, mode os.FileMode) (*os.File, error) {
fd, err := openFileUnderRootRaw(dirfd, name, flags, mode)
if err == nil {
return os.NewFile(uintptr(fd), name), nil
}
hasCreate := (flags & unix.O_CREAT) != 0
if errors.Is(err, unix.ENOENT) && hasCreate {
parent := filepath.Dir(name)
if parent != "" {
newDirfd, err2 := openOrCreateDirUnderRoot(dirfd, parent, 0)
if err2 == nil {
defer newDirfd.Close()
fd, err := openFileUnderRootRaw(int(newDirfd.Fd()), filepath.Base(name), flags, mode)
if err == nil {
return os.NewFile(uintptr(fd), name), nil
}
}
}
}
return nil, fmt.Errorf("open %q under the rootfs: %w", name, err)
}
// openOrCreateDirUnderRoot safely opens a directory or create it if it is missing.
// dirfd is an open file descriptor to the target checkout directory.
// name is the path to open relative to dirfd.
// mode specifies the mode to use for newly created files.
func openOrCreateDirUnderRoot(dirfd int, name string, mode os.FileMode) (*os.File, error) {
fd, err := openFileUnderRootRaw(dirfd, name, unix.O_DIRECTORY|unix.O_RDONLY, 0)
if err == nil {
return os.NewFile(uintptr(fd), name), nil
}
if errors.Is(err, unix.ENOENT) {
parent := filepath.Dir(name)
if parent != "" {
pDir, err2 := openOrCreateDirUnderRoot(dirfd, parent, mode)
if err2 != nil {
return nil, err
}
defer pDir.Close()
baseName := filepath.Base(name)
if err2 := unix.Mkdirat(int(pDir.Fd()), baseName, uint32(mode)); err2 != nil {
return nil, &fs.PathError{Op: "mkdirat", Path: name, Err: err2}
}
fd, err = openFileUnderRootRaw(int(pDir.Fd()), baseName, unix.O_DIRECTORY|unix.O_RDONLY, 0)
if err == nil {
return os.NewFile(uintptr(fd), name), nil
}
}
}
return nil, err
}
// appendHole creates a hole with the specified size at the open fd.
// fd is the open file descriptor.
// name is the path to use for error messages.
// size is the size of the hole to create.
func appendHole(fd int, name string, size int64) error {
off, err := unix.Seek(fd, size, unix.SEEK_CUR)
if err != nil {
return &fs.PathError{Op: "seek", Path: name, Err: err}
}
// Make sure the file size is changed. It might be the last hole and no other data written afterwards.
if err := unix.Ftruncate(fd, off); err != nil {
return &fs.PathError{Op: "ftruncate", Path: name, Err: err}
}
return nil
}
func safeMkdir(dirfd int, mode os.FileMode, name string, metadata *fileMetadata, options *archive.TarOptions) error {
parent, base := filepath.Split(name)
parentFd := dirfd
if parent != "" && parent != "." {
parentFile, err := openOrCreateDirUnderRoot(dirfd, parent, 0)
if err != nil {
return err
}
defer parentFile.Close()
parentFd = int(parentFile.Fd())
}
if err := unix.Mkdirat(parentFd, base, uint32(mode)); err != nil {
if !os.IsExist(err) {
return &fs.PathError{Op: "mkdirat", Path: name, Err: err}
}
}
file, err := openFileUnderRoot(parentFd, base, unix.O_DIRECTORY|unix.O_RDONLY, 0)
if err != nil {
return err
}
defer file.Close()
return setFileAttrs(dirfd, file, mode, metadata, options, false)
}
func safeLink(dirfd int, mode os.FileMode, metadata *fileMetadata, options *archive.TarOptions) error {
sourceFile, err := openFileUnderRoot(dirfd, metadata.Linkname, unix.O_PATH|unix.O_RDONLY|unix.O_NOFOLLOW, 0)
if err != nil {
return err
}
defer sourceFile.Close()
err = doHardLink(dirfd, int(sourceFile.Fd()), metadata.Name)
if err != nil {
return err
}
newFile, err := openFileUnderRoot(dirfd, metadata.Name, unix.O_WRONLY|unix.O_NOFOLLOW, 0)
if err != nil {
// If the target is a symlink, open the file with O_PATH.
if errors.Is(err, unix.ELOOP) {
newFile, err := openFileUnderRoot(dirfd, metadata.Name, unix.O_PATH|unix.O_NOFOLLOW, 0)
if err != nil {
return err
}
defer newFile.Close()
return setFileAttrs(dirfd, newFile, mode, metadata, options, true)
}
return err
}
defer newFile.Close()
return setFileAttrs(dirfd, newFile, mode, metadata, options, false)
}
func safeSymlink(dirfd int, mode os.FileMode, metadata *fileMetadata, options *archive.TarOptions) error {
destDir, destBase := filepath.Split(metadata.Name)
destDirFd := dirfd
if destDir != "" && destDir != "." {
f, err := openOrCreateDirUnderRoot(dirfd, destDir, 0)
if err != nil {
return err
}
defer f.Close()
destDirFd = int(f.Fd())
}
if err := unix.Symlinkat(metadata.Linkname, destDirFd, destBase); err != nil {
return &fs.PathError{Op: "symlinkat", Path: metadata.Name, Err: err}
}
return nil
}
type whiteoutHandler struct {
Dirfd int
Root string
}
func (d whiteoutHandler) Setxattr(path, name string, value []byte) error {
file, err := openOrCreateDirUnderRoot(d.Dirfd, path, 0)
if err != nil {
return err
}
defer file.Close()
if err := unix.Fsetxattr(int(file.Fd()), name, value, 0); err != nil {
return &fs.PathError{Op: "fsetxattr", Path: path, Err: err}
}
return nil
}
func (d whiteoutHandler) Mknod(path string, mode uint32, dev int) error {
dir, base := filepath.Split(path)
dirfd := d.Dirfd
if dir != "" && dir != "." {
dir, err := openOrCreateDirUnderRoot(d.Dirfd, dir, 0)
if err != nil {
return err
}
defer dir.Close()
dirfd = int(dir.Fd())
}
if err := unix.Mknodat(dirfd, base, mode, dev); err != nil {
return &fs.PathError{Op: "mknodat", Path: path, Err: err}
}
return nil
}
func (d whiteoutHandler) Chown(path string, uid, gid int) error {
file, err := openFileUnderRoot(d.Dirfd, path, unix.O_PATH, 0)
if err != nil {
return err
}
defer file.Close()
return chown(int(file.Fd()), "", uid, gid, false, path)
}
type readerAtCloser interface {
io.ReaderAt
io.Closer
}
// seekableFile is a struct that wraps an *os.File to provide an ImageSourceSeekable.
type seekableFile struct {
reader readerAtCloser
}
func (f *seekableFile) Close() error {
return f.reader.Close()
}
func (f *seekableFile) GetBlobAt(chunks []ImageSourceChunk) (chan io.ReadCloser, chan error, error) {
streams := make(chan io.ReadCloser)
errs := make(chan error)
go func() {
for _, chunk := range chunks {
streams <- io.NopCloser(io.NewSectionReader(f.reader, int64(chunk.Offset), int64(chunk.Length)))
}
close(streams)
close(errs)
}()
return streams, errs, nil
}
func newSeekableFile(reader readerAtCloser) *seekableFile {
return &seekableFile{reader: reader}
}

View File

@ -17,13 +17,30 @@ import (
"github.com/opencontainers/go-digest"
)
// TOC is short for Table of Contents and is used by the zstd:chunked
// file format to effectively add an overall index into the contents
// of a tarball; it also includes file metadata.
type TOC struct {
Version int `json:"version"`
Entries []FileMetadata `json:"entries"`
TarSplitDigest digest.Digest `json:"tarSplitDigest,omitempty"`
// Version is currently expected to be 1
Version int `json:"version"`
// Entries is the list of file metadata in this TOC.
// The ordering in this array currently defaults to being the same
// as that of the tar stream; however, this should not be relied on.
Entries []FileMetadata `json:"entries"`
// TarSplitDigest is the checksum of the "tar-split" data which
// is included as a distinct skippable zstd frame before the TOC.
TarSplitDigest digest.Digest `json:"tarSplitDigest,omitempty"`
}
// FileMetadata is an entry in the TOC that includes both generic file metadata
// that duplicates what can found in the tar header (and should match), but
// also special/custom content (see below).
//
// Note that the metadata here, when fetched by a zstd:chunked aware client,
// is used instead of that in the tar stream. The contents of the tar stream
// are not used in this scenario.
type FileMetadata struct {
// The metadata below largely duplicates that in the tar headers.
Type string `json:"type"`
Name string `json:"name"`
Linkname string `json:"linkName,omitempty"`
@ -37,9 +54,11 @@ type FileMetadata struct {
Devmajor int64 `json:"devMajor,omitempty"`
Devminor int64 `json:"devMinor,omitempty"`
Xattrs map[string]string `json:"xattrs,omitempty"`
Digest string `json:"digest,omitempty"`
Offset int64 `json:"offset,omitempty"`
EndOffset int64 `json:"endOffset,omitempty"`
// Digest is a hexadecimal sha256 checksum of the file contents; it
// is empty for empty files
Digest string `json:"digest,omitempty"`
Offset int64 `json:"offset,omitempty"`
EndOffset int64 `json:"endOffset,omitempty"`
ChunkSize int64 `json:"chunkSize,omitempty"`
ChunkOffset int64 `json:"chunkOffset,omitempty"`
@ -53,19 +72,23 @@ const (
)
const (
// The following types correspond to regular types of entries that can
// appear in a tar archive.
TypeReg = "reg"
TypeChunk = "chunk"
TypeLink = "hardlink"
TypeChar = "char"
TypeBlock = "block"
TypeDir = "dir"
TypeFifo = "fifo"
TypeSymlink = "symlink"
// TypeChunk is special; in zstd:chunked not only are files individually
// compressed and indexable, there is a "rolling checksum" used to compute
// "chunks" of individual file contents, that are also added to the TOC
TypeChunk = "chunk"
)
var TarTypes = map[byte]string{
tar.TypeReg: TypeReg,
tar.TypeRegA: TypeReg,
tar.TypeLink: TypeLink,
tar.TypeChar: TypeChar,
tar.TypeBlock: TypeBlock,
@ -83,11 +106,23 @@ func GetType(t byte) (string, error) {
}
const (
// ManifestChecksumKey is a hexadecimal sha256 digest of the compressed manifest digest.
ManifestChecksumKey = "io.github.containers.zstd-chunked.manifest-checksum"
ManifestInfoKey = "io.github.containers.zstd-chunked.manifest-position"
TarSplitInfoKey = "io.github.containers.zstd-chunked.tarsplit-position"
// ManifestInfoKey is an annotation that signals the start of the TOC (manifest)
// contents which are embedded as a skippable zstd frame. It has a format of
// four decimal integers separated by `:` as follows:
// <offset>:<length>:<uncompressed length>:<type>
// The <type> is ManifestTypeCRFS which should have the value `1`.
ManifestInfoKey = "io.github.containers.zstd-chunked.manifest-position"
// TarSplitInfoKey is an annotation that signals the start of the "tar-split" metadata
// contents which are embedded as a skippable zstd frame. It has a format of
// three decimal integers separated by `:` as follows:
// <offset>:<length>:<uncompressed length>
TarSplitInfoKey = "io.github.containers.zstd-chunked.tarsplit-position"
TarSplitChecksumKey = "io.github.containers.zstd-chunked.tarsplit-checksum" // Deprecated: Use the TOC.TarSplitDigest field instead, this annotation is no longer read nor written.
// TarSplitChecksumKey is no longer used and is replaced by the TOC.TarSplitDigest field instead.
// The value is retained here as a constant as a historical reference for older zstd:chunked images.
// TarSplitChecksumKey = "io.github.containers.zstd-chunked.tarsplit-checksum"
// ManifestTypeCRFS is a manifest file compatible with the CRFS TOC file.
ManifestTypeCRFS = 1

View File

@ -8,20 +8,18 @@ import (
"fmt"
"hash"
"io"
"io/fs"
"os"
"path/filepath"
"reflect"
"sort"
"strings"
"sync"
"sync/atomic"
"syscall"
"time"
"github.com/containerd/stargz-snapshotter/estargz"
storage "github.com/containers/storage"
graphdriver "github.com/containers/storage/drivers"
driversCopy "github.com/containers/storage/drivers/copy"
"github.com/containers/storage/pkg/archive"
"github.com/containers/storage/pkg/chunked/compressor"
"github.com/containers/storage/pkg/chunked/internal"
@ -29,8 +27,6 @@ import (
"github.com/containers/storage/pkg/fsverity"
"github.com/containers/storage/pkg/idtools"
"github.com/containers/storage/pkg/system"
"github.com/containers/storage/types"
securejoin "github.com/cyphar/filepath-securejoin"
jsoniter "github.com/json-iterator/go"
"github.com/klauspost/compress/zstd"
"github.com/klauspost/pgzip"
@ -42,9 +38,8 @@ import (
const (
maxNumberMissingChunks = 1024
autoMergePartsThreshold = 128 // if the gap between two ranges is below this threshold, automatically merge them.
autoMergePartsThreshold = 1024 // if the gap between two ranges is below this threshold, automatically merge them.
newFileFlags = (unix.O_CREAT | unix.O_TRUNC | unix.O_EXCL | unix.O_WRONLY)
containersOverrideXattr = "user.containers.override_stat"
bigDataKey = "zstd-chunked-manifest"
chunkedData = "zstd-chunked-data"
chunkedLayerDataKey = "zstd-chunked-layer-data"
@ -59,21 +54,6 @@ const (
copyGoRoutines = 32
)
// fileMetadata is a wrapper around internal.FileMetadata with additional private fields that
// are not part of the TOC document.
// Type: TypeChunk entries are stored in Chunks, the primary [fileMetadata] entries never use TypeChunk.
type fileMetadata struct {
internal.FileMetadata
// chunks stores the TypeChunk entries relevant to this entry when FileMetadata.Type == TypeReg.
chunks []*internal.FileMetadata
// skipSetAttrs is set when the file attributes must not be
// modified, e.g. it is a hard link from a different source,
// or a composefs file.
skipSetAttrs bool
}
type compressedFileType int
type chunkedDiffer struct {
@ -111,7 +91,7 @@ type chunkedDiffer struct {
blobSize int64
storeOpts *types.StoreOptions
pullOptions map[string]string
useFsVerity graphdriver.DifferFsVerity
fsVerityDigests map[string]string
@ -127,98 +107,7 @@ type chunkedLayerData struct {
Format graphdriver.DifferOutputFormat `json:"format"`
}
func timeToTimespec(time *time.Time) (ts unix.Timespec) {
if time == nil || time.IsZero() {
// Return UTIME_OMIT special value
ts.Sec = 0
ts.Nsec = ((1 << 30) - 2)
return
}
return unix.NsecToTimespec(time.UnixNano())
}
func doHardLink(srcFd int, destDirFd int, destBase string) error {
doLink := func() error {
// Using unix.AT_EMPTY_PATH requires CAP_DAC_READ_SEARCH while this variant that uses
// /proc/self/fd doesn't and can be used with rootless.
srcPath := fmt.Sprintf("/proc/self/fd/%d", srcFd)
return unix.Linkat(unix.AT_FDCWD, srcPath, destDirFd, destBase, unix.AT_SYMLINK_FOLLOW)
}
err := doLink()
// if the destination exists, unlink it first and try again
if err != nil && os.IsExist(err) {
unix.Unlinkat(destDirFd, destBase, 0)
return doLink()
}
return err
}
func copyFileContent(srcFd int, fileMetadata *fileMetadata, dirfd int, mode os.FileMode, useHardLinks bool) (*os.File, int64, error) {
destFile := fileMetadata.Name
src := fmt.Sprintf("/proc/self/fd/%d", srcFd)
st, err := os.Stat(src)
if err != nil {
return nil, -1, fmt.Errorf("copy file content for %q: %w", destFile, err)
}
copyWithFileRange, copyWithFileClone := true, true
if useHardLinks {
destDirPath := filepath.Dir(destFile)
destBase := filepath.Base(destFile)
destDir, err := openFileUnderRoot(destDirPath, dirfd, 0, mode)
if err == nil {
defer destDir.Close()
err := doHardLink(srcFd, int(destDir.Fd()), destBase)
if err == nil {
// if the file was deduplicated with a hard link, skip overriding file metadata.
fileMetadata.skipSetAttrs = true
return nil, st.Size(), nil
}
}
}
// If the destination file already exists, we shouldn't blow it away
dstFile, err := openFileUnderRoot(destFile, dirfd, newFileFlags, mode)
if err != nil {
return nil, -1, fmt.Errorf("open file %q under rootfs for copy: %w", destFile, err)
}
err = driversCopy.CopyRegularToFile(src, dstFile, st, &copyWithFileRange, &copyWithFileClone)
if err != nil {
dstFile.Close()
return nil, -1, fmt.Errorf("copy to file %q under rootfs: %w", destFile, err)
}
return dstFile, st.Size(), nil
}
type seekableFile struct {
file *os.File
}
func (f *seekableFile) Close() error {
return f.file.Close()
}
func (f *seekableFile) GetBlobAt(chunks []ImageSourceChunk) (chan io.ReadCloser, chan error, error) {
streams := make(chan io.ReadCloser)
errs := make(chan error)
go func() {
for _, chunk := range chunks {
streams <- io.NopCloser(io.NewSectionReader(f.file, int64(chunk.Offset), int64(chunk.Length)))
}
close(streams)
close(errs)
}()
return streams, errs, nil
}
func convertTarToZstdChunked(destDirectory string, payload *os.File) (int64, *seekableFile, digest.Digest, map[string]string, error) {
func (c *chunkedDiffer) convertTarToZstdChunked(destDirectory string, payload *os.File) (int64, *seekableFile, digest.Digest, map[string]string, error) {
diff, err := archive.DecompressStream(payload)
if err != nil {
return 0, nil, "", nil, err
@ -226,7 +115,7 @@ func convertTarToZstdChunked(destDirectory string, payload *os.File) (int64, *se
fd, err := unix.Open(destDirectory, unix.O_TMPFILE|unix.O_RDWR|unix.O_CLOEXEC, 0o600)
if err != nil {
return 0, nil, "", nil, err
return 0, nil, "", nil, &fs.PathError{Op: "open", Path: destDirectory, Err: err}
}
f := os.NewFile(uintptr(fd), destDirectory)
@ -240,7 +129,7 @@ func convertTarToZstdChunked(destDirectory string, payload *os.File) (int64, *se
}
convertedOutputDigester := digest.Canonical.Digester()
copied, err := io.Copy(io.MultiWriter(chunked, convertedOutputDigester.Hash()), diff)
copied, err := io.CopyBuffer(io.MultiWriter(chunked, convertedOutputDigester.Hash()), diff, c.copyBuffer)
if err != nil {
f.Close()
return 0, nil, "", nil, err
@ -249,21 +138,15 @@ func convertTarToZstdChunked(destDirectory string, payload *os.File) (int64, *se
f.Close()
return 0, nil, "", nil, err
}
is := seekableFile{
file: f,
}
return copied, &is, convertedOutputDigester.Digest(), newAnnotations, nil
return copied, newSeekableFile(f), convertedOutputDigester.Digest(), newAnnotations, nil
}
// GetDiffer returns a differ than can be used with ApplyDiffWithDiffer.
func GetDiffer(ctx context.Context, store storage.Store, blobDigest digest.Digest, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (graphdriver.Differ, error) {
storeOpts, err := types.DefaultStoreOptions()
if err != nil {
return nil, err
}
pullOptions := store.PullOptions()
if !parseBooleanPullOption(&storeOpts, "enable_partial_images", true) {
if !parseBooleanPullOption(pullOptions, "enable_partial_images", true) {
return nil, errors.New("enable_partial_images not configured")
}
@ -279,21 +162,21 @@ func GetDiffer(ctx context.Context, store storage.Store, blobDigest digest.Diges
if err != nil {
return nil, fmt.Errorf("parsing zstd:chunked TOC digest %q: %w", zstdChunkedTOCDigestString, err)
}
return makeZstdChunkedDiffer(ctx, store, blobSize, zstdChunkedTOCDigest, annotations, iss, &storeOpts)
return makeZstdChunkedDiffer(ctx, store, blobSize, zstdChunkedTOCDigest, annotations, iss, pullOptions)
}
if hasEstargzTOC {
estargzTOCDigest, err := digest.Parse(estargzTOCDigestString)
if err != nil {
return nil, fmt.Errorf("parsing estargz TOC digest %q: %w", estargzTOCDigestString, err)
}
return makeEstargzChunkedDiffer(ctx, store, blobSize, estargzTOCDigest, iss, &storeOpts)
return makeEstargzChunkedDiffer(ctx, store, blobSize, estargzTOCDigest, iss, pullOptions)
}
return makeConvertFromRawDiffer(ctx, store, blobDigest, blobSize, annotations, iss, &storeOpts)
return makeConvertFromRawDiffer(ctx, store, blobDigest, blobSize, annotations, iss, pullOptions)
}
func makeConvertFromRawDiffer(ctx context.Context, store storage.Store, blobDigest digest.Digest, blobSize int64, annotations map[string]string, iss ImageSourceSeekable, storeOpts *types.StoreOptions) (*chunkedDiffer, error) {
if !parseBooleanPullOption(storeOpts, "convert_images", false) {
func makeConvertFromRawDiffer(ctx context.Context, store storage.Store, blobDigest digest.Digest, blobSize int64, annotations map[string]string, iss ImageSourceSeekable, pullOptions map[string]string) (*chunkedDiffer, error) {
if !parseBooleanPullOption(pullOptions, "convert_images", false) {
return nil, errors.New("convert_images not configured")
}
@ -309,12 +192,12 @@ func makeConvertFromRawDiffer(ctx context.Context, store storage.Store, blobDige
convertToZstdChunked: true,
copyBuffer: makeCopyBuffer(),
layersCache: layersCache,
storeOpts: storeOpts,
pullOptions: pullOptions,
stream: iss,
}, nil
}
func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, tocDigest digest.Digest, annotations map[string]string, iss ImageSourceSeekable, storeOpts *types.StoreOptions) (*chunkedDiffer, error) {
func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, tocDigest digest.Digest, annotations map[string]string, iss ImageSourceSeekable, pullOptions map[string]string) (*chunkedDiffer, error) {
manifest, toc, tarSplit, tocOffset, err := readZstdChunkedManifest(iss, tocDigest, annotations)
if err != nil {
return nil, fmt.Errorf("read zstd:chunked manifest: %w", err)
@ -333,14 +216,14 @@ func makeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize in
layersCache: layersCache,
manifest: manifest,
toc: toc,
storeOpts: storeOpts,
pullOptions: pullOptions,
stream: iss,
tarSplit: tarSplit,
tocOffset: tocOffset,
}, nil
}
func makeEstargzChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, tocDigest digest.Digest, iss ImageSourceSeekable, storeOpts *types.StoreOptions) (*chunkedDiffer, error) {
func makeEstargzChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, tocDigest digest.Digest, iss ImageSourceSeekable, pullOptions map[string]string) (*chunkedDiffer, error) {
manifest, tocOffset, err := readEstargzChunkedManifest(iss, blobSize, tocDigest)
if err != nil {
return nil, fmt.Errorf("read zstd:chunked manifest: %w", err)
@ -358,7 +241,7 @@ func makeEstargzChunkedDiffer(ctx context.Context, store storage.Store, blobSize
fileType: fileTypeEstargz,
layersCache: layersCache,
manifest: manifest,
storeOpts: storeOpts,
pullOptions: pullOptions,
stream: iss,
tocOffset: tocOffset,
}, nil
@ -375,15 +258,15 @@ func makeCopyBuffer() []byte {
// dirfd is an open file descriptor to the destination root directory.
// useHardLinks defines whether the deduplication can be performed using hard links.
func copyFileFromOtherLayer(file *fileMetadata, source string, name string, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
srcDirfd, err := unix.Open(source, unix.O_RDONLY, 0)
srcDirfd, err := unix.Open(source, unix.O_RDONLY|unix.O_CLOEXEC, 0)
if err != nil {
return false, nil, 0, fmt.Errorf("open source file: %w", err)
return false, nil, 0, &fs.PathError{Op: "open", Path: source, Err: err}
}
defer unix.Close(srcDirfd)
srcFile, err := openFileUnderRoot(name, srcDirfd, unix.O_RDONLY, 0)
srcFile, err := openFileUnderRoot(srcDirfd, name, unix.O_RDONLY|syscall.O_CLOEXEC, 0)
if err != nil {
return false, nil, 0, fmt.Errorf("open source file under target rootfs (%s): %w", name, err)
return false, nil, 0, err
}
defer srcFile.Close()
@ -420,7 +303,7 @@ func canDedupFileWithHardLink(file *fileMetadata, fd int, s os.FileInfo) bool {
return false
}
path := fmt.Sprintf("/proc/self/fd/%d", fd)
path := procPathForFd(fd)
listXattrs, err := system.Llistxattr(path)
if err != nil {
@ -476,7 +359,7 @@ func findFileInOSTreeRepos(file *fileMetadata, ostreeRepos []string, dirfd int,
if st.Size() != file.Size {
continue
}
fd, err := unix.Open(sourceFile, unix.O_RDONLY|unix.O_NONBLOCK, 0)
fd, err := unix.Open(sourceFile, unix.O_RDONLY|unix.O_NONBLOCK|unix.O_CLOEXEC, 0)
if err != nil {
logrus.Debugf("could not open sourceFile %s: %v", sourceFile, err)
return false, nil, 0, nil
@ -585,15 +468,15 @@ type missingPart struct {
}
func (o *originFile) OpenFile() (io.ReadCloser, error) {
srcDirfd, err := unix.Open(o.Root, unix.O_RDONLY, 0)
srcDirfd, err := unix.Open(o.Root, unix.O_RDONLY|unix.O_CLOEXEC, 0)
if err != nil {
return nil, fmt.Errorf("open source file: %w", err)
return nil, &fs.PathError{Op: "open", Path: o.Root, Err: err}
}
defer unix.Close(srcDirfd)
srcFile, err := openFileUnderRoot(o.Path, srcDirfd, unix.O_RDONLY, 0)
srcFile, err := openFileUnderRoot(srcDirfd, o.Path, unix.O_RDONLY|unix.O_CLOEXEC, 0)
if err != nil {
return nil, fmt.Errorf("open source file under target rootfs: %w", err)
return nil, err
}
if _, err := srcFile.Seek(o.Offset, 0); err != nil {
@ -603,253 +486,6 @@ func (o *originFile) OpenFile() (io.ReadCloser, error) {
return srcFile, nil
}
// setFileAttrs sets the file attributes for file given metadata
func setFileAttrs(dirfd int, file *os.File, mode os.FileMode, metadata *fileMetadata, options *archive.TarOptions, usePath bool) error {
if metadata.skipSetAttrs {
return nil
}
if file == nil || file.Fd() < 0 {
return errors.New("invalid file")
}
fd := int(file.Fd())
t, err := typeToTarType(metadata.Type)
if err != nil {
return err
}
// If it is a symlink, force to use the path
if t == tar.TypeSymlink {
usePath = true
}
baseName := ""
if usePath {
dirName := filepath.Dir(metadata.Name)
if dirName != "" {
parentFd, err := openFileUnderRoot(dirName, dirfd, unix.O_PATH|unix.O_DIRECTORY, 0)
if err != nil {
return err
}
defer parentFd.Close()
dirfd = int(parentFd.Fd())
}
baseName = filepath.Base(metadata.Name)
}
doChown := func() error {
if usePath {
return unix.Fchownat(dirfd, baseName, metadata.UID, metadata.GID, unix.AT_SYMLINK_NOFOLLOW)
}
return unix.Fchown(fd, metadata.UID, metadata.GID)
}
doSetXattr := func(k string, v []byte) error {
return unix.Fsetxattr(fd, k, v, 0)
}
doUtimes := func() error {
ts := []unix.Timespec{timeToTimespec(metadata.AccessTime), timeToTimespec(metadata.ModTime)}
if usePath {
return unix.UtimesNanoAt(dirfd, baseName, ts, unix.AT_SYMLINK_NOFOLLOW)
}
return unix.UtimesNanoAt(unix.AT_FDCWD, fmt.Sprintf("/proc/self/fd/%d", fd), ts, 0)
}
doChmod := func() error {
if usePath {
return unix.Fchmodat(dirfd, baseName, uint32(mode), unix.AT_SYMLINK_NOFOLLOW)
}
return unix.Fchmod(fd, uint32(mode))
}
if err := doChown(); err != nil {
if !options.IgnoreChownErrors {
return fmt.Errorf("chown %q to %d:%d: %w", metadata.Name, metadata.UID, metadata.GID, err)
}
}
canIgnore := func(err error) bool {
return err == nil || errors.Is(err, unix.ENOSYS) || errors.Is(err, unix.ENOTSUP)
}
for k, v := range metadata.Xattrs {
if _, found := xattrsToIgnore[k]; found {
continue
}
data, err := base64.StdEncoding.DecodeString(v)
if err != nil {
return fmt.Errorf("decode xattr %q: %w", v, err)
}
if err := doSetXattr(k, data); !canIgnore(err) {
return fmt.Errorf("set xattr %s=%q for %q: %w", k, data, metadata.Name, err)
}
}
if err := doUtimes(); !canIgnore(err) {
return fmt.Errorf("set utimes for %q: %w", metadata.Name, err)
}
if err := doChmod(); !canIgnore(err) {
return fmt.Errorf("chmod %q: %w", metadata.Name, err)
}
return nil
}
func openFileUnderRootFallback(dirfd int, name string, flags uint64, mode os.FileMode) (int, error) {
root := fmt.Sprintf("/proc/self/fd/%d", dirfd)
targetRoot, err := os.Readlink(root)
if err != nil {
return -1, err
}
hasNoFollow := (flags & unix.O_NOFOLLOW) != 0
var fd int
// If O_NOFOLLOW is specified in the flags, then resolve only the parent directory and use the
// last component as the path to openat().
if hasNoFollow {
dirName := filepath.Dir(name)
if dirName != "" {
newRoot, err := securejoin.SecureJoin(root, filepath.Dir(name))
if err != nil {
return -1, err
}
root = newRoot
}
parentDirfd, err := unix.Open(root, unix.O_PATH, 0)
if err != nil {
return -1, err
}
defer unix.Close(parentDirfd)
fd, err = unix.Openat(parentDirfd, filepath.Base(name), int(flags), uint32(mode))
if err != nil {
return -1, err
}
} else {
newPath, err := securejoin.SecureJoin(root, name)
if err != nil {
return -1, err
}
fd, err = unix.Openat(dirfd, newPath, int(flags), uint32(mode))
if err != nil {
return -1, err
}
}
target, err := os.Readlink(fmt.Sprintf("/proc/self/fd/%d", fd))
if err != nil {
unix.Close(fd)
return -1, err
}
// Add an additional check to make sure the opened fd is inside the rootfs
if !strings.HasPrefix(target, targetRoot) {
unix.Close(fd)
return -1, fmt.Errorf("while resolving %q. It resolves outside the root directory", name)
}
return fd, err
}
func openFileUnderRootOpenat2(dirfd int, name string, flags uint64, mode os.FileMode) (int, error) {
how := unix.OpenHow{
Flags: flags,
Mode: uint64(mode & 0o7777),
Resolve: unix.RESOLVE_IN_ROOT,
}
return unix.Openat2(dirfd, name, &how)
}
// skipOpenat2 is set when openat2 is not supported by the underlying kernel and avoid
// using it again.
var skipOpenat2 int32
// openFileUnderRootRaw tries to open a file using openat2 and if it is not supported fallbacks to a
// userspace lookup.
func openFileUnderRootRaw(dirfd int, name string, flags uint64, mode os.FileMode) (int, error) {
var fd int
var err error
if atomic.LoadInt32(&skipOpenat2) > 0 {
fd, err = openFileUnderRootFallback(dirfd, name, flags, mode)
} else {
fd, err = openFileUnderRootOpenat2(dirfd, name, flags, mode)
// If the function failed with ENOSYS, switch off the support for openat2
// and fallback to using safejoin.
if err != nil && errors.Is(err, unix.ENOSYS) {
atomic.StoreInt32(&skipOpenat2, 1)
fd, err = openFileUnderRootFallback(dirfd, name, flags, mode)
}
}
return fd, err
}
// openFileUnderRoot safely opens a file under the specified root directory using openat2
// name is the path to open relative to dirfd.
// dirfd is an open file descriptor to the target checkout directory.
// flags are the flags to pass to the open syscall.
// mode specifies the mode to use for newly created files.
func openFileUnderRoot(name string, dirfd int, flags uint64, mode os.FileMode) (*os.File, error) {
fd, err := openFileUnderRootRaw(dirfd, name, flags, mode)
if err == nil {
return os.NewFile(uintptr(fd), name), nil
}
hasCreate := (flags & unix.O_CREAT) != 0
if errors.Is(err, unix.ENOENT) && hasCreate {
parent := filepath.Dir(name)
if parent != "" {
newDirfd, err2 := openOrCreateDirUnderRoot(parent, dirfd, 0)
if err2 == nil {
defer newDirfd.Close()
fd, err := openFileUnderRootRaw(int(newDirfd.Fd()), filepath.Base(name), flags, mode)
if err == nil {
return os.NewFile(uintptr(fd), name), nil
}
}
}
}
return nil, fmt.Errorf("open %q under the rootfs: %w", name, err)
}
// openOrCreateDirUnderRoot safely opens a directory or create it if it is missing.
// name is the path to open relative to dirfd.
// dirfd is an open file descriptor to the target checkout directory.
// mode specifies the mode to use for newly created files.
func openOrCreateDirUnderRoot(name string, dirfd int, mode os.FileMode) (*os.File, error) {
fd, err := openFileUnderRootRaw(dirfd, name, unix.O_DIRECTORY|unix.O_RDONLY, mode)
if err == nil {
return os.NewFile(uintptr(fd), name), nil
}
if errors.Is(err, unix.ENOENT) {
parent := filepath.Dir(name)
if parent != "" {
pDir, err2 := openOrCreateDirUnderRoot(parent, dirfd, mode)
if err2 != nil {
return nil, err
}
defer pDir.Close()
baseName := filepath.Base(name)
if err2 := unix.Mkdirat(int(pDir.Fd()), baseName, 0o755); err2 != nil {
return nil, err
}
fd, err = openFileUnderRootRaw(int(pDir.Fd()), baseName, unix.O_DIRECTORY|unix.O_RDONLY, mode)
if err == nil {
return os.NewFile(uintptr(fd), name), nil
}
}
}
return nil, err
}
func (c *chunkedDiffer) prepareCompressedStreamToFile(partCompression compressedFileType, from io.Reader, mf *missingFileChunk) (compressedFileType, error) {
switch {
case partCompression == fileTypeHole:
@ -918,19 +554,6 @@ func hashHole(h hash.Hash, size int64, copyBuffer []byte) error {
return nil
}
// appendHole creates a hole with the specified size at the open fd.
func appendHole(fd int, size int64) error {
off, err := unix.Seek(fd, size, unix.SEEK_CUR)
if err != nil {
return err
}
// Make sure the file size is changed. It might be the last hole and no other data written afterwards.
if err := unix.Ftruncate(fd, off); err != nil {
return err
}
return nil
}
func (c *chunkedDiffer) appendCompressedStreamToFile(compression compressedFileType, destFile *destinationFile, size int64) error {
switch compression {
case fileTypeZstdChunked:
@ -948,7 +571,7 @@ func (c *chunkedDiffer) appendCompressedStreamToFile(compression compressedFileT
return err
}
case fileTypeHole:
if err := appendHole(int(destFile.file.Fd()), size); err != nil {
if err := appendHole(int(destFile.file.Fd()), destFile.metadata.Name, size); err != nil {
return err
}
if destFile.hash != nil {
@ -977,7 +600,7 @@ type destinationFile struct {
}
func openDestinationFile(dirfd int, metadata *fileMetadata, options *archive.TarOptions, skipValidation bool, recordFsVerity recordFsVerityFunc) (*destinationFile, error) {
file, err := openFileUnderRoot(metadata.Name, dirfd, newFileFlags, 0)
file, err := openFileUnderRoot(dirfd, metadata.Name, newFileFlags, 0)
if err != nil {
return nil, err
}
@ -1339,161 +962,6 @@ func (c *chunkedDiffer) retrieveMissingFiles(stream ImageSourceSeekable, dest st
return nil
}
func safeMkdir(dirfd int, mode os.FileMode, name string, metadata *fileMetadata, options *archive.TarOptions) error {
parent := filepath.Dir(name)
base := filepath.Base(name)
parentFd := dirfd
if parent != "." {
parentFile, err := openOrCreateDirUnderRoot(parent, dirfd, 0)
if err != nil {
return err
}
defer parentFile.Close()
parentFd = int(parentFile.Fd())
}
if err := unix.Mkdirat(parentFd, base, uint32(mode)); err != nil {
if !os.IsExist(err) {
return fmt.Errorf("mkdir %q: %w", name, err)
}
}
file, err := openFileUnderRoot(base, parentFd, unix.O_DIRECTORY|unix.O_RDONLY, 0)
if err != nil {
return err
}
defer file.Close()
return setFileAttrs(dirfd, file, mode, metadata, options, false)
}
func safeLink(dirfd int, mode os.FileMode, metadata *fileMetadata, options *archive.TarOptions) error {
sourceFile, err := openFileUnderRoot(metadata.Linkname, dirfd, unix.O_PATH|unix.O_RDONLY|unix.O_NOFOLLOW, 0)
if err != nil {
return err
}
defer sourceFile.Close()
destDir, destBase := filepath.Dir(metadata.Name), filepath.Base(metadata.Name)
destDirFd := dirfd
if destDir != "." {
f, err := openOrCreateDirUnderRoot(destDir, dirfd, 0)
if err != nil {
return err
}
defer f.Close()
destDirFd = int(f.Fd())
}
err = doHardLink(int(sourceFile.Fd()), destDirFd, destBase)
if err != nil {
return fmt.Errorf("create hardlink %q pointing to %q: %w", metadata.Name, metadata.Linkname, err)
}
newFile, err := openFileUnderRoot(metadata.Name, dirfd, unix.O_WRONLY|unix.O_NOFOLLOW, 0)
if err != nil {
// If the target is a symlink, open the file with O_PATH.
if errors.Is(err, unix.ELOOP) {
newFile, err := openFileUnderRoot(metadata.Name, dirfd, unix.O_PATH|unix.O_NOFOLLOW, 0)
if err != nil {
return err
}
defer newFile.Close()
return setFileAttrs(dirfd, newFile, mode, metadata, options, true)
}
return err
}
defer newFile.Close()
return setFileAttrs(dirfd, newFile, mode, metadata, options, false)
}
func safeSymlink(dirfd int, mode os.FileMode, metadata *fileMetadata, options *archive.TarOptions) error {
destDir, destBase := filepath.Dir(metadata.Name), filepath.Base(metadata.Name)
destDirFd := dirfd
if destDir != "." {
f, err := openOrCreateDirUnderRoot(destDir, dirfd, 0)
if err != nil {
return err
}
defer f.Close()
destDirFd = int(f.Fd())
}
if err := unix.Symlinkat(metadata.Linkname, destDirFd, destBase); err != nil {
return fmt.Errorf("create symlink %q pointing to %q: %w", metadata.Name, metadata.Linkname, err)
}
return nil
}
type whiteoutHandler struct {
Dirfd int
Root string
}
func (d whiteoutHandler) Setxattr(path, name string, value []byte) error {
file, err := openOrCreateDirUnderRoot(path, d.Dirfd, 0)
if err != nil {
return err
}
defer file.Close()
if err := unix.Fsetxattr(int(file.Fd()), name, value, 0); err != nil {
return fmt.Errorf("set xattr %s=%q for %q: %w", name, value, path, err)
}
return nil
}
func (d whiteoutHandler) Mknod(path string, mode uint32, dev int) error {
dir := filepath.Dir(path)
base := filepath.Base(path)
dirfd := d.Dirfd
if dir != "" {
dir, err := openOrCreateDirUnderRoot(dir, d.Dirfd, 0)
if err != nil {
return err
}
defer dir.Close()
dirfd = int(dir.Fd())
}
if err := unix.Mknodat(dirfd, base, mode, dev); err != nil {
return fmt.Errorf("mknod %q: %w", path, err)
}
return nil
}
func checkChownErr(err error, name string, uid, gid int) error {
if errors.Is(err, syscall.EINVAL) {
return fmt.Errorf(`potentially insufficient UIDs or GIDs available in user namespace (requested %d:%d for %s): Check /etc/subuid and /etc/subgid if configured locally and run "podman system migrate": %w`, uid, gid, name, err)
}
return err
}
func (d whiteoutHandler) Chown(path string, uid, gid int) error {
file, err := openFileUnderRoot(path, d.Dirfd, unix.O_PATH, 0)
if err != nil {
return err
}
defer file.Close()
if err := unix.Fchownat(int(file.Fd()), "", uid, gid, unix.AT_EMPTY_PATH); err != nil {
var stat unix.Stat_t
if unix.Fstat(int(file.Fd()), &stat) == nil {
if stat.Uid == uint32(uid) && stat.Gid == uint32(gid) {
return nil
}
}
return checkChownErr(err, path, uid, gid)
}
return nil
}
type hardLinkToCreate struct {
dest string
dirfd int
@ -1501,8 +969,8 @@ type hardLinkToCreate struct {
metadata *fileMetadata
}
func parseBooleanPullOption(storeOpts *storage.StoreOptions, name string, def bool) bool {
if value, ok := storeOpts.PullOptions[name]; ok {
func parseBooleanPullOption(pullOptions map[string]string, name string, def bool) bool {
if value, ok := pullOptions[name]; ok {
return strings.ToLower(value) == "true"
}
return def
@ -1515,10 +983,10 @@ type findAndCopyFileOptions struct {
}
func reopenFileReadOnly(f *os.File) (*os.File, error) {
path := fmt.Sprintf("/proc/self/fd/%d", f.Fd())
path := procPathForFile(f)
fd, err := unix.Open(path, unix.O_RDONLY|unix.O_CLOEXEC, 0)
if err != nil {
return nil, err
return nil, &fs.PathError{Op: "open", Path: path, Err: err}
}
return os.NewFile(uintptr(fd), f.Name()), nil
}
@ -1636,7 +1104,7 @@ func (c *chunkedDiffer) copyAllBlobToFile(destination *os.File) (digest.Digest,
r := io.TeeReader(payload, originalRawDigester.Hash())
// copy the entire tarball and compute its digest
_, err = io.Copy(destination, r)
_, err = io.CopyBuffer(destination, r, c.copyBuffer)
return originalRawDigester.Digest(), err
}
@ -1660,7 +1128,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
if c.convertToZstdChunked {
fd, err := unix.Open(dest, unix.O_TMPFILE|unix.O_RDWR|unix.O_CLOEXEC, 0o600)
if err != nil {
return graphdriver.DriverWithDifferOutput{}, err
return graphdriver.DriverWithDifferOutput{}, &fs.PathError{Op: "open", Path: dest, Err: err}
}
blobFile := os.NewFile(uintptr(fd), "blob-file")
defer func() {
@ -1683,7 +1151,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
return graphdriver.DriverWithDifferOutput{}, err
}
tarSize, fileSource, diffID, annotations, err := convertTarToZstdChunked(dest, blobFile)
tarSize, fileSource, diffID, annotations, err := c.convertTarToZstdChunked(dest, blobFile)
if err != nil {
return graphdriver.DriverWithDifferOutput{}, err
}
@ -1760,10 +1228,10 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
// When the hard links deduplication is used, file attributes are ignored because setting them
// modifies the source file as well.
useHardLinks := parseBooleanPullOption(c.storeOpts, "use_hard_links", false)
useHardLinks := parseBooleanPullOption(c.pullOptions, "use_hard_links", false)
// List of OSTree repositories to use for deduplication
ostreeRepos := strings.Split(c.storeOpts.PullOptions["ostree_repos"], ":")
ostreeRepos := strings.Split(c.pullOptions["ostree_repos"], ":")
whiteoutConverter := archive.GetWhiteoutConverter(options.WhiteoutFormat, options.WhiteoutData)
@ -1790,16 +1258,19 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
if options.ForceMask != nil {
uid, gid, mode, err := archive.GetFileOwner(dest)
if err == nil {
value := fmt.Sprintf("%d:%d:0%o", uid, gid, mode)
if err := unix.Setxattr(dest, containersOverrideXattr, []byte(value), 0); err != nil {
value := idtools.Stat{
IDs: idtools.IDPair{UID: int(uid), GID: int(gid)},
Mode: os.FileMode(mode),
}
if err := idtools.SetContainersOverrideXattr(dest, value); err != nil {
return output, err
}
}
}
dirfd, err := unix.Open(dest, unix.O_RDONLY|unix.O_PATH, 0)
dirfd, err := unix.Open(dest, unix.O_RDONLY|unix.O_PATH|unix.O_CLOEXEC, 0)
if err != nil {
return output, fmt.Errorf("cannot open %q: %w", dest, err)
return output, &fs.PathError{Op: "open", Path: dest, Err: err}
}
defer unix.Close(dirfd)
@ -1868,11 +1339,14 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
}
filesToWaitFor := 0
for i, r := range mergedEntries {
for i := range mergedEntries {
r := &mergedEntries[i]
if options.ForceMask != nil {
value := fmt.Sprintf("%d:%d:0%o", r.UID, r.GID, r.Mode&0o7777)
r.Xattrs[containersOverrideXattr] = base64.StdEncoding.EncodeToString([]byte(value))
r.Mode = int64(*options.ForceMask)
value := idtools.FormatContainersOverrideXattr(r.UID, r.GID, int(r.Mode))
if r.Xattrs == nil {
r.Xattrs = make(map[string]string)
}
r.Xattrs[idtools.ContainersOverrideXattr] = base64.StdEncoding.EncodeToString([]byte(value))
}
mode := os.FileMode(r.Mode)
@ -1916,12 +1390,12 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
if r.Size == 0 {
// Used to have a scope for cleanup.
createEmptyFile := func() error {
file, err := openFileUnderRoot(r.Name, dirfd, newFileFlags, 0)
file, err := openFileUnderRoot(dirfd, r.Name, newFileFlags, 0)
if err != nil {
return err
}
defer file.Close()
if err := setFileAttrs(dirfd, file, mode, &r, options, false); err != nil {
if err := setFileAttrs(dirfd, file, mode, r, options, false); err != nil {
return err
}
return nil
@ -1936,7 +1410,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
if r.Name == "" || r.Name == "." {
output.RootDirMode = &mode
}
if err := safeMkdir(dirfd, mode, r.Name, &r, options); err != nil {
if err := safeMkdir(dirfd, mode, r.Name, r, options); err != nil {
return output, err
}
continue
@ -1950,12 +1424,12 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff
dest: dest,
dirfd: dirfd,
mode: mode,
metadata: &r,
metadata: r,
})
continue
case tar.TypeSymlink:
if err := safeSymlink(dirfd, mode, &r, options); err != nil {
if err := safeSymlink(dirfd, mode, r, options); err != nil {
return output, err
}
continue
@ -2167,13 +1641,13 @@ func (c *chunkedDiffer) mergeTocEntries(fileType compressedFileType, entries []i
// validateChunkChecksum checks if the file at $root/$path[offset:chunk.ChunkSize] has the
// same digest as chunk.ChunkDigest
func validateChunkChecksum(chunk *internal.FileMetadata, root, path string, offset int64, copyBuffer []byte) bool {
parentDirfd, err := unix.Open(root, unix.O_PATH, 0)
parentDirfd, err := unix.Open(root, unix.O_PATH|unix.O_CLOEXEC, 0)
if err != nil {
return false
}
defer unix.Close(parentDirfd)
fd, err := openFileUnderRoot(path, parentDirfd, unix.O_RDONLY, 0)
fd, err := openFileUnderRoot(parentDirfd, path, unix.O_RDONLY|unix.O_CLOEXEC, 0)
if err != nil {
return false
}

View File

@ -75,10 +75,6 @@ type OptionsConfig struct {
// Size
Size string `toml:"size,omitempty"`
// RemapUIDs is a list of default UID mappings to use for layers.
RemapUIDs string `toml:"remap-uids,omitempty"`
// RemapGIDs is a list of default GID mappings to use for layers.
RemapGIDs string `toml:"remap-gids,omitempty"`
// IgnoreChownErrors is a flag for whether chown errors should be
// ignored when building an image.
IgnoreChownErrors string `toml:"ignore_chown_errors,omitempty"`
@ -90,13 +86,6 @@ type OptionsConfig struct {
// files and directories.
ForceMask os.FileMode `toml:"force_mask,omitempty"`
// RemapUser is the name of one or more entries in /etc/subuid which
// should be used to set up default UID mappings.
RemapUser string `toml:"remap-user,omitempty"`
// RemapGroup is the name of one or more entries in /etc/subgid which
// should be used to set up default GID mappings.
RemapGroup string `toml:"remap-group,omitempty"`
// RootAutoUsernsUser is the name of one or more entries in /etc/subuid and
// /etc/subgid which should be used to set up automatically a userns.
RootAutoUsernsUser string `toml:"root-auto-userns-user,omitempty"`

View File

@ -1,5 +1,5 @@
//go:build linux || darwin || freebsd || solaris
// +build linux darwin freebsd solaris
//go:build !windows
// +build !windows
package directory

View File

@ -1,38 +0,0 @@
//go:build !linux && !darwin && !freebsd && !windows
// +build !linux,!darwin,!freebsd,!windows
package homedir
// Copyright 2013-2018 Docker, Inc.
// NOTE: this package has originally been copied from github.com/docker/docker.
import (
"errors"
"os"
"path/filepath"
)
// GetRuntimeDir is unsupported on non-linux system.
func GetRuntimeDir() (string, error) {
return "", errors.New("homedir.GetRuntimeDir() is not supported on this system")
}
// StickRuntimeDirContents is unsupported on non-linux system.
func StickRuntimeDirContents(files []string) ([]string, error) {
return nil, errors.New("homedir.StickRuntimeDirContents() is not supported on this system")
}
// GetConfigHome returns XDG_CONFIG_HOME.
// GetConfigHome returns $HOME/.config and nil error if XDG_CONFIG_HOME is not set.
//
// See also https://standards.freedesktop.org/basedir-spec/latest/ar01s03.html
func GetConfigHome() (string, error) {
if xdgConfigHome := os.Getenv("XDG_CONFIG_HOME"); xdgConfigHome != "" {
return xdgConfigHome, nil
}
home := Get()
if home == "" {
return "", errors.New("could not get either XDG_CONFIG_HOME or HOME")
}
return filepath.Join(home, ".config"), nil
}

View File

@ -367,21 +367,77 @@ func checkChownErr(err error, name string, uid, gid int) error {
return err
}
// Stat contains file states that can be overriden with ContainersOverrideXattr.
type Stat struct {
IDs IDPair
Mode os.FileMode
}
// FormatContainersOverrideXattr will format the given uid, gid, and mode into a string
// that can be used as the value for the ContainersOverrideXattr xattr.
func FormatContainersOverrideXattr(uid, gid, mode int) string {
return fmt.Sprintf("%d:%d:0%o", uid, gid, mode&0o7777)
}
// GetContainersOverrideXattr will get and decode ContainersOverrideXattr.
func GetContainersOverrideXattr(path string) (Stat, error) {
var stat Stat
xstat, err := system.Lgetxattr(path, ContainersOverrideXattr)
if err != nil {
return stat, err
}
attrs := strings.Split(string(xstat), ":")
if len(attrs) != 3 {
return stat, fmt.Errorf("The number of clons in %s does not equal to 3",
ContainersOverrideXattr)
}
value, err := strconv.ParseUint(attrs[0], 10, 32)
if err != nil {
return stat, fmt.Errorf("Failed to parse UID: %w", err)
}
stat.IDs.UID = int(value)
value, err = strconv.ParseUint(attrs[0], 10, 32)
if err != nil {
return stat, fmt.Errorf("Failed to parse GID: %w", err)
}
stat.IDs.GID = int(value)
value, err = strconv.ParseUint(attrs[2], 8, 32)
if err != nil {
return stat, fmt.Errorf("Failed to parse mode: %w", err)
}
stat.Mode = os.FileMode(value)
return stat, nil
}
// SetContainersOverrideXattr will encode and set ContainersOverrideXattr.
func SetContainersOverrideXattr(path string, stat Stat) error {
value := FormatContainersOverrideXattr(stat.IDs.UID, stat.IDs.GID, int(stat.Mode))
return system.Lsetxattr(path, ContainersOverrideXattr, []byte(value), 0)
}
func SafeChown(name string, uid, gid int) error {
if runtime.GOOS == "darwin" {
var mode uint64 = 0o0700
var mode os.FileMode = 0o0700
xstat, err := system.Lgetxattr(name, ContainersOverrideXattr)
if err == nil {
attrs := strings.Split(string(xstat), ":")
if len(attrs) == 3 {
val, err := strconv.ParseUint(attrs[2], 8, 32)
if err == nil {
mode = val
mode = os.FileMode(val)
}
}
}
value := fmt.Sprintf("%d:%d:0%o", uid, gid, mode)
if err = system.Lsetxattr(name, ContainersOverrideXattr, []byte(value), 0); err != nil {
value := Stat{IDPair{uid, gid}, mode}
if err = SetContainersOverrideXattr(name, value); err != nil {
return err
}
uid = os.Getuid()
@ -397,19 +453,19 @@ func SafeChown(name string, uid, gid int) error {
func SafeLchown(name string, uid, gid int) error {
if runtime.GOOS == "darwin" {
var mode uint64 = 0o0700
var mode os.FileMode = 0o0700
xstat, err := system.Lgetxattr(name, ContainersOverrideXattr)
if err == nil {
attrs := strings.Split(string(xstat), ":")
if len(attrs) == 3 {
val, err := strconv.ParseUint(attrs[2], 8, 32)
if err == nil {
mode = val
mode = os.FileMode(val)
}
}
}
value := fmt.Sprintf("%d:%d:0%o", uid, gid, mode)
if err = system.Lsetxattr(name, ContainersOverrideXattr, []byte(value), 0); err != nil {
value := Stat{IDPair{uid, gid}, mode}
if err = SetContainersOverrideXattr(name, value); err != nil {
return err
}
uid = os.Getuid()

View File

@ -426,10 +426,13 @@ func (l *LockFile) lock(lType lockType) {
// command.
func (l *LockFile) tryLock(lType lockType) error {
var success bool
var rwMutexUnlocker func()
if lType == readLock {
success = l.rwMutex.TryRLock()
rwMutexUnlocker = l.rwMutex.RUnlock
} else {
success = l.rwMutex.TryLock()
rwMutexUnlocker = l.rwMutex.Unlock
}
if !success {
return fmt.Errorf("resource temporarily unavailable")
@ -440,7 +443,7 @@ func (l *LockFile) tryLock(lType lockType) error {
// If we're the first reference on the lock, we need to open the file again.
fd, err := openLock(l.file, l.ro)
if err != nil {
l.rwMutex.Unlock()
rwMutexUnlocker()
return err
}
l.fd = fd
@ -450,7 +453,7 @@ func (l *LockFile) tryLock(lType lockType) error {
// reader lock or a writer lock.
if err = lockHandle(l.fd, lType, true); err != nil {
closeHandle(fd)
l.rwMutex.Unlock()
rwMutexUnlocker()
return err
}
}

View File

@ -1,5 +1,5 @@
//go:build linux || solaris || darwin || freebsd
// +build linux solaris darwin freebsd
//go:build !windows
// +build !windows
package lockfile

View File

@ -1,5 +1,18 @@
package mount
import "github.com/moby/sys/mountinfo"
import (
"fmt"
"os"
var PidMountInfo = mountinfo.PidMountInfo
"github.com/moby/sys/mountinfo"
)
func PidMountInfo(pid int) ([]*Info, error) {
f, err := os.Open(fmt.Sprintf("/proc/%d/mountinfo", pid))
if err != nil {
return nil, err
}
defer f.Close()
return mountinfo.GetMountsFromReader(f, nil)
}

View File

@ -8,6 +8,8 @@ package kernel
import (
"errors"
"fmt"
"github.com/sirupsen/logrus"
)
// VersionInfo holds information about the kernel.
@ -46,6 +48,19 @@ func CompareKernelVersion(a, b VersionInfo) int {
return 0
}
// CheckKernelVersion checks if current kernel is newer than (or equal to)
// the given version.
func CheckKernelVersion(k, major, minor int) bool {
if v, err := GetKernelVersion(); err != nil {
logrus.Warnf("Error getting kernel version: %s", err)
} else {
if CompareKernelVersion(*v, VersionInfo{Kernel: k, Major: major, Minor: minor}) < 0 {
return false
}
}
return true
}
// ParseRelease parses a string and creates a VersionInfo based on it.
func ParseRelease(release string) (*VersionInfo, error) {
var (

View File

@ -1,9 +1,10 @@
//go:build unix && !darwin
// Package kernel provides helper function to get, parse and compare kernel
// versions for different platforms.
package kernel
import (
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
@ -17,16 +18,3 @@ func GetKernelVersion() (*VersionInfo, error) {
return ParseRelease(unix.ByteSliceToString(uts.Release[:]))
}
// CheckKernelVersion checks if current kernel is newer than (or equal to)
// the given version.
func CheckKernelVersion(k, major, minor int) bool {
if v, err := GetKernelVersion(); err != nil {
logrus.Warnf("Error getting kernel version: %s", err)
} else {
if CompareKernelVersion(*v, VersionInfo{Kernel: k, Major: major, Minor: minor}) < 0 {
return false
}
}
return true
}

View File

@ -1,5 +1,5 @@
//go:build linux || freebsd || darwin
// +build linux freebsd darwin
//go:build !windows
// +build !windows
package system

View File

@ -526,8 +526,11 @@ func MaybeReexecUsingUserNamespace(evenForRoot bool) {
} else {
// If we have CAP_SYS_ADMIN, then we don't need to create a new namespace in order to be able
// to use unshare(), so don't bother creating a new user namespace at this point.
capabilities, err := capability.NewPid(0)
capabilities, err := capability.NewPid2(0)
bailOnError(err, "Initializing a new Capabilities object of pid 0")
err = capabilities.Load()
bailOnError(err, "Reading the current capabilities sets")
if capabilities.Get(capability.EFFECTIVE, capability.CAP_SYS_ADMIN) {
return
}

View File

@ -19,6 +19,10 @@ driver = "overlay"
# Temporary storage location
runroot = "/run/containers/storage"
# Priority list for the storage drivers that will be tested one
# after the other to pick the storage driver if it is not defined.
# driver_priority = ["overlay", "btrfs"]
# Primary Read/Write location of container storage
# When changing the graphroot location on an SELINUX system, you must
# ensure the labeling matches the default locations labels with the
@ -77,28 +81,6 @@ additionalimagestores = [
# operation so it is not enabled by default.
pull_options = {enable_partial_images = "true", use_hard_links = "false", ostree_repos=""}
# Remap-UIDs/GIDs is the mapping from UIDs/GIDs as they should appear inside of
# a container, to the UIDs/GIDs as they should appear outside of the container,
# and the length of the range of UIDs/GIDs. Additional mapped sets can be
# listed and will be heeded by libraries, but there are limits to the number of
# mappings which the kernel will allow when you later attempt to run a
# container.
#
# remap-uids = "0:1668442479:65536"
# remap-gids = "0:1668442479:65536"
# Remap-User/Group is a user name which can be used to look up one or more UID/GID
# ranges in the /etc/subuid or /etc/subgid file. Mappings are set up starting
# with an in-container ID of 0 and then a host-level ID taken from the lowest
# range that matches the specified name, and using the length of that range.
# Additional ranges are then assigned, using the ranges which specify the
# lowest host-level IDs first, to the lowest not-yet-mapped in-container ID,
# until all of the entries have been used for maps. This setting overrides the
# Remap-UIDs/GIDs setting.
#
# remap-user = "containers"
# remap-group = "containers"
# Root-auto-userns-user is a user name which can be used to look up one or more UID/GID
# ranges in the /etc/subuid and /etc/subgid file. These ranges will be partitioned
# to containers configured to create automatically a user namespace. Containers

View File

@ -39,27 +39,6 @@ graphroot = "/var/db/containers/storage"
additionalimagestores = [
]
# Remap-UIDs/GIDs is the mapping from UIDs/GIDs as they should appear inside of
# a container, to the UIDs/GIDs as they should appear outside of the container,
# and the length of the range of UIDs/GIDs. Additional mapped sets can be
# listed and will be heeded by libraries, but there are limits to the number of
# mappings which the kernel will allow when you later attempt to run a
# container.
#
# remap-uids = 0:1668442479:65536
# remap-gids = 0:1668442479:65536
# Remap-User/Group is a user name which can be used to look up one or more UID/GID
# ranges in the /etc/subuid or /etc/subgid file. Mappings are set up starting
# with an in-container ID of 0 and then a host-level ID taken from the lowest
# range that matches the specified name, and using the length of that range.
# Additional ranges are then assigned, using the ranges which specify the
# lowest host-level IDs first, to the lowest not-yet-mapped in-container ID,
# until all of the entries have been used for maps.
#
# remap-user = "containers"
# remap-group = "containers"
# Root-auto-userns-user is a user name which can be used to look up one or more UID/GID
# ranges in the /etc/subuid and /etc/subgid file. These ranges will be partitioned
# to containers configured to create automatically a user namespace. Containers

View File

@ -1088,8 +1088,6 @@ func (s *store) createGraphDriverLocked() (drivers.Driver, error) {
RunRoot: s.runRoot,
DriverPriority: s.graphDriverPriority,
DriverOptions: s.graphOptions,
UIDMaps: s.uidMap,
GIDMaps: s.gidMap,
}
return drivers.New(s.graphDriverName, config)
}
@ -1437,7 +1435,9 @@ func (s *store) canUseShifting(uidmap, gidmap []idtools.IDMap) bool {
return true
}
// putLayer requires the rlstore, rlstores, as well as s.containerStore (even if not an argument to this function) to be locked for write.
// On entry:
// - rlstore must be locked for writing
// - rlstores MUST NOT be locked
func (s *store) putLayer(rlstore rwLayerStore, rlstores []roLayerStore, id, parent string, names []string, mountLabel string, writeable bool, lOptions *LayerOptions, diff io.Reader, slo *stagedLayerOptions) (*Layer, int64, error) {
var parentLayer *Layer
var options LayerOptions
@ -1474,6 +1474,11 @@ func (s *store) putLayer(rlstore rwLayerStore, rlstores []roLayerStore, id, pare
return nil, -1, ErrLayerUnknown
}
parentLayer = ilayer
if err := s.containerStore.startWriting(); err != nil {
return nil, -1, err
}
defer s.containerStore.stopWriting()
containers, err := s.containerStore.Containers()
if err != nil {
return nil, -1, err
@ -1490,6 +1495,13 @@ func (s *store) putLayer(rlstore rwLayerStore, rlstores []roLayerStore, id, pare
gidMap = ilayer.GIDMap
}
} else {
// FIXME? Its unclear why we are holding containerStore locked here at all
// (and because we are not modifying it, why it is a write lock, not a read lock).
if err := s.containerStore.startWriting(); err != nil {
return nil, -1, err
}
defer s.containerStore.stopWriting()
if !options.HostUIDMapping && len(options.UIDMap) == 0 {
uidMap = s.uidMap
}
@ -1525,10 +1537,6 @@ func (s *store) PutLayer(id, parent string, names []string, mountLabel string, w
return nil, -1, err
}
defer rlstore.stopWriting()
if err := s.containerStore.startWriting(); err != nil {
return nil, -1, err
}
defer s.containerStore.stopWriting()
return s.putLayer(rlstore, rlstores, id, parent, names, mountLabel, writeable, lOptions, diff, nil)
}
@ -3009,16 +3017,14 @@ func (s *store) ApplyStagedLayer(args ApplyStagedLayerOptions) (*Layer, error) {
return layer, err
}
if err == nil {
// This code path exists only for cmd/containers/storage.applyDiffUsingStagingDirectory; we have tests that
// assume layer creation and applying a staged layer are separate steps. Production pull code always uses the
// other path, where layer creation is atomic.
return layer, rlstore.applyDiffFromStagingDirectory(args.ID, args.DiffOutput, args.DiffOptions)
}
// if the layer doesn't exist yet, try to create it.
if err := s.containerStore.startWriting(); err != nil {
return nil, err
}
defer s.containerStore.stopWriting()
slo := stagedLayerOptions{
DiffOutput: args.DiffOutput,
DiffOptions: args.DiffOptions,

View File

@ -481,33 +481,6 @@ func ReloadConfigurationFile(configFile string, storeOptions *StoreOptions) erro
if config.Storage.Options.MountOpt != "" {
storeOptions.GraphDriverOptions = append(storeOptions.GraphDriverOptions, fmt.Sprintf("%s.mountopt=%s", config.Storage.Driver, config.Storage.Options.MountOpt))
}
uidmap, err := idtools.ParseIDMap([]string{config.Storage.Options.RemapUIDs}, "remap-uids")
if err != nil {
return err
}
gidmap, err := idtools.ParseIDMap([]string{config.Storage.Options.RemapGIDs}, "remap-gids")
if err != nil {
return err
}
if config.Storage.Options.RemapUser != "" && config.Storage.Options.RemapGroup == "" {
config.Storage.Options.RemapGroup = config.Storage.Options.RemapUser
}
if config.Storage.Options.RemapGroup != "" && config.Storage.Options.RemapUser == "" {
config.Storage.Options.RemapUser = config.Storage.Options.RemapGroup
}
if config.Storage.Options.RemapUser != "" && config.Storage.Options.RemapGroup != "" {
mappings, err := idtools.NewIDMappings(config.Storage.Options.RemapUser, config.Storage.Options.RemapGroup)
if err != nil {
logrus.Warningf("Error initializing ID mappings for %s:%s %v\n", config.Storage.Options.RemapUser, config.Storage.Options.RemapGroup, err)
return err
}
uidmap = mappings.UIDs()
gidmap = mappings.GIDs()
}
storeOptions.UIDMap = uidmap
storeOptions.GIDMap = gidmap
storeOptions.RootAutoNsUser = config.Storage.Options.RootAutoUsernsUser
if config.Storage.Options.AutoUsernsMinSize > 0 {
storeOptions.AutoNsMinSize = config.Storage.Options.AutoUsernsMinSize

View File

@ -1,3 +1,5 @@
//go:build freebsd || netbsd
package types
const (

View File

@ -25,16 +25,6 @@ rootless_storage_path = "$HOME/$UID/containers/storage"
additionalimagestores = [
]
# Remap-UIDs/GIDs is the mapping from UIDs/GIDs as they should appear inside of
# a container, to the UIDs/GIDs as they should appear outside of the container,
# and the length of the range of UIDs/GIDs. Additional mapped sets can be
# listed and will be heeded by libraries, but there are limits to the number of
# mappings which the kernel will allow when you later attempt to run a
# container.
#
remap-uids = "0:1000000000:30000"
remap-gids = "0:1500000000:60000"
[storage.options.overlay]
# mountopt specifies comma separated list of extra mount options

View File

@ -1,48 +0,0 @@
## pwalk: parallel implementation of filepath.Walk
This is a wrapper for [filepath.Walk](https://pkg.go.dev/path/filepath?tab=doc#Walk)
which may speed it up by calling multiple callback functions (WalkFunc) in parallel,
utilizing goroutines.
By default, it utilizes 2\*runtime.NumCPU() goroutines for callbacks.
This can be changed by using WalkN function which has the additional
parameter, specifying the number of goroutines (concurrency).
### pwalk vs pwalkdir
This package is deprecated in favor of
[pwalkdir](https://pkg.go.dev/github.com/opencontainers/selinux/pkg/pwalkdir),
which is faster, but requires at least Go 1.16.
### Caveats
Please note the following limitations of this code:
* Unlike filepath.Walk, the order of calls is non-deterministic;
* Only primitive error handling is supported:
* filepath.SkipDir is not supported;
* no errors are ever passed to WalkFunc;
* once any error is returned from any WalkFunc instance, no more new calls
to WalkFunc are made, and the error is returned to the caller of Walk;
* if more than one walkFunc instance will return an error, only one
of such errors will be propagated and returned by Walk, others
will be silently discarded.
### Documentation
For the official documentation, see
https://pkg.go.dev/github.com/opencontainers/selinux/pkg/pwalk?tab=doc
### Benchmarks
For a WalkFunc that consists solely of the return statement, this
implementation is about 10% slower than the standard library's
filepath.Walk.
Otherwise (if a WalkFunc is doing something) this is usually faster,
except when the WalkN(..., 1) is used.

View File

@ -1,123 +0,0 @@
package pwalk
import (
"fmt"
"os"
"path/filepath"
"runtime"
"sync"
)
// WalkFunc is the type of the function called by Walk to visit each
// file or directory. It is an alias for [filepath.WalkFunc].
//
// Deprecated: use [github.com/opencontainers/selinux/pkg/pwalkdir] and [fs.WalkDirFunc].
type WalkFunc = filepath.WalkFunc
// Walk is a wrapper for filepath.Walk which can call multiple walkFn
// in parallel, allowing to handle each item concurrently. A maximum of
// twice the runtime.NumCPU() walkFn will be called at any one time.
// If you want to change the maximum, use WalkN instead.
//
// The order of calls is non-deterministic.
//
// Note that this implementation only supports primitive error handling:
//
// - no errors are ever passed to walkFn;
//
// - once a walkFn returns any error, all further processing stops
// and the error is returned to the caller of Walk;
//
// - filepath.SkipDir is not supported;
//
// - if more than one walkFn instance will return an error, only one
// of such errors will be propagated and returned by Walk, others
// will be silently discarded.
//
// Deprecated: use [github.com/opencontainers/selinux/pkg/pwalkdir.Walk]
func Walk(root string, walkFn WalkFunc) error {
return WalkN(root, walkFn, runtime.NumCPU()*2)
}
// WalkN is a wrapper for filepath.Walk which can call multiple walkFn
// in parallel, allowing to handle each item concurrently. A maximum of
// num walkFn will be called at any one time.
//
// Please see Walk documentation for caveats of using this function.
//
// Deprecated: use [github.com/opencontainers/selinux/pkg/pwalkdir.WalkN]
func WalkN(root string, walkFn WalkFunc, num int) error {
// make sure limit is sensible
if num < 1 {
return fmt.Errorf("walk(%q): num must be > 0", root)
}
files := make(chan *walkArgs, 2*num)
errCh := make(chan error, 1) // get the first error, ignore others
// Start walking a tree asap
var (
err error
wg sync.WaitGroup
rootLen = len(root)
rootEntry *walkArgs
)
wg.Add(1)
go func() {
err = filepath.Walk(root, func(p string, info os.FileInfo, err error) error {
if err != nil {
close(files)
return err
}
if len(p) == rootLen {
// Root entry is processed separately below.
rootEntry = &walkArgs{path: p, info: &info}
return nil
}
// add a file to the queue unless a callback sent an error
select {
case e := <-errCh:
close(files)
return e
default:
files <- &walkArgs{path: p, info: &info}
return nil
}
})
if err == nil {
close(files)
}
wg.Done()
}()
wg.Add(num)
for i := 0; i < num; i++ {
go func() {
for file := range files {
if e := walkFn(file.path, *file.info, nil); e != nil {
select {
case errCh <- e: // sent ok
default: // buffer full
}
}
}
wg.Done()
}()
}
wg.Wait()
if err == nil {
err = walkFn(rootEntry.path, *rootEntry.info, nil)
}
return err
}
// walkArgs holds the arguments that were passed to the Walk or WalkN
// functions.
type walkArgs struct {
info *os.FileInfo
path string
}

5
vendor/modules.txt vendored
View File

@ -18,7 +18,7 @@ github.com/Microsoft/go-winio/internal/socket
github.com/Microsoft/go-winio/internal/stringbuffer
github.com/Microsoft/go-winio/pkg/guid
github.com/Microsoft/go-winio/vhd
# github.com/Microsoft/hcsshim v0.12.3
# github.com/Microsoft/hcsshim v0.12.4
## explicit; go 1.21
github.com/Microsoft/hcsshim
github.com/Microsoft/hcsshim/computestorage
@ -355,7 +355,7 @@ github.com/containers/psgo/internal/dev
github.com/containers/psgo/internal/host
github.com/containers/psgo/internal/proc
github.com/containers/psgo/internal/process
# github.com/containers/storage v1.54.0
# github.com/containers/storage v1.54.1-0.20240627145511-52b643e1ff51
## explicit; go 1.21
github.com/containers/storage
github.com/containers/storage/drivers
@ -909,7 +909,6 @@ github.com/opencontainers/runtime-tools/validate/capabilities
## explicit; go 1.19
github.com/opencontainers/selinux/go-selinux
github.com/opencontainers/selinux/go-selinux/label
github.com/opencontainers/selinux/pkg/pwalk
github.com/opencontainers/selinux/pkg/pwalkdir
# github.com/openshift/imagebuilder v1.2.10
## explicit; go 1.19