overlay: try harder with more layers
If we have enough layers that even using relative paths for symlinks to the diff directories won't make the list of lower directories and options fit in a single page, * open the various lowers for reading * use the descriptor names in the lowerdir= list This is a bit slower, so don't take these extra steps unless we know we have to. Signed-off-by: Nalin Dahyabhai <nalin@redhat.com>
This commit is contained in:
parent
ebf857fda6
commit
7c5964df95
|
|
@ -8,14 +8,16 @@ import (
|
|||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"github.com/containers/storage/pkg/reexec"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func init() {
|
||||
reexec.Register("storage-mountfrom", mountFromMain)
|
||||
reexec.Register("storage-mountfrom", mountOverlayFromMain)
|
||||
}
|
||||
|
||||
func fatal(err error) {
|
||||
|
|
@ -31,7 +33,7 @@ type mountOptions struct {
|
|||
Flag uint32
|
||||
}
|
||||
|
||||
func mountFrom(dir, device, target, mType string, flags uintptr, label string) error {
|
||||
func mountOverlayFrom(dir, device, target, mType string, flags uintptr, label string) error {
|
||||
options := &mountOptions{
|
||||
Device: device,
|
||||
Target: target,
|
||||
|
|
@ -67,7 +69,7 @@ func mountFrom(dir, device, target, mType string, flags uintptr, label string) e
|
|||
}
|
||||
|
||||
// mountfromMain is the entry-point for storage-mountfrom on re-exec.
|
||||
func mountFromMain() {
|
||||
func mountOverlayFromMain() {
|
||||
runtime.LockOSThread()
|
||||
flag.Parse()
|
||||
|
||||
|
|
@ -77,11 +79,96 @@ func mountFromMain() {
|
|||
fatal(err)
|
||||
}
|
||||
|
||||
if err := os.Chdir(flag.Arg(0)); err != nil {
|
||||
// Mount the arguments passed from the specified directory. Some of the
|
||||
// paths mentioned in the values we pass to the kernel are relative to
|
||||
// the specified directory.
|
||||
homedir := flag.Arg(0)
|
||||
if err := os.Chdir(homedir); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
if err := unix.Mount(options.Device, options.Target, options.Type, uintptr(options.Flag), options.Label); err != nil {
|
||||
pageSize := unix.Getpagesize()
|
||||
if len(options.Label) < pageSize {
|
||||
if err := unix.Mount(options.Device, options.Target, options.Type, uintptr(options.Flag), options.Label); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// Those arguments still took up too much space. Open the diff
|
||||
// directories and use their descriptor numbers as lowers, using
|
||||
// /proc/self/fd as the current directory.
|
||||
|
||||
// Split out the various options, since we need to manipulate the
|
||||
// paths, but we don't want to mess with other options.
|
||||
var upperk, upperv, workk, workv, lowerk, lowerv, labelk, labelv, others string
|
||||
for _, arg := range strings.Split(options.Label, ",") {
|
||||
kv := strings.SplitN(arg, "=", 2)
|
||||
switch kv[0] {
|
||||
case "upperdir":
|
||||
upperk = "upperdir="
|
||||
upperv = kv[1]
|
||||
case "workdir":
|
||||
workk = "workdir="
|
||||
workv = kv[1]
|
||||
case "lowerdir":
|
||||
lowerk = "lowerdir="
|
||||
lowerv = kv[1]
|
||||
case "label":
|
||||
labelk = "label="
|
||||
labelv = kv[1]
|
||||
default:
|
||||
if others == "" {
|
||||
others = arg
|
||||
} else {
|
||||
others = others + "," + arg
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure upperdir, workdir, and the target are absolute paths.
|
||||
if upperv != "" && !filepath.IsAbs(upperv) {
|
||||
upperv = filepath.Join(homedir, upperv)
|
||||
}
|
||||
if workv != "" && !filepath.IsAbs(workv) {
|
||||
workv = filepath.Join(homedir, workv)
|
||||
}
|
||||
if !filepath.IsAbs(options.Target) {
|
||||
options.Target = filepath.Join(homedir, options.Target)
|
||||
}
|
||||
|
||||
// Get a descriptor for each lower, and use that descriptor's name as
|
||||
// the new value for the list of lowers, because it's shorter.
|
||||
if lowerv != "" {
|
||||
lowers := strings.Split(lowerv, ":")
|
||||
for i := range lowers {
|
||||
lowerFd, err := unix.Open(lowers[i], unix.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
lowers[i] = fmt.Sprintf("%d", lowerFd)
|
||||
}
|
||||
lowerv = strings.Join(lowers, ":")
|
||||
}
|
||||
|
||||
// Reconstruct the Label field.
|
||||
options.Label = upperk + upperv + "," + workk + workv + "," + lowerk + lowerv + "," + labelk + labelv + "," + others
|
||||
options.Label = strings.ReplaceAll(options.Label, ",,", ",")
|
||||
|
||||
// Okay, try this, if we managed to make the arguments fit.
|
||||
var err error
|
||||
if len(options.Label) < pageSize {
|
||||
if err := os.Chdir("/proc/self/fd"); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
err = unix.Mount(options.Device, options.Target, options.Type, uintptr(options.Flag), options.Label)
|
||||
} else {
|
||||
err = fmt.Errorf("cannot mount layer, mount data %q too large %d >= page size %d", options.Label, len(options.Label), pageSize)
|
||||
}
|
||||
|
||||
// Clean up.
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "creating overlay mount to %s, mount_data=%q\n", options.Target, options.Label)
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -73,20 +73,22 @@ const (
|
|||
// or root directory. Mounts are always done relative to root and
|
||||
// referencing the symbolic links in order to ensure the number of
|
||||
// lower directories can fit in a single page for making the mount
|
||||
// syscall. A hard upper limit of 128 lower layers is enforced to ensure
|
||||
// syscall. A hard upper limit of 500 lower layers is enforced to ensure
|
||||
// that mounts do not fail due to length.
|
||||
|
||||
const (
|
||||
linkDir = "l"
|
||||
lowerFile = "lower"
|
||||
maxDepth = 128
|
||||
maxDepth = 500
|
||||
|
||||
// idLength represents the number of random characters
|
||||
// which can be used to create the unique link identifier
|
||||
// for every layer. If this value is too long then the
|
||||
// page size limit for the mount command may be exceeded.
|
||||
// The idLength should be selected such that following equation
|
||||
// is true (512 is a buffer for label metadata).
|
||||
// is true (512 is a buffer for label metadata, 128 is the
|
||||
// number of lowers we want to be able to use without having
|
||||
// to use bind mounts to get all the way to the kernel limit).
|
||||
// ((idLength + len(linkDir) + 1) * maxDepth) <= (pageSize - 512)
|
||||
idLength = 26
|
||||
)
|
||||
|
|
@ -1378,7 +1380,8 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
|
|||
|
||||
// absLowers is the list of lowers as absolute paths, which works well with additional stores.
|
||||
absLowers := []string{}
|
||||
// relLowers is the list of lowers as paths relative to the driver's home directory.
|
||||
// relLowers is the list of lowers as paths relative to the driver's home directory. If
|
||||
// additional stores are being used, some of these will still be absolute paths.
|
||||
relLowers := []string{}
|
||||
|
||||
// Check if $link/../diff{1-*} exist. If they do, add them, in order, as the front of the lowers
|
||||
|
|
@ -1435,6 +1438,7 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
|
|||
perms = os.FileMode(st2.Mode())
|
||||
permsKnown = true
|
||||
}
|
||||
l = lower
|
||||
break
|
||||
}
|
||||
lower = ""
|
||||
|
|
@ -1606,13 +1610,12 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
|
|||
return nil
|
||||
}
|
||||
} else if len(mountData) >= pageSize {
|
||||
// Use relative paths and mountFrom when the mount data has exceeded
|
||||
// Use (possibly) relative paths and mountFrom when the mount data has exceeded
|
||||
// the page size. The mount syscall fails if the mount data cannot
|
||||
// fit within a page and relative links make the mount data much
|
||||
// smaller at the expense of requiring a fork exec to chroot.
|
||||
// smaller at the expense of requiring a fork exec to chdir().
|
||||
|
||||
workdir = path.Join(id, "work")
|
||||
//FIXME: We need to figure out to get this to work with additional stores
|
||||
if readWrite {
|
||||
diffDir := path.Join(id, "diff")
|
||||
opts = fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", strings.Join(relLowers, ":"), diffDir, workdir)
|
||||
|
|
@ -1623,11 +1626,8 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
|
|||
opts = fmt.Sprintf("%s,%s", opts, strings.Join(optsList, ","))
|
||||
}
|
||||
mountData = label.FormatMountLabel(opts, options.MountLabel)
|
||||
if len(mountData) >= pageSize {
|
||||
return "", fmt.Errorf("cannot mount layer, mount label %q too large %d >= page size %d", options.MountLabel, len(mountData), pageSize)
|
||||
}
|
||||
mountFunc = func(source string, target string, mType string, flags uintptr, label string) error {
|
||||
return mountFrom(d.home, source, target, mType, flags, label)
|
||||
return mountOverlayFrom(d.home, source, target, mType, flags, label)
|
||||
}
|
||||
mountTarget = path.Join(id, "merged")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,18 +5,54 @@ load helpers
|
|||
|
||||
@test "allow storing images with more than 127 layers" {
|
||||
LAYER=""
|
||||
for i in {0..150}; do
|
||||
LIMIT=300
|
||||
for i in $(seq 0 ${LIMIT}); do
|
||||
echo "Layer: $i"
|
||||
|
||||
# Create a layer.
|
||||
run storage --debug=false create-layer "$LAYER"
|
||||
echo "$output"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "$output" != "" ]
|
||||
LAYER="$output"
|
||||
run storage --debug=false mount "$LAYER"
|
||||
echo "$output"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "$output" != "" ]
|
||||
ROOTFS="$output"
|
||||
touch "${ROOTFS}"/$i
|
||||
run storage --debug=false unmount "$LAYER"
|
||||
echo "$output"
|
||||
[ "$status" -eq 0 ]
|
||||
done
|
||||
|
||||
# Create the image
|
||||
run storage --debug=false create-image --name test-image "$LAYER"
|
||||
run storage --debug=false create-image "$LAYER"
|
||||
echo "$output"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "$output" != "" ]
|
||||
IMAGE="$output"
|
||||
|
||||
# Make sure the image has all of the content.
|
||||
run storage --debug=false create-container "$IMAGE"
|
||||
echo "$output"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "$output" != "" ]
|
||||
CONTAINER="$output"
|
||||
|
||||
run storage --debug=false mount "$CONTAINER"
|
||||
echo "$output"
|
||||
[ "$status" -eq 0 ]
|
||||
[ "$output" != "" ]
|
||||
ROOTFS="$output"
|
||||
for i in $(seq 0 ${LIMIT}); do
|
||||
if ! test -r "${ROOTFS}"/$i ; then
|
||||
echo File from layer $i of ${LIMIT} was not visible after mounting
|
||||
false
|
||||
fi
|
||||
done
|
||||
|
||||
run storage --debug=false unmount "$CONTAINER"
|
||||
echo "$output"
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue