chunked: restrict dedup with hard links
before deduplicating with hard links make sure the two files share the same UID, GID, file mode and extended attributes. Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
parent
d00974a9aa
commit
4ef5ee00ab
|
|
@ -10,6 +10,7 @@ import (
|
|||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
|
@ -22,6 +23,7 @@ import (
|
|||
"github.com/containers/storage/pkg/archive"
|
||||
"github.com/containers/storage/pkg/chunked/internal"
|
||||
"github.com/containers/storage/pkg/idtools"
|
||||
"github.com/containers/storage/pkg/system"
|
||||
"github.com/containers/storage/types"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
"github.com/klauspost/pgzip"
|
||||
|
|
@ -217,7 +219,7 @@ func makeEstargzChunkedDiffer(ctx context.Context, store storage.Store, blobSize
|
|||
// otherFile contains the metadata for the file.
|
||||
// dirfd is an open file descriptor to the destination root directory.
|
||||
// useHardLinks defines whether the deduplication can be performed using hard links.
|
||||
func copyFileFromOtherLayer(file internal.FileMetadata, source string, otherFile *internal.FileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
|
||||
func copyFileFromOtherLayer(file *internal.FileMetadata, source string, otherFile *internal.FileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
|
||||
srcDirfd, err := unix.Open(source, unix.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return false, nil, 0, err
|
||||
|
|
@ -237,31 +239,98 @@ func copyFileFromOtherLayer(file internal.FileMetadata, source string, otherFile
|
|||
return true, dstFile, written, err
|
||||
}
|
||||
|
||||
// canDedupMetadataWithHardLink says whether it is possible to deduplicate file with otherFile.
|
||||
// It checks that the two files have the same UID, GID, file mode and xattrs.
|
||||
func canDedupMetadataWithHardLink(file *internal.FileMetadata, otherFile *internal.FileMetadata) bool {
|
||||
if file.UID != otherFile.UID {
|
||||
return false
|
||||
}
|
||||
if file.GID != otherFile.GID {
|
||||
return false
|
||||
}
|
||||
if file.Mode != otherFile.Mode {
|
||||
return false
|
||||
}
|
||||
if !reflect.DeepEqual(file.Xattrs, otherFile.Xattrs) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// canDedupFileWithHardLink checks if the specified file can be deduplicated by an
|
||||
// open file, given its descriptor and stat data.
|
||||
func canDedupFileWithHardLink(file *internal.FileMetadata, fd int, s os.FileInfo) bool {
|
||||
st, ok := s.Sys().(*syscall.Stat_t)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
path := fmt.Sprintf("/proc/self/fd/%d", fd)
|
||||
|
||||
listXattrs, err := system.Llistxattr(path)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
xattrsToIgnore := map[string]interface{}{
|
||||
"security.selinux": true,
|
||||
}
|
||||
|
||||
xattrs := make(map[string]string)
|
||||
for _, x := range listXattrs {
|
||||
v, err := system.Lgetxattr(path, x)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if _, found := xattrsToIgnore[x]; found {
|
||||
continue
|
||||
}
|
||||
xattrs[x] = string(v)
|
||||
}
|
||||
// fill only the attributes used by canDedupMetadataWithHardLink.
|
||||
otherFile := internal.FileMetadata{
|
||||
UID: int(st.Uid),
|
||||
GID: int(st.Gid),
|
||||
Mode: int64(st.Mode),
|
||||
Xattrs: xattrs,
|
||||
}
|
||||
return canDedupMetadataWithHardLink(file, &otherFile)
|
||||
}
|
||||
|
||||
// findFileInOtherLayers finds the specified file in other layers.
|
||||
// file is the file to look for.
|
||||
// dirfd is an open file descriptor to the checkout root directory.
|
||||
// layersMetadata contains the metadata for each layer in the storage.
|
||||
// layersTarget maps each layer to its checkout on disk.
|
||||
// useHardLinks defines whether the deduplication can be performed using hard links.
|
||||
func findFileInOtherLayers(file internal.FileMetadata, dirfd int, layersMetadata map[string]map[string][]*internal.FileMetadata, layersTarget map[string]string, useHardLinks bool) (bool, *os.File, int64, error) {
|
||||
func findFileInOtherLayers(file *internal.FileMetadata, dirfd int, layersMetadata map[string]map[string][]*internal.FileMetadata, layersTarget map[string]string, useHardLinks bool) (bool, *os.File, int64, error) {
|
||||
// this is ugly, needs to be indexed
|
||||
for layerID, checksums := range layersMetadata {
|
||||
m, found := checksums[file.Digest]
|
||||
source, ok := layersTarget[layerID]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
files, found := checksums[file.Digest]
|
||||
if !found {
|
||||
continue
|
||||
}
|
||||
source, ok := layersTarget[layerID]
|
||||
if !ok || len(source) == 0 {
|
||||
continue
|
||||
}
|
||||
for _, candidate := range files {
|
||||
// check if it is a valid candidate to dedup file
|
||||
if useHardLinks && !canDedupMetadataWithHardLink(file, candidate) {
|
||||
continue
|
||||
}
|
||||
|
||||
otherFile := m[0]
|
||||
|
||||
found, dstFile, written, err := copyFileFromOtherLayer(file, source, otherFile, dirfd, useHardLinks)
|
||||
if found && err == nil {
|
||||
return found, dstFile, written, err
|
||||
found, dstFile, written, err := copyFileFromOtherLayer(file, source, candidate, dirfd, useHardLinks)
|
||||
if found && err == nil {
|
||||
return found, dstFile, written, err
|
||||
}
|
||||
}
|
||||
}
|
||||
// If hard links deduplication was used and it has failed, try again without hard links.
|
||||
if useHardLinks {
|
||||
return findFileInOtherLayers(file, dirfd, layersMetadata, layersTarget, false)
|
||||
}
|
||||
return false, nil, 0, nil
|
||||
}
|
||||
|
||||
|
|
@ -279,7 +348,7 @@ func getFileDigest(f *os.File) (digest.Digest, error) {
|
|||
// file is the file to look for.
|
||||
// dirfd is an open fd to the destination checkout.
|
||||
// useHardLinks defines whether the deduplication can be performed using hard links.
|
||||
func findFileOnTheHost(file internal.FileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
|
||||
func findFileOnTheHost(file *internal.FileMetadata, dirfd int, useHardLinks bool) (bool, *os.File, int64, error) {
|
||||
sourceFile := filepath.Clean(filepath.Join("/", file.Name))
|
||||
if !strings.HasPrefix(sourceFile, "/usr/") {
|
||||
// limit host deduplication to files under /usr.
|
||||
|
|
@ -317,6 +386,9 @@ func findFileOnTheHost(file internal.FileMetadata, dirfd int, useHardLinks bool)
|
|||
return false, nil, 0, nil
|
||||
}
|
||||
|
||||
// check if the open file can be deduplicated with hard links
|
||||
useHardLinks = useHardLinks && canDedupFileWithHardLink(file, fd, st)
|
||||
|
||||
dstFile, written, err := copyFileContent(fd, file.Name, dirfd, 0, useHardLinks)
|
||||
if err != nil {
|
||||
return false, nil, 0, nil
|
||||
|
|
@ -934,7 +1006,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions) (gra
|
|||
|
||||
totalChunksSize += r.Size
|
||||
|
||||
found, dstFile, _, err := findFileInOtherLayers(r, dirfd, otherLayersCache, c.layersTarget, useHardLinks)
|
||||
found, dstFile, _, err := findFileInOtherLayers(&r, dirfd, otherLayersCache, c.layersTarget, useHardLinks)
|
||||
if err != nil {
|
||||
return output, err
|
||||
}
|
||||
|
|
@ -950,7 +1022,7 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions) (gra
|
|||
}
|
||||
|
||||
if enableHostDedup {
|
||||
found, dstFile, _, err = findFileOnTheHost(r, dirfd, useHardLinks)
|
||||
found, dstFile, _, err = findFileOnTheHost(&r, dirfd, useHardLinks)
|
||||
if err != nil {
|
||||
return output, err
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue