mirror of https://github.com/docker/docker-py.git
Merge pull request #1914 from mefyl/master
Improve .dockerignore compliance
This commit is contained in:
commit
cc6e1b1249
|
@ -1,20 +1,24 @@
|
|||
import os
|
||||
import re
|
||||
|
||||
from ..constants import IS_WINDOWS_PLATFORM
|
||||
from .fnmatch import fnmatch
|
||||
from fnmatch import fnmatch
|
||||
from itertools import chain
|
||||
from .utils import create_archive
|
||||
|
||||
|
||||
def tar(path, exclude=None, dockerfile=None, fileobj=None, gzip=False):
|
||||
root = os.path.abspath(path)
|
||||
exclude = exclude or []
|
||||
|
||||
return create_archive(
|
||||
files=sorted(exclude_paths(root, exclude, dockerfile=dockerfile)),
|
||||
root=root, fileobj=fileobj, gzip=gzip
|
||||
)
|
||||
|
||||
|
||||
_SEP = re.compile('/|\\\\') if IS_WINDOWS_PLATFORM else re.compile('/')
|
||||
|
||||
|
||||
def exclude_paths(root, patterns, dockerfile=None):
|
||||
"""
|
||||
Given a root directory path and a list of .dockerignore patterns, return
|
||||
|
@ -23,127 +27,90 @@ def exclude_paths(root, patterns, dockerfile=None):
|
|||
|
||||
All paths returned are relative to the root.
|
||||
"""
|
||||
|
||||
if dockerfile is None:
|
||||
dockerfile = 'Dockerfile'
|
||||
|
||||
patterns = [p.lstrip('/') for p in patterns]
|
||||
exceptions = [p for p in patterns if p.startswith('!')]
|
||||
def normalize(p):
|
||||
# Leading and trailing slashes are not relevant. Yes,
|
||||
# "foo.py/" must exclude the "foo.py" regular file. "."
|
||||
# components are not relevant either, even if the whole
|
||||
# pattern is only ".", as the Docker reference states: "For
|
||||
# historical reasons, the pattern . is ignored."
|
||||
split = [pt for pt in re.split(_SEP, p) if pt and pt != '.']
|
||||
# ".." component must be cleared with the potential previous
|
||||
# component, regardless of whether it exists: "A preprocessing
|
||||
# step [...] eliminates . and .. elements using Go's
|
||||
# filepath.".
|
||||
i = 0
|
||||
while i < len(split):
|
||||
if split[i] == '..':
|
||||
del split[i]
|
||||
if i > 0:
|
||||
del split[i - 1]
|
||||
i -= 1
|
||||
else:
|
||||
i += 1
|
||||
return split
|
||||
|
||||
include_patterns = [p[1:] for p in exceptions]
|
||||
include_patterns += [dockerfile, '.dockerignore']
|
||||
|
||||
exclude_patterns = list(set(patterns) - set(exceptions))
|
||||
|
||||
paths = get_paths(root, exclude_patterns, include_patterns,
|
||||
has_exceptions=len(exceptions) > 0)
|
||||
|
||||
return set(paths).union(
|
||||
# If the Dockerfile is in a subdirectory that is excluded, get_paths
|
||||
# will not descend into it and the file will be skipped. This ensures
|
||||
# it doesn't happen.
|
||||
set([dockerfile.replace('/', os.path.sep)])
|
||||
if os.path.exists(os.path.join(root, dockerfile)) else set()
|
||||
)
|
||||
patterns = (
|
||||
(True, normalize(p[1:]))
|
||||
if p.startswith('!') else
|
||||
(False, normalize(p))
|
||||
for p in patterns)
|
||||
patterns = list(reversed(list(chain(
|
||||
# Exclude empty patterns such as "." or the empty string.
|
||||
filter(lambda p: p[1], patterns),
|
||||
# Always include the Dockerfile and .dockerignore
|
||||
[(True, dockerfile.split('/')), (True, ['.dockerignore'])]))))
|
||||
return set(walk(root, patterns))
|
||||
|
||||
|
||||
def should_include(path, exclude_patterns, include_patterns, root):
|
||||
def walk(root, patterns, default=True):
|
||||
"""
|
||||
Given a path, a list of exclude patterns, and a list of inclusion patterns:
|
||||
|
||||
1. Returns True if the path doesn't match any exclusion pattern
|
||||
2. Returns False if the path matches an exclusion pattern and doesn't match
|
||||
an inclusion pattern
|
||||
3. Returns true if the path matches an exclusion pattern and matches an
|
||||
inclusion pattern
|
||||
"""
|
||||
for pattern in exclude_patterns:
|
||||
if match_path(path, pattern):
|
||||
for pattern in include_patterns:
|
||||
if match_path(path, pattern):
|
||||
return True
|
||||
if os.path.isabs(pattern) and match_path(
|
||||
os.path.join(root, path), pattern):
|
||||
return True
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def should_check_directory(directory_path, exclude_patterns, include_patterns,
|
||||
root):
|
||||
"""
|
||||
Given a directory path, a list of exclude patterns, and a list of inclusion
|
||||
patterns:
|
||||
|
||||
1. Returns True if the directory path should be included according to
|
||||
should_include.
|
||||
2. Returns True if the directory path is the prefix for an inclusion
|
||||
pattern
|
||||
3. Returns False otherwise
|
||||
A collection of file lying below root that should be included according to
|
||||
patterns.
|
||||
"""
|
||||
|
||||
# To account for exception rules, check directories if their path is a
|
||||
# a prefix to an inclusion pattern. This logic conforms with the current
|
||||
# docker logic (2016-10-27):
|
||||
# https://github.com/docker/docker/blob/bc52939b0455116ab8e0da67869ec81c1a1c3e2c/pkg/archive/archive.go#L640-L671
|
||||
def match(p):
|
||||
if p[1][0] == '**':
|
||||
rec = (p[0], p[1][1:])
|
||||
return [p] + (match(rec) if rec[1] else [rec])
|
||||
elif fnmatch(f, p[1][0]):
|
||||
return [(p[0], p[1][1:])]
|
||||
else:
|
||||
return []
|
||||
|
||||
def normalize_path(path):
|
||||
return path.replace(os.path.sep, '/')
|
||||
|
||||
path_with_slash = normalize_path(directory_path) + '/'
|
||||
possible_child_patterns = [
|
||||
pattern for pattern in map(normalize_path, include_patterns)
|
||||
if (pattern + '/').startswith(path_with_slash)
|
||||
]
|
||||
directory_included = should_include(
|
||||
directory_path, exclude_patterns, include_patterns, root
|
||||
)
|
||||
return directory_included or len(possible_child_patterns) > 0
|
||||
|
||||
|
||||
def get_paths(root, exclude_patterns, include_patterns, has_exceptions=False):
|
||||
paths = []
|
||||
|
||||
for parent, dirs, files in os.walk(root, topdown=True, followlinks=False):
|
||||
parent = os.path.relpath(parent, root)
|
||||
if parent == '.':
|
||||
parent = ''
|
||||
|
||||
# Remove excluded patterns from the list of directories to traverse
|
||||
# by mutating the dirs we're iterating over.
|
||||
# This looks strange, but is considered the correct way to skip
|
||||
# traversal. See https://docs.python.org/2/library/os.html#os.walk
|
||||
dirs[:] = [
|
||||
d for d in dirs if should_check_directory(
|
||||
os.path.join(parent, d), exclude_patterns, include_patterns,
|
||||
root
|
||||
)
|
||||
]
|
||||
|
||||
for path in dirs:
|
||||
if should_include(os.path.join(parent, path),
|
||||
exclude_patterns, include_patterns, root):
|
||||
paths.append(os.path.join(parent, path))
|
||||
|
||||
for path in files:
|
||||
if should_include(os.path.join(parent, path),
|
||||
exclude_patterns, include_patterns, root):
|
||||
paths.append(os.path.join(parent, path))
|
||||
|
||||
return paths
|
||||
|
||||
|
||||
def match_path(path, pattern):
|
||||
|
||||
pattern = pattern.rstrip('/' + os.path.sep)
|
||||
if pattern and not os.path.isabs(pattern):
|
||||
pattern = os.path.relpath(pattern)
|
||||
|
||||
pattern_components = pattern.split(os.path.sep)
|
||||
if len(pattern_components) == 1 and IS_WINDOWS_PLATFORM:
|
||||
pattern_components = pattern.split('/')
|
||||
|
||||
if '**' not in pattern:
|
||||
path_components = path.split(os.path.sep)[:len(pattern_components)]
|
||||
else:
|
||||
path_components = path.split(os.path.sep)
|
||||
return fnmatch('/'.join(path_components), '/'.join(pattern_components))
|
||||
for f in os.listdir(root):
|
||||
cur = os.path.join(root, f)
|
||||
# The patterns if recursing in that directory.
|
||||
sub = list(chain(*(match(p) for p in patterns)))
|
||||
# Whether this file is explicitely included / excluded.
|
||||
hit = next((p[0] for p in sub if not p[1]), None)
|
||||
# Whether this file is implicitely included / excluded.
|
||||
matched = default if hit is None else hit
|
||||
sub = list(filter(lambda p: p[1], sub))
|
||||
if os.path.isdir(cur):
|
||||
# Entirely skip directories if there are no chance any subfile will
|
||||
# be included.
|
||||
if all(not p[0] for p in sub) and not matched:
|
||||
continue
|
||||
# I think this would greatly speed up dockerignore handling by not
|
||||
# recursing into directories we are sure would be entirely
|
||||
# included, and only yielding the directory itself, which will be
|
||||
# recursively archived anyway. However the current unit test expect
|
||||
# the full list of subfiles and I'm not 100% sure it would make no
|
||||
# difference yet.
|
||||
# if all(p[0] for p in sub) and matched:
|
||||
# yield f
|
||||
# continue
|
||||
children = False
|
||||
for r in (os.path.join(f, p) for p in walk(cur, sub, matched)):
|
||||
yield r
|
||||
children = True
|
||||
# The current unit tests expect directories only under those
|
||||
# conditions. It might be simplifiable though.
|
||||
if (not sub or not children) and hit or hit is None and default:
|
||||
yield f
|
||||
elif matched:
|
||||
yield f
|
||||
|
|
|
@ -23,7 +23,6 @@ from docker.utils import (
|
|||
decode_json_header, tar, split_command, parse_devices, update_headers,
|
||||
)
|
||||
|
||||
from docker.utils.build import should_check_directory
|
||||
from docker.utils.ports import build_port_bindings, split_port
|
||||
from docker.utils.utils import format_environment
|
||||
|
||||
|
@ -758,6 +757,13 @@ class ExcludePathsTest(unittest.TestCase):
|
|||
self.all_paths - set(['foo/a.py'])
|
||||
)
|
||||
|
||||
def test_exclude_include_absolute_path(self):
|
||||
base = make_tree([], ['a.py', 'b.py'])
|
||||
assert exclude_paths(
|
||||
base,
|
||||
['/*', '!/*.py']
|
||||
) == set(['a.py', 'b.py'])
|
||||
|
||||
def test_single_subdir_with_path_traversal(self):
|
||||
assert self.exclude(['foo/whoops/../a.py']) == convert_paths(
|
||||
self.all_paths - set(['foo/a.py'])
|
||||
|
@ -876,12 +882,25 @@ class ExcludePathsTest(unittest.TestCase):
|
|||
)
|
||||
)
|
||||
|
||||
def test_exclude_include_absolute_path(self):
|
||||
base = make_tree([], ['a.py', 'b.py'])
|
||||
def test_include_wildcard(self):
|
||||
base = make_tree(['a'], ['a/b.py'])
|
||||
assert exclude_paths(
|
||||
base,
|
||||
['/*', '!' + os.path.join(base, '*.py')]
|
||||
) == set(['a.py', 'b.py'])
|
||||
['*', '!*/b.py']
|
||||
) == convert_paths(['a/b.py'])
|
||||
|
||||
def test_last_line_precedence(self):
|
||||
base = make_tree(
|
||||
[],
|
||||
['garbage.md',
|
||||
'thrash.md',
|
||||
'README.md',
|
||||
'README-bis.md',
|
||||
'README-secret.md'])
|
||||
assert exclude_paths(
|
||||
base,
|
||||
['*.md', '!README*.md', 'README-secret.md']
|
||||
) == set(['README.md', 'README-bis.md'])
|
||||
|
||||
|
||||
class TarTest(unittest.TestCase):
|
||||
|
@ -1019,69 +1038,6 @@ class TarTest(unittest.TestCase):
|
|||
assert tar_data.getmember('th.txt').mtime == -3600
|
||||
|
||||
|
||||
class ShouldCheckDirectoryTest(unittest.TestCase):
|
||||
exclude_patterns = [
|
||||
'exclude_rather_large_directory',
|
||||
'dir/with/subdir_excluded',
|
||||
'dir/with/exceptions'
|
||||
]
|
||||
|
||||
include_patterns = [
|
||||
'dir/with/exceptions/like_this_one',
|
||||
'dir/with/exceptions/in/descendents'
|
||||
]
|
||||
|
||||
def test_should_check_directory_not_excluded(self):
|
||||
assert should_check_directory(
|
||||
'not_excluded', self.exclude_patterns, self.include_patterns, '.'
|
||||
)
|
||||
assert should_check_directory(
|
||||
convert_path('dir/with'), self.exclude_patterns,
|
||||
self.include_patterns, '.'
|
||||
)
|
||||
|
||||
def test_shoud_check_parent_directories_of_excluded(self):
|
||||
assert should_check_directory(
|
||||
'dir', self.exclude_patterns, self.include_patterns, '.'
|
||||
)
|
||||
assert should_check_directory(
|
||||
convert_path('dir/with'), self.exclude_patterns,
|
||||
self.include_patterns, '.'
|
||||
)
|
||||
|
||||
def test_should_not_check_excluded_directories_with_no_exceptions(self):
|
||||
assert not should_check_directory(
|
||||
'exclude_rather_large_directory', self.exclude_patterns,
|
||||
self.include_patterns, '.'
|
||||
)
|
||||
assert not should_check_directory(
|
||||
convert_path('dir/with/subdir_excluded'), self.exclude_patterns,
|
||||
self.include_patterns, '.'
|
||||
)
|
||||
|
||||
def test_should_check_excluded_directory_with_exceptions(self):
|
||||
assert should_check_directory(
|
||||
convert_path('dir/with/exceptions'), self.exclude_patterns,
|
||||
self.include_patterns, '.'
|
||||
)
|
||||
assert should_check_directory(
|
||||
convert_path('dir/with/exceptions/in'), self.exclude_patterns,
|
||||
self.include_patterns, '.'
|
||||
)
|
||||
|
||||
def test_should_not_check_siblings_of_exceptions(self):
|
||||
assert not should_check_directory(
|
||||
convert_path('dir/with/exceptions/but_not_here'),
|
||||
self.exclude_patterns, self.include_patterns, '.'
|
||||
)
|
||||
|
||||
def test_should_check_subdirectories_of_exceptions(self):
|
||||
assert should_check_directory(
|
||||
convert_path('dir/with/exceptions/like_this_one/subdir'),
|
||||
self.exclude_patterns, self.include_patterns, '.'
|
||||
)
|
||||
|
||||
|
||||
class FormatEnvironmentTest(unittest.TestCase):
|
||||
def test_format_env_binary_unicode_value(self):
|
||||
env_dict = {
|
||||
|
|
Loading…
Reference in New Issue