mirror of https://github.com/docker/docker-py.git
Merge pull request #1914 from mefyl/master
Improve .dockerignore compliance
This commit is contained in:
commit
cc6e1b1249
|
@ -1,20 +1,24 @@
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
from ..constants import IS_WINDOWS_PLATFORM
|
from ..constants import IS_WINDOWS_PLATFORM
|
||||||
from .fnmatch import fnmatch
|
from fnmatch import fnmatch
|
||||||
|
from itertools import chain
|
||||||
from .utils import create_archive
|
from .utils import create_archive
|
||||||
|
|
||||||
|
|
||||||
def tar(path, exclude=None, dockerfile=None, fileobj=None, gzip=False):
|
def tar(path, exclude=None, dockerfile=None, fileobj=None, gzip=False):
|
||||||
root = os.path.abspath(path)
|
root = os.path.abspath(path)
|
||||||
exclude = exclude or []
|
exclude = exclude or []
|
||||||
|
|
||||||
return create_archive(
|
return create_archive(
|
||||||
files=sorted(exclude_paths(root, exclude, dockerfile=dockerfile)),
|
files=sorted(exclude_paths(root, exclude, dockerfile=dockerfile)),
|
||||||
root=root, fileobj=fileobj, gzip=gzip
|
root=root, fileobj=fileobj, gzip=gzip
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_SEP = re.compile('/|\\\\') if IS_WINDOWS_PLATFORM else re.compile('/')
|
||||||
|
|
||||||
|
|
||||||
def exclude_paths(root, patterns, dockerfile=None):
|
def exclude_paths(root, patterns, dockerfile=None):
|
||||||
"""
|
"""
|
||||||
Given a root directory path and a list of .dockerignore patterns, return
|
Given a root directory path and a list of .dockerignore patterns, return
|
||||||
|
@ -23,127 +27,90 @@ def exclude_paths(root, patterns, dockerfile=None):
|
||||||
|
|
||||||
All paths returned are relative to the root.
|
All paths returned are relative to the root.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if dockerfile is None:
|
if dockerfile is None:
|
||||||
dockerfile = 'Dockerfile'
|
dockerfile = 'Dockerfile'
|
||||||
|
|
||||||
patterns = [p.lstrip('/') for p in patterns]
|
def normalize(p):
|
||||||
exceptions = [p for p in patterns if p.startswith('!')]
|
# Leading and trailing slashes are not relevant. Yes,
|
||||||
|
# "foo.py/" must exclude the "foo.py" regular file. "."
|
||||||
|
# components are not relevant either, even if the whole
|
||||||
|
# pattern is only ".", as the Docker reference states: "For
|
||||||
|
# historical reasons, the pattern . is ignored."
|
||||||
|
split = [pt for pt in re.split(_SEP, p) if pt and pt != '.']
|
||||||
|
# ".." component must be cleared with the potential previous
|
||||||
|
# component, regardless of whether it exists: "A preprocessing
|
||||||
|
# step [...] eliminates . and .. elements using Go's
|
||||||
|
# filepath.".
|
||||||
|
i = 0
|
||||||
|
while i < len(split):
|
||||||
|
if split[i] == '..':
|
||||||
|
del split[i]
|
||||||
|
if i > 0:
|
||||||
|
del split[i - 1]
|
||||||
|
i -= 1
|
||||||
|
else:
|
||||||
|
i += 1
|
||||||
|
return split
|
||||||
|
|
||||||
include_patterns = [p[1:] for p in exceptions]
|
patterns = (
|
||||||
include_patterns += [dockerfile, '.dockerignore']
|
(True, normalize(p[1:]))
|
||||||
|
if p.startswith('!') else
|
||||||
exclude_patterns = list(set(patterns) - set(exceptions))
|
(False, normalize(p))
|
||||||
|
for p in patterns)
|
||||||
paths = get_paths(root, exclude_patterns, include_patterns,
|
patterns = list(reversed(list(chain(
|
||||||
has_exceptions=len(exceptions) > 0)
|
# Exclude empty patterns such as "." or the empty string.
|
||||||
|
filter(lambda p: p[1], patterns),
|
||||||
return set(paths).union(
|
# Always include the Dockerfile and .dockerignore
|
||||||
# If the Dockerfile is in a subdirectory that is excluded, get_paths
|
[(True, dockerfile.split('/')), (True, ['.dockerignore'])]))))
|
||||||
# will not descend into it and the file will be skipped. This ensures
|
return set(walk(root, patterns))
|
||||||
# it doesn't happen.
|
|
||||||
set([dockerfile.replace('/', os.path.sep)])
|
|
||||||
if os.path.exists(os.path.join(root, dockerfile)) else set()
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def should_include(path, exclude_patterns, include_patterns, root):
|
def walk(root, patterns, default=True):
|
||||||
"""
|
"""
|
||||||
Given a path, a list of exclude patterns, and a list of inclusion patterns:
|
A collection of file lying below root that should be included according to
|
||||||
|
patterns.
|
||||||
1. Returns True if the path doesn't match any exclusion pattern
|
|
||||||
2. Returns False if the path matches an exclusion pattern and doesn't match
|
|
||||||
an inclusion pattern
|
|
||||||
3. Returns true if the path matches an exclusion pattern and matches an
|
|
||||||
inclusion pattern
|
|
||||||
"""
|
|
||||||
for pattern in exclude_patterns:
|
|
||||||
if match_path(path, pattern):
|
|
||||||
for pattern in include_patterns:
|
|
||||||
if match_path(path, pattern):
|
|
||||||
return True
|
|
||||||
if os.path.isabs(pattern) and match_path(
|
|
||||||
os.path.join(root, path), pattern):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def should_check_directory(directory_path, exclude_patterns, include_patterns,
|
|
||||||
root):
|
|
||||||
"""
|
|
||||||
Given a directory path, a list of exclude patterns, and a list of inclusion
|
|
||||||
patterns:
|
|
||||||
|
|
||||||
1. Returns True if the directory path should be included according to
|
|
||||||
should_include.
|
|
||||||
2. Returns True if the directory path is the prefix for an inclusion
|
|
||||||
pattern
|
|
||||||
3. Returns False otherwise
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# To account for exception rules, check directories if their path is a
|
def match(p):
|
||||||
# a prefix to an inclusion pattern. This logic conforms with the current
|
if p[1][0] == '**':
|
||||||
# docker logic (2016-10-27):
|
rec = (p[0], p[1][1:])
|
||||||
# https://github.com/docker/docker/blob/bc52939b0455116ab8e0da67869ec81c1a1c3e2c/pkg/archive/archive.go#L640-L671
|
return [p] + (match(rec) if rec[1] else [rec])
|
||||||
|
elif fnmatch(f, p[1][0]):
|
||||||
|
return [(p[0], p[1][1:])]
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
def normalize_path(path):
|
for f in os.listdir(root):
|
||||||
return path.replace(os.path.sep, '/')
|
cur = os.path.join(root, f)
|
||||||
|
# The patterns if recursing in that directory.
|
||||||
path_with_slash = normalize_path(directory_path) + '/'
|
sub = list(chain(*(match(p) for p in patterns)))
|
||||||
possible_child_patterns = [
|
# Whether this file is explicitely included / excluded.
|
||||||
pattern for pattern in map(normalize_path, include_patterns)
|
hit = next((p[0] for p in sub if not p[1]), None)
|
||||||
if (pattern + '/').startswith(path_with_slash)
|
# Whether this file is implicitely included / excluded.
|
||||||
]
|
matched = default if hit is None else hit
|
||||||
directory_included = should_include(
|
sub = list(filter(lambda p: p[1], sub))
|
||||||
directory_path, exclude_patterns, include_patterns, root
|
if os.path.isdir(cur):
|
||||||
)
|
# Entirely skip directories if there are no chance any subfile will
|
||||||
return directory_included or len(possible_child_patterns) > 0
|
# be included.
|
||||||
|
if all(not p[0] for p in sub) and not matched:
|
||||||
|
continue
|
||||||
def get_paths(root, exclude_patterns, include_patterns, has_exceptions=False):
|
# I think this would greatly speed up dockerignore handling by not
|
||||||
paths = []
|
# recursing into directories we are sure would be entirely
|
||||||
|
# included, and only yielding the directory itself, which will be
|
||||||
for parent, dirs, files in os.walk(root, topdown=True, followlinks=False):
|
# recursively archived anyway. However the current unit test expect
|
||||||
parent = os.path.relpath(parent, root)
|
# the full list of subfiles and I'm not 100% sure it would make no
|
||||||
if parent == '.':
|
# difference yet.
|
||||||
parent = ''
|
# if all(p[0] for p in sub) and matched:
|
||||||
|
# yield f
|
||||||
# Remove excluded patterns from the list of directories to traverse
|
# continue
|
||||||
# by mutating the dirs we're iterating over.
|
children = False
|
||||||
# This looks strange, but is considered the correct way to skip
|
for r in (os.path.join(f, p) for p in walk(cur, sub, matched)):
|
||||||
# traversal. See https://docs.python.org/2/library/os.html#os.walk
|
yield r
|
||||||
dirs[:] = [
|
children = True
|
||||||
d for d in dirs if should_check_directory(
|
# The current unit tests expect directories only under those
|
||||||
os.path.join(parent, d), exclude_patterns, include_patterns,
|
# conditions. It might be simplifiable though.
|
||||||
root
|
if (not sub or not children) and hit or hit is None and default:
|
||||||
)
|
yield f
|
||||||
]
|
elif matched:
|
||||||
|
yield f
|
||||||
for path in dirs:
|
|
||||||
if should_include(os.path.join(parent, path),
|
|
||||||
exclude_patterns, include_patterns, root):
|
|
||||||
paths.append(os.path.join(parent, path))
|
|
||||||
|
|
||||||
for path in files:
|
|
||||||
if should_include(os.path.join(parent, path),
|
|
||||||
exclude_patterns, include_patterns, root):
|
|
||||||
paths.append(os.path.join(parent, path))
|
|
||||||
|
|
||||||
return paths
|
|
||||||
|
|
||||||
|
|
||||||
def match_path(path, pattern):
|
|
||||||
|
|
||||||
pattern = pattern.rstrip('/' + os.path.sep)
|
|
||||||
if pattern and not os.path.isabs(pattern):
|
|
||||||
pattern = os.path.relpath(pattern)
|
|
||||||
|
|
||||||
pattern_components = pattern.split(os.path.sep)
|
|
||||||
if len(pattern_components) == 1 and IS_WINDOWS_PLATFORM:
|
|
||||||
pattern_components = pattern.split('/')
|
|
||||||
|
|
||||||
if '**' not in pattern:
|
|
||||||
path_components = path.split(os.path.sep)[:len(pattern_components)]
|
|
||||||
else:
|
|
||||||
path_components = path.split(os.path.sep)
|
|
||||||
return fnmatch('/'.join(path_components), '/'.join(pattern_components))
|
|
||||||
|
|
|
@ -23,7 +23,6 @@ from docker.utils import (
|
||||||
decode_json_header, tar, split_command, parse_devices, update_headers,
|
decode_json_header, tar, split_command, parse_devices, update_headers,
|
||||||
)
|
)
|
||||||
|
|
||||||
from docker.utils.build import should_check_directory
|
|
||||||
from docker.utils.ports import build_port_bindings, split_port
|
from docker.utils.ports import build_port_bindings, split_port
|
||||||
from docker.utils.utils import format_environment
|
from docker.utils.utils import format_environment
|
||||||
|
|
||||||
|
@ -758,6 +757,13 @@ class ExcludePathsTest(unittest.TestCase):
|
||||||
self.all_paths - set(['foo/a.py'])
|
self.all_paths - set(['foo/a.py'])
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_exclude_include_absolute_path(self):
|
||||||
|
base = make_tree([], ['a.py', 'b.py'])
|
||||||
|
assert exclude_paths(
|
||||||
|
base,
|
||||||
|
['/*', '!/*.py']
|
||||||
|
) == set(['a.py', 'b.py'])
|
||||||
|
|
||||||
def test_single_subdir_with_path_traversal(self):
|
def test_single_subdir_with_path_traversal(self):
|
||||||
assert self.exclude(['foo/whoops/../a.py']) == convert_paths(
|
assert self.exclude(['foo/whoops/../a.py']) == convert_paths(
|
||||||
self.all_paths - set(['foo/a.py'])
|
self.all_paths - set(['foo/a.py'])
|
||||||
|
@ -876,12 +882,25 @@ class ExcludePathsTest(unittest.TestCase):
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_exclude_include_absolute_path(self):
|
def test_include_wildcard(self):
|
||||||
base = make_tree([], ['a.py', 'b.py'])
|
base = make_tree(['a'], ['a/b.py'])
|
||||||
assert exclude_paths(
|
assert exclude_paths(
|
||||||
base,
|
base,
|
||||||
['/*', '!' + os.path.join(base, '*.py')]
|
['*', '!*/b.py']
|
||||||
) == set(['a.py', 'b.py'])
|
) == convert_paths(['a/b.py'])
|
||||||
|
|
||||||
|
def test_last_line_precedence(self):
|
||||||
|
base = make_tree(
|
||||||
|
[],
|
||||||
|
['garbage.md',
|
||||||
|
'thrash.md',
|
||||||
|
'README.md',
|
||||||
|
'README-bis.md',
|
||||||
|
'README-secret.md'])
|
||||||
|
assert exclude_paths(
|
||||||
|
base,
|
||||||
|
['*.md', '!README*.md', 'README-secret.md']
|
||||||
|
) == set(['README.md', 'README-bis.md'])
|
||||||
|
|
||||||
|
|
||||||
class TarTest(unittest.TestCase):
|
class TarTest(unittest.TestCase):
|
||||||
|
@ -1019,69 +1038,6 @@ class TarTest(unittest.TestCase):
|
||||||
assert tar_data.getmember('th.txt').mtime == -3600
|
assert tar_data.getmember('th.txt').mtime == -3600
|
||||||
|
|
||||||
|
|
||||||
class ShouldCheckDirectoryTest(unittest.TestCase):
|
|
||||||
exclude_patterns = [
|
|
||||||
'exclude_rather_large_directory',
|
|
||||||
'dir/with/subdir_excluded',
|
|
||||||
'dir/with/exceptions'
|
|
||||||
]
|
|
||||||
|
|
||||||
include_patterns = [
|
|
||||||
'dir/with/exceptions/like_this_one',
|
|
||||||
'dir/with/exceptions/in/descendents'
|
|
||||||
]
|
|
||||||
|
|
||||||
def test_should_check_directory_not_excluded(self):
|
|
||||||
assert should_check_directory(
|
|
||||||
'not_excluded', self.exclude_patterns, self.include_patterns, '.'
|
|
||||||
)
|
|
||||||
assert should_check_directory(
|
|
||||||
convert_path('dir/with'), self.exclude_patterns,
|
|
||||||
self.include_patterns, '.'
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_shoud_check_parent_directories_of_excluded(self):
|
|
||||||
assert should_check_directory(
|
|
||||||
'dir', self.exclude_patterns, self.include_patterns, '.'
|
|
||||||
)
|
|
||||||
assert should_check_directory(
|
|
||||||
convert_path('dir/with'), self.exclude_patterns,
|
|
||||||
self.include_patterns, '.'
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_should_not_check_excluded_directories_with_no_exceptions(self):
|
|
||||||
assert not should_check_directory(
|
|
||||||
'exclude_rather_large_directory', self.exclude_patterns,
|
|
||||||
self.include_patterns, '.'
|
|
||||||
)
|
|
||||||
assert not should_check_directory(
|
|
||||||
convert_path('dir/with/subdir_excluded'), self.exclude_patterns,
|
|
||||||
self.include_patterns, '.'
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_should_check_excluded_directory_with_exceptions(self):
|
|
||||||
assert should_check_directory(
|
|
||||||
convert_path('dir/with/exceptions'), self.exclude_patterns,
|
|
||||||
self.include_patterns, '.'
|
|
||||||
)
|
|
||||||
assert should_check_directory(
|
|
||||||
convert_path('dir/with/exceptions/in'), self.exclude_patterns,
|
|
||||||
self.include_patterns, '.'
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_should_not_check_siblings_of_exceptions(self):
|
|
||||||
assert not should_check_directory(
|
|
||||||
convert_path('dir/with/exceptions/but_not_here'),
|
|
||||||
self.exclude_patterns, self.include_patterns, '.'
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_should_check_subdirectories_of_exceptions(self):
|
|
||||||
assert should_check_directory(
|
|
||||||
convert_path('dir/with/exceptions/like_this_one/subdir'),
|
|
||||||
self.exclude_patterns, self.include_patterns, '.'
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class FormatEnvironmentTest(unittest.TestCase):
|
class FormatEnvironmentTest(unittest.TestCase):
|
||||||
def test_format_env_binary_unicode_value(self):
|
def test_format_env_binary_unicode_value(self):
|
||||||
env_dict = {
|
env_dict = {
|
||||||
|
|
Loading…
Reference in New Issue