From 20c6fe31e067f647af52b05ef44ba36aad887882 Mon Sep 17 00:00:00 2001 From: Joffrey F Date: Tue, 14 Feb 2017 18:24:23 -0800 Subject: [PATCH] Add support for recursive wildcard pattern in .dockerignore Signed-off-by: Joffrey F --- docker/utils/__init__.py | 5 +- docker/utils/build.py | 138 +++++++++++++++++++++++++++++++++++++++ docker/utils/fnmatch.py | 106 ++++++++++++++++++++++++++++++ docker/utils/utils.py | 132 ------------------------------------- tests/unit/utils_test.py | 16 ++++- 5 files changed, 260 insertions(+), 137 deletions(-) create mode 100644 docker/utils/build.py create mode 100644 docker/utils/fnmatch.py diff --git a/docker/utils/__init__.py b/docker/utils/__init__.py index 8f8eb270..b758cbd4 100644 --- a/docker/utils/__init__.py +++ b/docker/utils/__init__.py @@ -1,7 +1,9 @@ # flake8: noqa +from .build import tar, exclude_paths +from .decorators import check_resource, minimum_version, update_headers from .utils import ( compare_version, convert_port_bindings, convert_volume_binds, - mkbuildcontext, tar, exclude_paths, parse_repository_tag, parse_host, + mkbuildcontext, parse_repository_tag, parse_host, kwargs_from_env, convert_filters, datetime_to_timestamp, create_host_config, parse_bytes, ping_registry, parse_env_file, version_lt, version_gte, decode_json_header, split_command, create_ipam_config, @@ -9,4 +11,3 @@ from .utils import ( format_environment, create_archive ) -from .decorators import check_resource, minimum_version, update_headers diff --git a/docker/utils/build.py b/docker/utils/build.py new file mode 100644 index 00000000..6ba47b39 --- /dev/null +++ b/docker/utils/build.py @@ -0,0 +1,138 @@ +import os + +from .fnmatch import fnmatch +from .utils import create_archive + + +def tar(path, exclude=None, dockerfile=None, fileobj=None, gzip=False): + root = os.path.abspath(path) + exclude = exclude or [] + + return create_archive( + files=sorted(exclude_paths(root, exclude, dockerfile=dockerfile)), + root=root, fileobj=fileobj, gzip=gzip + ) + + +def exclude_paths(root, patterns, dockerfile=None): + """ + Given a root directory path and a list of .dockerignore patterns, return + an iterator of all paths (both regular files and directories) in the root + directory that do *not* match any of the patterns. + + All paths returned are relative to the root. + """ + if dockerfile is None: + dockerfile = 'Dockerfile' + + exceptions = [p for p in patterns if p.startswith('!')] + + include_patterns = [p[1:] for p in exceptions] + include_patterns += [dockerfile, '.dockerignore'] + + exclude_patterns = list(set(patterns) - set(exceptions)) + + paths = get_paths(root, exclude_patterns, include_patterns, + has_exceptions=len(exceptions) > 0) + + return set(paths).union( + # If the Dockerfile is in a subdirectory that is excluded, get_paths + # will not descend into it and the file will be skipped. This ensures + # it doesn't happen. + set([dockerfile]) + if os.path.exists(os.path.join(root, dockerfile)) else set() + ) + + +def should_include(path, exclude_patterns, include_patterns): + """ + Given a path, a list of exclude patterns, and a list of inclusion patterns: + + 1. Returns True if the path doesn't match any exclusion pattern + 2. Returns False if the path matches an exclusion pattern and doesn't match + an inclusion pattern + 3. Returns true if the path matches an exclusion pattern and matches an + inclusion pattern + """ + for pattern in exclude_patterns: + if match_path(path, pattern): + for pattern in include_patterns: + if match_path(path, pattern): + return True + return False + return True + + +def should_check_directory(directory_path, exclude_patterns, include_patterns): + """ + Given a directory path, a list of exclude patterns, and a list of inclusion + patterns: + + 1. Returns True if the directory path should be included according to + should_include. + 2. Returns True if the directory path is the prefix for an inclusion + pattern + 3. Returns False otherwise + """ + + # To account for exception rules, check directories if their path is a + # a prefix to an inclusion pattern. This logic conforms with the current + # docker logic (2016-10-27): + # https://github.com/docker/docker/blob/bc52939b0455116ab8e0da67869ec81c1a1c3e2c/pkg/archive/archive.go#L640-L671 + + def normalize_path(path): + return path.replace(os.path.sep, '/') + + path_with_slash = normalize_path(directory_path) + '/' + possible_child_patterns = [ + pattern for pattern in map(normalize_path, include_patterns) + if (pattern + '/').startswith(path_with_slash) + ] + directory_included = should_include( + directory_path, exclude_patterns, include_patterns + ) + return directory_included or len(possible_child_patterns) > 0 + + +def get_paths(root, exclude_patterns, include_patterns, has_exceptions=False): + paths = [] + + for parent, dirs, files in os.walk(root, topdown=True, followlinks=False): + parent = os.path.relpath(parent, root) + if parent == '.': + parent = '' + + # Remove excluded patterns from the list of directories to traverse + # by mutating the dirs we're iterating over. + # This looks strange, but is considered the correct way to skip + # traversal. See https://docs.python.org/2/library/os.html#os.walk + dirs[:] = [ + d for d in dirs if should_check_directory( + os.path.join(parent, d), exclude_patterns, include_patterns + ) + ] + + for path in dirs: + if should_include(os.path.join(parent, path), + exclude_patterns, include_patterns): + paths.append(os.path.join(parent, path)) + + for path in files: + if should_include(os.path.join(parent, path), + exclude_patterns, include_patterns): + paths.append(os.path.join(parent, path)) + + return paths + + +def match_path(path, pattern): + pattern = pattern.rstrip('/' + os.path.sep) + if pattern: + pattern = os.path.relpath(pattern) + + if '**' not in pattern: + pattern_components = pattern.split(os.path.sep) + path_components = path.split(os.path.sep)[:len(pattern_components)] + else: + path_components = path.split(os.path.sep) + return fnmatch('/'.join(path_components), pattern) diff --git a/docker/utils/fnmatch.py b/docker/utils/fnmatch.py new file mode 100644 index 00000000..80bdf773 --- /dev/null +++ b/docker/utils/fnmatch.py @@ -0,0 +1,106 @@ +"""Filename matching with shell patterns. + +fnmatch(FILENAME, PATTERN) matches according to the local convention. +fnmatchcase(FILENAME, PATTERN) always takes case in account. + +The functions operate by translating the pattern into a regular +expression. They cache the compiled regular expressions for speed. + +The function translate(PATTERN) returns a regular expression +corresponding to PATTERN. (It does not compile it.) +""" + +import re + +__all__ = ["fnmatch", "fnmatchcase", "translate"] + +_cache = {} +_MAXCACHE = 100 + + +def _purge(): + """Clear the pattern cache""" + _cache.clear() + + +def fnmatch(name, pat): + """Test whether FILENAME matches PATTERN. + + Patterns are Unix shell style: + + * matches everything + ? matches any single character + [seq] matches any character in seq + [!seq] matches any char not in seq + + An initial period in FILENAME is not special. + Both FILENAME and PATTERN are first case-normalized + if the operating system requires it. + If you don't want this, use fnmatchcase(FILENAME, PATTERN). + """ + + import os + name = os.path.normcase(name) + pat = os.path.normcase(pat) + return fnmatchcase(name, pat) + + +def fnmatchcase(name, pat): + """Test whether FILENAME matches PATTERN, including case. + + This is a version of fnmatch() which doesn't case-normalize + its arguments. + """ + + try: + re_pat = _cache[pat] + except KeyError: + res = translate(pat) + if len(_cache) >= _MAXCACHE: + _cache.clear() + _cache[pat] = re_pat = re.compile(res) + return re_pat.match(name) is not None + + +def translate(pat): + """Translate a shell PATTERN to a regular expression. + + There is no way to quote meta-characters. + """ + + recursive_mode = False + i, n = 0, len(pat) + res = '' + while i < n: + c = pat[i] + i = i + 1 + if c == '*': + if i < n and pat[i] == '*': + recursive_mode = True + i = i + 1 + res = res + '.*' + elif c == '?': + res = res + '.' + elif c == '[': + j = i + if j < n and pat[j] == '!': + j = j + 1 + if j < n and pat[j] == ']': + j = j + 1 + while j < n and pat[j] != ']': + j = j + 1 + if j >= n: + res = res + '\\[' + else: + stuff = pat[i:j].replace('\\', '\\\\') + i = j + 1 + if stuff[0] == '!': + stuff = '^' + stuff[1:] + elif stuff[0] == '^': + stuff = '\\' + stuff + res = '%s[%s]' % (res, stuff) + elif recursive_mode and c == '/': + res = res + '/?' + else: + res = res + re.escape(c) + return res + '\Z(?ms)' diff --git a/docker/utils/utils.py b/docker/utils/utils.py index 01eb16c3..d9a6d7c1 100644 --- a/docker/utils/utils.py +++ b/docker/utils/utils.py @@ -9,7 +9,6 @@ import tempfile import warnings from distutils.version import StrictVersion from datetime import datetime -from fnmatch import fnmatch import requests import six @@ -79,16 +78,6 @@ def decode_json_header(header): return json.loads(data) -def tar(path, exclude=None, dockerfile=None, fileobj=None, gzip=False): - root = os.path.abspath(path) - exclude = exclude or [] - - return create_archive( - files=sorted(exclude_paths(root, exclude, dockerfile=dockerfile)), - root=root, fileobj=fileobj, gzip=gzip - ) - - def build_file_list(root): files = [] for dirname, dirnames, fnames in os.walk(root): @@ -131,127 +120,6 @@ def create_archive(root, files=None, fileobj=None, gzip=False): return fileobj -def exclude_paths(root, patterns, dockerfile=None): - """ - Given a root directory path and a list of .dockerignore patterns, return - an iterator of all paths (both regular files and directories) in the root - directory that do *not* match any of the patterns. - - All paths returned are relative to the root. - """ - if dockerfile is None: - dockerfile = 'Dockerfile' - - exceptions = [p for p in patterns if p.startswith('!')] - - include_patterns = [p[1:] for p in exceptions] - include_patterns += [dockerfile, '.dockerignore'] - - exclude_patterns = list(set(patterns) - set(exceptions)) - - paths = get_paths(root, exclude_patterns, include_patterns, - has_exceptions=len(exceptions) > 0) - - return set(paths).union( - # If the Dockerfile is in a subdirectory that is excluded, get_paths - # will not descend into it and the file will be skipped. This ensures - # it doesn't happen. - set([dockerfile]) - if os.path.exists(os.path.join(root, dockerfile)) else set() - ) - - -def should_include(path, exclude_patterns, include_patterns): - """ - Given a path, a list of exclude patterns, and a list of inclusion patterns: - - 1. Returns True if the path doesn't match any exclusion pattern - 2. Returns False if the path matches an exclusion pattern and doesn't match - an inclusion pattern - 3. Returns true if the path matches an exclusion pattern and matches an - inclusion pattern - """ - for pattern in exclude_patterns: - if match_path(path, pattern): - for pattern in include_patterns: - if match_path(path, pattern): - return True - return False - return True - - -def should_check_directory(directory_path, exclude_patterns, include_patterns): - """ - Given a directory path, a list of exclude patterns, and a list of inclusion - patterns: - - 1. Returns True if the directory path should be included according to - should_include. - 2. Returns True if the directory path is the prefix for an inclusion - pattern - 3. Returns False otherwise - """ - - # To account for exception rules, check directories if their path is a - # a prefix to an inclusion pattern. This logic conforms with the current - # docker logic (2016-10-27): - # https://github.com/docker/docker/blob/bc52939b0455116ab8e0da67869ec81c1a1c3e2c/pkg/archive/archive.go#L640-L671 - - def normalize_path(path): - return path.replace(os.path.sep, '/') - - path_with_slash = normalize_path(directory_path) + '/' - possible_child_patterns = [ - pattern for pattern in map(normalize_path, include_patterns) - if (pattern + '/').startswith(path_with_slash) - ] - directory_included = should_include( - directory_path, exclude_patterns, include_patterns - ) - return directory_included or len(possible_child_patterns) > 0 - - -def get_paths(root, exclude_patterns, include_patterns, has_exceptions=False): - paths = [] - - for parent, dirs, files in os.walk(root, topdown=True, followlinks=False): - parent = os.path.relpath(parent, root) - if parent == '.': - parent = '' - - # Remove excluded patterns from the list of directories to traverse - # by mutating the dirs we're iterating over. - # This looks strange, but is considered the correct way to skip - # traversal. See https://docs.python.org/2/library/os.html#os.walk - dirs[:] = [ - d for d in dirs if should_check_directory( - os.path.join(parent, d), exclude_patterns, include_patterns - ) - ] - - for path in dirs: - if should_include(os.path.join(parent, path), - exclude_patterns, include_patterns): - paths.append(os.path.join(parent, path)) - - for path in files: - if should_include(os.path.join(parent, path), - exclude_patterns, include_patterns): - paths.append(os.path.join(parent, path)) - - return paths - - -def match_path(path, pattern): - pattern = pattern.rstrip('/' + os.path.sep) - if pattern: - pattern = os.path.relpath(pattern) - - pattern_components = pattern.split(os.path.sep) - path_components = path.split(os.path.sep)[:len(pattern_components)] - return fnmatch('/'.join(path_components), pattern) - - def compare_version(v1, v2): """Compare docker versions diff --git a/tests/unit/utils_test.py b/tests/unit/utils_test.py index 71a8cc70..854d0ef2 100644 --- a/tests/unit/utils_test.py +++ b/tests/unit/utils_test.py @@ -23,10 +23,9 @@ from docker.utils import ( decode_json_header, tar, split_command, parse_devices, update_headers, ) +from docker.utils.build import should_check_directory from docker.utils.ports import build_port_bindings, split_port -from docker.utils.utils import ( - format_environment, should_check_directory -) +from docker.utils.utils import format_environment from ..helpers import make_tree @@ -811,6 +810,17 @@ class ExcludePathsTest(unittest.TestCase): self.all_paths - set(['foo/bar', 'foo/bar/a.py']) ) + def test_double_wildcard(self): + assert self.exclude(['**/a.py']) == convert_paths( + self.all_paths - set( + ['a.py', 'foo/a.py', 'foo/bar/a.py', 'bar/a.py'] + ) + ) + + assert self.exclude(['foo/**/bar']) == convert_paths( + self.all_paths - set(['foo/bar', 'foo/bar/a.py']) + ) + class TarTest(unittest.TestCase): def test_tar_with_excludes(self):