mirror of https://github.com/docker/docker-py.git
Improve get_paths performance by not descending into ignored directories
Signed-off-by: Thomas Boyt <thomas@ledgerx.com>
This commit is contained in:
parent
28864df27b
commit
a49166abf2
|
@ -107,37 +107,67 @@ def exclude_paths(root, patterns, dockerfile=None):
|
|||
|
||||
exclude_patterns = list(set(patterns) - set(exceptions))
|
||||
|
||||
all_paths = get_paths(root)
|
||||
|
||||
# Remove all paths that are matched by any exclusion pattern
|
||||
paths = [
|
||||
p for p in all_paths
|
||||
if not any(match_path(p, pattern) for pattern in exclude_patterns)
|
||||
]
|
||||
|
||||
# Add back the set of paths that are matched by any inclusion pattern.
|
||||
# Include parent dirs - if we add back 'foo/bar', add 'foo' as well
|
||||
for p in all_paths:
|
||||
if any(match_path(p, pattern) for pattern in include_patterns):
|
||||
components = p.split('/')
|
||||
paths += [
|
||||
'/'.join(components[:end])
|
||||
for end in range(1, len(components) + 1)
|
||||
]
|
||||
paths = get_paths(root, exclude_patterns, include_patterns,
|
||||
has_exceptions=len(exceptions) > 0)
|
||||
|
||||
return set(paths)
|
||||
|
||||
|
||||
def get_paths(root):
|
||||
def should_include(path, exclude_patterns, include_patterns):
|
||||
"""
|
||||
Given a path, a list of exclude patterns, and a list of inclusion patterns:
|
||||
|
||||
1. Returns True if the path doesn't match any exclusion pattern
|
||||
2. Returns False if the path matches an exclusion pattern and doesn't match
|
||||
an inclusion pattern
|
||||
3. Returns true if the path matches an exclusion pattern and matches an
|
||||
inclusion pattern
|
||||
"""
|
||||
for pattern in exclude_patterns:
|
||||
if match_path(path, pattern):
|
||||
for pattern in include_patterns:
|
||||
if match_path(path, pattern):
|
||||
return True
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def get_paths(root, exclude_patterns, include_patterns, has_exceptions=False):
|
||||
paths = []
|
||||
|
||||
for parent, dirs, files in os.walk(root, followlinks=False):
|
||||
for parent, dirs, files in os.walk(root, topdown=True, followlinks=False):
|
||||
parent = os.path.relpath(parent, root)
|
||||
if parent == '.':
|
||||
parent = ''
|
||||
|
||||
# If exception rules exist, we can't skip recursing into ignored
|
||||
# directories, as we need to look for exceptions in them.
|
||||
#
|
||||
# It may be possible to optimize this further for exception patterns
|
||||
# that *couldn't* match within ignored directores.
|
||||
#
|
||||
# This matches the current docker logic (as of 2015-11-24):
|
||||
# https://github.com/docker/docker/blob/37ba67bf636b34dc5c0c0265d62a089d0492088f/pkg/archive/archive.go#L555-L557
|
||||
|
||||
if not has_exceptions:
|
||||
|
||||
# Remove excluded patterns from the list of directories to traverse
|
||||
# by mutating the dirs we're iterating over.
|
||||
# This looks strange, but is considered the correct way to skip
|
||||
# traversal. See https://docs.python.org/2/library/os.html#os.walk
|
||||
|
||||
dirs[:] = [d for d in dirs if
|
||||
should_include(os.path.join(parent, d),
|
||||
exclude_patterns, include_patterns)]
|
||||
|
||||
for path in dirs:
|
||||
if should_include(os.path.join(parent, path),
|
||||
exclude_patterns, include_patterns):
|
||||
paths.append(os.path.join(parent, path))
|
||||
|
||||
for path in files:
|
||||
if should_include(os.path.join(parent, path),
|
||||
exclude_patterns, include_patterns):
|
||||
paths.append(os.path.join(parent, path))
|
||||
|
||||
return paths
|
||||
|
|
|
@ -65,6 +65,7 @@ class BuildTest(helpers.BaseTestCase):
|
|||
'ignored',
|
||||
'Dockerfile',
|
||||
'.dockerignore',
|
||||
'!ignored/subdir/excepted-file',
|
||||
'', # empty line
|
||||
]))
|
||||
|
||||
|
@ -76,6 +77,9 @@ class BuildTest(helpers.BaseTestCase):
|
|||
with open(os.path.join(subdir, 'file'), 'w') as f:
|
||||
f.write("this file should be ignored")
|
||||
|
||||
with open(os.path.join(subdir, 'excepted-file'), 'w') as f:
|
||||
f.write("this file should not be ignored")
|
||||
|
||||
tag = 'docker-py-test-build-with-dockerignore'
|
||||
stream = self.client.build(
|
||||
path=base_dir,
|
||||
|
@ -84,7 +88,7 @@ class BuildTest(helpers.BaseTestCase):
|
|||
for chunk in stream:
|
||||
pass
|
||||
|
||||
c = self.client.create_container(tag, ['ls', '-1A', '/test'])
|
||||
c = self.client.create_container(tag, ['find', '/test', '-type', 'f'])
|
||||
self.client.start(c)
|
||||
self.client.wait(c)
|
||||
logs = self.client.logs(c)
|
||||
|
@ -93,8 +97,9 @@ class BuildTest(helpers.BaseTestCase):
|
|||
logs = logs.decode('utf-8')
|
||||
|
||||
self.assertEqual(
|
||||
list(filter(None, logs.split('\n'))),
|
||||
['not-ignored'],
|
||||
sorted(list(filter(None, logs.split('\n')))),
|
||||
sorted(['/test/ignored/subdir/excepted-file',
|
||||
'/test/not-ignored']),
|
||||
)
|
||||
|
||||
@requires_api_version('1.21')
|
||||
|
|
|
@ -671,17 +671,17 @@ class ExcludePathsTest(base.BaseTestCase):
|
|||
|
||||
def test_directory_with_single_exception(self):
|
||||
assert self.exclude(['foo', '!foo/bar/a.py']) == self.all_paths - set([
|
||||
'foo/a.py', 'foo/b.py',
|
||||
'foo/a.py', 'foo/b.py', 'foo', 'foo/bar'
|
||||
])
|
||||
|
||||
def test_directory_with_subdir_exception(self):
|
||||
assert self.exclude(['foo', '!foo/bar']) == self.all_paths - set([
|
||||
'foo/a.py', 'foo/b.py',
|
||||
'foo/a.py', 'foo/b.py', 'foo'
|
||||
])
|
||||
|
||||
def test_directory_with_wildcard_exception(self):
|
||||
assert self.exclude(['foo', '!foo/*.py']) == self.all_paths - set([
|
||||
'foo/bar', 'foo/bar/a.py',
|
||||
'foo/bar', 'foo/bar/a.py', 'foo'
|
||||
])
|
||||
|
||||
def test_subdirectory(self):
|
||||
|
|
Loading…
Reference in New Issue