mirror of https://github.com/docker/docker-py.git
Improve get_paths performance by not descending into ignored directories
Signed-off-by: Thomas Boyt <thomas@ledgerx.com>
This commit is contained in:
parent
28864df27b
commit
a49166abf2
|
@ -107,38 +107,68 @@ def exclude_paths(root, patterns, dockerfile=None):
|
||||||
|
|
||||||
exclude_patterns = list(set(patterns) - set(exceptions))
|
exclude_patterns = list(set(patterns) - set(exceptions))
|
||||||
|
|
||||||
all_paths = get_paths(root)
|
paths = get_paths(root, exclude_patterns, include_patterns,
|
||||||
|
has_exceptions=len(exceptions) > 0)
|
||||||
# Remove all paths that are matched by any exclusion pattern
|
|
||||||
paths = [
|
|
||||||
p for p in all_paths
|
|
||||||
if not any(match_path(p, pattern) for pattern in exclude_patterns)
|
|
||||||
]
|
|
||||||
|
|
||||||
# Add back the set of paths that are matched by any inclusion pattern.
|
|
||||||
# Include parent dirs - if we add back 'foo/bar', add 'foo' as well
|
|
||||||
for p in all_paths:
|
|
||||||
if any(match_path(p, pattern) for pattern in include_patterns):
|
|
||||||
components = p.split('/')
|
|
||||||
paths += [
|
|
||||||
'/'.join(components[:end])
|
|
||||||
for end in range(1, len(components) + 1)
|
|
||||||
]
|
|
||||||
|
|
||||||
return set(paths)
|
return set(paths)
|
||||||
|
|
||||||
|
|
||||||
def get_paths(root):
|
def should_include(path, exclude_patterns, include_patterns):
|
||||||
|
"""
|
||||||
|
Given a path, a list of exclude patterns, and a list of inclusion patterns:
|
||||||
|
|
||||||
|
1. Returns True if the path doesn't match any exclusion pattern
|
||||||
|
2. Returns False if the path matches an exclusion pattern and doesn't match
|
||||||
|
an inclusion pattern
|
||||||
|
3. Returns true if the path matches an exclusion pattern and matches an
|
||||||
|
inclusion pattern
|
||||||
|
"""
|
||||||
|
for pattern in exclude_patterns:
|
||||||
|
if match_path(path, pattern):
|
||||||
|
for pattern in include_patterns:
|
||||||
|
if match_path(path, pattern):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def get_paths(root, exclude_patterns, include_patterns, has_exceptions=False):
|
||||||
paths = []
|
paths = []
|
||||||
|
|
||||||
for parent, dirs, files in os.walk(root, followlinks=False):
|
for parent, dirs, files in os.walk(root, topdown=True, followlinks=False):
|
||||||
parent = os.path.relpath(parent, root)
|
parent = os.path.relpath(parent, root)
|
||||||
if parent == '.':
|
if parent == '.':
|
||||||
parent = ''
|
parent = ''
|
||||||
|
|
||||||
|
# If exception rules exist, we can't skip recursing into ignored
|
||||||
|
# directories, as we need to look for exceptions in them.
|
||||||
|
#
|
||||||
|
# It may be possible to optimize this further for exception patterns
|
||||||
|
# that *couldn't* match within ignored directores.
|
||||||
|
#
|
||||||
|
# This matches the current docker logic (as of 2015-11-24):
|
||||||
|
# https://github.com/docker/docker/blob/37ba67bf636b34dc5c0c0265d62a089d0492088f/pkg/archive/archive.go#L555-L557
|
||||||
|
|
||||||
|
if not has_exceptions:
|
||||||
|
|
||||||
|
# Remove excluded patterns from the list of directories to traverse
|
||||||
|
# by mutating the dirs we're iterating over.
|
||||||
|
# This looks strange, but is considered the correct way to skip
|
||||||
|
# traversal. See https://docs.python.org/2/library/os.html#os.walk
|
||||||
|
|
||||||
|
dirs[:] = [d for d in dirs if
|
||||||
|
should_include(os.path.join(parent, d),
|
||||||
|
exclude_patterns, include_patterns)]
|
||||||
|
|
||||||
for path in dirs:
|
for path in dirs:
|
||||||
paths.append(os.path.join(parent, path))
|
if should_include(os.path.join(parent, path),
|
||||||
|
exclude_patterns, include_patterns):
|
||||||
|
paths.append(os.path.join(parent, path))
|
||||||
|
|
||||||
for path in files:
|
for path in files:
|
||||||
paths.append(os.path.join(parent, path))
|
if should_include(os.path.join(parent, path),
|
||||||
|
exclude_patterns, include_patterns):
|
||||||
|
paths.append(os.path.join(parent, path))
|
||||||
|
|
||||||
return paths
|
return paths
|
||||||
|
|
||||||
|
|
|
@ -65,6 +65,7 @@ class BuildTest(helpers.BaseTestCase):
|
||||||
'ignored',
|
'ignored',
|
||||||
'Dockerfile',
|
'Dockerfile',
|
||||||
'.dockerignore',
|
'.dockerignore',
|
||||||
|
'!ignored/subdir/excepted-file',
|
||||||
'', # empty line
|
'', # empty line
|
||||||
]))
|
]))
|
||||||
|
|
||||||
|
@ -76,6 +77,9 @@ class BuildTest(helpers.BaseTestCase):
|
||||||
with open(os.path.join(subdir, 'file'), 'w') as f:
|
with open(os.path.join(subdir, 'file'), 'w') as f:
|
||||||
f.write("this file should be ignored")
|
f.write("this file should be ignored")
|
||||||
|
|
||||||
|
with open(os.path.join(subdir, 'excepted-file'), 'w') as f:
|
||||||
|
f.write("this file should not be ignored")
|
||||||
|
|
||||||
tag = 'docker-py-test-build-with-dockerignore'
|
tag = 'docker-py-test-build-with-dockerignore'
|
||||||
stream = self.client.build(
|
stream = self.client.build(
|
||||||
path=base_dir,
|
path=base_dir,
|
||||||
|
@ -84,7 +88,7 @@ class BuildTest(helpers.BaseTestCase):
|
||||||
for chunk in stream:
|
for chunk in stream:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
c = self.client.create_container(tag, ['ls', '-1A', '/test'])
|
c = self.client.create_container(tag, ['find', '/test', '-type', 'f'])
|
||||||
self.client.start(c)
|
self.client.start(c)
|
||||||
self.client.wait(c)
|
self.client.wait(c)
|
||||||
logs = self.client.logs(c)
|
logs = self.client.logs(c)
|
||||||
|
@ -93,8 +97,9 @@ class BuildTest(helpers.BaseTestCase):
|
||||||
logs = logs.decode('utf-8')
|
logs = logs.decode('utf-8')
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
list(filter(None, logs.split('\n'))),
|
sorted(list(filter(None, logs.split('\n')))),
|
||||||
['not-ignored'],
|
sorted(['/test/ignored/subdir/excepted-file',
|
||||||
|
'/test/not-ignored']),
|
||||||
)
|
)
|
||||||
|
|
||||||
@requires_api_version('1.21')
|
@requires_api_version('1.21')
|
||||||
|
|
|
@ -671,17 +671,17 @@ class ExcludePathsTest(base.BaseTestCase):
|
||||||
|
|
||||||
def test_directory_with_single_exception(self):
|
def test_directory_with_single_exception(self):
|
||||||
assert self.exclude(['foo', '!foo/bar/a.py']) == self.all_paths - set([
|
assert self.exclude(['foo', '!foo/bar/a.py']) == self.all_paths - set([
|
||||||
'foo/a.py', 'foo/b.py',
|
'foo/a.py', 'foo/b.py', 'foo', 'foo/bar'
|
||||||
])
|
])
|
||||||
|
|
||||||
def test_directory_with_subdir_exception(self):
|
def test_directory_with_subdir_exception(self):
|
||||||
assert self.exclude(['foo', '!foo/bar']) == self.all_paths - set([
|
assert self.exclude(['foo', '!foo/bar']) == self.all_paths - set([
|
||||||
'foo/a.py', 'foo/b.py',
|
'foo/a.py', 'foo/b.py', 'foo'
|
||||||
])
|
])
|
||||||
|
|
||||||
def test_directory_with_wildcard_exception(self):
|
def test_directory_with_wildcard_exception(self):
|
||||||
assert self.exclude(['foo', '!foo/*.py']) == self.all_paths - set([
|
assert self.exclude(['foo', '!foo/*.py']) == self.all_paths - set([
|
||||||
'foo/bar', 'foo/bar/a.py',
|
'foo/bar', 'foo/bar/a.py', 'foo'
|
||||||
])
|
])
|
||||||
|
|
||||||
def test_subdirectory(self):
|
def test_subdirectory(self):
|
||||||
|
|
Loading…
Reference in New Issue