feat(sdk): using component's pip_index_urls for Dockerfile generation. Fixes #8816 (#8871)

* feat(sdk): using component's pip_index_urls for Dockerfile generation

* Update sdk/python/kfp/cli/component.py

Co-authored-by: Connor McCarthy <mccarthy.connor.james@gmail.com>

* feat(sdk): PR changes: removed additional space from index url options string; duplicate removal not necessary

* feat(sdk): feature mentioned in RELEASE.md

* deduplication back - more components in a file can specify same pypi url - we need it only once for docker image

* feature note moved to current development section

---------

Co-authored-by: Connor McCarthy <mccarthy.connor.james@gmail.com>
This commit is contained in:
b4sus 2023-03-01 09:32:56 -08:00 committed by GitHub
parent fde6b944b5
commit ed66ba327f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 111 additions and 14 deletions

View File

@ -1,6 +1,7 @@
# Current Version (in development)
## Features
* `pip_index_urls` is now considered also for containerized python component - the urls will be used for Dockerfile generation [\#8871](https://github.com/kubeflow/pipelines/pull/8871)
## Breaking changes

View File

@ -46,9 +46,9 @@ FROM {base_image}
WORKDIR {component_root_dir}
COPY {requirements_file} {requirements_file}
RUN pip install --no-cache-dir -r {requirements_file}
RUN pip install {index_urls}--no-cache-dir -r {requirements_file}
{maybe_copy_kfp_package}
RUN pip install --no-cache-dir {kfp_package_path}
RUN pip install {index_urls}--no-cache-dir {kfp_package_path}
COPY . .
'''
@ -156,6 +156,7 @@ class ComponentBuilder():
self._base_image = None
self._target_image = None
self._pip_index_urls = None
self._load_components()
def _load_components(self):
@ -214,6 +215,13 @@ class ComponentBuilder():
raise sys.exit(1)
logging.info(f'Using target image: {self._target_image}')
pip_index_urls = []
for comp in self._components:
if comp.pip_index_urls is not None:
pip_index_urls.extend(comp.pip_index_urls)
if pip_index_urls:
self._pip_index_urls = list(dict.fromkeys(pip_index_urls))
def _maybe_write_file(self,
filename: str,
contents: str,
@ -270,12 +278,15 @@ class ComponentBuilder():
config.save()
def maybe_generate_dockerfile(self, overwrite_dockerfile: bool = False):
index_urls_options = component_factory.make_index_url_options(
self._pip_index_urls)
dockerfile_contents = _DOCKERFILE_TEMPLATE.format(
base_image=self._base_image,
maybe_copy_kfp_package=self._maybe_copy_kfp_package,
component_root_dir=_COMPONENT_ROOT_DIR,
kfp_package_path=self._kfp_package_path,
requirements_file=_REQUIREMENTS_TXT,
index_urls=index_urls_options,
)
self._maybe_write_file(_DOCKERFILE, dockerfile_contents,

View File

@ -30,11 +30,14 @@ docker = mock.Mock()
from kfp.cli import component
def _make_component(func_name: str,
base_image: Optional[str] = None,
target_image: Optional[str] = None,
packages_to_install: Optional[List[str]] = None,
output_component_file: Optional[str] = None) -> str:
def _make_component(
func_name: str,
base_image: Optional[str] = None,
target_image: Optional[str] = None,
packages_to_install: Optional[List[str]] = None,
output_component_file: Optional[str] = None,
pip_index_urls: Optional[List[str]] = None,
) -> str:
return textwrap.dedent('''
from kfp.dsl import *
@ -42,7 +45,8 @@ def _make_component(func_name: str,
base_image={base_image},
target_image={target_image},
packages_to_install={packages_to_install},
output_component_file={output_component_file})
output_component_file={output_component_file},
pip_index_urls={pip_index_urls})
def {func_name}():
pass
''').format(
@ -50,6 +54,7 @@ def _make_component(func_name: str,
target_image=repr(target_image),
packages_to_install=repr(packages_to_install),
output_component_file=repr(output_component_file),
pip_index_urls=repr(pip_index_urls),
func_name=func_name)
@ -443,6 +448,66 @@ class Test(unittest.TestCase):
COPY . .
'''))
@mock.patch('kfp.__version__', '1.2.3')
def test_docker_file_is_created_correctly_with_one_url(self):
component = _make_component(
func_name='train',
target_image='custom-image',
pip_index_urls=['https://pypi.org/simple'])
_write_components('components.py', component)
result = self.runner.invoke(
self.cli,
['build', str(self._working_dir)],
)
self.assertEqual(result.exit_code, 0)
self._docker_client.api.build.assert_called_once()
self.assert_file_exists_and_contains(
'Dockerfile',
textwrap.dedent('''\
# Generated by KFP.
FROM python:3.7
WORKDIR /usr/local/src/kfp/components
COPY runtime-requirements.txt runtime-requirements.txt
RUN pip install --index-url https://pypi.org/simple --trusted-host https://pypi.org/simple --no-cache-dir -r runtime-requirements.txt
RUN pip install --index-url https://pypi.org/simple --trusted-host https://pypi.org/simple --no-cache-dir kfp==1.2.3
COPY . .
'''))
@mock.patch('kfp.__version__', '1.2.3')
def test_docker_file_is_created_correctly_with_two_urls(self):
component = _make_component(
func_name='train',
target_image='custom-image',
pip_index_urls=[
'https://pypi.org/simple', 'https://example.com/pypi/simple'
])
_write_components('components.py', component)
result = self.runner.invoke(
self.cli,
['build', str(self._working_dir)],
)
self.assertEqual(result.exit_code, 0)
self._docker_client.api.build.assert_called_once()
self.assert_file_exists_and_contains(
'Dockerfile',
textwrap.dedent('''\
# Generated by KFP.
FROM python:3.7
WORKDIR /usr/local/src/kfp/components
COPY runtime-requirements.txt runtime-requirements.txt
RUN pip install --index-url https://pypi.org/simple --trusted-host https://pypi.org/simple --extra-index-url https://example.com/pypi/simple --trusted-host https://example.com/pypi/simple --no-cache-dir -r runtime-requirements.txt
RUN pip install --index-url https://pypi.org/simple --trusted-host https://pypi.org/simple --extra-index-url https://example.com/pypi/simple --trusted-host https://example.com/pypi/simple --no-cache-dir kfp==1.2.3
COPY . .
'''))
def test_existing_dockerfile_is_unchanged_by_default(self):
component = _make_component(
func_name='train', target_image='custom-image')

View File

@ -50,6 +50,7 @@ class ComponentInfo():
output_component_file: Optional[str] = None
base_image: str = _DEFAULT_BASE_IMAGE
packages_to_install: Optional[List[str]] = None
pip_index_urls: Optional[List[str]] = None
# A map from function_name to components. This is always populated when a
@ -63,19 +64,37 @@ def _python_function_name_to_component_name(name):
return name_with_spaces[0].upper() + name_with_spaces[1:]
def _make_index_url_options(pip_index_urls: Optional[List[str]]) -> str:
def make_index_url_options(pip_index_urls: Optional[List[str]]) -> str:
"""Generates index url options for pip install command based on provided
pip_index_urls.
Args:
pip_index_urls: Optional list of pip index urls
Returns:
- Empty string if pip_index_urls is empty/None.
- '--index-url url --trusted-host url ' if pip_index_urls contains 1
url
- the above followed by '--extra-index-url url --trusted-host url '
for
each next url in pip_index_urls if pip_index_urls contains more than 1
url
Note: In case pip_index_urls is not empty, the returned string will
contain space at the end.
"""
if not pip_index_urls:
return ''
index_url = pip_index_urls[0]
extra_index_urls = pip_index_urls[1:]
options = [f'--index-url {index_url} --trusted-host {index_url} ']
options = [f'--index-url {index_url} --trusted-host {index_url}']
options.extend(
f'--extra-index-url {extra_index_url} --trusted-host {extra_index_url} '
f'--extra-index-url {extra_index_url} --trusted-host {extra_index_url}'
for extra_index_url in extra_index_urls)
return ' '.join(options)
return ' '.join(options) + ' '
_install_python_packages_script_template = '''
@ -97,7 +116,7 @@ def _get_packages_to_install_command(
concat_package_list = ' '.join(
[repr(str(package)) for package in package_list])
index_url_options = _make_index_url_options(pip_index_urls)
index_url_options = make_index_url_options(pip_index_urls)
install_python_packages_script = _install_python_packages_script_template.format(
index_url_options=index_url_options,
concat_package_list=concat_package_list)
@ -468,7 +487,8 @@ def create_component_from_func(
component_spec=component_spec,
output_component_file=output_component_file,
base_image=base_image,
packages_to_install=packages_to_install)
packages_to_install=packages_to_install,
pip_index_urls=pip_index_urls)
if REGISTERED_MODULES is not None:
REGISTERED_MODULES[component_name] = component_info