SDK - Containers - Build python container image based on current working directory (#1970)
* SDK - Containers - Build container image from current environment * Removed the ability to capture the active python environment (as requested by @hongye-sun) * Added the type hint and docstring to for the return type. * Renamed `build_image_from_env` function to `build_image_from_working_dir` as requested by @hongye-sun * Explained the function behavior in the documentation. * Removed extra empty line * Improved caching by copying python files only after installing python packages * Made test more portable * Added support for specifying the base_image `kfp.containers.default_base_image = ...` The image can also be a callable returning the image name. * Renamed `get_python_image` to `get_python_image_for_current_version` * Switched the default base image to Google Deep Learning container image as requested by @hongye-sun The size of this image is 4.35GB which really concerns me. The GPU image size is 6.45GB. * Stopped importing kfp.containers.* into kfp.* * Fixed test * Fixed the regex string * Fixed the type annotation style * Addressed @hongye-sun feedback * Removed the container image size warning * Fixed import failure
This commit is contained in:
parent
60018e3149
commit
08104d6cf9
|
|
@ -15,4 +15,4 @@
|
|||
|
||||
from ._client import Client
|
||||
from ._config import *
|
||||
from ._runners import *
|
||||
from ._runners import *
|
||||
|
|
|
|||
|
|
@ -152,11 +152,11 @@ class ContainerBuilder(object):
|
|||
with tarfile.open(tarball_path, 'w:gz') as tarball:
|
||||
tarball.add(dir_name, arcname='')
|
||||
|
||||
def build(self, local_dir, docker_filename, target_image=None, timeout=1000):
|
||||
def build(self, local_dir, docker_filename : str = 'Dockerfile', target_image=None, timeout=1000):
|
||||
"""
|
||||
Args:
|
||||
local_dir (str): local directory that stores all the necessary build files
|
||||
docker_filename (str): the dockerfile name that is in the local_dir
|
||||
docker_filename (str): the path of the Dockerfile relative to the local_dir
|
||||
target_image (str): the target image tag to push the final image.
|
||||
timeout (int): time out in seconds. Default: 1000
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -0,0 +1,14 @@
|
|||
# Copyright 2019 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the speci
|
||||
|
||||
from ._build_image_api import *
|
||||
|
|
@ -0,0 +1,120 @@
|
|||
# Copyright 2019 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the speci
|
||||
|
||||
__all__ = [
|
||||
'build_image_from_working_dir',
|
||||
]
|
||||
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
import requests
|
||||
|
||||
from ..compiler._container_builder import ContainerBuilder
|
||||
|
||||
|
||||
default_base_image = 'gcr.io/deeplearning-platform-release/tf-cpu.1-14'
|
||||
|
||||
|
||||
_container_work_dir = '/python_env'
|
||||
|
||||
|
||||
_default_image_builder = None
|
||||
|
||||
|
||||
def _get_default_image_builder():
|
||||
global _default_image_builder
|
||||
if _default_image_builder is None:
|
||||
from ..compiler._container_builder import ContainerBuilder
|
||||
_default_image_builder = ContainerBuilder()
|
||||
|
||||
|
||||
def _generate_dockerfile_text(context_dir: str, dockerfile_path: str, base_image: str = None) -> str:
|
||||
# Generating the Dockerfile
|
||||
logging.info('Generating the Dockerfile')
|
||||
|
||||
requirements_rel_path = 'requirements.txt'
|
||||
requirements_path = os.path.join(context_dir, requirements_rel_path)
|
||||
requirements_file_exists = os.path.exists(requirements_path)
|
||||
|
||||
if not base_image:
|
||||
base_image = default_base_image
|
||||
if callable(base_image):
|
||||
base_image = base_image()
|
||||
|
||||
dockerfile_lines = []
|
||||
dockerfile_lines.append('FROM {}'.format(base_image))
|
||||
dockerfile_lines.append('WORKDIR {}'.format(_container_work_dir))
|
||||
if requirements_file_exists:
|
||||
dockerfile_lines.append('COPY {} .'.format(requirements_rel_path))
|
||||
dockerfile_lines.append('RUN python3 -m pip install -r {}'.format(requirements_rel_path))
|
||||
dockerfile_lines.append('COPY . .')
|
||||
|
||||
return '\n'.join(dockerfile_lines)
|
||||
|
||||
|
||||
def build_image_from_working_dir(image_name: str = None, working_dir: str = None, file_filter_re: str = r'.*\.py', timeout: int = 1000, base_image: str = None, builder: ContainerBuilder = None) -> str:
|
||||
'''build_image_from_working_dir builds and pushes a new container image that captures the current python working directory.
|
||||
This function recursively scans the working directory and captures the following files in the container image context:
|
||||
* requirements.txt files
|
||||
* all python files (can be overridden by passing a different `file_filter_re` argument)
|
||||
|
||||
The function generates Dockerfile that starts from a python container image, install packages from requirements.txt (if present) and copies all the captured python files to the container image.
|
||||
The Dockerfile can be overridden by placing a custom Dockerfile in the root of the working directory.
|
||||
Args:
|
||||
image_name: Optional. The image repo name where the new container image will be pushed. The name will be generated if not not set.
|
||||
working_dir: Optional. The directory that will be captured. The current directory will be used if omitted.
|
||||
file_filter_re: Optional. A regular expression that will be used to decide which files to include in the container building context.
|
||||
timeout: Optional. The image building timeout in seconds.
|
||||
base_image: Optional. The container image to use as the base for the new image. If not set, the Google Deep Learning Tensorflow CPU image will be used.
|
||||
builder: Optional. An instance of ContainerBuilder or compatible class that will be used to build the image.
|
||||
Returns:
|
||||
The full name of the container image including the hash digest. E.g. gcr.io/my-org/my-image@sha256:86c1...793c.
|
||||
'''
|
||||
current_dir = working_dir or os.getcwd()
|
||||
with tempfile.TemporaryDirectory() as context_dir:
|
||||
logging.info('Creating the build context directory: {}'.format(context_dir))
|
||||
|
||||
# Copying all *.py and requirements.txt files
|
||||
for dirpath, dirnames, filenames in os.walk(current_dir):
|
||||
dst_dirpath = os.path.join(context_dir, os.path.relpath(dirpath, current_dir))
|
||||
os.makedirs(dst_dirpath, exist_ok=True)
|
||||
for file_name in filenames:
|
||||
if re.match(file_filter_re, file_name) or file_name == 'requirements.txt':
|
||||
src_path = os.path.join(dirpath, file_name)
|
||||
dst_path = os.path.join(dst_dirpath, file_name)
|
||||
shutil.copy(src_path, dst_path)
|
||||
|
||||
src_dockerfile_path = os.path.join(current_dir, 'Dockerfile')
|
||||
dst_dockerfile_path = os.path.join(context_dir, 'Dockerfile')
|
||||
if os.path.exists(src_dockerfile_path):
|
||||
if base_image:
|
||||
raise ValueError('Cannot specify base_image when using custom Dockerfile (which already specifies the base image).')
|
||||
shutil.copy(src_dockerfile_path, dst_dockerfile_path)
|
||||
else:
|
||||
dockerfile_text = _generate_dockerfile_text(context_dir, dst_dockerfile_path, base_image)
|
||||
with open(dst_dockerfile_path, 'w') as f:
|
||||
f.write(dockerfile_text)
|
||||
|
||||
if builder is None:
|
||||
builder = _get_default_image_builder()
|
||||
return builder.build(
|
||||
local_dir=context_dir,
|
||||
target_image=image_name,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
|
@ -51,6 +51,7 @@ setup(
|
|||
'kfp.components',
|
||||
'kfp.components.structures',
|
||||
'kfp.components.structures.kubernetes',
|
||||
'kfp.containers',
|
||||
'kfp.dsl',
|
||||
'kfp.notebook',
|
||||
],
|
||||
|
|
|
|||
|
|
@ -0,0 +1,84 @@
|
|||
# Copyright 2019 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the speci
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
|
||||
import mock
|
||||
|
||||
from kfp.containers import build_image_from_working_dir
|
||||
|
||||
|
||||
class MockImageBuilder:
|
||||
def __init__(self, dockerfile_text_check : Callable[[str], None] = None, requirements_text_check : Callable[[str], None] = None, file_paths_check : Callable[[str], None] = None):
|
||||
self.dockerfile_text_check = dockerfile_text_check
|
||||
self.requirements_text_check = requirements_text_check
|
||||
self.file_paths_check = file_paths_check
|
||||
|
||||
def build(self, local_dir = None, target_image = None, timeout = 1000):
|
||||
if self.dockerfile_text_check:
|
||||
actual_dockerfile_text = (Path(local_dir) / 'Dockerfile').read_text()
|
||||
self.dockerfile_text_check(actual_dockerfile_text)
|
||||
if self.requirements_text_check:
|
||||
actual_requirements_text = (Path(local_dir) / 'requirements.txt').read_text()
|
||||
self.requirements_text_check(actual_requirements_text)
|
||||
if self.file_paths_check:
|
||||
file_paths = set(os.path.relpath(os.path.join(dirpath, file_name), local_dir) for dirpath, dirnames, filenames in os.walk(local_dir) for file_name in filenames)
|
||||
self.file_paths_check(file_paths)
|
||||
return target_image
|
||||
|
||||
|
||||
class BuildImageApiTests(unittest.TestCase):
|
||||
def test_build_image_from_working_dir(self):
|
||||
expected_dockerfile_text_re = '''
|
||||
FROM python:3.6.5
|
||||
WORKDIR /.*
|
||||
COPY requirements.txt .
|
||||
RUN python3 -m pip install -r requirements.txt
|
||||
COPY . .
|
||||
'''
|
||||
#mock_builder =
|
||||
with tempfile.TemporaryDirectory() as context_dir:
|
||||
requirements_text = 'pandas==1.24'
|
||||
requirements_txt_relpath = Path('.') / 'requirements.txt'
|
||||
file1_py_relpath = Path('.') / 'lib' / 'file1.py'
|
||||
file1_sh_relpath = Path('.') / 'lib' / 'file1.sh'
|
||||
|
||||
context_path = Path(context_dir)
|
||||
(context_path / requirements_txt_relpath).write_text(requirements_text)
|
||||
(context_path / file1_py_relpath).parent.mkdir(parents=True, exist_ok=True)
|
||||
(context_path / file1_py_relpath).write_text('#py file')
|
||||
(context_path / file1_sh_relpath).parent.mkdir(parents=True, exist_ok=True)
|
||||
(context_path / file1_sh_relpath).write_text('#sh file')
|
||||
expected_file_paths = {
|
||||
'Dockerfile',
|
||||
str(requirements_txt_relpath),
|
||||
str(file1_py_relpath),
|
||||
}
|
||||
def dockerfile_text_check(actual_dockerfile_text):
|
||||
self.assertRegex(actual_dockerfile_text.strip(), expected_dockerfile_text_re.strip())
|
||||
def requirements_text_check(actual_requirements_text):
|
||||
self.assertEqual(actual_requirements_text.strip(), requirements_text.strip())
|
||||
def file_paths_check(file_paths):
|
||||
self.assertEqual(file_paths, expected_file_paths)
|
||||
|
||||
builder = MockImageBuilder(dockerfile_text_check, requirements_text_check, file_paths_check)
|
||||
result = build_image_from_working_dir(working_dir=context_dir, base_image='python:3.6.5', builder=builder)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Loading…
Reference in New Issue