SDK - Capturing function dependencies when creating lightweight components (#1372)

* Transitively capturing code dependencies
Using cloudpickle.

* Got rid of func_type_declarations_code variable

* Extracted the function code extraction functions

* Improved support for capturing module-level dependencies

* Added test for capturing module-level dependencies

* Removed the _capture_function_code_using_source_copy function
As requested by Ning
This commit is contained in:
Alexey Volkov 2019-05-28 18:18:18 -07:00 committed by Kubernetes Prow Robot
parent 02313e4e5e
commit 9a1d47a185
3 changed files with 57 additions and 21 deletions

View File

@ -45,6 +45,20 @@ def _python_function_name_to_component_name(name):
return re.sub(' +', ' ', name.replace('_', ' ')).strip(' ').capitalize()
def _capture_function_code_using_cloudpickle(func) -> str:
import sys
import cloudpickle
import pickle
# Hack to force cloudpickle to capture the whole function instead of just referencing the code file. See https://github.com/cloudpipe/cloudpickle/blob/74d69d759185edaeeac7bdcb7015cfc0c652f204/cloudpickle/cloudpickle.py#L490
try: # Try is needed to restore the state if something goes wrong
old_module = sys.modules.pop(func.__module__)
func_pickle = cloudpickle.dumps(func, pickle.DEFAULT_PROTOCOL)
finally:
sys.modules[func.__module__] = old_module
func_code = '{func_name} = pickle.loads({func_pickle})'.format(func_name=func.__name__, func_pickle=repr(func_pickle))
return 'import pickle' + '\n\n' + func_code
def _func_to_component_spec(func, extra_code='', base_image=_default_base_image) -> ComponentSpec:
'''Takes a self-contained python function and converts it to component
@ -127,24 +141,7 @@ def _func_to_component_spec(func, extra_code='', base_image=_default_base_image)
func_name=func.__name__
#TODO: Add support for copying the NamedTuple subclass declaration code
#Adding NamedTuple import if needed
func_type_declarations_code = ""
if hasattr(return_ann, '_fields'): #NamedTuple
func_type_declarations_code = func_type_declarations_code + '\n' + 'from typing import NamedTuple'
#Source code can include decorators line @python_op. Remove them
(func_code_lines, _) = inspect.getsourcelines(func)
while func_code_lines[0].lstrip().startswith('@'): #decorator
del func_code_lines[0]
#Function might be defined in some indented scope (e.g. in another function).
#We need to handle this and properly dedent the function source code
first_line = func_code_lines[0]
indent = len(first_line) - len(first_line.lstrip())
func_code_lines = [line[indent:] for line in func_code_lines]
func_code = ''.join(func_code_lines) #Lines retain their \n endings
func_code = _capture_function_code_using_cloudpickle(func)
extra_output_external_names = [name + '_file' for name in extra_output_names]
@ -166,8 +163,6 @@ def _func_to_component_spec(func, extra_code='', base_image=_default_base_image)
'''\
{extra_code}
{func_type_declarations_code}
{func_code}
import sys
@ -191,7 +186,6 @@ for idx, filename in enumerate(_output_files):
'''.format(
func_name=func_name,
func_code=func_code,
func_type_declarations_code=func_type_declarations_code,
extra_code=extra_code,
input_args_parsing_code='\n'.join(input_args_parsing_code_lines),
output_files_parsing_code='\n'.join(output_files_parsing_code_lines),

View File

@ -29,6 +29,7 @@ REQUIRES = [
'cryptography>=2.4.2',
'google-auth>=1.6.1',
'requests_toolbelt>=0.8.0',
'cloudpickle',
'kfp-server-api >= 0.1.18, < 0.1.19', #Update the upper version whenever a new version of the kfp-server-api package is released. Update the lower version when there is a breaking change in kfp-server-api.
'argo-models == 2.2.1a', #2.2.1a is equivalent to argo 2.2.1
]

View File

@ -34,6 +34,23 @@ def components_local_output_dir_context(output_dir: str):
finally:
comp._components._outputs_dir = old_dir
module_level_variable = 10
class ModuleLevelClass:
def class_method(self, x):
return x * module_level_variable
def module_func(a: float) -> float:
return a * 5
def module_func_with_deps(a: float, b: float) -> float:
return ModuleLevelClass().class_method(a) + module_func(b)
class PythonOpTestCase(unittest.TestCase):
def helper_test_2_in_1_out_component_using_local_call(self, func, op):
arg1 = float(3)
@ -95,6 +112,30 @@ class PythonOpTestCase(unittest.TestCase):
self.helper_test_2_in_1_out_component_using_local_call(func, op)
def test_func_to_container_op_call_other_func(self):
extra_variable = 10
class ExtraClass:
def class_method(self, x):
return x * extra_variable
def extra_func(a: float) -> float:
return a * 5
def main_func(a: float, b: float) -> float:
return ExtraClass().class_method(a) + extra_func(b)
func = main_func
op = comp.func_to_container_op(func, output_component_file='comp.yaml')
self.helper_test_2_in_1_out_component_using_local_call(func, op)
def test_func_to_container_op_call_other_func_global(self):
func = module_func_with_deps
op = comp.func_to_container_op(func, output_component_file='comp.yaml')
self.helper_test_2_in_1_out_component_using_local_call(func, op)
def test_func_to_container_op_multiple_named_typed_outputs(self):
from typing import NamedTuple
def add_multiply_two_numbers(a: float, b: float) -> NamedTuple('DummyName', [('sum', float), ('product', float)]):