SDK - Capturing function dependencies when creating lightweight components (#1372)
* Transitively capturing code dependencies Using cloudpickle. * Got rid of func_type_declarations_code variable * Extracted the function code extraction functions * Improved support for capturing module-level dependencies * Added test for capturing module-level dependencies * Removed the _capture_function_code_using_source_copy function As requested by Ning
This commit is contained in:
parent
02313e4e5e
commit
9a1d47a185
|
|
@ -45,6 +45,20 @@ def _python_function_name_to_component_name(name):
|
|||
return re.sub(' +', ' ', name.replace('_', ' ')).strip(' ').capitalize()
|
||||
|
||||
|
||||
def _capture_function_code_using_cloudpickle(func) -> str:
|
||||
import sys
|
||||
import cloudpickle
|
||||
import pickle
|
||||
# Hack to force cloudpickle to capture the whole function instead of just referencing the code file. See https://github.com/cloudpipe/cloudpickle/blob/74d69d759185edaeeac7bdcb7015cfc0c652f204/cloudpickle/cloudpickle.py#L490
|
||||
try: # Try is needed to restore the state if something goes wrong
|
||||
old_module = sys.modules.pop(func.__module__)
|
||||
func_pickle = cloudpickle.dumps(func, pickle.DEFAULT_PROTOCOL)
|
||||
finally:
|
||||
sys.modules[func.__module__] = old_module
|
||||
func_code = '{func_name} = pickle.loads({func_pickle})'.format(func_name=func.__name__, func_pickle=repr(func_pickle))
|
||||
return 'import pickle' + '\n\n' + func_code
|
||||
|
||||
|
||||
def _func_to_component_spec(func, extra_code='', base_image=_default_base_image) -> ComponentSpec:
|
||||
'''Takes a self-contained python function and converts it to component
|
||||
|
||||
|
|
@ -127,24 +141,7 @@ def _func_to_component_spec(func, extra_code='', base_image=_default_base_image)
|
|||
|
||||
func_name=func.__name__
|
||||
|
||||
#TODO: Add support for copying the NamedTuple subclass declaration code
|
||||
#Adding NamedTuple import if needed
|
||||
func_type_declarations_code = ""
|
||||
if hasattr(return_ann, '_fields'): #NamedTuple
|
||||
func_type_declarations_code = func_type_declarations_code + '\n' + 'from typing import NamedTuple'
|
||||
|
||||
#Source code can include decorators line @python_op. Remove them
|
||||
(func_code_lines, _) = inspect.getsourcelines(func)
|
||||
while func_code_lines[0].lstrip().startswith('@'): #decorator
|
||||
del func_code_lines[0]
|
||||
|
||||
#Function might be defined in some indented scope (e.g. in another function).
|
||||
#We need to handle this and properly dedent the function source code
|
||||
first_line = func_code_lines[0]
|
||||
indent = len(first_line) - len(first_line.lstrip())
|
||||
func_code_lines = [line[indent:] for line in func_code_lines]
|
||||
|
||||
func_code = ''.join(func_code_lines) #Lines retain their \n endings
|
||||
func_code = _capture_function_code_using_cloudpickle(func)
|
||||
|
||||
extra_output_external_names = [name + '_file' for name in extra_output_names]
|
||||
|
||||
|
|
@ -166,8 +163,6 @@ def _func_to_component_spec(func, extra_code='', base_image=_default_base_image)
|
|||
'''\
|
||||
{extra_code}
|
||||
|
||||
{func_type_declarations_code}
|
||||
|
||||
{func_code}
|
||||
|
||||
import sys
|
||||
|
|
@ -191,7 +186,6 @@ for idx, filename in enumerate(_output_files):
|
|||
'''.format(
|
||||
func_name=func_name,
|
||||
func_code=func_code,
|
||||
func_type_declarations_code=func_type_declarations_code,
|
||||
extra_code=extra_code,
|
||||
input_args_parsing_code='\n'.join(input_args_parsing_code_lines),
|
||||
output_files_parsing_code='\n'.join(output_files_parsing_code_lines),
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ REQUIRES = [
|
|||
'cryptography>=2.4.2',
|
||||
'google-auth>=1.6.1',
|
||||
'requests_toolbelt>=0.8.0',
|
||||
'cloudpickle',
|
||||
'kfp-server-api >= 0.1.18, < 0.1.19', #Update the upper version whenever a new version of the kfp-server-api package is released. Update the lower version when there is a breaking change in kfp-server-api.
|
||||
'argo-models == 2.2.1a', #2.2.1a is equivalent to argo 2.2.1
|
||||
]
|
||||
|
|
|
|||
|
|
@ -34,6 +34,23 @@ def components_local_output_dir_context(output_dir: str):
|
|||
finally:
|
||||
comp._components._outputs_dir = old_dir
|
||||
|
||||
|
||||
module_level_variable = 10
|
||||
|
||||
|
||||
class ModuleLevelClass:
|
||||
def class_method(self, x):
|
||||
return x * module_level_variable
|
||||
|
||||
|
||||
def module_func(a: float) -> float:
|
||||
return a * 5
|
||||
|
||||
|
||||
def module_func_with_deps(a: float, b: float) -> float:
|
||||
return ModuleLevelClass().class_method(a) + module_func(b)
|
||||
|
||||
|
||||
class PythonOpTestCase(unittest.TestCase):
|
||||
def helper_test_2_in_1_out_component_using_local_call(self, func, op):
|
||||
arg1 = float(3)
|
||||
|
|
@ -95,6 +112,30 @@ class PythonOpTestCase(unittest.TestCase):
|
|||
|
||||
self.helper_test_2_in_1_out_component_using_local_call(func, op)
|
||||
|
||||
def test_func_to_container_op_call_other_func(self):
|
||||
extra_variable = 10
|
||||
|
||||
class ExtraClass:
|
||||
def class_method(self, x):
|
||||
return x * extra_variable
|
||||
|
||||
def extra_func(a: float) -> float:
|
||||
return a * 5
|
||||
|
||||
def main_func(a: float, b: float) -> float:
|
||||
return ExtraClass().class_method(a) + extra_func(b)
|
||||
|
||||
func = main_func
|
||||
op = comp.func_to_container_op(func, output_component_file='comp.yaml')
|
||||
|
||||
self.helper_test_2_in_1_out_component_using_local_call(func, op)
|
||||
|
||||
def test_func_to_container_op_call_other_func_global(self):
|
||||
func = module_func_with_deps
|
||||
op = comp.func_to_container_op(func, output_component_file='comp.yaml')
|
||||
|
||||
self.helper_test_2_in_1_out_component_using_local_call(func, op)
|
||||
|
||||
def test_func_to_container_op_multiple_named_typed_outputs(self):
|
||||
from typing import NamedTuple
|
||||
def add_multiply_two_numbers(a: float, b: float) -> NamedTuple('DummyName', [('sum', float), ('product', float)]):
|
||||
|
|
|
|||
Loading…
Reference in New Issue