280 lines
11 KiB
Python
280 lines
11 KiB
Python
# Copyright 2018 Google LLC
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
__all__ = [
|
|
'load_component',
|
|
'load_component_from_text',
|
|
'load_component_from_url',
|
|
'load_component_from_file',
|
|
]
|
|
|
|
import sys
|
|
from collections import OrderedDict
|
|
from ._naming import _sanitize_file_name, _sanitize_python_function_name, generate_unique_name_conversion_table
|
|
from ._yaml_utils import load_yaml
|
|
from ._structures import ComponentSpec
|
|
from ._structures import *
|
|
from kfp.dsl import PipelineParam
|
|
from kfp.dsl.types import InconsistentTypeException, check_types
|
|
import kfp
|
|
|
|
_default_component_name = 'Component'
|
|
|
|
|
|
def load_component(filename=None, url=None, text=None):
|
|
'''
|
|
Loads component from text, file or URL and creates a task factory function
|
|
|
|
Only one argument should be specified.
|
|
|
|
Args:
|
|
filename: Path of local file containing the component definition.
|
|
url: The URL of the component file data
|
|
text: A string containing the component file data.
|
|
|
|
Returns:
|
|
A factory function with a strongly-typed signature.
|
|
Once called with the required arguments, the factory constructs a pipeline task instance (ContainerOp).
|
|
'''
|
|
#This function should be called load_task_factory since it returns a factory function.
|
|
#The real load_component function should produce an object with component properties (e.g. name, description, inputs/outputs).
|
|
#TODO: Change this function to return component spec object but it should be callable to construct tasks.
|
|
non_null_args_count = len([name for name, value in locals().items() if value != None])
|
|
if non_null_args_count != 1:
|
|
raise ValueError('Need to specify exactly one source')
|
|
if filename:
|
|
return load_component_from_file(filename)
|
|
elif url:
|
|
return load_component_from_url(url)
|
|
elif text:
|
|
return load_component_from_text(text)
|
|
else:
|
|
raise ValueError('Need to specify a source')
|
|
|
|
|
|
def load_component_from_url(url):
|
|
'''
|
|
Loads component from URL and creates a task factory function
|
|
|
|
Args:
|
|
url: The URL of the component file data
|
|
|
|
Returns:
|
|
A factory function with a strongly-typed signature.
|
|
Once called with the required arguments, the factory constructs a pipeline task instance (ContainerOp).
|
|
'''
|
|
if url is None:
|
|
raise TypeError
|
|
|
|
#Handling Google Cloud Storage URIs
|
|
if url.startswith('gs://'):
|
|
#Replacing the gs:// URI with https:// URI (works for public objects)
|
|
url = 'https://storage.googleapis.com/' + url[len('gs://'):]
|
|
|
|
import requests
|
|
resp = requests.get(url)
|
|
resp.raise_for_status()
|
|
component_ref = ComponentReference(url=url)
|
|
return _load_component_from_yaml_or_zip_bytes(resp.content, url, component_ref)
|
|
|
|
|
|
def load_component_from_file(filename):
|
|
'''
|
|
Loads component from file and creates a task factory function
|
|
|
|
Args:
|
|
filename: Path of local file containing the component definition.
|
|
|
|
Returns:
|
|
A factory function with a strongly-typed signature.
|
|
Once called with the required arguments, the factory constructs a pipeline task instance (ContainerOp).
|
|
'''
|
|
if filename is None:
|
|
raise TypeError
|
|
with open(filename, 'rb') as component_stream:
|
|
return _load_component_from_yaml_or_zip_stream(component_stream, filename)
|
|
|
|
|
|
def load_component_from_text(text):
|
|
'''
|
|
Loads component from text and creates a task factory function
|
|
|
|
Args:
|
|
text: A string containing the component file data.
|
|
|
|
Returns:
|
|
A factory function with a strongly-typed signature.
|
|
Once called with the required arguments, the factory constructs a pipeline task instance (ContainerOp).
|
|
'''
|
|
if text is None:
|
|
raise TypeError
|
|
return _create_task_factory_from_component_text(text, None)
|
|
|
|
|
|
_COMPONENT_FILE_NAME_IN_ARCHIVE = 'component.yaml'
|
|
|
|
|
|
def _load_component_from_yaml_or_zip_bytes(bytes, component_filename=None, component_ref: ComponentReference = None):
|
|
import io
|
|
component_stream = io.BytesIO(bytes)
|
|
return _load_component_from_yaml_or_zip_stream(component_stream, component_filename, component_ref)
|
|
|
|
|
|
def _load_component_from_yaml_or_zip_stream(stream, component_filename=None, component_ref: ComponentReference = None):
|
|
'''Loads component from a stream and creates a task factory function.
|
|
The stream can be YAML or a zip file with a component.yaml file inside.
|
|
'''
|
|
import zipfile
|
|
stream.seek(0)
|
|
if zipfile.is_zipfile(stream):
|
|
stream.seek(0)
|
|
with zipfile.ZipFile(stream) as zip_obj:
|
|
with zip_obj.open(_COMPONENT_FILE_NAME_IN_ARCHIVE) as component_stream:
|
|
return _create_task_factory_from_component_text(component_stream, component_filename, component_ref)
|
|
else:
|
|
stream.seek(0)
|
|
return _create_task_factory_from_component_text(stream, component_filename, component_ref)
|
|
|
|
|
|
def _create_task_factory_from_component_text(text_or_file, component_filename=None, component_ref: ComponentReference = None):
|
|
component_dict = load_yaml(text_or_file)
|
|
return _create_task_factory_from_component_dict(component_dict, component_filename, component_ref)
|
|
|
|
|
|
def _create_task_factory_from_component_dict(component_dict, component_filename=None, component_ref: ComponentReference = None):
|
|
component_spec = ComponentSpec.from_dict(component_dict)
|
|
return _create_task_factory_from_component_spec(component_spec, component_filename, component_ref)
|
|
|
|
|
|
_inputs_dir = '/tmp/inputs'
|
|
_outputs_dir = '/tmp/outputs'
|
|
_single_io_file_name = 'data'
|
|
|
|
|
|
def _generate_input_file_name(port_name):
|
|
return _inputs_dir + '/' + _sanitize_file_name(port_name) + '/' + _single_io_file_name
|
|
|
|
|
|
def _generate_output_file_name(port_name):
|
|
return _outputs_dir + '/' + _sanitize_file_name(port_name) + '/' + _single_io_file_name
|
|
|
|
|
|
def _try_get_object_by_name(obj_name):
|
|
'''Locates any Python object (type, module, function, global variable) by name'''
|
|
try:
|
|
##Might be heavy since locate searches all Python modules
|
|
#from pydoc import locate
|
|
#return locate(obj_name) or obj_name
|
|
import builtins
|
|
return builtins.__dict__.get(obj_name, obj_name)
|
|
except:
|
|
pass
|
|
return obj_name
|
|
|
|
|
|
|
|
#Holds the transformation functions that are called each time TaskSpec instance is created from a component. If there are multiple handlers, the last one is used.
|
|
_created_task_transformation_handler = []
|
|
|
|
|
|
#TODO: Move to the dsl.Pipeline context class
|
|
from . import _dsl_bridge
|
|
_created_task_transformation_handler.append(_dsl_bridge.create_container_op_from_task)
|
|
|
|
|
|
class _DefaultValue:
|
|
def __init__(self, value):
|
|
self.value = value
|
|
|
|
def __repr__(self):
|
|
return repr(self.value)
|
|
|
|
|
|
#TODO: Refactor the function to make it shorter
|
|
def _create_task_factory_from_component_spec(component_spec:ComponentSpec, component_filename=None, component_ref: ComponentReference = None):
|
|
name = component_spec.name or _default_component_name
|
|
|
|
func_docstring_lines = []
|
|
if component_spec.name:
|
|
func_docstring_lines.append(component_spec.name)
|
|
if component_spec.description:
|
|
func_docstring_lines.append(component_spec.description)
|
|
|
|
inputs_list = component_spec.inputs or [] #List[InputSpec]
|
|
input_names = [input.name for input in inputs_list]
|
|
|
|
#Creating the name translation tables : Original <-> Pythonic
|
|
input_name_to_pythonic = generate_unique_name_conversion_table(input_names, _sanitize_python_function_name)
|
|
pythonic_name_to_input_name = {v: k for k, v in input_name_to_pythonic.items()}
|
|
|
|
if component_ref is None:
|
|
component_ref = ComponentReference(name=component_spec.name or component_filename or _default_component_name)
|
|
component_ref._component_spec = component_spec
|
|
|
|
def create_task_from_component_and_arguments(pythonic_arguments):
|
|
#Converting the argument names and not passing None arguments
|
|
valid_argument_types = (str, int, float, bool, GraphInputArgument, TaskOutputArgument, PipelineParam) #Hack for passed PipelineParams. TODO: Remove the hack once they're no longer passed here.
|
|
arguments = {
|
|
pythonic_name_to_input_name[k]: (v if isinstance(v, valid_argument_types) else str(v))
|
|
for k, v in pythonic_arguments.items()
|
|
if not isinstance(v, _DefaultValue) # Skipping passing arguments for optional values that have not been overridden.
|
|
}
|
|
for key in arguments:
|
|
if isinstance(arguments[key], PipelineParam):
|
|
if kfp.TYPE_CHECK:
|
|
for input_spec in component_spec.inputs:
|
|
if input_spec.name == key:
|
|
if arguments[key].param_type is not None and not check_types(arguments[key].param_type.to_dict_or_str(), '' if input_spec.type is None else input_spec.type):
|
|
raise InconsistentTypeException('Component "' + name + '" is expecting ' + key + ' to be type(' + str(input_spec.type) + '), but the passed argument is type(' + arguments[key].param_type.serialize() + ')')
|
|
arguments[key] = str(arguments[key])
|
|
|
|
task = TaskSpec(
|
|
component_ref=component_ref,
|
|
arguments=arguments,
|
|
)
|
|
if _created_task_transformation_handler:
|
|
task = _created_task_transformation_handler[-1](task)
|
|
return task
|
|
|
|
import inspect
|
|
from . import _dynamic
|
|
|
|
#Reordering the inputs since in Python optional parameters must come after required parameters
|
|
reordered_input_list = [input for input in inputs_list if input.default is None and not input.optional] + [input for input in inputs_list if not (input.default is None and not input.optional)]
|
|
|
|
def component_default_to_func_default(component_default: str, is_optional: bool):
|
|
if is_optional:
|
|
return _DefaultValue(component_default)
|
|
if component_default is not None:
|
|
return component_default
|
|
return inspect.Parameter.empty
|
|
|
|
input_parameters = [
|
|
_dynamic.KwParameter(
|
|
input_name_to_pythonic[port.name],
|
|
annotation=(_try_get_object_by_name(str(port.type)) if port.type else inspect.Parameter.empty),
|
|
default=component_default_to_func_default(port.default, port.optional),
|
|
)
|
|
for port in reordered_input_list
|
|
]
|
|
factory_function_parameters = input_parameters #Outputs are no longer part of the task factory function signature. The paths are always generated by the system.
|
|
|
|
return _dynamic.create_function_from_parameters(
|
|
create_task_from_component_and_arguments,
|
|
factory_function_parameters,
|
|
documentation='\n'.join(func_docstring_lines),
|
|
func_name=name,
|
|
func_filename=component_filename
|
|
)
|