pipelines/sdk/python/kfp/components/_components.py

280 lines
11 KiB
Python

# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = [
'load_component',
'load_component_from_text',
'load_component_from_url',
'load_component_from_file',
]
import sys
from collections import OrderedDict
from ._naming import _sanitize_file_name, _sanitize_python_function_name, generate_unique_name_conversion_table
from ._yaml_utils import load_yaml
from ._structures import ComponentSpec
from ._structures import *
from kfp.dsl import PipelineParam
from kfp.dsl.types import InconsistentTypeException, check_types
import kfp
_default_component_name = 'Component'
def load_component(filename=None, url=None, text=None):
'''
Loads component from text, file or URL and creates a task factory function
Only one argument should be specified.
Args:
filename: Path of local file containing the component definition.
url: The URL of the component file data
text: A string containing the component file data.
Returns:
A factory function with a strongly-typed signature.
Once called with the required arguments, the factory constructs a pipeline task instance (ContainerOp).
'''
#This function should be called load_task_factory since it returns a factory function.
#The real load_component function should produce an object with component properties (e.g. name, description, inputs/outputs).
#TODO: Change this function to return component spec object but it should be callable to construct tasks.
non_null_args_count = len([name for name, value in locals().items() if value != None])
if non_null_args_count != 1:
raise ValueError('Need to specify exactly one source')
if filename:
return load_component_from_file(filename)
elif url:
return load_component_from_url(url)
elif text:
return load_component_from_text(text)
else:
raise ValueError('Need to specify a source')
def load_component_from_url(url):
'''
Loads component from URL and creates a task factory function
Args:
url: The URL of the component file data
Returns:
A factory function with a strongly-typed signature.
Once called with the required arguments, the factory constructs a pipeline task instance (ContainerOp).
'''
if url is None:
raise TypeError
#Handling Google Cloud Storage URIs
if url.startswith('gs://'):
#Replacing the gs:// URI with https:// URI (works for public objects)
url = 'https://storage.googleapis.com/' + url[len('gs://'):]
import requests
resp = requests.get(url)
resp.raise_for_status()
component_ref = ComponentReference(url=url)
return _load_component_from_yaml_or_zip_bytes(resp.content, url, component_ref)
def load_component_from_file(filename):
'''
Loads component from file and creates a task factory function
Args:
filename: Path of local file containing the component definition.
Returns:
A factory function with a strongly-typed signature.
Once called with the required arguments, the factory constructs a pipeline task instance (ContainerOp).
'''
if filename is None:
raise TypeError
with open(filename, 'rb') as component_stream:
return _load_component_from_yaml_or_zip_stream(component_stream, filename)
def load_component_from_text(text):
'''
Loads component from text and creates a task factory function
Args:
text: A string containing the component file data.
Returns:
A factory function with a strongly-typed signature.
Once called with the required arguments, the factory constructs a pipeline task instance (ContainerOp).
'''
if text is None:
raise TypeError
return _create_task_factory_from_component_text(text, None)
_COMPONENT_FILE_NAME_IN_ARCHIVE = 'component.yaml'
def _load_component_from_yaml_or_zip_bytes(bytes, component_filename=None, component_ref: ComponentReference = None):
import io
component_stream = io.BytesIO(bytes)
return _load_component_from_yaml_or_zip_stream(component_stream, component_filename, component_ref)
def _load_component_from_yaml_or_zip_stream(stream, component_filename=None, component_ref: ComponentReference = None):
'''Loads component from a stream and creates a task factory function.
The stream can be YAML or a zip file with a component.yaml file inside.
'''
import zipfile
stream.seek(0)
if zipfile.is_zipfile(stream):
stream.seek(0)
with zipfile.ZipFile(stream) as zip_obj:
with zip_obj.open(_COMPONENT_FILE_NAME_IN_ARCHIVE) as component_stream:
return _create_task_factory_from_component_text(component_stream, component_filename, component_ref)
else:
stream.seek(0)
return _create_task_factory_from_component_text(stream, component_filename, component_ref)
def _create_task_factory_from_component_text(text_or_file, component_filename=None, component_ref: ComponentReference = None):
component_dict = load_yaml(text_or_file)
return _create_task_factory_from_component_dict(component_dict, component_filename, component_ref)
def _create_task_factory_from_component_dict(component_dict, component_filename=None, component_ref: ComponentReference = None):
component_spec = ComponentSpec.from_dict(component_dict)
return _create_task_factory_from_component_spec(component_spec, component_filename, component_ref)
_inputs_dir = '/tmp/inputs'
_outputs_dir = '/tmp/outputs'
_single_io_file_name = 'data'
def _generate_input_file_name(port_name):
return _inputs_dir + '/' + _sanitize_file_name(port_name) + '/' + _single_io_file_name
def _generate_output_file_name(port_name):
return _outputs_dir + '/' + _sanitize_file_name(port_name) + '/' + _single_io_file_name
def _try_get_object_by_name(obj_name):
'''Locates any Python object (type, module, function, global variable) by name'''
try:
##Might be heavy since locate searches all Python modules
#from pydoc import locate
#return locate(obj_name) or obj_name
import builtins
return builtins.__dict__.get(obj_name, obj_name)
except:
pass
return obj_name
#Holds the transformation functions that are called each time TaskSpec instance is created from a component. If there are multiple handlers, the last one is used.
_created_task_transformation_handler = []
#TODO: Move to the dsl.Pipeline context class
from . import _dsl_bridge
_created_task_transformation_handler.append(_dsl_bridge.create_container_op_from_task)
class _DefaultValue:
def __init__(self, value):
self.value = value
def __repr__(self):
return repr(self.value)
#TODO: Refactor the function to make it shorter
def _create_task_factory_from_component_spec(component_spec:ComponentSpec, component_filename=None, component_ref: ComponentReference = None):
name = component_spec.name or _default_component_name
func_docstring_lines = []
if component_spec.name:
func_docstring_lines.append(component_spec.name)
if component_spec.description:
func_docstring_lines.append(component_spec.description)
inputs_list = component_spec.inputs or [] #List[InputSpec]
input_names = [input.name for input in inputs_list]
#Creating the name translation tables : Original <-> Pythonic
input_name_to_pythonic = generate_unique_name_conversion_table(input_names, _sanitize_python_function_name)
pythonic_name_to_input_name = {v: k for k, v in input_name_to_pythonic.items()}
if component_ref is None:
component_ref = ComponentReference(name=component_spec.name or component_filename or _default_component_name)
component_ref._component_spec = component_spec
def create_task_from_component_and_arguments(pythonic_arguments):
#Converting the argument names and not passing None arguments
valid_argument_types = (str, int, float, bool, GraphInputArgument, TaskOutputArgument, PipelineParam) #Hack for passed PipelineParams. TODO: Remove the hack once they're no longer passed here.
arguments = {
pythonic_name_to_input_name[k]: (v if isinstance(v, valid_argument_types) else str(v))
for k, v in pythonic_arguments.items()
if not isinstance(v, _DefaultValue) # Skipping passing arguments for optional values that have not been overridden.
}
for key in arguments:
if isinstance(arguments[key], PipelineParam):
if kfp.TYPE_CHECK:
for input_spec in component_spec.inputs:
if input_spec.name == key:
if arguments[key].param_type is not None and not check_types(arguments[key].param_type.to_dict_or_str(), '' if input_spec.type is None else input_spec.type):
raise InconsistentTypeException('Component "' + name + '" is expecting ' + key + ' to be type(' + str(input_spec.type) + '), but the passed argument is type(' + arguments[key].param_type.serialize() + ')')
arguments[key] = str(arguments[key])
task = TaskSpec(
component_ref=component_ref,
arguments=arguments,
)
if _created_task_transformation_handler:
task = _created_task_transformation_handler[-1](task)
return task
import inspect
from . import _dynamic
#Reordering the inputs since in Python optional parameters must come after required parameters
reordered_input_list = [input for input in inputs_list if input.default is None and not input.optional] + [input for input in inputs_list if not (input.default is None and not input.optional)]
def component_default_to_func_default(component_default: str, is_optional: bool):
if is_optional:
return _DefaultValue(component_default)
if component_default is not None:
return component_default
return inspect.Parameter.empty
input_parameters = [
_dynamic.KwParameter(
input_name_to_pythonic[port.name],
annotation=(_try_get_object_by_name(str(port.type)) if port.type else inspect.Parameter.empty),
default=component_default_to_func_default(port.default, port.optional),
)
for port in reordered_input_list
]
factory_function_parameters = input_parameters #Outputs are no longer part of the task factory function signature. The paths are always generated by the system.
return _dynamic.create_function_from_parameters(
create_task_from_component_and_arguments,
factory_function_parameters,
documentation='\n'.join(func_docstring_lines),
func_name=name,
func_filename=component_filename
)