369 lines
15 KiB
Python
369 lines
15 KiB
Python
# Copyright 2018 Google LLC
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
__all__ = [
|
|
'load_component',
|
|
'load_component_from_text',
|
|
'load_component_from_url',
|
|
'load_component_from_file',
|
|
]
|
|
|
|
import sys
|
|
from ._yaml_utils import load_yaml, dump_yaml
|
|
from ._structures import ComponentSpec
|
|
|
|
|
|
_default_component_name = 'Component'
|
|
|
|
|
|
def load_component(filename=None, url=None, text=None):
|
|
'''
|
|
Loads component from text, file or URL and creates a task factory function
|
|
|
|
Only one argument should be specified.
|
|
|
|
Args:
|
|
filename: Path of local file containing the component definition.
|
|
url: The URL of the component file data
|
|
text: A string containing the component file data.
|
|
|
|
Returns:
|
|
A factory function with a strongly-typed signature.
|
|
Once called with the required arguments, the factory constructs a pipeline task instance (ContainerOp).
|
|
'''
|
|
#This function should be called load_task_factory since it returns a factory function.
|
|
#The real load_component function should produce an object with component properties (e.g. name, description, inputs/outputs).
|
|
#TODO: Change this function to return component spec object but it should be callable to construct tasks.
|
|
non_null_args_count = len([name for name, value in locals().items() if value != None])
|
|
if non_null_args_count != 1:
|
|
raise ValueError('Need to specify exactly one source')
|
|
if filename:
|
|
return load_component_from_file(filename)
|
|
elif url:
|
|
return load_component_from_url(url)
|
|
elif text:
|
|
return load_component_from_text(text)
|
|
else:
|
|
raise ValueError('Need to specify a source')
|
|
|
|
|
|
def load_component_from_url(url):
|
|
'''
|
|
Loads component from URL and creates a task factory function
|
|
|
|
Args:
|
|
url: The URL of the component file data
|
|
|
|
Returns:
|
|
A factory function with a strongly-typed signature.
|
|
Once called with the required arguments, the factory constructs a pipeline task instance (ContainerOp).
|
|
'''
|
|
if url is None:
|
|
raise TypeError
|
|
import requests
|
|
resp = requests.get(url)
|
|
resp.raise_for_status()
|
|
return _create_task_factory_from_component_text(resp.content, url)
|
|
|
|
|
|
def load_component_from_file(filename):
|
|
'''
|
|
Loads component from file and creates a task factory function
|
|
|
|
Args:
|
|
filename: Path of local file containing the component definition.
|
|
|
|
Returns:
|
|
A factory function with a strongly-typed signature.
|
|
Once called with the required arguments, the factory constructs a pipeline task instance (ContainerOp).
|
|
'''
|
|
if filename is None:
|
|
raise TypeError
|
|
with open(filename, 'r') as yaml_file:
|
|
return _create_task_factory_from_component_text(yaml_file, filename)
|
|
|
|
|
|
def load_component_from_text(text):
|
|
'''
|
|
Loads component from text and creates a task factory function
|
|
|
|
Args:
|
|
text: A string containing the component file data.
|
|
|
|
Returns:
|
|
A factory function with a strongly-typed signature.
|
|
Once called with the required arguments, the factory constructs a pipeline task instance (ContainerOp).
|
|
'''
|
|
if text is None:
|
|
raise TypeError
|
|
return _create_task_factory_from_component_text(text, None)
|
|
|
|
|
|
def _create_task_factory_from_component_text(text_or_file, component_filename=None):
|
|
component_dict = load_yaml(text_or_file)
|
|
return _create_task_factory_from_component_dict(component_dict, component_filename)
|
|
|
|
|
|
def _create_task_factory_from_component_dict(component_dict, component_filename=None):
|
|
component_spec = ComponentSpec.from_struct(component_dict)
|
|
return _create_task_factory_from_component_spec(component_spec, component_filename)
|
|
|
|
|
|
def _normalize_identifier_name(name):
|
|
import re
|
|
normalized_name = name.lower()
|
|
normalized_name = re.sub(r'[\W_]', ' ', normalized_name) #No non-word characters
|
|
normalized_name = re.sub(' +', ' ', normalized_name).strip() #No double spaces, leading or trailing spaces
|
|
if re.match(r'\d', normalized_name):
|
|
normalized_name = 'n' + normalized_name #No leading digits
|
|
return normalized_name
|
|
|
|
|
|
def _sanitize_kubernetes_resource_name(name):
|
|
return _normalize_identifier_name(name).replace(' ', '-')
|
|
|
|
|
|
def _sanitize_python_function_name(name):
|
|
return _normalize_identifier_name(name).replace(' ', '_')
|
|
|
|
|
|
def _sanitize_file_name(name):
|
|
import re
|
|
return re.sub('[^-_.0-9a-zA-Z]+', '_', name)
|
|
|
|
|
|
def _generate_unique_suffix(data):
|
|
import time
|
|
import hashlib
|
|
string_data = str( (data, time.time()) )
|
|
return hashlib.sha256(string_data.encode()).hexdigest()[0:8]
|
|
|
|
_inputs_dir = '/inputs'
|
|
_outputs_dir = '/outputs'
|
|
|
|
|
|
def _generate_input_file_name(port_name):
|
|
return _inputs_dir + '/' + _sanitize_file_name(port_name)
|
|
|
|
|
|
def _generate_output_file_name(port_name):
|
|
return _outputs_dir + '/' + _sanitize_file_name(port_name)
|
|
|
|
|
|
def _try_get_object_by_name(obj_name):
|
|
'''Locates any Python object (type, module, function, global variable) by name'''
|
|
##Might be heavy since locate searches all Python modules
|
|
#from pydoc import locate
|
|
#return locate(obj_name) or obj_name
|
|
import builtins
|
|
return builtins.__dict__.get(obj_name, obj_name)
|
|
|
|
|
|
def _make_name_unique_by_adding_index(name:str, collection, delimiter:str):
|
|
unique_name = name
|
|
if unique_name in collection:
|
|
for i in range(2, sys.maxsize**10):
|
|
unique_name = name + delimiter + str(i)
|
|
if unique_name not in collection:
|
|
break
|
|
return unique_name
|
|
|
|
|
|
#TODO: Refactor the function to make it shorter
|
|
def _create_task_factory_from_component_spec(component_spec:ComponentSpec, component_filename=None):
|
|
name = component_spec.name or _default_component_name
|
|
description = component_spec.description
|
|
|
|
inputs_list = component_spec.inputs or [] #List[InputSpec]
|
|
outputs_list = component_spec.outputs or [] #List[OutputSpec]
|
|
|
|
input_name_to_pythonic = {}
|
|
output_name_to_pythonic = {}
|
|
pythonic_name_to_original = {}
|
|
|
|
input_name_to_kubernetes = {}
|
|
output_name_to_kubernetes = {}
|
|
kubernetes_name_to_input_name = {}
|
|
kubernetes_name_to_output_name = {}
|
|
|
|
for io_port in inputs_list:
|
|
pythonic_name = _sanitize_python_function_name(io_port.name)
|
|
pythonic_name = _make_name_unique_by_adding_index(pythonic_name, pythonic_name_to_original, '_')
|
|
input_name_to_pythonic[io_port.name] = pythonic_name
|
|
pythonic_name_to_original[pythonic_name] = io_port.name
|
|
|
|
kubernetes_name = _sanitize_kubernetes_resource_name(io_port.name)
|
|
kubernetes_name = _make_name_unique_by_adding_index(kubernetes_name, kubernetes_name_to_input_name, '-')
|
|
input_name_to_kubernetes[io_port.name] = kubernetes_name
|
|
kubernetes_name_to_input_name[kubernetes_name] = io_port.name
|
|
|
|
for io_port in outputs_list:
|
|
pythonic_name = _sanitize_python_function_name(io_port.name)
|
|
pythonic_name = _make_name_unique_by_adding_index(pythonic_name, pythonic_name_to_original, '_')
|
|
output_name_to_pythonic[io_port.name] = pythonic_name
|
|
pythonic_name_to_original[pythonic_name] = io_port.name
|
|
|
|
kubernetes_name = _sanitize_kubernetes_resource_name(io_port.name)
|
|
kubernetes_name = _make_name_unique_by_adding_index(kubernetes_name, kubernetes_name_to_output_name, '-')
|
|
output_name_to_kubernetes[io_port.name] = kubernetes_name
|
|
kubernetes_name_to_output_name[kubernetes_name] = io_port.name
|
|
|
|
container_spec = component_spec.implementation.docker_container
|
|
container_image = container_spec.image
|
|
|
|
file_inputs={}
|
|
file_outputs_from_def={}
|
|
if container_spec.file_outputs != None:
|
|
file_outputs_from_def = {output_name_to_kubernetes[param]: path for param, path in container_spec.file_outputs.items()}
|
|
|
|
def create_container_op_with_expanded_arguments(pythonic_input_argument_values):
|
|
file_outputs = file_outputs_from_def.copy()
|
|
|
|
def expand_command_part(arg): #input values with original names
|
|
#(Union[str,List],Mapping[str, Any]) -> str
|
|
if isinstance(arg, dict) or isinstance(arg, list):
|
|
if isinstance(arg, list):
|
|
(func_name, func_argument) = (arg[0], arg[1:])
|
|
if len(func_argument) == 1:
|
|
func_argument = func_argument[0]
|
|
elif isinstance(arg, dict):
|
|
if len(arg) != 1:
|
|
raise ValueError('Failed to parse argument dict: "{}"'.format(arg))
|
|
(func_name, func_argument) = list(arg.items())[0]
|
|
else:
|
|
raise TypeError()
|
|
func_name=func_name.lower()
|
|
if func_name == 'value':
|
|
assert isinstance(func_argument, str)
|
|
port_name = func_argument
|
|
input_value = pythonic_input_argument_values[input_name_to_pythonic[port_name]]
|
|
return str(input_value)
|
|
|
|
elif func_name == 'file':
|
|
assert isinstance(func_argument, str)
|
|
port_name = func_argument
|
|
input_filename = _generate_input_file_name(port_name)
|
|
input_key = input_name_to_kubernetes[port_name]
|
|
input_value = pythonic_input_argument_values[input_name_to_pythonic[port_name]]
|
|
file_inputs[input_key] = {'local_path': input_filename, 'data_source': input_value}
|
|
return input_filename
|
|
|
|
elif func_name == 'output':
|
|
assert isinstance(func_argument, str)
|
|
port_name = func_argument
|
|
pythonic_port_name = output_name_to_pythonic[port_name]
|
|
if pythonic_port_name in pythonic_input_argument_values and pythonic_input_argument_values[pythonic_port_name] is not None:
|
|
output_filename = str(pythonic_input_argument_values[pythonic_port_name])
|
|
else:
|
|
output_filename = _generate_output_file_name(port_name)
|
|
#We need to pass the file mapping to file_outputs
|
|
output_key = output_name_to_kubernetes[port_name]
|
|
if output_key in file_outputs:
|
|
if file_outputs[output_key] != output_filename:
|
|
raise ValueError('Conflicting output files specified for port {}: {} and {}'.format(port_name, file_outputs[output_key], output_filename))
|
|
else:
|
|
file_outputs[output_key] = output_filename
|
|
|
|
return output_filename
|
|
|
|
elif func_name == 'concat':
|
|
assert isinstance(func_argument, list)
|
|
items_to_concatenate = func_argument
|
|
expanded_arguments = [expand_command_part(arg1) for arg1 in items_to_concatenate]
|
|
expanded_argument_strings = [str(arg2) for arg2 in expanded_arguments if arg2 is not None]
|
|
return ''.join(expanded_argument_strings)
|
|
|
|
elif func_name == 'if':
|
|
if isinstance(func_argument, dict):
|
|
condition_node = func_argument['cond']
|
|
then_node = func_argument['then']
|
|
else_node = func_argument.get('else', None)
|
|
elif isinstance(func_argument, list):
|
|
assert len(func_argument) in [2, 3]
|
|
condition_node = func_argument[0]
|
|
then_node = func_argument[1]
|
|
else_node = func_argument[2] if len(func_argument) == 3 else None
|
|
else:
|
|
raise TypeError()
|
|
condition_result = bool(expand_command_part(condition_node))
|
|
result_node = then_node if condition_result else else_node
|
|
if isinstance(result_node, list):
|
|
expanded_result = [expand_command_part(arg1) for arg1 in result_node]
|
|
else:
|
|
expanded_result = expand_command_part(result_node)
|
|
print(expanded_result)
|
|
return expanded_result
|
|
|
|
elif func_name == 'ispresent':
|
|
assert isinstance(func_argument, str)
|
|
input_name = func_argument
|
|
pythonic_input_name = input_name_to_pythonic[input_name]
|
|
argument_is_present = pythonic_input_name in pythonic_input_argument_values
|
|
return str(argument_is_present)
|
|
else:
|
|
return arg
|
|
|
|
expanded_command = []
|
|
if container_spec.command != None:
|
|
for part in container_spec.command:
|
|
expanded_part = expand_command_part(part)
|
|
if expanded_part is not None:
|
|
if isinstance(expanded_part, list):
|
|
expanded_command.extend(expanded_part)
|
|
else:
|
|
expanded_command.append(str(expanded_part))
|
|
|
|
expanded_args = []
|
|
if container_spec.arguments != None:
|
|
for part in container_spec.arguments:
|
|
expanded_part = expand_command_part(part)
|
|
if expanded_part is not None:
|
|
if isinstance(expanded_part, list):
|
|
expanded_args.extend(expanded_part)
|
|
else:
|
|
expanded_args.append(str(expanded_part))
|
|
|
|
#Working around Python's variable scoping. Do not write to variable from global scope as that makes the variable local.
|
|
|
|
file_outputs_to_pass = file_outputs
|
|
if file_outputs_to_pass == {}:
|
|
file_outputs_to_pass = None
|
|
|
|
from . import _dsl_bridge
|
|
return _dsl_bridge._task_object_factory(
|
|
name=name,
|
|
container_image=container_image,
|
|
command=expanded_command,
|
|
arguments=expanded_args,
|
|
file_inputs=file_inputs,
|
|
file_outputs=file_outputs_to_pass,
|
|
)
|
|
|
|
import inspect
|
|
from . import _dynamic
|
|
|
|
#Still allowing to set the output parameters, but make them optional and auto-generate if missing.
|
|
input_parameters = [_dynamic.KwParameter(input_name_to_pythonic[port.name], annotation=(_try_get_object_by_name(port.type) if port.type else inspect.Parameter.empty)) for port in inputs_list]
|
|
output_parameters = [_dynamic.KwParameter(output_name_to_pythonic[port.name], annotation=('OutputFile[{}]'.format(port.type) if port.type else inspect.Parameter.empty), default=None) for port in outputs_list]
|
|
|
|
factory_function_parameters = input_parameters + output_parameters
|
|
|
|
return _dynamic.create_function_from_parameters(
|
|
create_container_op_with_expanded_arguments,
|
|
factory_function_parameters,
|
|
documentation=description,
|
|
func_name=name,
|
|
func_filename=component_filename
|
|
)
|