845 lines
32 KiB
Python
845 lines
32 KiB
Python
# Copyright 2021-2022 The Kubeflow Authors
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Definitions for component spec."""
|
|
|
|
import ast
|
|
import collections
|
|
import functools
|
|
import itertools
|
|
from typing import Any, Dict, List, Mapping, Optional, Union
|
|
import uuid
|
|
|
|
import kfp
|
|
from kfp import dsl
|
|
from kfp.compiler import compiler
|
|
from kfp.components import base_model
|
|
from kfp.components import placeholders
|
|
from kfp.components import utils
|
|
from kfp.components import v1_components
|
|
from kfp.components import v1_structures
|
|
from kfp.components.types import type_utils
|
|
from kfp.pipeline_spec import pipeline_spec_pb2
|
|
import yaml
|
|
|
|
|
|
class InputSpec_(base_model.BaseModel):
|
|
"""Component input definitions. (Inner class).
|
|
|
|
Attributes:
|
|
type: The type of the input.
|
|
default (optional): the default value for the input.
|
|
description: Optional: the user description of the input.
|
|
"""
|
|
type: Union[str, dict]
|
|
default: Union[Any, None] = None
|
|
|
|
|
|
# Hack to allow access to __init__ arguments for setting _optional value
|
|
class InputSpec(InputSpec_, base_model.BaseModel):
|
|
"""Component input definitions.
|
|
|
|
Attributes:
|
|
type: The type of the input.
|
|
default (optional): the default value for the input.
|
|
_optional: Wether the input is optional. An input is optional when it has an explicit default value.
|
|
"""
|
|
|
|
@functools.wraps(InputSpec_.__init__)
|
|
def __init__(self, *args, **kwargs) -> None:
|
|
"""InputSpec constructor, which can access the arguments passed to the
|
|
constructor for setting ._optional value."""
|
|
if args:
|
|
raise ValueError('InputSpec does not accept positional arguments.')
|
|
super().__init__(*args, **kwargs)
|
|
self._optional = 'default' in kwargs
|
|
|
|
@classmethod
|
|
def from_ir_parameter_dict(
|
|
cls, ir_parameter_dict: Dict[str, Any]) -> 'InputSpec':
|
|
"""Creates an InputSpec from a ComponentInputsSpec message in dict
|
|
format (pipeline_spec.components.<component-
|
|
key>.inputDefinitions.parameters.<input-key>).
|
|
|
|
Args:
|
|
ir_parameter_dict (Dict[str, Any]): The ComponentInputsSpec message in dict format.
|
|
|
|
Returns:
|
|
InputSpec: The InputSpec object.
|
|
"""
|
|
type_ = type_utils.IR_TYPE_TO_IN_MEMORY_SPEC_TYPE.get(
|
|
ir_parameter_dict['parameterType'])
|
|
if type_ is None:
|
|
raise ValueError(
|
|
f'Unknown type {ir_parameter_dict["parameterType"]} found in IR.'
|
|
)
|
|
default = ir_parameter_dict.get('defaultValue')
|
|
return InputSpec(type=type_, default=default)
|
|
|
|
def __eq__(self, other: Any) -> bool:
|
|
"""Equality comparison for InputSpec. Robust to different type
|
|
representations, such that it respects the maximum amount of
|
|
information possible to encode in IR. That is, because
|
|
`typing.List[str]` can only be represented a `List` in IR,
|
|
'typing.List' == 'List' in this comparison.
|
|
|
|
Args:
|
|
other (Any): The object to compare to InputSpec.
|
|
|
|
Returns:
|
|
bool: True if the objects are equal, False otherwise.
|
|
"""
|
|
if isinstance(other, InputSpec):
|
|
return type_utils.get_canonical_name_for_outer_generic(
|
|
self.type) == type_utils.get_canonical_name_for_outer_generic(
|
|
other.type) and self.default == other.default
|
|
else:
|
|
return False
|
|
|
|
|
|
class OutputSpec(base_model.BaseModel):
|
|
"""Component output definitions.
|
|
|
|
Attributes:
|
|
type: The type of the output.
|
|
"""
|
|
type: Union[str, dict]
|
|
|
|
@classmethod
|
|
def from_ir_parameter_dict(
|
|
cls, ir_parameter_dict: Dict[str, Any]) -> 'OutputSpec':
|
|
"""Creates an OutputSpec from a ComponentOutputsSpec message in dict
|
|
format (pipeline_spec.components.<component-
|
|
key>.outputDefinitions.parameters|artifacts.<output-key>).
|
|
|
|
Args:
|
|
ir_parameter_dict (Dict[str, Any]): The ComponentOutputsSpec in dict format.
|
|
|
|
Returns:
|
|
OutputSpec: The OutputSpec object.
|
|
"""
|
|
type_string = ir_parameter_dict[
|
|
'parameterType'] if 'parameterType' in ir_parameter_dict else ir_parameter_dict[
|
|
'artifactType']['schemaTitle']
|
|
type_ = type_utils.IR_TYPE_TO_IN_MEMORY_SPEC_TYPE.get(type_string)
|
|
if type_ is None:
|
|
raise ValueError(
|
|
f'Unknown type {ir_parameter_dict["parameterType"]} found in IR.'
|
|
)
|
|
return OutputSpec(type=type_)
|
|
|
|
def __eq__(self, other: Any) -> bool:
|
|
"""Equality comparison for OutputSpec. Robust to different type
|
|
representations, such that it respects the maximum amount of
|
|
information possible to encode in IR. That is, because
|
|
`typing.List[str]` can only be represented a `List` in IR,
|
|
'typing.List' == 'List' in this comparison.
|
|
|
|
Args:
|
|
other (Any): The object to compare to OutputSpec.
|
|
|
|
Returns:
|
|
bool: True if the objects are equal, False otherwise.
|
|
"""
|
|
if isinstance(other, OutputSpec):
|
|
return type_utils.get_canonical_name_for_outer_generic(
|
|
self.type) == type_utils.get_canonical_name_for_outer_generic(
|
|
other.type)
|
|
else:
|
|
return False
|
|
|
|
|
|
class ResourceSpec(base_model.BaseModel):
|
|
"""The resource requirements of a container execution.
|
|
|
|
Attributes:
|
|
cpu_limit (optional): the limit of the number of vCPU cores.
|
|
memory_limit (optional): the memory limit in GB.
|
|
accelerator_type (optional): the type of accelerators attached to the
|
|
container.
|
|
accelerator_count (optional): the number of accelerators attached.
|
|
"""
|
|
cpu_limit: Optional[float] = None
|
|
memory_limit: Optional[float] = None
|
|
accelerator_type: Optional[str] = None
|
|
accelerator_count: Optional[int] = None
|
|
|
|
|
|
class ContainerSpec(base_model.BaseModel):
|
|
"""Container implementation definition.
|
|
|
|
Attributes:
|
|
image: The container image.
|
|
command (optional): the container entrypoint.
|
|
args (optional): the arguments to the container entrypoint.
|
|
env (optional): the environment variables to be passed to the container.
|
|
resources (optional): the specification on the resource requirements.
|
|
"""
|
|
image: str
|
|
command: Optional[List[placeholders.CommandLineElement]] = None
|
|
args: Optional[List[placeholders.CommandLineElement]] = None
|
|
env: Optional[Mapping[str, placeholders.CommandLineElement]] = None
|
|
resources: Optional[ResourceSpec] = None
|
|
|
|
def transform_command(self) -> None:
|
|
"""Use None instead of empty list for command."""
|
|
self.command = None if self.command == [] else self.command
|
|
|
|
def transform_args(self) -> None:
|
|
"""Use None instead of empty list for args."""
|
|
self.args = None if self.args == [] else self.args
|
|
|
|
def transform_env(self) -> None:
|
|
"""Use None instead of empty dict for env."""
|
|
self.env = None if self.env == {} else self.env
|
|
|
|
@classmethod
|
|
def from_container_dict(cls, container_dict: Dict[str,
|
|
Any]) -> 'ContainerSpec':
|
|
"""Creates a ContainerSpec from a PipelineContainerSpec message in dict
|
|
format (pipeline_spec.deploymentSpec.executors.<executor-
|
|
key>.container).
|
|
|
|
Args:
|
|
container_dict (Dict[str, Any]): PipelineContainerSpec message in dict format.
|
|
|
|
Returns:
|
|
ContainerSpec: The ContainerSpec instance.
|
|
"""
|
|
args = container_dict.get('args')
|
|
if args is not None:
|
|
args = [
|
|
placeholders.maybe_convert_placeholder_string_to_placeholder(
|
|
arg) for arg in args
|
|
]
|
|
command = container_dict.get('command')
|
|
if command is not None:
|
|
command = [
|
|
placeholders.maybe_convert_placeholder_string_to_placeholder(c)
|
|
for c in command
|
|
]
|
|
return ContainerSpec(
|
|
image=container_dict['image'],
|
|
command=command,
|
|
args=args,
|
|
env=None, # can only be set on tasks
|
|
resources=None) # can only be set on tasks
|
|
|
|
|
|
class TaskSpec(base_model.BaseModel):
|
|
"""The spec of a pipeline task.
|
|
|
|
Attributes:
|
|
name: The name of the task.
|
|
inputs: The sources of task inputs. Constant values or PipelineParams.
|
|
dependent_tasks: The list of upstream tasks.
|
|
component_ref: The name of a component spec this task is based on.
|
|
trigger_condition (optional): an expression which will be evaluated into
|
|
a boolean value. True to trigger the task to run.
|
|
trigger_strategy (optional): when the task will be ready to be triggered.
|
|
Valid values include: "TRIGGER_STRATEGY_UNSPECIFIED",
|
|
"ALL_UPSTREAM_TASKS_SUCCEEDED", and "ALL_UPSTREAM_TASKS_COMPLETED".
|
|
iterator_items (optional): the items to iterate on. A constant value or
|
|
a PipelineParam.
|
|
iterator_item_input (optional): the name of the input which has the item
|
|
from the [items][] collection.
|
|
enable_caching (optional): whether or not to enable caching for the task.
|
|
Default is True.
|
|
display_name (optional): the display name of the task. If not specified,
|
|
the task name will be used as the display name.
|
|
"""
|
|
name: str
|
|
inputs: Mapping[str, Any]
|
|
dependent_tasks: List[str]
|
|
component_ref: str
|
|
trigger_condition: Optional[str] = None
|
|
trigger_strategy: Optional[str] = None
|
|
iterator_items: Optional[Any] = None
|
|
iterator_item_input: Optional[str] = None
|
|
enable_caching: bool = True
|
|
display_name: Optional[str] = None
|
|
|
|
|
|
class DagSpec(base_model.BaseModel):
|
|
"""DAG(graph) implementation definition.
|
|
|
|
Attributes:
|
|
tasks: The tasks inside the DAG.
|
|
outputs: Defines how the outputs of the dag are linked to the sub tasks.
|
|
"""
|
|
tasks: Mapping[str, TaskSpec]
|
|
outputs: Mapping[str, Any]
|
|
|
|
|
|
class ImporterSpec(base_model.BaseModel):
|
|
"""ImporterSpec definition.
|
|
|
|
Attributes:
|
|
artifact_uri: The URI of the artifact.
|
|
type_schema: The type of the artifact.
|
|
reimport: Whether or not import an artifact regardless it has been
|
|
imported before.
|
|
metadata (optional): the properties of the artifact.
|
|
"""
|
|
artifact_uri: str
|
|
type_schema: str
|
|
reimport: bool
|
|
metadata: Optional[Mapping[str, Any]] = None
|
|
|
|
|
|
class Implementation(base_model.BaseModel):
|
|
"""Implementation definition.
|
|
|
|
Attributes:
|
|
container: container implementation details.
|
|
graph: graph implementation details.
|
|
importer: importer implementation details.
|
|
"""
|
|
container: Optional[ContainerSpec] = None
|
|
graph: Optional[DagSpec] = None
|
|
importer: Optional[ImporterSpec] = None
|
|
|
|
@classmethod
|
|
def from_deployment_spec_dict(cls, deployment_spec_dict: Dict[str, Any],
|
|
component_name: str) -> 'Implementation':
|
|
"""Creates an Implmentation object from a deployment spec message in
|
|
dict format (pipeline_spec.deploymentSpec).
|
|
|
|
Args:
|
|
deployment_spec_dict (Dict[str, Any]): PipelineDeploymentConfig message in dict format.
|
|
component_name (str): The name of the component.
|
|
|
|
Returns:
|
|
Implementation: An implementation object.
|
|
"""
|
|
executor_key = utils._EXECUTOR_LABEL_PREFIX + component_name
|
|
container = deployment_spec_dict['executors'][executor_key]['container']
|
|
container_spec = ContainerSpec.from_container_dict(container)
|
|
return Implementation(container=container_spec)
|
|
|
|
|
|
def try_to_get_dict_from_string(string: str) -> Union[dict, str]:
|
|
"""Tries to parse a dictionary from a string if possible, else returns the
|
|
string.
|
|
|
|
Args:
|
|
string (str): The string to parse.
|
|
|
|
Returns:
|
|
Union[dict, str]: The dictionary if one was successfully parsed, else the original string.
|
|
"""
|
|
try:
|
|
res = ast.literal_eval(string)
|
|
except (ValueError, SyntaxError):
|
|
return string
|
|
|
|
if not isinstance(res, dict):
|
|
return string
|
|
return res
|
|
|
|
|
|
def convert_str_or_dict_to_placeholder(
|
|
element: Union[str,
|
|
dict]) -> Union[str, placeholders.CommandLineElement]:
|
|
"""Converts command and args elements to a placholder type based on value
|
|
of the key of the placeholder string, else returns the input.
|
|
|
|
Args:
|
|
element (Union[str, dict, placeholders.CommandLineElement]): A ContainerSpec.command or ContainerSpec.args element.
|
|
|
|
Returns:
|
|
Union[str, placeholders.CommandLineElement]: Possibly converted placeholder or original input.
|
|
"""
|
|
|
|
if not isinstance(element, dict):
|
|
return element
|
|
|
|
has_one_entry = len(element) == 1
|
|
|
|
if not has_one_entry:
|
|
raise ValueError(
|
|
f'Got unexpected dictionary {element}. Expected a dictionary with one entry.'
|
|
)
|
|
|
|
first_key = list(element.keys())[0]
|
|
first_value = list(element.values())[0]
|
|
if first_key == 'inputValue':
|
|
return placeholders.InputValuePlaceholder(
|
|
input_name=utils.sanitize_input_name(first_value))
|
|
|
|
elif first_key == 'inputPath':
|
|
return placeholders.InputPathPlaceholder(
|
|
input_name=utils.sanitize_input_name(first_value))
|
|
|
|
elif first_key == 'inputUri':
|
|
return placeholders.InputUriPlaceholder(
|
|
input_name=utils.sanitize_input_name(first_value))
|
|
|
|
elif first_key == 'outputPath':
|
|
return placeholders.OutputPathPlaceholder(
|
|
output_name=utils.sanitize_input_name(first_value))
|
|
|
|
elif first_key == 'outputUri':
|
|
return placeholders.OutputUriPlaceholder(
|
|
output_name=utils.sanitize_input_name(first_value))
|
|
|
|
elif first_key == 'ifPresent':
|
|
structure_kwargs = element['ifPresent']
|
|
structure_kwargs['input_name'] = structure_kwargs.pop('inputName')
|
|
structure_kwargs['otherwise'] = structure_kwargs.pop('else')
|
|
structure_kwargs['then'] = [
|
|
convert_str_or_dict_to_placeholder(e)
|
|
for e in structure_kwargs['then']
|
|
]
|
|
structure_kwargs['otherwise'] = [
|
|
convert_str_or_dict_to_placeholder(e)
|
|
for e in structure_kwargs['otherwise']
|
|
]
|
|
return placeholders.IfPresentPlaceholder(**structure_kwargs)
|
|
|
|
elif first_key == 'concat':
|
|
return placeholders.ConcatPlaceholder(items=[
|
|
convert_str_or_dict_to_placeholder(e) for e in element['concat']
|
|
])
|
|
|
|
elif first_key == 'executorInput':
|
|
return placeholders.ExecutorInputPlaceholder()
|
|
|
|
else:
|
|
raise TypeError(
|
|
f'Unexpected command/argument type: "{element}" of type "{type(element)}".'
|
|
)
|
|
|
|
|
|
def _check_valid_placeholder_reference(
|
|
valid_inputs: List[str], valid_outputs: List[str],
|
|
placeholder: placeholders.CommandLineElement) -> None:
|
|
"""Validates input/output placeholders refer to an existing input/output.
|
|
|
|
Args:
|
|
valid_inputs: The existing input names.
|
|
valid_outputs: The existing output names.
|
|
arg: The placeholder argument for checking.
|
|
|
|
Raises:
|
|
ValueError: if any placeholder references a non-existing input or
|
|
output.
|
|
TypeError: if any argument is neither a str nor a placeholder
|
|
instance.
|
|
"""
|
|
if isinstance(
|
|
placeholder,
|
|
(placeholders.InputValuePlaceholder, placeholders.InputPathPlaceholder,
|
|
placeholders.InputUriPlaceholder)):
|
|
if placeholder.input_name not in valid_inputs:
|
|
raise ValueError(
|
|
f'Argument "{placeholder}" references non-existing input.')
|
|
elif isinstance(placeholder, (placeholders.OutputParameterPlaceholder,
|
|
placeholders.OutputPathPlaceholder,
|
|
placeholders.OutputUriPlaceholder)):
|
|
if placeholder.output_name not in valid_outputs:
|
|
raise ValueError(
|
|
f'Argument "{placeholder}" references non-existing output.')
|
|
elif isinstance(placeholder, placeholders.IfPresentPlaceholder):
|
|
if placeholder.input_name not in valid_inputs:
|
|
raise ValueError(
|
|
f'Argument "{placeholder}" references non-existing input.')
|
|
for placeholder in itertools.chain(placeholder.then or [],
|
|
placeholder.else_ or []):
|
|
_check_valid_placeholder_reference(valid_inputs, valid_outputs,
|
|
placeholder)
|
|
elif isinstance(placeholder, placeholders.ConcatPlaceholder):
|
|
for placeholder in placeholder.items:
|
|
_check_valid_placeholder_reference(valid_inputs, valid_outputs,
|
|
placeholder)
|
|
elif not isinstance(placeholder, placeholders.ExecutorInputPlaceholder
|
|
) and not isinstance(placeholder, str):
|
|
raise TypeError(
|
|
f'Unexpected argument "{placeholder}" of type {type(placeholder)}.')
|
|
|
|
|
|
ValidCommandArgTypes = (str, placeholders.InputValuePlaceholder,
|
|
placeholders.InputPathPlaceholder,
|
|
placeholders.InputUriPlaceholder,
|
|
placeholders.OutputPathPlaceholder,
|
|
placeholders.OutputUriPlaceholder,
|
|
placeholders.IfPresentPlaceholder,
|
|
placeholders.ConcatPlaceholder)
|
|
|
|
|
|
class ComponentSpec(base_model.BaseModel):
|
|
"""The definition of a component.
|
|
|
|
Attributes:
|
|
name: The name of the component.
|
|
description (optional): the description of the component.
|
|
inputs (optional): the input definitions of the component.
|
|
outputs (optional): the output definitions of the component.
|
|
implementation: The implementation of the component. Either an executor
|
|
(container, importer) or a DAG consists of other components.
|
|
"""
|
|
name: str
|
|
implementation: Implementation
|
|
description: Optional[str] = None
|
|
inputs: Optional[Dict[str, InputSpec]] = None
|
|
outputs: Optional[Dict[str, OutputSpec]] = None
|
|
|
|
def transform_name(self) -> None:
|
|
"""Converts the name to a valid name."""
|
|
self.name = utils.maybe_rename_for_k8s(self.name)
|
|
|
|
def transform_inputs(self) -> None:
|
|
"""Use None instead of empty list for inputs."""
|
|
self.inputs = None if self.inputs == {} else self.inputs
|
|
|
|
def transform_outputs(self) -> None:
|
|
"""Use None instead of empty list for outputs."""
|
|
self.outputs = None if self.outputs == {} else self.outputs
|
|
|
|
def transform_command_input_placeholders(self) -> None:
|
|
"""Converts command and args elements to a placholder type where
|
|
applicable."""
|
|
if self.implementation.container is not None:
|
|
|
|
if self.implementation.container.command is not None:
|
|
self.implementation.container.command = [
|
|
convert_str_or_dict_to_placeholder(e)
|
|
for e in self.implementation.container.command
|
|
]
|
|
|
|
if self.implementation.container.args is not None:
|
|
self.implementation.container.args = [
|
|
convert_str_or_dict_to_placeholder(e)
|
|
for e in self.implementation.container.args
|
|
]
|
|
|
|
def validate_placeholders(self):
|
|
"""Validates that input/output placeholders refer to an existing
|
|
input/output."""
|
|
implementation = self.implementation
|
|
if getattr(implementation, 'container', None) is None:
|
|
return
|
|
|
|
containerSpec: ContainerSpec = implementation.container
|
|
|
|
valid_inputs = [] if self.inputs is None else list(self.inputs.keys())
|
|
valid_outputs = [] if self.outputs is None else list(
|
|
self.outputs.keys())
|
|
|
|
for arg in itertools.chain((containerSpec.command or []),
|
|
(containerSpec.args or [])):
|
|
_check_valid_placeholder_reference(valid_inputs, valid_outputs, arg)
|
|
|
|
@classmethod
|
|
def from_v1_component_spec(
|
|
cls,
|
|
v1_component_spec: v1_structures.ComponentSpec) -> 'ComponentSpec':
|
|
"""Converts V1 ComponentSpec to V2 ComponentSpec.
|
|
|
|
Args:
|
|
v1_component_spec: The V1 ComponentSpec.
|
|
|
|
Returns:
|
|
Component spec in the form of V2 ComponentSpec.
|
|
|
|
Raises:
|
|
ValueError: If implementation is not found.
|
|
TypeError: If any argument is neither a str nor Dict.
|
|
"""
|
|
component_dict = v1_component_spec.to_dict()
|
|
if component_dict.get('implementation') is None:
|
|
raise ValueError('Implementation field not found')
|
|
|
|
if 'container' not in component_dict.get(
|
|
'implementation'): # type: ignore
|
|
raise NotImplementedError('Container implementation not found.')
|
|
|
|
def convert_v1_if_present_placholder_to_v2(
|
|
arg: Dict[str, Any]
|
|
) -> Union[Dict[str, Any], placeholders.CommandLineElement]:
|
|
if isinstance(arg, str):
|
|
arg = try_to_get_dict_from_string(arg)
|
|
if isinstance(arg, str):
|
|
return arg
|
|
|
|
if 'if' in arg:
|
|
if_ = arg['if']
|
|
input_name = utils.sanitize_input_name(if_['cond']['isPresent'])
|
|
then_ = if_['then']
|
|
else_ = if_.get('else', [])
|
|
return placeholders.IfPresentPlaceholder(
|
|
input_name=input_name,
|
|
then=[
|
|
convert_str_or_dict_to_placeholder(
|
|
convert_v1_if_present_placholder_to_v2(val))
|
|
for val in then_
|
|
],
|
|
else_=[
|
|
convert_str_or_dict_to_placeholder(
|
|
convert_v1_if_present_placholder_to_v2(val))
|
|
for val in else_
|
|
])
|
|
|
|
elif 'concat' in arg:
|
|
|
|
return placeholders.ConcatPlaceholder(items=[
|
|
convert_str_or_dict_to_placeholder(
|
|
convert_v1_if_present_placholder_to_v2(val))
|
|
for val in arg['concat']
|
|
])
|
|
elif isinstance(arg, (ValidCommandArgTypes, dict)):
|
|
return arg
|
|
else:
|
|
raise TypeError(
|
|
f'Unexpected argument {arg} of type {type(arg)}.')
|
|
|
|
implementation = component_dict['implementation']['container']
|
|
implementation['command'] = [
|
|
convert_v1_if_present_placholder_to_v2(command)
|
|
for command in implementation.get('command', [])
|
|
]
|
|
implementation['args'] = [
|
|
convert_v1_if_present_placholder_to_v2(command)
|
|
for command in implementation.get('args', [])
|
|
]
|
|
implementation['env'] = {
|
|
key: convert_v1_if_present_placholder_to_v2(command)
|
|
for key, command in implementation.get('env', {}).items()
|
|
}
|
|
container_spec = ContainerSpec(image=implementation['image'])
|
|
# Must assign these after the constructor call, otherwise it won't work.
|
|
if implementation['command']:
|
|
container_spec.command = implementation['command']
|
|
if implementation['args']:
|
|
container_spec.args = implementation['args']
|
|
if implementation['env']:
|
|
container_spec.env = implementation['env']
|
|
|
|
return ComponentSpec(
|
|
name=component_dict.get('name', 'name'),
|
|
description=component_dict.get('description'),
|
|
implementation=Implementation(container=container_spec),
|
|
inputs={
|
|
utils.sanitize_input_name(spec['name']): InputSpec(
|
|
type=spec.get('type', 'Artifact'),
|
|
default=spec.get('default', None))
|
|
for spec in component_dict.get('inputs', [])
|
|
},
|
|
outputs={
|
|
utils.sanitize_input_name(spec['name']):
|
|
OutputSpec(type=spec.get('type', 'Artifact'))
|
|
for spec in component_dict.get('outputs', [])
|
|
})
|
|
|
|
@classmethod
|
|
def from_pipeline_spec_dict(
|
|
cls, pipeline_spec_dict: Dict[str, Any]) -> 'ComponentSpec':
|
|
raw_name = pipeline_spec_dict['pipelineInfo']['name']
|
|
|
|
implementation = Implementation.from_deployment_spec_dict(
|
|
pipeline_spec_dict['deploymentSpec'], raw_name)
|
|
|
|
def inputs_dict_from_components_dict(
|
|
components_dict: Dict[str, Any],
|
|
component_name: str) -> Dict[str, InputSpec]:
|
|
component_key = utils._COMPONENT_NAME_PREFIX + component_name
|
|
parameters = components_dict[component_key].get(
|
|
'inputDefinitions', {}).get('parameters', {})
|
|
return {
|
|
name: InputSpec.from_ir_parameter_dict(parameter_dict)
|
|
for name, parameter_dict in parameters.items()
|
|
}
|
|
|
|
def outputs_dict_from_components_dict(
|
|
components_dict: Dict[str, Any],
|
|
component_name: str) -> Dict[str, OutputSpec]:
|
|
component_key = utils._COMPONENT_NAME_PREFIX + component_name
|
|
parameters = components_dict[component_key].get(
|
|
'outputDefinitions', {}).get('parameters', {})
|
|
artifacts = components_dict[component_key].get(
|
|
'outputDefinitions', {}).get('artifacts', {})
|
|
all_outputs = {**parameters, **artifacts}
|
|
return {
|
|
name: OutputSpec.from_ir_parameter_dict(parameter_dict)
|
|
for name, parameter_dict in all_outputs.items()
|
|
}
|
|
|
|
def extract_description_from_command(commands: List[str]) -> str:
|
|
for command in commands:
|
|
if isinstance(command, str) and 'import kfp' in command:
|
|
for node in ast.walk(ast.parse(command)):
|
|
if isinstance(
|
|
node,
|
|
(ast.FunctionDef, ast.ClassDef, ast.Module)):
|
|
docstring = ast.get_docstring(node)
|
|
if docstring:
|
|
return docstring
|
|
return ''
|
|
|
|
inputs = inputs_dict_from_components_dict(
|
|
pipeline_spec_dict['components'], raw_name)
|
|
outputs = outputs_dict_from_components_dict(
|
|
pipeline_spec_dict['components'], raw_name)
|
|
|
|
description = extract_description_from_command(
|
|
implementation.container.command) or None
|
|
return ComponentSpec(
|
|
name=raw_name,
|
|
implementation=implementation,
|
|
description=description,
|
|
inputs=inputs,
|
|
outputs=outputs)
|
|
|
|
@classmethod
|
|
def from_pipeline_spec_yaml(cls,
|
|
pipeline_spec_yaml: str) -> 'ComponentSpec':
|
|
"""Creates a ComponentSpec from a pipeline spec in YAML format.
|
|
|
|
Args:
|
|
component_yaml (str): Component spec in YAML format.
|
|
|
|
Returns:
|
|
ComponentSpec: The component spec object.
|
|
"""
|
|
return ComponentSpec.from_pipeline_spec_dict(
|
|
yaml.safe_load(pipeline_spec_yaml))
|
|
|
|
@classmethod
|
|
def load_from_component_yaml(cls, component_yaml: str) -> 'ComponentSpec':
|
|
"""Loads V1 or V2 component yaml into ComponentSpec.
|
|
|
|
Args:
|
|
component_yaml: the component yaml in string format.
|
|
|
|
Returns:
|
|
Component spec in the form of V2 ComponentSpec.
|
|
"""
|
|
|
|
json_component = yaml.safe_load(component_yaml)
|
|
is_v1 = 'implementation' in set(json_component.keys())
|
|
if is_v1:
|
|
v1_component = v1_components._load_component_spec_from_component_text(
|
|
component_yaml)
|
|
return cls.from_v1_component_spec(v1_component)
|
|
else:
|
|
return ComponentSpec.from_pipeline_spec_dict(json_component)
|
|
|
|
def save_to_component_yaml(self, output_file: str) -> None:
|
|
"""Saves ComponentSpec into IR YAML file.
|
|
|
|
Args:
|
|
output_file: File path to store the component yaml.
|
|
"""
|
|
|
|
pipeline_spec = self.to_pipeline_spec()
|
|
compiler.write_pipeline_spec_to_file(pipeline_spec, output_file)
|
|
|
|
def to_pipeline_spec(self) -> pipeline_spec_pb2.PipelineSpec:
|
|
"""Creates a pipeline instance and constructs the pipeline spec for a
|
|
single component.
|
|
|
|
Args:
|
|
component_spec: The ComponentSpec to convert to PipelineSpec.
|
|
|
|
Returns:
|
|
A PipelineSpec proto representing the compiled component.
|
|
"""
|
|
# import here to aviod circular module dependency
|
|
from kfp.compiler import pipeline_spec_builder as builder
|
|
from kfp.components import pipeline_task
|
|
from kfp.components import tasks_group
|
|
from kfp.components.types import type_utils
|
|
|
|
args_dict = {}
|
|
pipeline_inputs = self.inputs or {}
|
|
|
|
for arg_name, input_spec in pipeline_inputs.items():
|
|
arg_type = input_spec.type
|
|
if not type_utils.is_parameter_type(
|
|
arg_type) or type_utils.is_task_final_status_type(arg_type):
|
|
raise TypeError(
|
|
builder.make_invalid_input_type_error_msg(
|
|
arg_name, arg_type))
|
|
args_dict[arg_name] = dsl.PipelineParameterChannel(
|
|
name=arg_name, channel_type=arg_type)
|
|
|
|
task = pipeline_task.PipelineTask(self, args_dict)
|
|
|
|
# instead of constructing a pipeline with pipeline_context.Pipeline,
|
|
# just build the single task group
|
|
group = tasks_group.TasksGroup(
|
|
group_type=tasks_group.TasksGroupType.PIPELINE)
|
|
group.tasks.append(task)
|
|
|
|
# Fill in the default values.
|
|
args_list_with_defaults = [
|
|
dsl.PipelineParameterChannel(
|
|
name=input_name,
|
|
channel_type=input_spec.type,
|
|
value=input_spec.default,
|
|
) for input_name, input_spec in pipeline_inputs.items()
|
|
]
|
|
group.name = uuid.uuid4().hex
|
|
|
|
pipeline_name = self.name
|
|
pipeline_args = args_list_with_defaults
|
|
task_group = group
|
|
|
|
builder.validate_pipeline_name(pipeline_name)
|
|
|
|
pipeline_spec = pipeline_spec_pb2.PipelineSpec()
|
|
pipeline_spec.pipeline_info.name = pipeline_name
|
|
pipeline_spec.sdk_version = f'kfp-{kfp.__version__}'
|
|
# Schema version 2.1.0 is required for kfp-pipeline-spec>0.1.13
|
|
pipeline_spec.schema_version = '2.1.0'
|
|
pipeline_spec.root.CopyFrom(
|
|
builder.build_component_spec_for_group(
|
|
pipeline_channels=pipeline_args,
|
|
is_root_group=True,
|
|
))
|
|
|
|
deployment_config = pipeline_spec_pb2.PipelineDeploymentConfig()
|
|
root_group = task_group
|
|
|
|
task_name_to_parent_groups, group_name_to_parent_groups = builder.get_parent_groups(
|
|
root_group)
|
|
|
|
def get_inputs(task_group: tasks_group.TasksGroup,
|
|
task_name_to_parent_groups):
|
|
inputs = collections.defaultdict(set)
|
|
if len(task_group.tasks) != 1:
|
|
raise ValueError(
|
|
f'Error compiling component. Expected one task in task group, got {len(task_group.tasks)}.'
|
|
)
|
|
only_task = task_group.tasks[0]
|
|
if only_task.channel_inputs:
|
|
for group_name in task_name_to_parent_groups[only_task.name]:
|
|
inputs[group_name].add((only_task.channel_inputs[-1], None))
|
|
return inputs
|
|
|
|
inputs = get_inputs(task_group, task_name_to_parent_groups)
|
|
|
|
builder.build_spec_by_group(
|
|
pipeline_spec=pipeline_spec,
|
|
deployment_config=deployment_config,
|
|
group=root_group,
|
|
inputs=inputs,
|
|
dependencies={}, # no dependencies for single-component pipeline
|
|
rootgroup_name=root_group.name,
|
|
task_name_to_parent_groups=task_name_to_parent_groups,
|
|
group_name_to_parent_groups=group_name_to_parent_groups,
|
|
name_to_for_loop_group={}, # no for loop for single-component pipeline
|
|
)
|
|
|
|
return pipeline_spec
|