feat(sdk): add compilation logic for google artifact types (support custom artifact types pt. 2) (#8232)

* remove dependency of deprecated on v2

* use new artifact identification logic

* add custom artifact type compiler test

* compile schema_version from non-kfp artifacts

* update tests

* add importer to compiler test

* update golden snapshots

* fix type checking code

* undo executor_test diff

* remove gcpc types compiler test

* add compilation test comment

* update google artifact handling

* update compiler test

* only permit system. and google. artifact types; add tests

* update compiler test

* simplify type checking logic

* use bundled artifact types in PipelineArtifactChannel

* raise error on unknown v1 input or output

* add type utils and move functions

* add type utils tests

* remove schema_version from inputspec and outputspec

* update artifact proto creation and tests

* propogate bundled artifact type string changes throughout sdk

* update golden snapshot

* remove unused v1 compat and tests

* update type checking and tests to validate schema_version using semantic versioning

* update importer sample

* update test data config

* update test_data config

* clean up sample

* clean up sample
This commit is contained in:
Connor McCarthy 2022-09-12 14:00:02 -06:00 committed by GitHub
parent 16c7d6658e
commit e371fa3795
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 998 additions and 582 deletions

View File

@ -16,14 +16,15 @@
from __future__ import annotations
import unittest
from pprint import pprint
import kfp.deprecated as kfp
from kfp.samples.test.utils import KfpTask
from kfp.samples.test.utils import run_pipeline_func
from kfp.samples.test.utils import TestCase
import kfp_server_api
from ml_metadata.proto import Execution
from .pipeline_with_importer import pipeline_with_importer
from kfp.samples.test.utils import KfpTask, run_pipeline_func, TestCase
def verify(t: unittest.TestCase, run: kfp_server_api.ApiRun,
@ -60,14 +61,12 @@ def verify(t: unittest.TestCase, run: kfp_server_api.ApiRun,
'outputs': {
'artifacts': [{
'name': 'artifact',
'type': 'system.Artifact',
# 'type': 'system.Dataset',
'type': 'system.Dataset',
}],
},
'type': 'system.ImporterExecution',
'state': Execution.State.COMPLETE,
},
importer_dict)
}, importer_dict)
t.assertEqual(
{
@ -75,9 +74,7 @@ def verify(t: unittest.TestCase, run: kfp_server_api.ApiRun,
'inputs': {
'artifacts': [{
'name': 'dataset',
# TODO(chesu): compiled pipeline spec incorrectly sets importer artifact type to system.Artifact, but in the pipeline, it should be system.Dataset.
'type': 'system.Artifact',
# 'type': 'system.Dataset'
'type': 'system.Dataset'
}],
},
'outputs': {
@ -94,8 +91,7 @@ def verify(t: unittest.TestCase, run: kfp_server_api.ApiRun,
},
'type': 'system.ContainerExecution',
'state': Execution.State.COMPLETE,
},
train_dict)
}, train_dict)
if __name__ == '__main__':

View File

@ -162,7 +162,8 @@ class TestCompilePipeline(parameterized.TestCase):
with self.assertRaisesRegex(
TypeError,
' type "Model" cannot be paired with InputValuePlaceholder.'):
' type "system.Model@0.0.1" cannot be paired with InputValuePlaceholder.'
):
@dsl.pipeline(name='test-pipeline', pipeline_root='dummy_root')
def my_pipeline():
@ -281,12 +282,11 @@ class TestCompilePipeline(parameterized.TestCase):
args:
- {inputPath: some_input}
""")
with self.assertRaisesRegex(
type_utils.InconsistentTypeException,
'Incompatible argument passed to the input "some_input" of '
'component "compoent": Argument type "STRING" is incompatible '
'with the input type "Artifact"'):
'with the input type "system.Artifact@0.0.1"'):
@dsl.pipeline(name='test-pipeline', pipeline_root='gs://path')
def my_pipeline(input1: str):
@ -414,16 +414,16 @@ class TestCompilePipeline(parameterized.TestCase):
def consumer_op(input1: dsl.Input[dsl.Dataset]):
pass
with self.assertRaisesRegex(
type_utils.InconsistentTypeException,
'Incompatible argument passed to the input "input1" of component'
' "consumer-op": Argument type "SomeArbitraryType" is'
' incompatible with the input type "Dataset"'):
@dsl.pipeline(name='test-pipeline')
def my_pipeline():
consumer_op(input1=producer_op1().output)
@dsl.pipeline(name='test-pipeline')
def my_pipeline():
consumer_op(input1=producer_op1().output)
consumer_op(input1=producer_op2().output)
with tempfile.TemporaryDirectory() as tmpdir:
target_yaml_file = os.path.join(tmpdir, 'result.yaml')
compiler.Compiler().compile(
pipeline_func=my_pipeline, package_path=target_yaml_file)
self.assertTrue(os.path.exists(target_yaml_file))
def test_invalid_data_dependency_loop(self):

View File

@ -367,7 +367,8 @@ def build_component_spec_for_task(
else:
component_spec.input_definitions.artifacts[
input_name].artifact_type.CopyFrom(
type_utils.get_artifact_type_schema(input_spec.type))
type_utils.bundled_artifact_to_artifact_proto(
input_spec.type))
for output_name, output_spec in (task.component_spec.outputs or {}).items():
if type_utils.is_parameter_type(output_spec.type):
@ -377,7 +378,8 @@ def build_component_spec_for_task(
else:
component_spec.output_definitions.artifacts[
output_name].artifact_type.CopyFrom(
type_utils.get_artifact_type_schema(output_spec.type))
type_utils.bundled_artifact_to_artifact_proto(
output_spec.type))
return component_spec
@ -409,7 +411,8 @@ def _build_component_spec_from_component_spec_structure(
else:
component_spec.input_definitions.artifacts[
input_name].artifact_type.CopyFrom(
type_utils.get_artifact_type_schema(input_spec.type))
type_utils.bundled_artifact_to_artifact_proto(
input_spec.type))
for output_name, output_spec in (component_spec_struct.outputs or
{}).items():
@ -420,7 +423,8 @@ def _build_component_spec_from_component_spec_structure(
else:
component_spec.output_definitions.artifacts[
output_name].artifact_type.CopyFrom(
type_utils.get_artifact_type_schema(output_spec.type))
type_utils.bundled_artifact_to_artifact_proto(
output_spec.type))
return component_spec
@ -494,8 +498,8 @@ def build_importer_spec_for_task(
Returns:
A ImporterSpec object for the task.
"""
type_schema = type_utils.get_artifact_type_schema(
task.importer_spec.type_schema)
type_schema = type_utils.bundled_artifact_to_artifact_proto(
task.importer_spec.schema_title)
importer_spec = pipeline_spec_pb2.PipelineDeploymentConfig.ImporterSpec(
type_schema=type_schema, reimport=task.importer_spec.reimport)
@ -614,7 +618,8 @@ def build_component_spec_for_group(
if isinstance(channel, pipeline_channel.PipelineArtifactChannel):
component_spec.input_definitions.artifacts[
input_name].artifact_type.CopyFrom(
type_utils.get_artifact_type_schema(channel.channel_type))
type_utils.bundled_artifact_to_artifact_proto(
channel.channel_type))
else:
# channel is one of PipelineParameterChannel, LoopArgument, or
# LoopArgumentVariable.

View File

@ -42,7 +42,10 @@ class PipelineSpecBuilderTest(parameterized.TestCase):
{
'channel':
pipeline_channel.PipelineArtifactChannel(
name='output1', task_name='task1', channel_type='Artifact'),
name='output1',
task_name='task1',
channel_type='system.Artifact@0.0.1',
),
'expected':
'pipelinechannel--task1-output1',
},

View File

@ -28,7 +28,6 @@ from kfp.components import python_component
from kfp.components import structures
from kfp.components.container_component_artifact_channel import \
ContainerComponentArtifactChannel
from kfp.components.types import artifact_types
from kfp.components.types import type_annotations
from kfp.components.types import type_utils
@ -130,39 +129,6 @@ def _get_function_source_definition(func: Callable) -> str:
return '\n'.join(func_code_lines)
def _annotation_to_type_struct(annotation):
if not annotation or annotation == inspect.Parameter.empty:
return None
if hasattr(annotation, 'to_dict'):
annotation = annotation.to_dict()
if isinstance(annotation, dict):
return annotation
if isinstance(annotation, type):
type_struct = type_utils.get_canonical_type_name_for_type(annotation)
if type_struct:
return type_struct
if issubclass(annotation, artifact_types.Artifact
) and not annotation.schema_title.startswith('system.'):
# For artifact classes not under the `system` namespace,
# use its schema_title as-is.
schema_title = annotation.schema_title
else:
schema_title = str(annotation.__name__)
elif hasattr(annotation,
'__forward_arg__'): # Handling typing.ForwardRef('Type_name')
schema_title = str(annotation.__forward_arg__)
else:
schema_title = str(annotation)
# It's also possible to get the converter by type name
type_struct = type_utils.get_canonical_type_name_for_type(schema_title)
if type_struct:
return type_struct
return schema_title
def _maybe_make_unique(name: str, names: List[str]):
if name not in names:
return name
@ -205,7 +171,7 @@ def extract_component_interface(
# parameter_type is type_annotations.Artifact or one of its subclasses.
parameter_type = type_annotations.get_io_artifact_class(
parameter_type)
if not issubclass(parameter_type, artifact_types.Artifact):
if not type_annotations.is_artifact(parameter_type):
raise ValueError(
'Input[T] and Output[T] are only supported when T is a '
'subclass of Artifact. Found `{} with type {}`'.format(
@ -227,7 +193,7 @@ def extract_component_interface(
'Path inputs only support default values of None. Default'
' values for outputs are not supported.')
type_struct = _annotation_to_type_struct(parameter_type)
type_struct = type_utils._annotation_to_type_struct(parameter_type)
if type_struct is None:
raise TypeError('Missing type annotation for argument: {}'.format(
parameter.name))
@ -237,18 +203,30 @@ def extract_component_interface(
]:
io_name = _maybe_make_unique(io_name, output_names)
output_names.add(io_name)
output_spec = structures.OutputSpec(type=type_struct)
if type_annotations.is_artifact(parameter_type):
schema_version = parameter_type.schema_version
output_spec = structures.OutputSpec(
type=type_utils.create_bundled_artifact_type(
type_struct, schema_version))
else:
output_spec = structures.OutputSpec(type=type_struct)
outputs[io_name] = output_spec
else:
io_name = _maybe_make_unique(io_name, input_names)
input_names.add(io_name)
if parameter.default is not inspect.Parameter.empty:
if type_annotations.is_artifact(parameter_type):
schema_version = parameter_type.schema_version
input_spec = structures.InputSpec(
type=type_struct,
default=parameter.default,
)
type=type_utils.create_bundled_artifact_type(
type_struct, schema_version))
else:
input_spec = structures.InputSpec(type=type_struct)
if parameter.default is not inspect.Parameter.empty:
input_spec = structures.InputSpec(
type=type_struct,
default=parameter.default,
)
else:
input_spec = structures.InputSpec(type=type_struct,)
inputs[io_name] = input_spec
@ -265,12 +243,19 @@ def extract_component_interface(
for field_name in return_ann._fields:
type_struct = None
if field_annotations:
type_struct = _annotation_to_type_struct(
type_struct = type_utils._annotation_to_type_struct(
field_annotations.get(field_name, None))
output_name = _maybe_make_unique(field_name, output_names)
output_names.add(output_name)
output_spec = structures.OutputSpec(type=type_struct)
if type_struct.lower() in type_utils._PARAMETER_TYPES_MAPPING:
output_spec = structures.OutputSpec(type=type_struct)
else:
output_spec = structures.OutputSpec(
type=type_utils.create_bundled_artifact_type(
type_struct,
field_annotations.get(field_name).schema_version))
outputs[output_name] = output_spec
# Deprecated dict-based way of declaring multiple outputs. Was only used by
# the @component decorator
@ -281,7 +266,7 @@ def extract_component_interface(
' 0.1.32. Please use typing.NamedTuple to declare multiple'
' outputs.')
for output_name, output_type_annotation in return_ann.items():
output_type_struct = _annotation_to_type_struct(
output_type_struct = type_utils._annotation_to_type_struct(
output_type_annotation)
output_spec = structures.OutputSpec(type=output_type_struct)
outputs[name] = output_spec
@ -291,9 +276,15 @@ def extract_component_interface(
# Fixes exotic, but possible collision:
# `def func(output_path: OutputPath()) -> str: ...`
output_names.add(output_name)
type_struct = _annotation_to_type_struct(
signature.return_annotation)
output_spec = structures.OutputSpec(type=type_struct)
return_ann = signature.return_annotation
if type_annotations.is_artifact(signature.return_annotation):
output_spec = structures.OutputSpec(
type=type_utils.create_bundled_artifact_type(
return_ann.schema_title, return_ann.schema_version))
else:
type_struct = type_utils._annotation_to_type_struct(return_ann)
output_spec = structures.OutputSpec(type=type_struct)
outputs[output_name] = output_spec
elif return_ann != inspect.Parameter.empty and return_ann != structures.ContainerSpec:
raise TypeError(

View File

@ -170,7 +170,7 @@ class Executor():
@classmethod
def _is_artifact(cls, annotation: Any) -> bool:
if type(annotation) == type:
return issubclass(annotation, artifact_types.Artifact)
return type_annotations.is_artifact(annotation)
return False
@classmethod

View File

@ -46,12 +46,10 @@ class GraphComponent(base_component.BaseComponent):
signature = inspect.signature(pipeline_func)
for arg_name in signature.parameters:
arg_type = component_spec.inputs[arg_name].type
input_spec = component_spec.inputs[arg_name]
args_list.append(
pipeline_channel.create_pipeline_channel(
name=arg_name,
channel_type=arg_type,
))
name=arg_name, channel_type=input_spec.type))
with pipeline_context.Pipeline(
self.component_spec.name) as dsl_pipeline:

View File

@ -21,6 +21,7 @@ from kfp.components import pipeline_task
from kfp.components import placeholders
from kfp.components import structures
from kfp.components.types import artifact_types
from kfp.components.types import type_utils
INPUT_KEY = 'uri'
OUTPUT_KEY = 'artifact'
@ -54,13 +55,6 @@ def importer(
reimport=False)
train(dataset=importer1.output)
"""
if issubclass(artifact_class, artifact_types.Artifact
) and not artifact_class.schema_title.startswith('system.'):
# For artifact classes not under the `system` namespace,
# use its schema_title as-is.
schema_title = artifact_class.schema_title
else:
schema_title = artifact_class.__name__
component_spec = structures.ComponentSpec(
name='importer',
@ -68,11 +62,19 @@ def importer(
importer=structures.ImporterSpec(
artifact_uri=placeholders.InputValuePlaceholder(
INPUT_KEY).to_placeholder_string(),
type_schema=artifact_class.schema_title,
schema_title=type_utils.create_bundled_artifact_type(
artifact_class.schema_title, artifact_class.schema_version),
schema_version=artifact_class.schema_version,
reimport=reimport,
metadata=metadata)),
inputs={INPUT_KEY: structures.InputSpec(type='String')},
outputs={OUTPUT_KEY: structures.OutputSpec(type=schema_title)},
outputs={
OUTPUT_KEY:
structures.OutputSpec(
type=type_utils.create_bundled_artifact_type(
artifact_class.schema_title,
artifact_class.schema_version))
},
)
importer = importer_component.ImporterComponent(

View File

@ -111,11 +111,11 @@ class PipelineChannelTest(parameterized.TestCase):
'pipeline_channel':
pipeline_channel.PipelineArtifactChannel(
name='channel5',
channel_type='Artifact',
channel_type='system.Artifact@0.0.1',
task_name='task5',
),
'str_repr':
'{{channel:task=task5;name=channel5;type=Artifact;}}',
'{{channel:task=task5;name=channel5;type=system.Artifact@0.0.1;}}',
},
)
def test_str_repr(self, pipeline_channel, str_repr):

View File

@ -82,7 +82,8 @@ class PipelineTask:
f'Component "{component_spec.name}" got an unexpected input:'
f' {input_name}.')
input_type = component_spec.inputs[input_name].type
input_spec = component_spec.inputs[input_name]
input_type = input_spec.type
argument_type = None
if isinstance(argument_value, pipeline_channel.PipelineChannel):

View File

@ -95,7 +95,7 @@ class PipelineTaskTest(parameterized.TestCase):
'input1': structures.InputSpec(type='String'),
},
outputs={
'output1': structures.OutputSpec(type='Artifact'),
'output1': structures.OutputSpec(type='system.Artifact@0.0.1'),
},
)
expected_task_spec = structures.TaskSpec(

View File

@ -30,6 +30,8 @@ from kfp.components import v1_components
from kfp.components import v1_structures
from kfp.components.container_component_artifact_channel import \
ContainerComponentArtifactChannel
from kfp.components.types import artifact_types
from kfp.components.types import type_annotations
from kfp.components.types import type_utils
from kfp.pipeline_spec import pipeline_spec_pb2
import yaml
@ -44,7 +46,7 @@ class InputSpec_(base_model.BaseModel):
description: Optional: the user description of the input.
"""
type: Union[str, dict]
default: Union[Any, None] = None
default: Optional[Any] = None
# Hack to allow access to __init__ arguments for setting _optional value
@ -82,16 +84,22 @@ class InputSpec(InputSpec_, base_model.BaseModel):
"""
if 'parameterType' in ir_component_inputs_dict:
type_string = ir_component_inputs_dict['parameterType']
type_ = type_utils.IR_TYPE_TO_IN_MEMORY_SPEC_TYPE.get(type_string)
if type_ is None:
raise ValueError(f'Unknown type {type_string} found in IR.')
default_value = ir_component_inputs_dict.get('defaultValue')
else:
type_string = ir_component_inputs_dict['artifactType'][
'schemaTitle']
default_value = None
type_ = type_utils.IR_TYPE_TO_IN_MEMORY_SPEC_TYPE.get(type_string)
if type_ is None:
raise ValueError(f'Unknown type {type_string} found in IR.')
return InputSpec(
type=type_,
default=default_value,
)
return InputSpec(type=type_, default=default_value)
else:
type_ = ir_component_inputs_dict['artifactType']['schemaTitle']
schema_version = ir_component_inputs_dict['artifactType'][
'schemaVersion']
return InputSpec(
type=type_utils.create_bundled_artifact_type(
type_, schema_version))
def __eq__(self, other: Any) -> bool:
"""Equality comparison for InputSpec. Robust to different type
@ -113,6 +121,16 @@ class InputSpec(InputSpec_, base_model.BaseModel):
else:
return False
def validate_type(self) -> None:
"""Type should either be a parameter or a valid bundled artifact type
by the time it gets to InputSpec.
This allows us to perform fewer checks downstream.
"""
# TODO: add transformation logic so that we don't have to transform inputs at every place they are used, including v1 back compat support
if not spec_type_is_parameter(self.type):
type_utils.validate_bundled_artifact_type(self.type)
class OutputSpec(base_model.BaseModel):
"""Component output definitions.
@ -138,13 +156,17 @@ class OutputSpec(base_model.BaseModel):
"""
if 'parameterType' in ir_component_outputs_dict:
type_string = ir_component_outputs_dict['parameterType']
type_ = type_utils.IR_TYPE_TO_IN_MEMORY_SPEC_TYPE.get(type_string)
if type_ is None:
raise ValueError(f'Unknown type {type_string} found in IR.')
return OutputSpec(type=type_,)
else:
type_string = ir_component_outputs_dict['artifactType'][
'schemaTitle']
type_ = type_utils.IR_TYPE_TO_IN_MEMORY_SPEC_TYPE.get(type_string)
if type_ is None:
raise ValueError(f'Unknown type {type_string} found in IR.')
return OutputSpec(type=type_)
type_ = ir_component_outputs_dict['artifactType']['schemaTitle']
schema_version = ir_component_outputs_dict['artifactType'][
'schemaVersion']
return OutputSpec(
type=type_utils.create_bundled_artifact_type(
type_, schema_version))
def __eq__(self, other: Any) -> bool:
"""Equality comparison for OutputSpec. Robust to different type
@ -166,6 +188,23 @@ class OutputSpec(base_model.BaseModel):
else:
return False
def validate_type(self):
"""Type should either be a parameter or a valid bundled artifact type
by the time it gets to OutputSpec.
This allows us to perform fewer checks downstream.
"""
# TODO: add transformation logic so that we don't have to transform outputs at every place they are used, including v1 back compat support
if not spec_type_is_parameter(self.type):
type_utils.validate_bundled_artifact_type(self.type)
def spec_type_is_parameter(type_: str) -> bool:
in_memory_type = type_annotations.maybe_strip_optional_from_annotation_string(
type_utils.get_canonical_name_for_outer_generic(type_))
return in_memory_type in type_utils.IN_MEMORY_SPEC_TYPE_TO_IR_TYPE or in_memory_type == 'PipelineTaskFinalStatus'
class ResourceSpec(base_model.BaseModel):
"""The resource requirements of a container execution.
@ -376,13 +415,15 @@ class ImporterSpec(base_model.BaseModel):
Attributes:
artifact_uri: The URI of the artifact.
type_schema: The type of the artifact.
schema_title: The schema_title of the artifact.
schema_version: The schema_version of the artifact.
reimport: Whether or not import an artifact regardless it has been
imported before.
metadata (optional): the properties of the artifact.
"""
artifact_uri: str
type_schema: str
schema_title: str
schema_version: str
reimport: bool
metadata: Optional[Mapping[str, Any]] = None
@ -585,21 +626,108 @@ class ComponentSpec(base_model.BaseModel):
'env': container['env']
})
inputs = {}
for spec in component_dict.get('inputs', []):
type_ = spec.get('type')
print('TYPE', type_)
if isinstance(type_, str) and type_ == 'PipelineTaskFinalStatus':
inputs[utils.sanitize_input_name(
spec['name'])] = InputSpec(type=type_)
continue
elif isinstance(type_, str) and type_.lower(
) in type_utils._PARAMETER_TYPES_MAPPING:
default = spec.get('default')
type_enum = type_utils._PARAMETER_TYPES_MAPPING[type_.lower()]
ir_parameter_type_name = pipeline_spec_pb2.ParameterType.ParameterTypeEnum.Name(
type_enum)
in_memory_parameter_type_name = type_utils.IR_TYPE_TO_IN_MEMORY_SPEC_TYPE[
ir_parameter_type_name]
inputs[utils.sanitize_input_name(spec['name'])] = InputSpec(
type=in_memory_parameter_type_name,
default=default,
)
continue
elif isinstance(type_, str) and re.match(
type_utils._GOOGLE_TYPES_PATTERN, type_):
schema_title = type_
schema_version = type_utils._GOOGLE_TYPES_VERSION
elif isinstance(type_, str) and type_.lower(
) in type_utils._ARTIFACT_CLASSES_MAPPING:
artifact_class = type_utils._ARTIFACT_CLASSES_MAPPING[
type_.lower()]
schema_title = artifact_class.schema_title
schema_version = artifact_class.schema_version
elif type_ is None or isinstance(type_, dict) or type_.lower(
) not in type_utils._ARTIFACT_CLASSES_MAPPING:
schema_title = artifact_types.Artifact.schema_title
schema_version = artifact_types.Artifact.schema_version
else:
raise ValueError(f'Unknown input: {type_}')
if spec.get('optional', False):
# handles a v1 edge-case where a user marks an artifact input as optional with no default value. some of these v1 component YAMLs exist.
inputs[utils.sanitize_input_name(spec['name'])] = InputSpec(
type=type_utils.create_bundled_artifact_type(
schema_title, schema_version),
default=None,
)
else:
inputs[utils.sanitize_input_name(spec['name'])] = InputSpec(
type=type_utils.create_bundled_artifact_type(
schema_title, schema_version))
outputs = {}
for spec in component_dict.get('outputs', []):
type_ = spec.get('type')
if isinstance(type_, str) and type_.lower(
) in type_utils._PARAMETER_TYPES_MAPPING:
type_enum = type_utils._PARAMETER_TYPES_MAPPING[type_.lower()]
ir_parameter_type_name = pipeline_spec_pb2.ParameterType.ParameterTypeEnum.Name(
type_enum)
in_memory_parameter_type_name = type_utils.IR_TYPE_TO_IN_MEMORY_SPEC_TYPE[
ir_parameter_type_name]
outputs[utils.sanitize_input_name(
spec['name'])] = OutputSpec(type=type_)
continue
elif isinstance(type_, str) and re.match(
type_utils._GOOGLE_TYPES_PATTERN, type_):
schema_title = type_
schema_version = type_utils._GOOGLE_TYPES_VERSION
elif isinstance(type_, str) and type_.lower(
) in type_utils._ARTIFACT_CLASSES_MAPPING:
artifact_class = type_utils._ARTIFACT_CLASSES_MAPPING[
type_.lower()]
schema_title = artifact_class.schema_title
schema_version = artifact_class.schema_version
elif type_ is None or isinstance(type_, dict) or type_.lower(
) not in type_utils._ARTIFACT_CLASSES_MAPPING:
schema_title = artifact_types.Artifact.schema_title
schema_version = artifact_types.Artifact.schema_version
else:
raise ValueError(f'Unknown output: {type_}')
outputs[utils.sanitize_input_name(spec['name'])] = OutputSpec(
type=type_utils.create_bundled_artifact_type(
schema_title, schema_version))
return ComponentSpec(
name=component_dict.get('name', 'name'),
description=component_dict.get('description'),
implementation=Implementation(container=container_spec),
inputs={
utils.sanitize_input_name(spec['name']): InputSpec(
type=spec.get('type', 'Artifact'),
default=spec.get('default', None))
for spec in component_dict.get('inputs', [])
},
outputs={
utils.sanitize_input_name(spec['name']):
OutputSpec(type=spec.get('type', 'Artifact'))
for spec in component_dict.get('outputs', [])
})
inputs=inputs,
outputs=outputs,
)
@classmethod
def from_pipeline_spec_dict(
@ -725,15 +853,13 @@ class ComponentSpec(base_model.BaseModel):
from kfp.components import pipeline_channel
from kfp.components import pipeline_task
from kfp.components import tasks_group
from kfp.components.types import type_utils
args_dict = {}
pipeline_inputs = self.inputs or {}
for arg_name, input_spec in pipeline_inputs.items():
arg_type = input_spec.type
args_dict[arg_name] = pipeline_channel.create_pipeline_channel(
name=arg_name, channel_type=arg_type)
name=arg_name, channel_type=input_spec.type)
task = pipeline_task.PipelineTask(self, args_dict)

View File

@ -423,12 +423,14 @@ sdkVersion: kfp-2.0.0-alpha.2
env={},
)),
inputs={
'input_parameter': structures.InputSpec(type='String'),
'input_artifact': structures.InputSpec(type='Artifact')
'input_parameter':
structures.InputSpec(type='String'),
'input_artifact':
structures.InputSpec(type='system.Artifact@0.0.1')
},
outputs={
'output_1': structures.OutputSpec(type='Artifact'),
'output_2': structures.OutputSpec(type='Artifact'),
'output_1': structures.OutputSpec(type='system.Artifact@0.0.1'),
'output_2': structures.OutputSpec(type='system.Artifact@0.0.1'),
})
self.assertEqual(generated_spec, expected_spec)
@ -557,11 +559,11 @@ class TestInputSpec(unittest.TestCase):
def test_equality(self):
self.assertEqual(
structures.InputSpec(type='str', default=None),
structures.InputSpec(type='str', default=None))
structures.InputSpec(type='String', default=None),
structures.InputSpec(type='String', default=None))
self.assertNotEqual(
structures.InputSpec(type='str', default=None),
structures.InputSpec(type='str', default='test'))
structures.InputSpec(type='String', default=None),
structures.InputSpec(type='String', default='test'))
self.assertEqual(
structures.InputSpec(type='List', default=None),
structures.InputSpec(type='typing.List', default=None))
@ -573,15 +575,15 @@ class TestInputSpec(unittest.TestCase):
structures.InputSpec(type='typing.List[typing.Dict[str, str]]'))
def test_optional(self):
input_spec = structures.InputSpec(type='str', default='test')
input_spec = structures.InputSpec(type='String', default='test')
self.assertEqual(input_spec.default, 'test')
self.assertEqual(input_spec._optional, True)
input_spec = structures.InputSpec(type='str', default=None)
input_spec = structures.InputSpec(type='String', default=None)
self.assertEqual(input_spec.default, None)
self.assertEqual(input_spec._optional, True)
input_spec = structures.InputSpec(type='str')
input_spec = structures.InputSpec(type='String')
self.assertEqual(input_spec.default, None)
self.assertEqual(input_spec._optional, False)
@ -620,7 +622,7 @@ class TestInputSpec(unittest.TestCase):
}
input_spec = structures.InputSpec.from_ir_component_inputs_dict(
artifact_dict)
self.assertEqual(input_spec.type, 'Artifact')
self.assertEqual(input_spec.type, 'system.Artifact@0.0.1')
class TestOutputSpec(parameterized.TestCase):
@ -639,7 +641,7 @@ class TestOutputSpec(parameterized.TestCase):
}
output_spec = structures.OutputSpec.from_ir_component_outputs_dict(
artifact_dict)
self.assertEqual(output_spec.type, 'Artifact')
self.assertEqual(output_spec.type, 'system.Artifact@0.0.1')
V1_YAML = textwrap.dedent("""\
@ -741,7 +743,9 @@ sdkVersion: kfp-2.0.0-alpha.2""")
inputs={
'input1': structures.InputSpec(type='String', default=None)
},
outputs={'output1': structures.OutputSpec(type='Artifact')})
outputs={
'output1': structures.OutputSpec(type='system.Artifact@0.0.1')
})
self.assertEqual(loaded_component_spec, component_spec)
def test_if_placeholder(self):

View File

@ -17,7 +17,10 @@ These are only compatible with v2 Pipelines.
"""
import re
from typing import TypeVar, Union
from typing import Type, TypeVar, Union
from kfp.components.types import type_annotations
from kfp.components.types import type_utils
try:
from typing import Annotated
@ -62,12 +65,10 @@ class OutputPath:
"""
def __init__(self, type=None):
self.type = type
self.type = construct_type_for_inputpath_or_outputpath(type)
def __eq__(self, other):
if isinstance(other, OutputPath):
return self.type == other.type
return False
return isinstance(other, OutputPath) and self.type == other.type
class InputPath:
@ -97,12 +98,25 @@ class InputPath:
"""
def __init__(self, type=None):
self.type = type
self.type = construct_type_for_inputpath_or_outputpath(type)
def __eq__(self, other):
if isinstance(other, InputPath):
return self.type == other.type
return False
return isinstance(other, InputPath) and self.type == other.type
def construct_type_for_inputpath_or_outputpath(
type_: Union[str, Type, None]) -> Union[str, None]:
if type_annotations.is_artifact(type_):
return type_utils.create_bundled_artifact_type(type_.schema_title,
type_.schema_version)
elif isinstance(
type_,
str) and type_.lower() in type_utils._ARTIFACT_CLASSES_MAPPING:
# v1 artifact backward compat
return type_utils.create_bundled_artifact_type(
type_utils._ARTIFACT_CLASSES_MAPPING[type_.lower()].schema_title)
else:
return type_
class InputAnnotation():
@ -229,6 +243,12 @@ def maybe_strip_optional_from_annotation(annotation: T) -> T:
return annotation
def maybe_strip_optional_from_annotation_string(annotation: str) -> str:
if annotation.startswith('Optional[') and annotation.endswith(']'):
return annotation.lstrip('Optional[').rstrip(']')
return annotation
def get_short_type_name(type_name: str) -> str:
"""Extracts the short form type name.
@ -252,3 +272,9 @@ def get_short_type_name(type_name: str) -> str:
return match.group('type')
else:
return type_name
def is_artifact(artifact_class_or_instance: Type) -> bool:
# we do not yet support non-pre-registered custom artifact types with instance_schema attribute
return hasattr(artifact_class_or_instance, 'schema_title') and hasattr(
artifact_class_or_instance, 'schema_version')

View File

@ -17,6 +17,7 @@ from typing import Any, Dict, List, Optional
import unittest
from absl.testing import parameterized
from kfp.components.types import artifact_types
from kfp.components.types import type_annotations
from kfp.components.types.artifact_types import Model
from kfp.components.types.type_annotations import Input
@ -154,5 +155,38 @@ class AnnotationsTest(parameterized.TestCase):
type_annotations.get_short_type_name(original_type_name))
class TestIsArtifact(parameterized.TestCase):
@parameterized.parameters([{
'obj': obj
} for obj in artifact_types._SCHEMA_TITLE_TO_TYPE.values()])
def test_true_class(self, obj):
self.assertTrue(type_annotations.is_artifact(obj))
@parameterized.parameters([{
'obj': obj(name='name', uri='uri', metadata={})
} for obj in artifact_types._SCHEMA_TITLE_TO_TYPE.values()])
def test_true_instance(self, obj):
self.assertTrue(type_annotations.is_artifact(obj))
@parameterized.parameters([{'obj': 'string'}, {'obj': 1}, {'obj': int}])
def test_false(self, obj):
self.assertFalse(type_annotations.is_artifact(obj))
def test_false_no_schema_title(self):
class NotArtifact:
schema_version = ''
self.assertFalse(type_annotations.is_artifact(NotArtifact))
def test_false_no_schema_version(self):
class NotArtifact:
schema_title = ''
self.assertFalse(type_annotations.is_artifact(NotArtifact))
if __name__ == '__main__':
unittest.main()

View File

@ -12,18 +12,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for component I/O type mapping."""
import inspect
import re
from typing import Any, List, Optional, Type, Union
from typing import Any, Optional, Type, Union
import warnings
import kfp
from kfp.components import task_final_status
from kfp.components import v1_structures
from kfp.components.types import artifact_types
from kfp.components.types import type_annotations
from kfp.pipeline_spec import pipeline_spec_pb2
DEFAULT_ARTIFACT_SCHEMA_VERSION = '0.0.1'
PARAMETER_TYPES = Union[str, int, float, bool, dict, list]
# ComponentSpec I/O types to DSL ontology artifact classes mapping.
@ -38,7 +38,7 @@ _ARTIFACT_CLASSES_MAPPING = {
}
_GOOGLE_TYPES_PATTERN = r'^google.[A-Za-z]+$'
_GOOGLE_TYPES_VERSION = '0.0.1'
_GOOGLE_TYPES_VERSION = DEFAULT_ARTIFACT_SCHEMA_VERSION
# ComponentSpec I/O types to (IR) PipelineTaskSpec I/O types mapping.
# The keys are normalized (lowercased). These are types viewed as Parameters.
@ -104,28 +104,15 @@ def is_parameter_type(type_name: Optional[Union[str, dict]]) -> bool:
) in _PARAMETER_TYPES_MAPPING or is_task_final_status_type(type_name)
def get_artifact_type_schema(
artifact_class_or_type_name: Optional[Union[str,
Type[artifact_types.Artifact]]]
) -> pipeline_spec_pb2.ArtifactTypeSchema:
"""Gets the IR I/O artifact type msg for the given ComponentSpec I/O
type."""
artifact_class = artifact_types.Artifact
if isinstance(artifact_class_or_type_name, str):
if re.match(_GOOGLE_TYPES_PATTERN, artifact_class_or_type_name):
return pipeline_spec_pb2.ArtifactTypeSchema(
schema_title=artifact_class_or_type_name,
schema_version=_GOOGLE_TYPES_VERSION,
)
artifact_class = _ARTIFACT_CLASSES_MAPPING.get(
artifact_class_or_type_name.lower(), artifact_types.Artifact)
elif inspect.isclass(artifact_class_or_type_name) and issubclass(
artifact_class_or_type_name, artifact_types.Artifact):
artifact_class = artifact_class_or_type_name
def bundled_artifact_to_artifact_proto(
bundled_artifact_str: str) -> pipeline_spec_pb2.ArtifactTypeSchema:
"""Gets the IR ArtifactTypeSchema proto for a bundled artifact in form
`<namespace>.<Name>@x.x.x` (e.g., system.Artifact@0.0.1)."""
bundled_artifact_str, schema_version = bundled_artifact_str.split('@')
return pipeline_spec_pb2.ArtifactTypeSchema(
schema_title=artifact_class.schema_title,
schema_version=artifact_class.schema_version)
schema_title=bundled_artifact_str,
schema_version=schema_version,
)
def get_parameter_type(
@ -143,6 +130,7 @@ def get_parameter_type(
Raises:
AttributeError: if type_name is not a string type.
"""
if type(param_type) == type:
type_name = param_type.__name__
elif isinstance(param_type, dict):
@ -159,7 +147,7 @@ def get_parameter_type_name(
get_parameter_type(param_type))
def get_parameter_type_field_name(type_name: Optional[str]) -> str:
def get_parameter_type_field_name(type_name: Optional[str]) -> Optional[str]:
"""Get the IR field name for the given primitive type.
For example: 'str' -> 'string_value', 'double' -> 'double_value', etc.
@ -177,30 +165,6 @@ def get_parameter_type_field_name(type_name: Optional[str]) -> str:
get_parameter_type(type_name))
def get_input_artifact_type_schema(
input_name: str,
inputs: List[v1_structures.InputSpec],
) -> Optional[str]:
"""Find the input artifact type by input name.
Args:
input_name: The name of the component input.
inputs: The list of InputSpec
Returns:
The artifact type schema of the input.
Raises:
AssertionError if input not found, or input found but not an artifact type.
"""
for component_input in inputs:
if component_input.name == input_name:
assert not is_parameter_type(
component_input.type), 'Input is not an artifact type.'
return get_artifact_type_schema(component_input.type)
assert False, 'Input not found.'
class InconsistentTypeException(Exception):
"""InconsistencyTypeException is raised when two types are not
consistent."""
@ -211,8 +175,8 @@ class InconsistentTypeWarning(Warning):
def verify_type_compatibility(
given_type: Union[str, dict],
expected_type: Union[str, dict],
given_type: str,
expected_type: str,
error_message_prefix: str,
) -> bool:
"""Verifies the given argument type is compatible with the expected type.
@ -229,37 +193,50 @@ def verify_type_compatibility(
InconsistentTypeException if types are incompatible and TYPE_CHECK==True.
"""
# Generic "Artifact" type is compatible with any specific artifact types.
if not is_parameter_type(
str(given_type)) and (str(given_type).lower() == 'artifact' or
str(expected_type).lower() == 'artifact'):
return True
# Special handling for PipelineTaskFinalStatus, treat it as Dict type.
if is_task_final_status_type(given_type):
given_type = 'Dict'
# Normalize parameter type names.
if is_parameter_type(given_type):
given_type = get_parameter_type_name(given_type)
if is_parameter_type(expected_type):
expected_type = get_parameter_type_name(expected_type)
types_are_compatible = False
is_parameter = is_parameter_type(str(given_type))
types_are_compatible = _check_types(given_type, expected_type)
# handle parameters
if is_parameter:
# Normalize parameter type names.
if is_parameter_type(given_type):
given_type = get_parameter_type_name(given_type)
if is_parameter_type(expected_type):
expected_type = get_parameter_type_name(expected_type)
types_are_compatible = check_parameter_type_compatibility(
given_type, expected_type)
else:
# handle artifacts
given_schema_title, given_schema_version = given_type.split('@')
expected_schema_title, expected_schema_version = expected_type.split(
'@')
if artifact_types.Artifact.schema_title in {
given_schema_title, expected_schema_title
}:
types_are_compatible = True
else:
schema_title_compatible = given_schema_title == expected_schema_title
schema_version_compatible = given_schema_version.split(
'.')[0] == expected_schema_version.split('.')[0]
types_are_compatible = schema_title_compatible and schema_version_compatible
# maybe raise, maybe warn, return bool
if not types_are_compatible:
error_text = error_message_prefix + (
'Argument type "{}" is incompatible with the input type "{}"'
).format(str(given_type), str(expected_type))
error_text = error_message_prefix + f'Argument type "{given_type}" is incompatible with the input type "{expected_type}"'
if kfp.TYPE_CHECK:
raise InconsistentTypeException(error_text)
else:
warnings.warn(InconsistentTypeWarning(error_text))
return types_are_compatible
def _check_types(
def check_parameter_type_compatibility(
given_type: Union[str, dict],
expected_type: Union[str, dict],
):
@ -349,34 +326,16 @@ class TypeCheckManager:
# for reading in IR back to in-memory data structures
IR_TYPE_TO_IN_MEMORY_SPEC_TYPE = {
'STRING':
'String',
'NUMBER_INTEGER':
'Integer',
'NUMBER_DOUBLE':
'Float',
'LIST':
'List',
'STRUCT':
'Dict',
'BOOLEAN':
'Boolean',
artifact_types.Artifact.schema_title:
'Artifact',
artifact_types.Model.schema_title:
'Model',
artifact_types.Dataset.schema_title:
'Dataset',
artifact_types.Metrics.schema_title:
'Metrics',
artifact_types.ClassificationMetrics.schema_title:
'ClassificationMetrics',
artifact_types.SlicedClassificationMetrics.schema_title:
'SlicedClassificationMetrics',
artifact_types.HTML.schema_title:
'HTML',
artifact_types.Markdown.schema_title:
'Markdown',
'STRING': 'String',
'NUMBER_INTEGER': 'Integer',
'NUMBER_DOUBLE': 'Float',
'LIST': 'List',
'STRUCT': 'Dict',
'BOOLEAN': 'Boolean',
}
IN_MEMORY_SPEC_TYPE_TO_IR_TYPE = {
v: k for k, v in IR_TYPE_TO_IN_MEMORY_SPEC_TYPE.items()
}
@ -384,9 +343,10 @@ def get_canonical_name_for_outer_generic(type_name: Any) -> str:
"""Maps a complex/nested type name back to a canonical type.
E.g.
>>> get_canonical_name_for_outer_generic('typing.List[str]')
get_canonical_name_for_outer_generic('typing.List[str]')
'List'
>>> get_canonical_name_for_outer_generic('typing.Dict[typing.List[str], str]')
get_canonical_name_for_outer_generic('typing.Dict[typing.List[str], str]')
'Dict'
Args:
@ -399,3 +359,67 @@ def get_canonical_name_for_outer_generic(type_name: Any) -> str:
return type_name
return type_name.lstrip('typing.').split('[')[0]
def create_bundled_artifact_type(schema_title: str,
schema_version: Optional[str] = None) -> str:
if not isinstance(schema_title, str):
raise ValueError
return schema_title + '@' + (
schema_version or DEFAULT_ARTIFACT_SCHEMA_VERSION)
def validate_schema_version(schema_version: str) -> None:
split_schema_version = schema_version.split('.')
if len(split_schema_version) != 3:
raise TypeError(
f'Artifact schema_version must use three-part semantic versioning. Got: {schema_version}'
)
def validate_schema_title(schema_title: str) -> None:
split_schema_title = schema_title.split('.')
if len(split_schema_title) != 2:
raise TypeError(
f'Artifact schema_title must have both a namespace and a name, separated by a `.`. Got: {schema_title}'
)
namespace, _ = split_schema_title
if namespace not in {'system', 'google'}:
raise TypeError(
f'Artifact schema_title must belong to `system` or `google` namespace. Got: {schema_title}'
)
def validate_bundled_artifact_type(type_: str) -> None:
split_type = type_.split('@')
# two parts and neither are empty strings
if len(split_type) != 2 or not all(split_type):
raise TypeError(
f'Artifacts must have both a schema_title and a schema_version, separated by `@`. Got: {type_}'
)
schema_title, schema_version = split_type
validate_schema_title(schema_title)
validate_schema_version(schema_version)
def _annotation_to_type_struct(annotation):
if not annotation or annotation == inspect.Parameter.empty:
return None
if hasattr(annotation, 'to_dict'):
annotation = annotation.to_dict()
if isinstance(annotation, dict):
return annotation
if isinstance(annotation, type):
type_struct = get_canonical_type_name_for_type(annotation)
if type_struct:
return type_struct
elif type_annotations.is_artifact(annotation):
schema_title = annotation.schema_title
else:
schema_title = str(annotation.__name__)
elif hasattr(annotation, '__forward_arg__'):
schema_title = str(annotation.__forward_arg__)
else:
schema_title = str(annotation)
type_struct = get_canonical_type_name_for_type(schema_title)
return type_struct or schema_title

View File

@ -16,7 +16,6 @@ import unittest
from absl.testing import parameterized
import kfp
from kfp.components import v1_structures
from kfp.components.types import artifact_types
from kfp.components.types import type_utils
from kfp.components.types.type_utils import InconsistentTypeException
@ -79,137 +78,6 @@ class TypeUtilsTest(parameterized.TestCase):
self.assertEqual(expected_result,
type_utils.is_parameter_type(type_name))
@parameterized.parameters(
{
'artifact_class_or_type_name':
'Model',
'expected_result':
pb.ArtifactTypeSchema(
schema_title='system.Model', schema_version='0.0.1')
},
{
'artifact_class_or_type_name':
artifact_types.Model,
'expected_result':
pb.ArtifactTypeSchema(
schema_title='system.Model', schema_version='0.0.1')
},
{
'artifact_class_or_type_name':
'Dataset',
'expected_result':
pb.ArtifactTypeSchema(
schema_title='system.Dataset', schema_version='0.0.1')
},
{
'artifact_class_or_type_name':
artifact_types.Dataset,
'expected_result':
pb.ArtifactTypeSchema(
schema_title='system.Dataset', schema_version='0.0.1')
},
{
'artifact_class_or_type_name':
'Metrics',
'expected_result':
pb.ArtifactTypeSchema(
schema_title='system.Metrics', schema_version='0.0.1')
},
{
'artifact_class_or_type_name':
artifact_types.Metrics,
'expected_result':
pb.ArtifactTypeSchema(
schema_title='system.Metrics', schema_version='0.0.1')
},
{
'artifact_class_or_type_name':
'ClassificationMetrics',
'expected_result':
pb.ArtifactTypeSchema(
schema_title='system.ClassificationMetrics',
schema_version='0.0.1')
},
{
'artifact_class_or_type_name':
artifact_types.ClassificationMetrics,
'expected_result':
pb.ArtifactTypeSchema(
schema_title='system.ClassificationMetrics',
schema_version='0.0.1')
},
{
'artifact_class_or_type_name':
'SlicedClassificationMetrics',
'expected_result':
pb.ArtifactTypeSchema(
schema_title='system.SlicedClassificationMetrics',
schema_version='0.0.1')
},
{
'artifact_class_or_type_name':
artifact_types.SlicedClassificationMetrics,
'expected_result':
pb.ArtifactTypeSchema(
schema_title='system.SlicedClassificationMetrics',
schema_version='0.0.1')
},
{
'artifact_class_or_type_name':
'arbitrary name',
'expected_result':
pb.ArtifactTypeSchema(
schema_title='system.Artifact', schema_version='0.0.1')
},
{
'artifact_class_or_type_name':
_ArbitraryClass,
'expected_result':
pb.ArtifactTypeSchema(
schema_title='system.Artifact', schema_version='0.0.1')
},
{
'artifact_class_or_type_name':
artifact_types.HTML,
'expected_result':
pb.ArtifactTypeSchema(
schema_title='system.HTML', schema_version='0.0.1')
},
{
'artifact_class_or_type_name':
artifact_types.Markdown,
'expected_result':
pb.ArtifactTypeSchema(
schema_title='system.Markdown', schema_version='0.0.1')
},
{
'artifact_class_or_type_name':
'some-google-type',
'expected_result':
pb.ArtifactTypeSchema(
schema_title='system.Artifact', schema_version='0.0.1')
},
{
'artifact_class_or_type_name':
'google.VertexModel',
'expected_result':
pb.ArtifactTypeSchema(
schema_title='google.VertexModel', schema_version='0.0.1')
},
{
'artifact_class_or_type_name':
_VertexDummy,
'expected_result':
pb.ArtifactTypeSchema(
schema_title='google.VertexDummy', schema_version='0.0.2')
},
)
def test_get_artifact_type_schema(self, artifact_class_or_type_name,
expected_result):
self.assertEqual(
expected_result,
type_utils.get_artifact_type_schema(artifact_class_or_type_name))
@parameterized.parameters(
{
'given_type': 'Int',
@ -300,79 +168,53 @@ class TypeUtilsTest(parameterized.TestCase):
with self.assertRaises(AttributeError):
type_utils.get_parameter_type_schema(None)
def test_get_input_artifact_type_schema(self):
input_specs = [
v1_structures.InputSpec(name='input1', type='String'),
v1_structures.InputSpec(name='input2', type='Model'),
v1_structures.InputSpec(name='input3', type=None),
]
# input not found.
with self.assertRaises(AssertionError) as cm:
type_utils.get_input_artifact_type_schema('input0', input_specs)
self.assertEqual('Input not found.', str(cm))
# input found, but it doesn't map to an artifact type.
with self.assertRaises(AssertionError) as cm:
type_utils.get_input_artifact_type_schema('input1', input_specs)
self.assertEqual('Input is not an artifact type.', str(cm))
# input found, and a matching artifact type schema returned.
self.assertEqual(
'system.Model',
type_utils.get_input_artifact_type_schema('input2',
input_specs).schema_title)
# input found, and the default artifact type schema returned.
self.assertEqual(
'system.Artifact',
type_utils.get_input_artifact_type_schema('input3',
input_specs).schema_title)
@parameterized.parameters(
# param True
{
'given_type': 'String',
'expected_type': 'String',
'is_compatible': True,
},
# param False
{
'given_type': 'String',
'expected_type': 'Integer',
'is_compatible': False,
},
# param Artifact compat, irrespective of version
{
'given_type': {
'type_a': {
'property': 'property_b',
}
},
'expected_type': {
'type_a': {
'property': 'property_b',
}
},
'given_type': 'system.Artifact@1.0.0',
'expected_type': 'system.Model@0.0.1',
'is_compatible': True,
},
# param Artifact compat, irrespective of version, other way
{
'given_type': {
'type_a': {
'property': 'property_b',
}
},
'expected_type': {
'type_a': {
'property': 'property_c',
}
},
'given_type': 'system.Metrics@1.0.0',
'expected_type': 'system.Artifact@0.0.1',
'is_compatible': True,
},
# different schema_title incompat, irrespective of version
{
'given_type': 'system.Metrics@1.0.0',
'expected_type': 'system.Dataset@1.0.0',
'is_compatible': False,
},
# different major version incompat
{
'given_type': 'Artifact',
'expected_type': 'Model',
'is_compatible': True,
'given_type': 'system.Metrics@1.0.0',
'expected_type': 'system.Metrics@2.1.1',
'is_compatible': False,
},
# namespace must match
{
'given_type': 'Metrics',
'expected_type': 'Artifact',
'given_type': 'google.Model@1.0.0',
'expected_type': 'system.Model@1.0.0',
'is_compatible': False,
},
# system.Artifact compatible works across namespace
{
'given_type': 'google.Model@1.0.0',
'expected_type': 'system.Artifact@1.0.0',
'is_compatible': True,
},
)
@ -455,20 +297,182 @@ class TypeUtilsTest(parameterized.TestCase):
type_utils.is_task_final_status_type(given_type))
class TestGetArtifactTypeSchema(parameterized.TestCase):
@parameterized.parameters([
# v2 standard system types
{
'schema_title': 'system.Artifact@0.0.1',
'exp_schema_title': 'system.Artifact',
'exp_schema_version': '0.0.1',
},
{
'schema_title': 'system.Dataset@0.0.1',
'exp_schema_title': 'system.Dataset',
'exp_schema_version': '0.0.1',
},
# google type with schema_version
{
'schema_title': 'google.VertexDataset@0.0.2',
'exp_schema_title': 'google.VertexDataset',
'exp_schema_version': '0.0.2',
},
])
def test_valid(
self,
schema_title: str,
exp_schema_title: str,
exp_schema_version: str,
):
artifact_type_schema = type_utils.bundled_artifact_to_artifact_proto(
schema_title)
self.assertEqual(artifact_type_schema.schema_title, exp_schema_title)
self.assertEqual(artifact_type_schema.schema_version,
exp_schema_version)
class TestTypeCheckManager(unittest.TestCase):
def test_false_to_true(self):
def test_true_to_falsewq(self):
kfp.TYPE_CHECK = False
with type_utils.TypeCheckManager(enable=True):
self.assertEqual(kfp.TYPE_CHECK, True)
self.assertEqual(kfp.TYPE_CHECK, False)
def test_false_to_true(self):
def test_true_to_false(self):
kfp.TYPE_CHECK = True
with type_utils.TypeCheckManager(enable=False):
self.assertEqual(kfp.TYPE_CHECK, False)
self.assertEqual(kfp.TYPE_CHECK, True)
class TestCreateBundledArtifacttType(parameterized.TestCase):
@parameterized.parameters([
{
'schema_title': 'system.Artifact',
'schema_version': '0.0.2',
'expected': 'system.Artifact@0.0.2'
},
{
'schema_title': 'google.Artifact',
'schema_version': '0.0.3',
'expected': 'google.Artifact@0.0.3'
},
{
'schema_title': 'system.Artifact',
'schema_version': None,
'expected': 'system.Artifact@0.0.1'
},
{
'schema_title': 'google.Artifact',
'schema_version': None,
'expected': 'google.Artifact@0.0.1'
},
])
def test(self, schema_title: str, schema_version: Union[str, None],
expected: str):
actual = type_utils.create_bundled_artifact_type(
schema_title, schema_version)
self.assertEqual(actual, expected)
class TestValidateBundledArtifactType(parameterized.TestCase):
@parameterized.parameters([
{
'type_': 'system.Artifact@0.0.1'
},
{
'type_': 'system.Dataset@2.0.1'
},
{
'type_': 'google.Model@2.0.0'
},
])
def test_valid(self, type_: str):
type_utils.validate_bundled_artifact_type(type_)
@parameterized.parameters([
{
'type_': 'system.Artifact'
},
{
'type_': '2.0.1'
},
{
'type_': 'google.Model2.0.0'
},
{
'type_': 'google.Model2.0.0'
},
{
'type_': 'google.Model@'
},
{
'type_': 'google.Model@'
},
{
'type_': '@2.0.0'
},
])
def test_missing_part(self, type_: str):
with self.assertRaisesRegex(
TypeError,
r'Artifacts must have both a schema_title and a schema_version, separated by `@`'
):
type_utils.validate_bundled_artifact_type(type_)
@parameterized.parameters([
{
'type_': 'system@0.0.1'
},
{
'type_': 'google@0.0.1'
},
{
'type_': 'other@0.0.1'
},
{
'type_': 'Artifact@0.0.1'
},
])
def test_one_part_schema_title(self, type_: str):
with self.assertRaisesRegex(
TypeError,
r'Artifact schema_title must have both a namespace and a name'):
type_utils.validate_bundled_artifact_type(type_)
@parameterized.parameters([
{
'type_': 'other.Artifact@0.0.1'
},
])
def test_must_be_system_or_google_namespace(self, type_: str):
with self.assertRaisesRegex(
TypeError,
r'Artifact schema_title must belong to `system` or `google` namespace'
):
type_utils.validate_bundled_artifact_type(type_)
@parameterized.parameters([
{
'type_': 'system.Artifact@0'
},
{
'type_': 'system.Artifact@0.0'
},
{
'type_': 'google.Artifact@0.01'
},
])
def test_must_be_valid_semantic_version(self, type_: str):
with self.assertRaisesRegex(
TypeError,
r'Artifact schema_version must use three-part semantic versioning'
):
type_utils.validate_bundled_artifact_type(type_)
if __name__ == '__main__':
unittest.main()

View File

@ -18,7 +18,7 @@ from typing import List, Optional, Tuple, Union
from kfp.deprecated.components import _structures as structures
from kfp.deprecated.dsl import _for_loop, _pipeline_param, dsl_utils
from kfp.pipeline_spec import pipeline_spec_pb2
from kfp.components.types import type_utils
from kfp.deprecated.dsl import type_utils
def additional_input_name_for_pipelineparam(

View File

@ -17,6 +17,13 @@ This module will be removed in KFP v2.0.
"""
import warnings
from kfp.components.types import type_utils
from kfp.pipeline_spec import pipeline_spec_pb2
import inspect
from typing import Union, Type
import re
from typing import List, Optional
from kfp.deprecated.components import _structures
from kfp.components.types import artifact_types
warnings.warn(
'Module kfp.dsl.type_utils is deprecated and will be removed'
@ -24,6 +31,66 @@ warnings.warn(
category=FutureWarning)
is_parameter_type = type_utils.is_parameter_type
get_artifact_type_schema = type_utils.get_artifact_type_schema
get_parameter_type = type_utils.get_parameter_type
get_input_artifact_type_schema = type_utils.get_input_artifact_type_schema
# copying a lot of code from v2 to here to avoid certain dependencies of deprecated on v2, since making any changes to this code in v2 would result in breaks in deprecated/
_GOOGLE_TYPES_PATTERN = r'^google.[A-Za-z]+$'
_GOOGLE_TYPES_VERSION = '0.0.1'
_ARTIFACT_CLASSES_MAPPING = {
'model': artifact_types.Model,
'dataset': artifact_types.Dataset,
'metrics': artifact_types.Metrics,
'classificationmetrics': artifact_types.ClassificationMetrics,
'slicedclassificationmetrics': artifact_types.SlicedClassificationMetrics,
'html': artifact_types.HTML,
'markdown': artifact_types.Markdown,
}
def get_artifact_type_schema(
artifact_class_or_type_name: Optional[Union[str,
Type[artifact_types.Artifact]]]
) -> pipeline_spec_pb2.ArtifactTypeSchema:
"""Gets the IR I/O artifact type msg for the given ComponentSpec I/O
type."""
artifact_class = artifact_types.Artifact
if isinstance(artifact_class_or_type_name, str):
if re.match(_GOOGLE_TYPES_PATTERN, artifact_class_or_type_name):
return pipeline_spec_pb2.ArtifactTypeSchema(
schema_title=artifact_class_or_type_name,
schema_version=_GOOGLE_TYPES_VERSION,
)
artifact_class = _ARTIFACT_CLASSES_MAPPING.get(
artifact_class_or_type_name.lower(), artifact_types.Artifact)
elif inspect.isclass(artifact_class_or_type_name) and issubclass(
artifact_class_or_type_name, artifact_types.Artifact):
artifact_class = artifact_class_or_type_name
return pipeline_spec_pb2.ArtifactTypeSchema(
schema_title=artifact_class.schema_title,
schema_version=artifact_class.schema_version)
def get_input_artifact_type_schema(
input_name: str,
inputs: List[_structures.InputSpec],
) -> Optional[str]:
"""Find the input artifact type by input name.
Args:
input_name: The name of the component input.
inputs: The list of InputSpec
Returns:
The artifact type schema of the input.
Raises:
AssertionError if input not found, or input found but not an artifact type.
"""
for component_input in inputs:
if component_input.name == input_name:
assert not is_parameter_type(
component_input.type), 'Input is not an artifact type.'
return get_artifact_type_schema_old(component_input.type)
assert False, 'Input not found.'

View File

@ -1,53 +0,0 @@
# Copyright 2021 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from kfp import compiler
from kfp import components
from kfp import dsl
from kfp.dsl import component
from kfp.dsl import Input
class VertexModel(dsl.Artifact):
schema_title = 'google.VertexModel'
producer_op = components.load_component_from_text("""
name: producer
outputs:
- {name: model, type: google.VertexModel}
implementation:
container:
image: dummy
command:
- cmd
args:
- {outputPath: model}
""")
@component
def consumer_op(model: Input[VertexModel]):
pass
@dsl.pipeline(name='pipeline-with-gcpc-types')
def my_pipeline():
consumer_op(model=producer_op().outputs['model'])
if __name__ == '__main__':
compiler.Compiler().compile(
pipeline_func=my_pipeline,
package_path=__file__.replace('.py', '.yaml'))

View File

@ -1,81 +0,0 @@
components:
comp-consumer-op:
executorLabel: exec-consumer-op
inputDefinitions:
artifacts:
model:
artifactType:
schemaTitle: google.VertexModel
schemaVersion: 0.0.1
comp-producer:
executorLabel: exec-producer
outputDefinitions:
artifacts:
model:
artifactType:
schemaTitle: google.VertexModel
schemaVersion: 0.0.1
deploymentSpec:
executors:
exec-consumer-op:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- consumer_op
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-alpha.5'\
\ && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef consumer_op(model: Input[VertexModel]):\n pass\n\n"
image: python:3.7
exec-producer:
container:
args:
- '{{$.outputs.artifacts[''model''].path}}'
command:
- cmd
image: dummy
pipelineInfo:
name: pipeline-with-gcpc-types
root:
dag:
tasks:
consumer-op:
cachingOptions:
enableCache: true
componentRef:
name: comp-consumer-op
dependentTasks:
- producer
inputs:
artifacts:
model:
taskOutputArtifact:
outputArtifactKey: model
producerTask: producer
taskInfo:
name: consumer-op
producer:
cachingOptions:
enableCache: true
componentRef:
name: comp-producer
taskInfo:
name: producer
schemaVersion: 2.1.0
sdkVersion: kfp-2.0.0-alpha.5

View File

@ -0,0 +1,119 @@
# Copyright 2022 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import tempfile
# NOTE: this is a compilation test only and is not executable, since dummy_third_party_package does not exist and cannot be installed or imported at runtime
def create_temporary_google_artifact_package(
temp_dir: tempfile.TemporaryDirectory) -> None:
"""Creates a fake temporary module that can be used as a Vertex SDK mock
for testing purposes."""
import inspect
import os
import textwrap
class VertexModel:
schema_title = 'google.VertexModel'
schema_version = '0.0.0'
def __init__(self, name: str, uri: str, metadata: dict) -> None:
self.name = name
self.uri = uri
self.metadata = metadata
@property
def path(self) -> str:
return self.uri.replace('gs://', '/')
class VertexDataset:
schema_title = 'google.VertexDataset'
schema_version = '0.0.0'
def __init__(self, name: str, uri: str, metadata: dict) -> None:
self.name = name
self.uri = uri
self.metadata = metadata
@property
def path(self) -> str:
return self.uri.replace('gs://', '/')
class_source = textwrap.dedent(
inspect.getsource(VertexModel)) + '\n\n' + textwrap.dedent(
inspect.getsource(VertexDataset))
with open(os.path.join(temp_dir.name, 'aiplatform.py'), 'w') as f:
f.write(class_source)
# remove try finally when a third-party package adds pre-registered custom artifact types that we can use for testing
try:
temp_dir = tempfile.TemporaryDirectory()
sys.path.append(temp_dir.name)
create_temporary_google_artifact_package(temp_dir)
import aiplatform
from aiplatform import VertexDataset
from aiplatform import VertexModel
from kfp import compiler
from kfp import dsl
from kfp.dsl import Input
from kfp.dsl import Output
PACKAGES_TO_INSTALL = ['aiplatform']
@dsl.component(packages_to_install=PACKAGES_TO_INSTALL)
def model_producer(model: Output[aiplatform.VertexModel]):
assert isinstance(model, aiplatform.VertexModel), type(model)
with open(model.path, 'w') as f:
f.write('my model')
@dsl.component(packages_to_install=PACKAGES_TO_INSTALL)
def model_consumer(model: Input[VertexModel],
dataset: Input[VertexDataset]):
print('Model')
print('artifact.type: ', type(model))
print('artifact.name: ', model.name)
print('artifact.uri: ', model.uri)
print('artifact.metadata: ', model.metadata)
print('Dataset')
print('artifact.type: ', type(dataset))
print('artifact.name: ', dataset.name)
print('artifact.uri: ', dataset.uri)
print('artifact.metadata: ', dataset.metadata)
@dsl.pipeline(name='pipeline-with-google-types')
def my_pipeline():
producer_task = model_producer()
importer = dsl.importer(
artifact_uri='gs://ml-pipeline-playground/shakespeare1.txt',
artifact_class=VertexDataset,
reimport=False,
metadata={'key': 'value'})
model_consumer(
model=producer_task.outputs['model'],
dataset=importer.output,
)
if __name__ == '__main__':
ir_file = __file__.replace('.py', '.yaml')
compiler.Compiler().compile(
pipeline_func=my_pipeline, package_path=ir_file)
finally:
sys.path.pop()
temp_dir.cleanup()

View File

@ -0,0 +1,150 @@
components:
comp-importer:
executorLabel: exec-importer
inputDefinitions:
parameters:
uri:
parameterType: STRING
outputDefinitions:
artifacts:
artifact:
artifactType:
schemaTitle: google.VertexDataset
schemaVersion: 0.0.0
comp-model-consumer:
executorLabel: exec-model-consumer
inputDefinitions:
artifacts:
dataset:
artifactType:
schemaTitle: google.VertexDataset
schemaVersion: 0.0.0
model:
artifactType:
schemaTitle: google.VertexModel
schemaVersion: 0.0.0
comp-model-producer:
executorLabel: exec-model-producer
outputDefinitions:
artifacts:
model:
artifactType:
schemaTitle: google.VertexModel
schemaVersion: 0.0.0
deploymentSpec:
executors:
exec-importer:
importer:
artifactUri:
constant: gs://ml-pipeline-playground/shakespeare1.txt
metadata:
key: value
typeSchema:
schemaTitle: google.VertexDataset
schemaVersion: 0.0.0
exec-model-consumer:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- model_consumer
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'aiplatform'\
\ 'kfp==2.0.0-beta.4' 'kfp==2.0.0-beta.4' && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef model_consumer(model: Input[VertexModel],\n \
\ dataset: Input[VertexDataset]):\n print('Model')\n print('artifact.type:\
\ ', type(model))\n print('artifact.name: ', model.name)\n print('artifact.uri:\
\ ', model.uri)\n print('artifact.metadata: ', model.metadata)\n\n \
\ print('Dataset')\n print('artifact.type: ', type(dataset))\n print('artifact.name:\
\ ', dataset.name)\n print('artifact.uri: ', dataset.uri)\n print('artifact.metadata:\
\ ', dataset.metadata)\n\n"
image: python:3.7
exec-model-producer:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- model_producer
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'aiplatform'\
\ 'kfp==2.0.0-beta.4' && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef model_producer(model: Output[aiplatform.VertexModel]):\n\n \
\ assert isinstance(model, aiplatform.VertexModel), type(model)\n with\
\ open(model.path, 'w') as f:\n f.write('my model')\n\n"
image: python:3.7
pipelineInfo:
name: pipeline-with-google-types
root:
dag:
tasks:
importer:
cachingOptions:
enableCache: true
componentRef:
name: comp-importer
inputs:
parameters:
uri:
runtimeValue:
constant: gs://ml-pipeline-playground/shakespeare1.txt
taskInfo:
name: importer
model-consumer:
cachingOptions:
enableCache: true
componentRef:
name: comp-model-consumer
dependentTasks:
- importer
- model-producer
inputs:
artifacts:
dataset:
taskOutputArtifact:
outputArtifactKey: artifact
producerTask: importer
model:
taskOutputArtifact:
outputArtifactKey: model
producerTask: model-producer
taskInfo:
name: model-consumer
model-producer:
cachingOptions:
enableCache: true
componentRef:
name: comp-model-producer
taskInfo:
name: model-producer
schemaVersion: 2.1.0
sdkVersion: kfp-2.0.0-beta.4

View File

@ -102,7 +102,7 @@ deploymentSpec:
metadata:
key: value
typeSchema:
schemaTitle: system.Artifact
schemaTitle: system.Dataset
schemaVersion: 0.0.1
exec-importer-2:
importer:
@ -110,7 +110,7 @@ deploymentSpec:
runtimeParameter: uri
reimport: true
typeSchema:
schemaTitle: system.Artifact
schemaTitle: system.Dataset
schemaVersion: 0.0.1
exec-train:
container:
@ -124,7 +124,7 @@ deploymentSpec:
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-alpha.5'\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.4'\
\ && \"$0\" \"$@\"\n"
- sh
- -ec
@ -156,7 +156,7 @@ deploymentSpec:
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-alpha.5'\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.4'\
\ && \"$0\" \"$@\"\n"
- sh
- -ec
@ -231,4 +231,4 @@ root:
defaultValue: gs://ml-pipeline-playground/shakespeare2.txt
parameterType: STRING
schemaVersion: 2.1.0
sdkVersion: kfp-2.0.0-alpha.5
sdkVersion: kfp-2.0.0-beta.4

View File

@ -95,9 +95,6 @@ pipelines:
- module: component_with_optional_inputs
name: pipeline
execute: true
- module: pipeline_with_gcpc_types
name: my_pipeline
execute: false
- module: pipeline_with_placeholders
name: my_pipeline
execute: true
@ -138,6 +135,9 @@ pipelines:
- module: pipeline_as_exit_task
name: my_pipeline
execute: false
- module: pipeline_with_google_artifact_type
name: my_pipeline
execute: false
components:
test_data_dir: sdk/python/test_data/components
read: true