feat(sdk): support optional artifact inputs (#8623)

* add compiler tests

* support optional artifact inputs

* update executor test

* update existing golden snapshot

* add new compiler test/snapshot

* add release note

* bump kfp-pipeline-spec dependency
This commit is contained in:
Connor McCarthy 2022-12-28 13:40:12 -05:00 committed by GitHub
parent 1d8272b4cd
commit 151e8a3c6e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 572 additions and 26 deletions

View File

@ -8,6 +8,7 @@
## Bug fixes and other changes
* Fully support optional parameter inputs by witing `isOptional` field to IR [\#8612](https://github.com/kubeflow/pipelines/pull/8612)
* Add support for optional artifact inputs (toward feature parity with KFP SDK v1) [\#8623](https://github.com/kubeflow/pipelines/pull/8623)
## Documentation updates
# 2.0.0-beta.9

View File

@ -30,6 +30,7 @@ from kfp import dsl
from kfp.cli import cli
from kfp.compiler import compiler
from kfp.components.types import type_utils
from kfp.dsl import Artifact
from kfp.dsl import ContainerSpec
from kfp.dsl import Input
from kfp.dsl import Model
@ -2443,6 +2444,213 @@ class TestCompileThenLoadThenUseWithOptionalInputs(unittest.TestCase):
my_pipeline.pipeline_spec.components['comp-inner-pipeline']
.input_definitions.parameters['x'].is_optional)
def test__component__artifact(self):
@dsl.component
def comp(x: Optional[Input[Artifact]] = None):
print(x)
@dsl.pipeline
def my_pipeline():
comp()
self.assertTrue(my_pipeline.pipeline_spec.components['comp-comp']
.input_definitions.artifacts['x'].is_optional)
with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, 'comp.yaml')
compiler.Compiler().compile(comp, path)
loaded_comp = components.load_component_from_file(path)
@dsl.pipeline
def my_pipeline():
loaded_comp()
self.assertTrue(my_pipeline.pipeline_spec.components['comp-comp']
.input_definitions.artifacts['x'].is_optional)
def test__pipeline__artifact(self):
@dsl.component
def comp(x: Optional[Input[Artifact]] = None):
print(x)
@dsl.pipeline
def inner_pipeline(x: Optional[Input[Artifact]] = None):
comp(x=x)
@dsl.pipeline
def my_pipeline():
inner_pipeline()
self.assertTrue(my_pipeline.pipeline_spec.components['comp-comp']
.input_definitions.artifacts['x'].is_optional)
with tempfile.TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, 'comp.yaml')
compiler.Compiler().compile(comp, path)
loaded_comp = components.load_component_from_file(path)
@dsl.pipeline
def my_pipeline():
loaded_comp()
self.assertTrue(my_pipeline.pipeline_spec.components['comp-comp']
.input_definitions.artifacts['x'].is_optional)
class TestCompileOptionalArtifacts(unittest.TestCase):
def test_python_comp(self):
@dsl.component
def comp(x: Optional[Input[Artifact]] = None):
print(x)
artifact_spec_from_root = comp.pipeline_spec.root.input_definitions.artifacts[
'x']
self.assertTrue(artifact_spec_from_root.is_optional)
artifact_spec_from_comp = comp.pipeline_spec.components[
'comp-comp'].input_definitions.artifacts['x']
self.assertTrue(artifact_spec_from_comp.is_optional)
def test_python_comp_with_model(self):
@dsl.component
def comp(x: Optional[Input[Model]] = None):
print(x)
artifact_spec_from_root = comp.pipeline_spec.root.input_definitions.artifacts[
'x']
self.assertTrue(artifact_spec_from_root.is_optional)
artifact_spec_from_comp = comp.pipeline_spec.components[
'comp-comp'].input_definitions.artifacts['x']
self.assertTrue(artifact_spec_from_comp.is_optional)
def test_python_comp_without_optional_type_modifier(self):
@dsl.component
def comp(x: Input[Model] = None):
print(x)
artifact_spec_from_root = comp.pipeline_spec.root.input_definitions.artifacts[
'x']
self.assertTrue(artifact_spec_from_root.is_optional)
artifact_spec_from_comp = comp.pipeline_spec.components[
'comp-comp'].input_definitions.artifacts['x']
self.assertTrue(artifact_spec_from_comp.is_optional)
def test_container_comp(self):
@dsl.container_component
def comp(x: Optional[Input[Artifact]] = None):
return dsl.ContainerSpec(
image='alpine',
command=[
dsl.IfPresentPlaceholder(
input_name='x',
then=['echo', x.uri],
else_=['echo', 'No artifact provided!'])
])
artifact_spec_from_root = comp.pipeline_spec.root.input_definitions.artifacts[
'x']
self.assertTrue(artifact_spec_from_root.is_optional)
artifact_spec_from_comp = comp.pipeline_spec.components[
'comp-comp'].input_definitions.artifacts['x']
self.assertTrue(artifact_spec_from_comp.is_optional)
def test_pipeline(self):
@dsl.component
def comp():
print('hello')
@dsl.pipeline
def my_pipeline(x: Optional[Input[Artifact]] = None):
comp()
artifact_spec_from_root = my_pipeline.pipeline_spec.root.input_definitions.artifacts[
'x']
self.assertTrue(artifact_spec_from_root.is_optional)
def test_pipeline_without_optional_type_modifier(self):
@dsl.component
def comp():
print('hello')
@dsl.pipeline
def my_pipeline(x: Input[Artifact] = None):
comp()
artifact_spec_from_root = my_pipeline.pipeline_spec.root.input_definitions.artifacts[
'x']
self.assertTrue(artifact_spec_from_root.is_optional)
def test_pipeline_and_inner_component_together(self):
@dsl.component
def comp(x: Optional[Input[Model]] = None):
print(x)
@dsl.pipeline
def my_pipeline(x: Optional[Input[Artifact]] = None):
comp()
artifact_spec_from_root = my_pipeline.pipeline_spec.root.input_definitions.artifacts[
'x']
self.assertTrue(artifact_spec_from_root.is_optional)
artifact_spec_from_comp = my_pipeline.pipeline_spec.components[
'comp-comp'].input_definitions.artifacts['x']
self.assertTrue(artifact_spec_from_comp.is_optional)
def test_invalid_default_comp(self):
with self.assertRaisesRegex(
ValueError,
'Optional Input artifacts may only have default value None'):
@dsl.component
def comp(x: Optional[Input[Model]] = 1):
print(x)
with self.assertRaisesRegex(
ValueError,
'Optional Input artifacts may only have default value None'):
@dsl.component
def comp(x: Optional[Input[Model]] = Model(
name='', uri='', metadata={})):
print(x)
def test_invalid_default_pipeline(self):
@dsl.component
def comp():
print('hello')
with self.assertRaisesRegex(
ValueError,
'Optional Input artifacts may only have default value None'):
@dsl.pipeline
def my_pipeline(x: Input[Artifact] = 1):
comp()
with self.assertRaisesRegex(
ValueError,
'Optional Input artifacts may only have default value None'):
@dsl.pipeline
def my_pipeline(x: Input[Artifact] = Artifact(
name='', uri='', metadata={})):
comp()
if __name__ == '__main__':
unittest.main()

View File

@ -337,34 +337,20 @@ def build_component_spec_for_task(
f'PipelineTaskFinalStatus can only be used in an exit task. Parameter {input_name} of a non exit task has type PipelineTaskFinalStatus.'
)
unprovided_artifact_inputs = []
for input_name, input_spec in (task.component_spec.inputs or {}).items():
if not type_utils.is_parameter_type(
input_spec.type) and input_name not in task.inputs:
unprovided_artifact_inputs.append(input_name)
component_spec = _build_component_spec_from_component_spec_structure(
task.component_spec, unprovided_artifact_inputs)
task.component_spec)
component_spec.executor_label = utils.sanitize_executor_label(task.name)
return component_spec
def _build_component_spec_from_component_spec_structure(
component_spec_struct: structures.ComponentSpec,
unprovided_artifact_inputs: Optional[List[str]] = None,
component_spec_struct: structures.ComponentSpec
) -> pipeline_spec_pb2.ComponentSpec:
"""Builds ComponentSpec proto from ComponentSpec structure."""
# TODO: remove unprovided_artifact_inputs from interface and all downstream logic when supporting optional artifact inputs
unprovided_artifact_inputs = unprovided_artifact_inputs or []
component_spec = pipeline_spec_pb2.ComponentSpec()
for input_name, input_spec in (component_spec_struct.inputs or {}).items():
# skip inputs not present, as a workaround to support optional inputs.
if input_name in unprovided_artifact_inputs and input_spec.default is None:
continue
# Special handling for PipelineTaskFinalStatus first.
if type_utils.is_task_final_status_type(input_spec.type):
component_spec.input_definitions.parameters[
@ -390,6 +376,9 @@ def _build_component_spec_from_component_spec_structure(
input_name].artifact_type.CopyFrom(
type_utils.bundled_artifact_to_artifact_proto(
input_spec.type))
if input_spec.optional:
component_spec.input_definitions.artifacts[
input_name].is_optional = True
for output_name, output_spec in (component_spec_struct.outputs or
{}).items():

View File

@ -184,9 +184,18 @@ def extract_component_interface(
)
if parameter.default is not inspect.Parameter.empty:
raise ValueError(
'Default values for Input/Output artifacts are not supported.'
)
if passing_style in [
type_annotations.OutputAnnotation,
type_annotations.OutputPath,
]:
raise ValueError(
'Default values for Output artifacts are not supported.'
)
elif parameter.default is not None:
raise ValueError(
f'Optional Input artifacts may only have default value None. Got: {parameter.default}.'
)
elif isinstance(
parameter_type,
(type_annotations.InputPath, type_annotations.OutputPath)):
@ -229,7 +238,8 @@ def extract_component_interface(
type_struct, parameter_type.schema_version
) if type_annotations.is_artifact_class(
parameter_type) else type_struct
default = None if parameter.default == inspect.Parameter.empty else parameter.default
default = None if parameter.default == inspect.Parameter.empty or type_annotations.is_artifact_class(
parameter_type) else parameter.default
optional = parameter.default is not inspect.Parameter.empty or type_utils.is_task_final_status_type(
type_struct)
input_spec = structures.InputSpec(

View File

@ -849,6 +849,21 @@ class ExecutorTest(unittest.TestCase):
},
})
def test_function_with_optional_input_artifact(self):
executor_input = """\
{
"inputs": {},
"outputs": {
"outputFile": "%(test_dir)s/output_metadata.json"
}
}
"""
def test_func(a: Optional[Input[Artifact]] = None):
self.assertIsNone(a)
self.execute(test_func, executor_input)
def test_function_with_pipeline_task_final_status(self):
executor_input = """\
{

View File

@ -88,9 +88,13 @@ class InputSpec:
type_ = ir_component_inputs_dict['artifactType']['schemaTitle']
schema_version = ir_component_inputs_dict['artifactType'][
'schemaVersion']
# TODO: would be better to extract these fields from the proto
# message, as False default would be preserved
optional = ir_component_inputs_dict.get('isOptional', False)
return InputSpec(
type=type_utils.create_bundled_artifact_type(
type_, schema_version))
type_, schema_version),
optional=optional)
def __eq__(self, other: Any) -> bool:
"""Equality comparison for InputSpec. Robust to different type

View File

@ -16,7 +16,7 @@ google-cloud-storage>=2.2.1,<3
# NOTE: Maintainers, please do not require google-auth>=2.x.x
# Until this issue is closed
# https://github.com/googleapis/google-cloud-python/issues/10566
kfp-pipeline-spec>=0.1.16,<0.2.0
kfp-pipeline-spec>=0.1.17,<0.2.0
# Update the upper version whenever a new major version of the
# kfp-server-api package is released.
# Update the lower version when kfp sdk depends on new apis/fields in

View File

@ -1,6 +1,6 @@
#
# This file is autogenerated by pip-compile with python 3.7
# To update, run:
# This file is autogenerated by pip-compile with Python 3.7
# by the following command:
#
# pip-compile --no-emit-index-url requirements.in
#
@ -59,7 +59,7 @@ importlib-metadata==4.12.0
# jsonschema
jsonschema==3.2.0
# via -r requirements.in
kfp-pipeline-spec==0.1.16
kfp-pipeline-spec==0.1.17
# via -r requirements.in
kfp-server-api==2.0.0a4
# via -r requirements.in

View File

@ -0,0 +1,66 @@
# Copyright 2022 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Optional
from kfp import compiler
from kfp import dsl
from kfp.dsl import Artifact
from kfp.dsl import Dataset
from kfp.dsl import Input
@dsl.component
def python_artifact_printer(artifact: Optional[Input[Artifact]] = None):
if artifact is not None:
print(artifact.name)
print(artifact.uri)
print(artifact.metadata)
else:
print('No artifact provided!')
@dsl.container_component
def custom_artifact_printer(artifact: Optional[Input[Artifact]] = None):
return dsl.ContainerSpec(
image='alpine',
command=[
dsl.IfPresentPlaceholder(
input_name='artifact',
then=['echo', artifact.uri],
else_=['echo', 'No artifact provided!'])
])
@dsl.pipeline
def inner_pipeline(dataset: Optional[Input[Dataset]] = None):
python_artifact_printer(artifact=dataset)
@dsl.pipeline(name='optional-artifact-pipeline')
def pipeline(dataset1: Optional[Input[Dataset]] = None):
custom_artifact_printer(artifact=dataset1)
custom_artifact_printer()
dataset2 = dsl.importer(
artifact_uri='gs://ml-pipeline-playground/shakespeare1.txt',
artifact_class=Dataset,
)
inner_pipeline(dataset=dataset2.output)
inner_pipeline()
if __name__ == '__main__':
compiler.Compiler().compile(
pipeline_func=pipeline, package_path=__file__.replace('.py', '.yaml'))

View File

@ -0,0 +1,240 @@
# PIPELINE DEFINITION
# Name: optional-artifact-pipeline
# Inputs:
# dataset1: system.Dataset
components:
comp-custom-artifact-printer:
executorLabel: exec-custom-artifact-printer
inputDefinitions:
artifacts:
artifact:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
isOptional: true
comp-custom-artifact-printer-2:
executorLabel: exec-custom-artifact-printer-2
inputDefinitions:
artifacts:
artifact:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
isOptional: true
comp-importer:
executorLabel: exec-importer
inputDefinitions:
parameters:
uri:
parameterType: STRING
outputDefinitions:
artifacts:
artifact:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
comp-inner-pipeline:
dag:
tasks:
python-artifact-printer:
cachingOptions:
enableCache: true
componentRef:
name: comp-python-artifact-printer
inputs:
artifacts:
artifact:
componentInputArtifact: dataset
taskInfo:
name: python-artifact-printer
inputDefinitions:
artifacts:
dataset:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
isOptional: true
comp-inner-pipeline-2:
dag:
tasks:
python-artifact-printer:
cachingOptions:
enableCache: true
componentRef:
name: comp-python-artifact-printer-2
inputs:
artifacts:
artifact:
componentInputArtifact: dataset
taskInfo:
name: python-artifact-printer
inputDefinitions:
artifacts:
dataset:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
isOptional: true
comp-python-artifact-printer:
executorLabel: exec-python-artifact-printer
inputDefinitions:
artifacts:
artifact:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
isOptional: true
comp-python-artifact-printer-2:
executorLabel: exec-python-artifact-printer-2
inputDefinitions:
artifacts:
artifact:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
isOptional: true
deploymentSpec:
executors:
exec-custom-artifact-printer:
container:
command:
- '{"IfPresent": {"InputName": "artifact", "Then": ["echo", "{{$.inputs.artifacts[''artifact''].uri}}"],
"Else": ["echo", "No artifact provided!"]}}'
image: alpine
exec-custom-artifact-printer-2:
container:
command:
- '{"IfPresent": {"InputName": "artifact", "Then": ["echo", "{{$.inputs.artifacts[''artifact''].uri}}"],
"Else": ["echo", "No artifact provided!"]}}'
image: alpine
exec-importer:
importer:
artifactUri:
constant: gs://ml-pipeline-playground/shakespeare1.txt
typeSchema:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
exec-python-artifact-printer:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- python_artifact_printer
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.9'\
\ && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef python_artifact_printer(artifact: Optional[Input[Artifact]] =\
\ None):\n if artifact is not None:\n print(artifact.name)\n \
\ print(artifact.uri)\n print(artifact.metadata)\n else:\n\
\ print('No artifact provided!')\n\n"
image: python:3.7
exec-python-artifact-printer-2:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- python_artifact_printer
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.9'\
\ && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef python_artifact_printer(artifact: Optional[Input[Artifact]] =\
\ None):\n if artifact is not None:\n print(artifact.name)\n \
\ print(artifact.uri)\n print(artifact.metadata)\n else:\n\
\ print('No artifact provided!')\n\n"
image: python:3.7
pipelineInfo:
name: optional-artifact-pipeline
root:
dag:
tasks:
custom-artifact-printer:
cachingOptions:
enableCache: true
componentRef:
name: comp-custom-artifact-printer
inputs:
artifacts:
artifact:
componentInputArtifact: dataset1
taskInfo:
name: custom-artifact-printer
custom-artifact-printer-2:
cachingOptions:
enableCache: true
componentRef:
name: comp-custom-artifact-printer-2
taskInfo:
name: custom-artifact-printer-2
importer:
cachingOptions:
enableCache: true
componentRef:
name: comp-importer
inputs:
parameters:
uri:
runtimeValue:
constant: gs://ml-pipeline-playground/shakespeare1.txt
taskInfo:
name: importer
inner-pipeline:
cachingOptions:
enableCache: true
componentRef:
name: comp-inner-pipeline
dependentTasks:
- importer
inputs:
artifacts:
dataset:
taskOutputArtifact:
outputArtifactKey: artifact
producerTask: importer
taskInfo:
name: inner-pipeline
inner-pipeline-2:
cachingOptions:
enableCache: true
componentRef:
name: comp-inner-pipeline-2
taskInfo:
name: inner-pipeline-2
inputDefinitions:
artifacts:
dataset1:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
isOptional: true
schemaVersion: 2.1.0
sdkVersion: kfp-2.0.0-beta.9

View File

@ -133,6 +133,11 @@ components:
executorLabel: exec-xgboost-train
inputDefinitions:
artifacts:
starting_model:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
isOptional: true
training_data:
artifactType:
schemaTitle: system.Artifact
@ -183,6 +188,11 @@ components:
executorLabel: exec-xgboost-train-2
inputDefinitions:
artifacts:
starting_model:
artifactType:
schemaTitle: system.Artifact
schemaVersion: 0.0.1
isOptional: true
training_data:
artifactType:
schemaTitle: system.Artifact
@ -914,4 +924,4 @@ root:
taskInfo:
name: xgboost-train-2
schemaVersion: 2.1.0
sdkVersion: kfp-2.0.0-beta.8
sdkVersion: kfp-2.0.0-beta.9

View File

@ -141,6 +141,9 @@ pipelines:
- module: pipeline_with_dynamic_importer_metadata
name: my_pipeline
execute: false
- module: components_with_optional_artifacts
name: pipeline
execute: false
components:
test_data_dir: sdk/python/test_data/components
read: true