chore(sdk): Refactor and move all v2 related code to under the v2 namespace. (#6358)

* Refactor and move all v2 related code to under the v2 namespace.

Most of the changes are around imports and restructuring of the
codebase. While it looks like a lot of code was added, most of the code
already existed and was simply moved or copied over to v2. The only
exceptions are:
- under kfp/v2/components/component_factory.py: some helper functions
  were copied with simplification from _python_op.py
- we no longer strip the `_path` suffix in v2 components.

Note: there is still some duplication of code (particularly between
component_factory.py and _python_op.py), but it's ok for now since we
intend to replace some of this with v2 ComponentSpec + BaseComponent.

* Update setup.py.

* update tests.

* revert accidental change of gcpc

* Fix component entrypoint.

* Update goldens.

* fix tests.

* fix merge conflict.

* revert gcpc change.

* fix tests.

* fix tests.

* Add type aliases for moved files.

* merge and update goldens.
This commit is contained in:
Ajay Gopinathan 2021-08-17 19:25:37 -07:00 committed by GitHub
parent 7f6e11dedc
commit f3f383c2ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
57 changed files with 3116 additions and 2653 deletions

View File

@ -15,10 +15,8 @@
from typing import Dict, List
import os
from kfp import dsl
from kfp import components
from kfp.components import InputPath, OutputPath
from kfp.v2.dsl import Input, Output, Dataset, Model, component
from kfp.v2 import dsl
from kfp.v2.dsl import Input, InputPath, Output, OutputPath, Dataset, Model, component
import kfp.v2.compiler as compiler
# In tests, we install a KFP package from the PR under test. Users should not
@ -126,12 +124,12 @@ def train(
def pipeline(message: str = 'message'):
preprocess_task = preprocess(message=message)
train_task = train(
dataset_one=preprocess_task.outputs['output_dataset_one'],
dataset_two=preprocess_task.outputs['output_dataset_two'],
message=preprocess_task.outputs['output_parameter'],
input_bool=preprocess_task.outputs['output_bool_parameter'],
input_dict=preprocess_task.outputs['output_dict_parameter'],
input_list=preprocess_task.outputs['output_list_parameter'],
dataset_one_path=preprocess_task.outputs['output_dataset_one'],
dataset_two=preprocess_task.outputs['output_dataset_two_path'],
message=preprocess_task.outputs['output_parameter_path'],
input_bool=preprocess_task.outputs['output_bool_parameter_path'],
input_dict=preprocess_task.outputs['output_dict_parameter_path'],
input_list=preprocess_task.outputs['output_list_parameter_path'],
)

View File

@ -55,16 +55,16 @@ def verify(run: kfp_server_api.ApiRun, mlmd_connection_config, **kwargs):
'type': 'system.Dataset'
}, {
'metadata': {
'display_name': 'output_dataset_two'
'display_name': 'output_dataset_two_path'
},
'name': 'output_dataset_two',
'name': 'output_dataset_two_path',
'type': 'system.Dataset'
}],
'parameters': {
'output_bool_parameter': 'True',
'output_dict_parameter': '{"A": 1, "B": 2}',
'output_list_parameter': '["a", "b", "c"]',
'output_parameter': 'message'
'output_bool_parameter_path': 'True',
'output_dict_parameter_path': '{"A": 1, "B": 2}',
'output_list_parameter_path': '["a", "b", "c"]',
'output_parameter_path': 'message'
}
},
'type': 'system.ContainerExecution',
@ -79,11 +79,11 @@ def verify(run: kfp_server_api.ApiRun, mlmd_connection_config, **kwargs):
'metadata': {
'display_name': 'output_dataset_one'
},
'name': 'dataset_one',
'name': 'dataset_one_path',
'type': 'system.Dataset'
}, {
'metadata': {
'display_name': 'output_dataset_two'
'display_name': 'output_dataset_two_path'
},
'name': 'dataset_two',
'type': 'system.Dataset'

View File

@ -19,9 +19,9 @@ from kfp.v2.dsl import (
Output,
ClassificationMetrics,
Metrics,
HTML,
Markdown
)
from kfp.dsl.io_types import Markdown
from kfp.dsl.io_types import HTML
# In tests, we install a KFP package from the PR under test. Users should not
# normally need to specify `kfp_package_path` in their component definitions.

View File

@ -11,14 +11,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from kfp.v2 import dsl
from kfp.v2 import compiler
from kfp.v2 import components
# In tests, we install a KFP package from the PR under test. Users should not
# normally need to specify `kfp_package_path` in their component definitions.
_KFP_PACKAGE_PATH = os.getenv('KFP_PACKAGE_PATH')
@components.create_component_from_func
def hello_world(text: str):
@dsl.component(kfp_package_path=_KFP_PACKAGE_PATH)
def hello_world(text: str) -> str:
print(text)
return text

View File

@ -35,15 +35,13 @@ def verify(run: kfp_server_api.ApiRun, mlmd_connection_config, **kwargs):
t.assertEqual(
{
'hello-world':
KfpTask(
name='hello-world',
type='system.ContainerExecution',
state=Execution.State.COMPLETE,
inputs=TaskInputs(
parameters={'text': 'hi there'}, artifacts=[]
),
outputs=TaskOutputs(parameters={}, artifacts=[])
)
KfpTask(name='hello-world',
type='system.ContainerExecution',
state=Execution.State.COMPLETE,
inputs=TaskInputs(parameters={'text': 'hi there'},
artifacts=[]),
outputs=TaskOutputs(parameters={'Output': 'hi there'},
artifacts=[]))
},
tasks,
)

View File

@ -14,10 +14,9 @@
"""Sample pipeline for passing data in KFP v2."""
from typing import Dict, List
from kfp import dsl
from kfp import components
from kfp.components import InputPath, OutputPath
from kfp.v2.dsl import Input, Output, Dataset, Model, component
from kfp import dsl as v1dsl
from kfp.v2 import dsl
from kfp.v2.dsl import Input, InputPath, Output, OutputPath, Dataset, Model, component
import kfp.compiler as compiler
@ -101,7 +100,7 @@ def train(
f'input_bool: {input_bool}, type {type(input_bool)} || '
f'input_dict: {input_dict}, type {type(input_dict)} || '
f'input_list: {input_list}, type {type(input_list)} \n')
with open(model.path, 'w') as output_file:
for i in range(num_steps):
output_file.write('Step {}\n{}\n=====\n'.format(i, line))
@ -125,5 +124,5 @@ def pipeline(message: str = 'message'):
if __name__ == '__main__':
compiler.Compiler(mode=dsl.PipelineExecutionMode.V2_COMPATIBLE).compile(
compiler.Compiler(mode=v1dsl.PipelineExecutionMode.V2_COMPATIBLE).compile(
pipeline_func=pipeline, package_path=__file__.replace('.py', '.yaml'))

View File

@ -13,9 +13,10 @@
# limitations under the License.
# Simple two-step pipeline with 'producer' and 'consumer' steps
from kfp import components as v1components
from kfp.v2 import components, compiler, dsl
producer_op = components.load_component_from_text(
producer_op = v1components.load_component_from_text(
"""
name: Producer
inputs:
@ -36,7 +37,7 @@ implementation:
"""
)
consumer_op = components.load_component_from_text(
consumer_op = v1components.load_component_from_text(
"""
name: Consumer
inputs:

View File

@ -3,15 +3,15 @@ kind: Workflow
metadata:
generateName: my-test-pipeline-
annotations:
pipelines.kubeflow.org/kfp_sdk_version: 1.7.0
pipelines.kubeflow.org/pipeline_compilation_time: '2021-08-16T18:41:22.625538'
pipelines.kubeflow.org/kfp_sdk_version: 1.7.1
pipelines.kubeflow.org/pipeline_compilation_time: '2021-08-17T17:37:09.946952'
pipelines.kubeflow.org/pipeline_spec: '{"inputs": [{"default": "gs://output-directory/v2-artifacts",
"name": "pipeline-root"}, {"default": "pipeline/my-test-pipeline", "name": "pipeline-name"}],
"name": "my-test-pipeline"}'
pipelines.kubeflow.org/v2_pipeline: "true"
labels:
pipelines.kubeflow.org/v2_pipeline: "true"
pipelines.kubeflow.org/kfp_sdk_version: 1.7.0
pipelines.kubeflow.org/kfp_sdk_version: 1.7.1
spec:
entrypoint: my-test-pipeline
templates:
@ -42,42 +42,34 @@ spec:
container:
args:
- sh
- -c
- (python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1
python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.1'
|| PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'kfp==1.7.1' --user) && "$0" "$@"
- sh
- -ec
- |
program_path=$(mktemp)
printf "%s" "$0" > "$program_path"
python3 -u "$program_path" "$@"
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.v2.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
- |2+
def preprocess(uri, some_int, output_parameter_one,
output_dataset_one):
'''Dummy Preprocess Step.'''
with open(output_dataset_one, 'w') as f:
f.write('Output dataset')
with open(output_parameter_one, 'w') as f:
f.write("{}".format(1234))
from kfp.v2.dsl import *
from typing import *
import argparse
_parser = argparse.ArgumentParser(prog='Preprocess', description='Dummy Preprocess Step.')
_parser.add_argument("--uri", dest="uri", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--some-int", dest="some_int", type=int, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--output-parameter-one", dest="output_parameter_one", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--output-dataset-one", dest="output_dataset_one", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
def preprocess(uri: str, some_int: int, output_parameter_one: OutputPath(int),
output_dataset_one: OutputPath('Dataset')):
'''Dummy Preprocess Step.'''
with open(output_dataset_one, 'w') as f:
f.write('Output dataset')
with open(output_parameter_one, 'w') as f:
f.write("{}".format(1234))
_outputs = preprocess(**_parsed_args)
- --uri
- '{{$.inputs.parameters[''uri'']}}'
- --some-int
- '{{$.inputs.parameters[''some_int'']}}'
- --output-parameter-one
- '{{$.outputs.parameters[''output_parameter_one''].output_file}}'
- --output-dataset-one
- '{{$.outputs.artifacts[''output_dataset_one''].path}}'
- --executor_input
- '{{$}}'
- --function_to_execute
- preprocess
command: [/kfp-launcher/launch, --mlmd_server_address, $(METADATA_GRPC_SERVICE_HOST),
--mlmd_server_port, $(METADATA_GRPC_SERVICE_PORT), --runtime_info_json, $(KFP_V2_RUNTIME_INFO),
--container_image, $(KFP_V2_IMAGE), --task_name, preprocess, --pipeline_name,
@ -104,7 +96,7 @@ spec:
- name: ENABLE_CACHING
valueFrom:
fieldRef: {fieldPath: 'metadata.labels[''pipelines.kubeflow.org/enable_caching'']'}
- {name: KFP_V2_IMAGE, value: 'python:3.9'}
- {name: KFP_V2_IMAGE, value: 'python:3.7'}
- {name: KFP_V2_RUNTIME_INFO, value: '{"inputParameters": {"some_int": {"type":
"INT"}, "uri": {"type": "STRING"}}, "inputArtifacts": {}, "outputParameters":
{"output_parameter_one": {"type": "INT", "path": "/tmp/outputs/output_parameter_one/data"}},
@ -112,7 +104,7 @@ spec:
"instanceSchema": "", "metadataPath": "/tmp/outputs/output_dataset_one/data"}}}'}
envFrom:
- configMapRef: {name: metadata-grpc-configmap, optional: true}
image: python:3.9
image: python:3.7
volumeMounts:
- {mountPath: /kfp-launcher, name: kfp-launcher}
inputs:
@ -132,13 +124,13 @@ spec:
pipelines.kubeflow.org/component_ref: '{}'
pipelines.kubeflow.org/arguments.parameters: '{"some_int": "12", "uri": "uri-to-import"}'
labels:
pipelines.kubeflow.org/kfp_sdk_version: 1.7.0
pipelines.kubeflow.org/kfp_sdk_version: 1.7.1
pipelines.kubeflow.org/pipeline-sdk-type: kfp
pipelines.kubeflow.org/v2_component: "true"
pipelines.kubeflow.org/enable_caching: "true"
initContainers:
- command: [launcher, --copy, /kfp-launcher/launch]
image: gcr.io/ml-pipeline/kfp-launcher:1.7.0
image: gcr.io/ml-pipeline/kfp-launcher:1.7.1
name: kfp-launcher
mirrorVolumeMounts: true
volumes:
@ -147,42 +139,37 @@ spec:
container:
args:
- sh
- -c
- (python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1
python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.1'
|| PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'kfp==1.7.1' --user) && "$0" "$@"
- sh
- -ec
- |
program_path=$(mktemp)
printf "%s" "$0" > "$program_path"
python3 -u "$program_path" "$@"
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.v2.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
- |2+
def train(dataset,
model,
num_steps = 100):
'''Dummy Training Step.'''
from kfp.v2.dsl import *
from typing import *
with open(dataset, 'r') as input_file:
input_string = input_file.read()
with open(model, 'w') as output_file:
for i in range(num_steps):
output_file.write("Step {}\n{}\n=====\n".format(i, input_string))
def train(dataset: InputPath('Dataset'),
model: OutputPath('Model'),
num_steps: int = 100):
'''Dummy Training Step.'''
import argparse
_parser = argparse.ArgumentParser(prog='Train', description='Dummy Training Step.')
_parser.add_argument("--dataset", dest="dataset", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--num-steps", dest="num_steps", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--model", dest="model", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
with open(dataset, 'r') as input_file:
input_string = input_file.read()
with open(model, 'w') as output_file:
for i in range(num_steps):
output_file.write("Step {}\n{}\n=====\n".format(i, input_string))
_outputs = train(**_parsed_args)
- --dataset
- '{{$.inputs.artifacts[''dataset''].path}}'
- --num-steps
- '{{$.inputs.parameters[''num_steps'']}}'
- --model
- '{{$.outputs.artifacts[''model''].path}}'
- --executor_input
- '{{$}}'
- --function_to_execute
- train
command: [/kfp-launcher/launch, --mlmd_server_address, $(METADATA_GRPC_SERVICE_HOST),
--mlmd_server_port, $(METADATA_GRPC_SERVICE_PORT), --runtime_info_json, $(KFP_V2_RUNTIME_INFO),
--container_image, $(KFP_V2_IMAGE), --task_name, train, --pipeline_name, '{{inputs.parameters.pipeline-name}}',
@ -237,13 +224,13 @@ spec:
pipelines.kubeflow.org/component_ref: '{}'
pipelines.kubeflow.org/arguments.parameters: '{"num_steps": "{{inputs.parameters.preprocess-output_parameter_one}}"}'
labels:
pipelines.kubeflow.org/kfp_sdk_version: 1.7.0
pipelines.kubeflow.org/kfp_sdk_version: 1.7.1
pipelines.kubeflow.org/pipeline-sdk-type: kfp
pipelines.kubeflow.org/v2_component: "true"
pipelines.kubeflow.org/enable_caching: "true"
initContainers:
- command: [launcher, --copy, /kfp-launcher/launch]
image: gcr.io/ml-pipeline/kfp-launcher:1.7.0
image: gcr.io/ml-pipeline/kfp-launcher:1.7.1
name: kfp-launcher
mirrorVolumeMounts: true
volumes:

View File

@ -3,15 +3,15 @@ kind: Workflow
metadata:
generateName: my-test-pipeline-with-custom-launcher-
annotations:
pipelines.kubeflow.org/kfp_sdk_version: 1.7.0
pipelines.kubeflow.org/pipeline_compilation_time: '2021-08-16T18:41:22.156035'
pipelines.kubeflow.org/kfp_sdk_version: 1.7.1
pipelines.kubeflow.org/pipeline_compilation_time: '2021-08-17T17:37:09.475411'
pipelines.kubeflow.org/pipeline_spec: '{"inputs": [{"default": "gs://output-directory/v2-artifacts",
"name": "pipeline-root"}, {"default": "pipeline/my-test-pipeline-with-custom-launcher",
"name": "pipeline-name"}], "name": "my-test-pipeline-with-custom-launcher"}'
pipelines.kubeflow.org/v2_pipeline: "true"
labels:
pipelines.kubeflow.org/v2_pipeline: "true"
pipelines.kubeflow.org/kfp_sdk_version: 1.7.0
pipelines.kubeflow.org/kfp_sdk_version: 1.7.1
spec:
entrypoint: my-test-pipeline-with-custom-launcher
templates:
@ -42,42 +42,34 @@ spec:
container:
args:
- sh
- -c
- (python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1
python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.1'
|| PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'kfp==1.7.1' --user) && "$0" "$@"
- sh
- -ec
- |
program_path=$(mktemp)
printf "%s" "$0" > "$program_path"
python3 -u "$program_path" "$@"
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.v2.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
- |2+
def preprocess(uri, some_int, output_parameter_one,
output_dataset_one):
'''Dummy Preprocess Step.'''
with open(output_dataset_one, 'w') as f:
f.write('Output dataset')
with open(output_parameter_one, 'w') as f:
f.write("{}".format(1234))
from kfp.v2.dsl import *
from typing import *
import argparse
_parser = argparse.ArgumentParser(prog='Preprocess', description='Dummy Preprocess Step.')
_parser.add_argument("--uri", dest="uri", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--some-int", dest="some_int", type=int, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--output-parameter-one", dest="output_parameter_one", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--output-dataset-one", dest="output_dataset_one", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
def preprocess(uri: str, some_int: int, output_parameter_one: OutputPath(int),
output_dataset_one: OutputPath('Dataset')):
'''Dummy Preprocess Step.'''
with open(output_dataset_one, 'w') as f:
f.write('Output dataset')
with open(output_parameter_one, 'w') as f:
f.write("{}".format(1234))
_outputs = preprocess(**_parsed_args)
- --uri
- '{{$.inputs.parameters[''uri'']}}'
- --some-int
- '{{$.inputs.parameters[''some_int'']}}'
- --output-parameter-one
- '{{$.outputs.parameters[''output_parameter_one''].output_file}}'
- --output-dataset-one
- '{{$.outputs.artifacts[''output_dataset_one''].path}}'
- --executor_input
- '{{$}}'
- --function_to_execute
- preprocess
command: [/kfp-launcher/launch, --mlmd_server_address, $(METADATA_GRPC_SERVICE_HOST),
--mlmd_server_port, $(METADATA_GRPC_SERVICE_PORT), --runtime_info_json, $(KFP_V2_RUNTIME_INFO),
--container_image, $(KFP_V2_IMAGE), --task_name, preprocess, --pipeline_name,
@ -104,7 +96,7 @@ spec:
- name: ENABLE_CACHING
valueFrom:
fieldRef: {fieldPath: 'metadata.labels[''pipelines.kubeflow.org/enable_caching'']'}
- {name: KFP_V2_IMAGE, value: 'python:3.9'}
- {name: KFP_V2_IMAGE, value: 'python:3.7'}
- {name: KFP_V2_RUNTIME_INFO, value: '{"inputParameters": {"some_int": {"type":
"INT"}, "uri": {"type": "STRING"}}, "inputArtifacts": {}, "outputParameters":
{"output_parameter_one": {"type": "INT", "path": "/tmp/outputs/output_parameter_one/data"}},
@ -112,7 +104,7 @@ spec:
"instanceSchema": "", "metadataPath": "/tmp/outputs/output_dataset_one/data"}}}'}
envFrom:
- configMapRef: {name: metadata-grpc-configmap, optional: true}
image: python:3.9
image: python:3.7
volumeMounts:
- {mountPath: /kfp-launcher, name: kfp-launcher}
inputs:
@ -132,7 +124,7 @@ spec:
pipelines.kubeflow.org/component_ref: '{}'
pipelines.kubeflow.org/arguments.parameters: '{"some_int": "12", "uri": "uri-to-import"}'
labels:
pipelines.kubeflow.org/kfp_sdk_version: 1.7.0
pipelines.kubeflow.org/kfp_sdk_version: 1.7.1
pipelines.kubeflow.org/pipeline-sdk-type: kfp
pipelines.kubeflow.org/v2_component: "true"
pipelines.kubeflow.org/enable_caching: "true"
@ -147,42 +139,37 @@ spec:
container:
args:
- sh
- -c
- (python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1
python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.1'
|| PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
'kfp==1.7.1' --user) && "$0" "$@"
- sh
- -ec
- |
program_path=$(mktemp)
printf "%s" "$0" > "$program_path"
python3 -u "$program_path" "$@"
- |
def _make_parent_dirs_and_return_path(file_path: str):
import os
os.makedirs(os.path.dirname(file_path), exist_ok=True)
return file_path
program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.v2.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
- |2+
def train(dataset,
model,
num_steps = 100):
'''Dummy Training Step.'''
from kfp.v2.dsl import *
from typing import *
with open(dataset, 'r') as input_file:
input_string = input_file.read()
with open(model, 'w') as output_file:
for i in range(num_steps):
output_file.write("Step {}\n{}\n=====\n".format(i, input_string))
def train(dataset: InputPath('Dataset'),
model: OutputPath('Model'),
num_steps: int = 100):
'''Dummy Training Step.'''
import argparse
_parser = argparse.ArgumentParser(prog='Train', description='Dummy Training Step.')
_parser.add_argument("--dataset", dest="dataset", type=str, required=True, default=argparse.SUPPRESS)
_parser.add_argument("--num-steps", dest="num_steps", type=int, required=False, default=argparse.SUPPRESS)
_parser.add_argument("--model", dest="model", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
_parsed_args = vars(_parser.parse_args())
with open(dataset, 'r') as input_file:
input_string = input_file.read()
with open(model, 'w') as output_file:
for i in range(num_steps):
output_file.write("Step {}\n{}\n=====\n".format(i, input_string))
_outputs = train(**_parsed_args)
- --dataset
- '{{$.inputs.artifacts[''dataset''].path}}'
- --num-steps
- '{{$.inputs.parameters[''num_steps'']}}'
- --model
- '{{$.outputs.artifacts[''model''].path}}'
- --executor_input
- '{{$}}'
- --function_to_execute
- train
command: [/kfp-launcher/launch, --mlmd_server_address, $(METADATA_GRPC_SERVICE_HOST),
--mlmd_server_port, $(METADATA_GRPC_SERVICE_PORT), --runtime_info_json, $(KFP_V2_RUNTIME_INFO),
--container_image, $(KFP_V2_IMAGE), --task_name, train, --pipeline_name, '{{inputs.parameters.pipeline-name}}',
@ -237,7 +224,7 @@ spec:
pipelines.kubeflow.org/component_ref: '{}'
pipelines.kubeflow.org/arguments.parameters: '{"num_steps": "{{inputs.parameters.preprocess-output_parameter_one}}"}'
labels:
pipelines.kubeflow.org/kfp_sdk_version: 1.7.0
pipelines.kubeflow.org/kfp_sdk_version: 1.7.1
pipelines.kubeflow.org/pipeline-sdk-type: kfp
pipelines.kubeflow.org/v2_component: "true"
pipelines.kubeflow.org/enable_caching: "true"

View File

@ -13,142 +13,143 @@
# limitations under the License.
"""Tests for v2-compatible compiled pipelines."""
from kfp.v2.components.types.artifact_types import Artifact
import os
import tempfile
from typing import Callable
import unittest
import yaml
from kfp import compiler, components, dsl
from kfp.components import InputPath, OutputPath
from kfp import compiler, components
from kfp import dsl as v1dsl
from kfp.v2 import dsl
from kfp.v2.dsl import component, Artifact, InputPath, OutputPath
@component
def preprocess(uri: str, some_int: int, output_parameter_one: OutputPath(int),
output_dataset_one: OutputPath('Dataset')):
'''Dummy Preprocess Step.'''
with open(output_dataset_one, 'w') as f:
f.write('Output dataset')
with open(output_parameter_one, 'w') as f:
f.write("{}".format(1234))
'''Dummy Preprocess Step.'''
with open(output_dataset_one, 'w') as f:
f.write('Output dataset')
with open(output_parameter_one, 'w') as f:
f.write("{}".format(1234))
@component
def train(dataset: InputPath('Dataset'),
model: OutputPath('Model'),
num_steps: int = 100):
'''Dummy Training Step.'''
'''Dummy Training Step.'''
with open(dataset, 'r') as input_file:
input_string = input_file.read()
with open(model, 'w') as output_file:
for i in range(num_steps):
output_file.write("Step {}\n{}\n=====\n".format(i, input_string))
with open(dataset, 'r') as input_file:
input_string = input_file.read()
with open(model, 'w') as output_file:
for i in range(num_steps):
output_file.write("Step {}\n{}\n=====\n".format(i, input_string))
preprocess_op = components.create_component_from_func(preprocess,
base_image='python:3.9')
train_op = components.create_component_from_func(train)
class TestV2CompatibleModeCompiler(unittest.TestCase):
def _assert_compiled_pipeline_equals_golden(self,
kfp_compiler: compiler.Compiler,
pipeline_func: Callable,
golden_yaml_filename: str):
compiled_file = os.path.join(tempfile.mkdtemp(), 'workflow.yaml')
kfp_compiler.compile(pipeline_func, package_path=compiled_file)
def _assert_compiled_pipeline_equals_golden(self,
kfp_compiler: compiler.Compiler,
pipeline_func: Callable,
golden_yaml_filename: str):
compiled_file = os.path.join(tempfile.mkdtemp(), 'workflow.yaml')
kfp_compiler.compile(pipeline_func, package_path=compiled_file)
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
golden_file = os.path.join(test_data_dir, golden_yaml_filename)
# Uncomment the following to update goldens.
# TODO: place this behind some --update_goldens flag.
# kfp_compiler.compile(pipeline_func, package_path=golden_file)
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
golden_file = os.path.join(test_data_dir, golden_yaml_filename)
# Uncomment the following to update goldens.
# TODO: place this behind some --update_goldens flag.
# kfp_compiler.compile(pipeline_func, package_path=golden_file)
with open(golden_file, 'r') as f:
golden = yaml.safe_load(f)
with open(golden_file, 'r') as f:
golden = yaml.safe_load(f)
with open(compiled_file, 'r') as f:
compiled = yaml.safe_load(f)
with open(compiled_file, 'r') as f:
compiled = yaml.safe_load(f)
for workflow in golden, compiled:
del workflow['metadata']
for template in workflow['spec']['templates']:
template.pop('metadata', None)
for workflow in golden, compiled:
del workflow['metadata']
for template in workflow['spec']['templates']:
template.pop('metadata', None)
if 'initContainers' not in template:
continue
# Strip off the launcher image label before comparison
for initContainer in template['initContainers']:
initContainer['image'] = initContainer['image'].split(':')[0]
if 'initContainers' not in template:
continue
# Strip off the launcher image label before comparison
for initContainer in template['initContainers']:
initContainer['image'] = initContainer['image'].split(':')[0]
self.maxDiff = None
self.assertDictEqual(golden, compiled)
self.maxDiff = None
self.assertDictEqual(golden, compiled)
def test_two_step_pipeline(self):
def test_two_step_pipeline(self):
@dsl.pipeline(pipeline_root='gs://output-directory/v2-artifacts',
name='my-test-pipeline')
def v2_compatible_two_step_pipeline():
preprocess_task = preprocess_op(uri='uri-to-import', some_int=12)
train_task = train_op(
num_steps=preprocess_task.outputs['output_parameter_one'],
dataset=preprocess_task.outputs['output_dataset_one'])
@dsl.pipeline(pipeline_root='gs://output-directory/v2-artifacts',
name='my-test-pipeline')
def v2_compatible_two_step_pipeline():
preprocess_task = preprocess(uri='uri-to-import', some_int=12)
train_task = train(
num_steps=preprocess_task.outputs['output_parameter_one'],
dataset=preprocess_task.outputs['output_dataset_one'])
kfp_compiler = compiler.Compiler(
mode=dsl.PipelineExecutionMode.V2_COMPATIBLE)
self._assert_compiled_pipeline_equals_golden(
kfp_compiler, v2_compatible_two_step_pipeline,
'v2_compatible_two_step_pipeline.yaml')
kfp_compiler = compiler.Compiler(
mode=v1dsl.PipelineExecutionMode.V2_COMPATIBLE)
self._assert_compiled_pipeline_equals_golden(
kfp_compiler, v2_compatible_two_step_pipeline,
'v2_compatible_two_step_pipeline.yaml')
def test_custom_launcher(self):
def test_custom_launcher(self):
@dsl.pipeline(pipeline_root='gs://output-directory/v2-artifacts',
name='my-test-pipeline-with-custom-launcher')
def v2_compatible_two_step_pipeline():
preprocess_task = preprocess_op(uri='uri-to-import', some_int=12)
train_task = train_op(
num_steps=preprocess_task.outputs['output_parameter_one'],
dataset=preprocess_task.outputs['output_dataset_one'])
@dsl.pipeline(pipeline_root='gs://output-directory/v2-artifacts',
name='my-test-pipeline-with-custom-launcher')
def v2_compatible_two_step_pipeline():
preprocess_task = preprocess(uri='uri-to-import', some_int=12)
train_task = train(
num_steps=preprocess_task.outputs['output_parameter_one'],
dataset=preprocess_task.outputs['output_dataset_one'])
kfp_compiler = compiler.Compiler(
mode=dsl.PipelineExecutionMode.V2_COMPATIBLE,
launcher_image='my-custom-image')
self._assert_compiled_pipeline_equals_golden(
kfp_compiler, v2_compatible_two_step_pipeline,
'v2_compatible_two_step_pipeline_with_custom_launcher.yaml')
kfp_compiler = compiler.Compiler(
mode=v1dsl.PipelineExecutionMode.V2_COMPATIBLE,
launcher_image='my-custom-image')
self._assert_compiled_pipeline_equals_golden(
kfp_compiler, v2_compatible_two_step_pipeline,
'v2_compatible_two_step_pipeline_with_custom_launcher.yaml')
def test_constructing_container_op_directly_should_error(
self):
def test_constructing_container_op_directly_should_error(
self):
@dsl.pipeline(name='test-pipeline')
def my_pipeline():
dsl.ContainerOp(
name='comp1',
image='gcr.io/dummy',
command=['python', 'main.py']
)
@dsl.pipeline(name='test-pipeline')
def my_pipeline():
v1dsl.ContainerOp(
name='comp1',
image='gcr.io/dummy',
command=['python', 'main.py']
)
with self.assertRaisesRegex(
RuntimeError,
'Constructing ContainerOp instances directly is deprecated and not '
'supported when compiling to v2 \(using v2 compiler or v1 compiler '
'with V2_COMPATIBLE or V2_ENGINE mode\).'):
compiler.Compiler(mode=dsl.PipelineExecutionMode.V2_COMPATIBLE).compile(
pipeline_func=my_pipeline, package_path='result.json')
with self.assertRaisesRegex(
RuntimeError,
'Constructing ContainerOp instances directly is deprecated and not '
'supported when compiling to v2 \(using v2 compiler or v1 compiler '
'with V2_COMPATIBLE or V2_ENGINE mode\).'):
compiler.Compiler(mode=v1dsl.PipelineExecutionMode.V2_COMPATIBLE).compile(
pipeline_func=my_pipeline, package_path='result.json')
def test_use_importer_should_error(self):
def test_use_importer_should_error(self):
@dsl.pipeline(name='test-pipeline')
def my_pipeline():
dsl.importer(artifact_uri='dummy', artifact_class=dsl.io_types.Artifact)
@dsl.pipeline(name='test-pipeline')
def my_pipeline():
dsl.importer(artifact_uri='dummy', artifact_class=Artifact)
with self.assertRaisesRegex(
NotImplementedError,
'dsl.importer is not supported for Kubeflow Pipelines open source yet.',
):
compiler.Compiler(mode=dsl.PipelineExecutionMode.V2_COMPATIBLE).compile(
pipeline_func=my_pipeline, package_path='result.json')
with self.assertRaisesRegex(
NotImplementedError,
'dsl.importer is not supported for Kubeflow Pipelines open source yet.',
):
compiler.Compiler(mode=v1dsl.PipelineExecutionMode.V2_COMPATIBLE).compile(
pipeline_func=my_pipeline, package_path='result.json')
if __name__ == '__main__':
unittest.main()
unittest.main()

View File

@ -34,6 +34,7 @@ from ._data_passing import serialize_value, get_deserializer_code_for_type_struc
from ._naming import _make_name_unique_by_adding_index
from .structures import *
from . import _structures as structures
from kfp.components import type_annotation_utils
import inspect
import itertools
@ -44,9 +45,6 @@ import warnings
import docstring_parser
from kfp.components import type_annotation_utils
from kfp.dsl import io_types
T = TypeVar('T')
@ -335,22 +333,7 @@ def _extract_component_interface(func: Callable) -> ComponentSpec:
passing_style = None
io_name = parameter.name
if io_types.is_artifact_annotation(parameter_type):
# passing_style is either io_types.InputAnnotation or
# io_types.OutputAnnotation.
passing_style = io_types.get_io_artifact_annotation(parameter_type)
# parameter_type is io_types.Artifact or one of its subclasses.
parameter_type = io_types.get_io_artifact_class(parameter_type)
if not issubclass(parameter_type, io_types.Artifact):
raise ValueError(
'Input[T] and Output[T] are only supported when T is a '
'subclass of Artifact. Found `{} with type {}`'.format(
io_name, parameter_type))
if parameter.default is not inspect.Parameter.empty:
raise ValueError('Default values for Input/Output artifacts are not supported.')
elif isinstance(
if isinstance(
parameter_type,
(InputArtifact, InputPath, InputTextFile, InputBinaryFile,
OutputArtifact, OutputPath, OutputTextFile, OutputBinaryFile)):
@ -372,8 +355,7 @@ def _extract_component_interface(func: Callable) -> ComponentSpec:
type_struct = annotation_to_type_struct(parameter_type)
if passing_style in [io_types.OutputAnnotation, OutputArtifact,
OutputPath, OutputTextFile, OutputBinaryFile]:
if passing_style in [OutputArtifact, OutputPath, OutputTextFile, OutputBinaryFile]:
io_name = _make_name_unique_by_adding_index(io_name, output_names, '_')
output_names.add(io_name)
output_spec = OutputSpec(
@ -467,96 +449,6 @@ def _extract_component_interface(func: Callable) -> ComponentSpec:
return component_spec
def _get_default_kfp_package_path() -> str:
import kfp
return 'kfp=={}'.format(kfp.__version__)
def _get_packages_to_install_command(
package_list: Optional[List[str]] = None) -> List[str]:
result = []
if package_list is not None:
install_pip_command = 'python3 -m ensurepip'
install_packages_command = (
'PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet \
--no-warn-script-location {}').format(
' '.join([repr(str(package)) for package in package_list]))
result = [
'sh', '-c',
'({install_pip} || {install_pip} --user) &&'
' ({install_packages} || {install_packages} --user) && "$0" "$@"'.format(
install_pip=install_pip_command,
install_packages=install_packages_command)
]
return result
def _func_to_component_spec_v2(
func: Callable,
base_image : Optional[str] = None,
packages_to_install: Optional[List[str]] = None,
install_kfp_package: bool = True,
kfp_package_path: Optional[str] = None) -> ComponentSpec:
decorator_base_image = getattr(func, '_component_base_image', None)
if decorator_base_image is not None:
if base_image is not None and decorator_base_image != base_image:
raise ValueError('base_image ({}) conflicts with the decorator-specified base image metadata ({})'.format(base_image, decorator_base_image))
else:
base_image = decorator_base_image
else:
if base_image is None:
base_image = default_base_image_or_builder
if isinstance(base_image, Callable):
base_image = base_image()
imports_source = [
"from kfp.v2.dsl import *",
"from typing import *",
]
func_source = _get_function_source_definition(func)
source = textwrap.dedent("""
{imports_source}
{func_source}\n""").format(imports_source='\n'.join(imports_source),
func_source=func_source)
packages_to_install = packages_to_install or []
if install_kfp_package:
if kfp_package_path is None:
kfp_package_path = _get_default_kfp_package_path()
packages_to_install.append(kfp_package_path)
packages_to_install_command = _get_packages_to_install_command(package_list=packages_to_install)
from kfp.components._structures import ExecutorInputPlaceholder
component_spec = _extract_component_interface(func)
component_spec.implementation=ContainerImplementation(
container=ContainerSpec(
image=base_image,
command=packages_to_install_command + [
'sh',
'-ec',
textwrap.dedent('''\
program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.components.executor_main \
--component_module_path \
"$program_path/ephemeral_component.py" \
"$@"
'''),
source,
],
args=[
"--executor_input",
ExecutorInputPlaceholder(),
"--function_to_execute", func.__name__,
]
)
)
return component_spec
def _func_to_component_spec(func, extra_code='', base_image : str = None, packages_to_install: List[str] = None, modules_to_capture: List[str] = None, use_code_pickling=False) -> ComponentSpec:
'''Takes a self-contained python function and converts it to component.
@ -980,17 +872,20 @@ def create_component_from_func_v2(
Returns:
A component task factory that can be used in pipeline definitions.
"""
component_spec = _func_to_component_spec_v2(
warnings.warn(
'create_component_from_func_v2() has been deprecated and will be'
' removed in KFP v1.9. Please use'
' kfp.v2.components.create_component_from_func() instead.',
category=FutureWarning,
)
from kfp.v2.components import component_factory
return component_factory.create_component_from_func(
func=func,
base_image=base_image,
packages_to_install=packages_to_install,
install_kfp_package=install_kfp_package,
kfp_package_path=kfp_package_path
)
if output_component_file:
component_spec.save(output_component_file)
return _create_task_factory_from_component_spec(component_spec)
def create_component_from_func(

View File

@ -1,284 +0,0 @@
# Copyright 2021 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import inspect
from typing import Any, Callable, Dict, List, Optional, Union
from kfp.components._python_op import InputPath, OutputPath
from kfp.dsl.io_types import Artifact, Input, Output, create_runtime_artifact, is_artifact_annotation, is_input_artifact, is_output_artifact
class Executor():
"""Executor executes v2-based Python function components."""
def __init__(self, executor_input: Dict, function_to_execute: Callable):
self._func = function_to_execute
self._input = executor_input
self._input_artifacts: Dict[str, Artifact] = {}
self._output_artifacts: Dict[str, Artifact] = {}
for name, artifacts in self._input.get('inputs', {}).get('artifacts',
{}).items():
artifacts_list = artifacts.get('artifacts')
if artifacts_list:
self._input_artifacts[name] = self._make_input_artifact(
artifacts_list[0])
for name, artifacts in self._input.get('outputs', {}).get('artifacts',
{}).items():
artifacts_list = artifacts.get('artifacts')
if artifacts_list:
self._output_artifacts[name] = self._make_output_artifact(
artifacts_list[0])
self._return_annotation = inspect.signature(self._func).return_annotation
self._executor_output = {}
@classmethod
def _make_input_artifact(cls, runtime_artifact: Dict):
return create_runtime_artifact(runtime_artifact)
@classmethod
def _make_output_artifact(cls, runtime_artifact: Dict):
import os
artifact = create_runtime_artifact(runtime_artifact)
os.makedirs(os.path.dirname(artifact.path), exist_ok=True)
return artifact
def _get_input_artifact(self, name: str):
return self._input_artifacts.get(name)
def _get_output_artifact(self, name: str):
return self._output_artifacts.get(name)
def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):
parameter = self._input.get('inputs', {}).get('parameters',
{}).get(parameter_name, None)
if parameter is None:
return None
if parameter.get('stringValue'):
if parameter_type == str:
return parameter['stringValue']
elif parameter_type == bool:
# Use `.lower()` so it can also handle 'True' and 'False' (resulted from
# `str(True)` and `str(False)`, respectively.
return json.loads(parameter['stringValue'].lower())
else:
return json.loads(parameter['stringValue'])
elif parameter.get('intValue'):
return int(parameter['intValue'])
elif parameter.get('doubleValue'):
return float(parameter['doubleValue'])
def _get_output_parameter_path(self, parameter_name: str):
parameter_name = self._maybe_strip_path_suffix(parameter_name)
parameter = self._input.get('outputs',
{}).get('parameters',
{}).get(parameter_name, None)
if parameter is None:
return None
import os
path = parameter.get('outputFile', None)
if path:
os.makedirs(os.path.dirname(path), exist_ok=True)
return path
def _get_output_artifact_path(self, artifact_name: str):
artifact_name = self._maybe_strip_path_suffix(artifact_name)
output_artifact = self._output_artifacts.get(artifact_name)
if not output_artifact:
raise ValueError(
'Failed to get output artifact path for artifact name {}'.format(
artifact_name))
return output_artifact.path
def _get_input_artifact_path(self, artifact_name: str):
artifact_name = self._maybe_strip_path_suffix(artifact_name)
input_artifact = self._input_artifacts.get(artifact_name)
if not input_artifact:
raise ValueError(
'Failed to get input artifact path for artifact name {}'.format(
artifact_name))
return input_artifact.path
def _write_output_parameter_value(self, name: str,
value: Union[str, int, float, bool, dict,
list, Dict, List]):
if type(value) == str:
output = {'stringValue': value}
elif type(value) == int:
output = {'intValue': value}
elif type(value) == float:
output = {'doubleValue': value}
else:
# For bool, list, dict, List, Dict, json serialize the value.
output = {'stringValue': json.dumps(value)}
if not self._executor_output.get('parameters'):
self._executor_output['parameters'] = {}
self._executor_output['parameters'][name] = output
def _write_output_artifact_payload(self, name: str, value: Any):
path = self._get_output_artifact_path(name)
with open(path, 'w') as f:
f.write(str(value))
# TODO: extract to a util
@classmethod
def _get_short_type_name(cls, type_name: str) -> str:
"""Extracts the short form type name.
This method is used for looking up serializer for a given type.
For example:
typing.List -> List
typing.List[int] -> List
typing.Dict[str, str] -> Dict
List -> List
str -> str
Args:
type_name: The original type name.
Returns:
The short form type name or the original name if pattern doesn't match.
"""
import re
match = re.match('(typing\.)?(?P<type>\w+)(?:\[.+\])?', type_name)
if match:
return match.group('type')
else:
return type_name
# TODO: merge with type_utils.is_parameter_type
@classmethod
def _is_parameter(cls, annotation: Any) -> bool:
if type(annotation) == type:
return annotation in [str, int, float, bool, dict, list]
# Annotation could be, for instance `typing.Dict[str, str]`, etc.
return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']
@classmethod
def _is_artifact(cls, annotation: Any) -> bool:
if type(annotation) == type:
return issubclass(annotation, Artifact)
return False
@classmethod
def _is_named_tuple(cls, annotation: Any) -> bool:
if type(annotation) == type:
return issubclass(annotation, tuple) and hasattr(
annotation, '_fields') and hasattr(annotation, '__annotations__')
return False
def _handle_single_return_value(self, output_name: str, annotation_type: Any,
return_value: Any):
if self._is_parameter(annotation_type):
if type(return_value) != annotation_type:
raise ValueError(
'Function `{}` returned value of type {}; want type {}'.format(
self._func.__name__, type(return_value), annotation_type))
self._write_output_parameter_value(output_name, return_value)
elif self._is_artifact(annotation_type):
self._write_output_artifact_payload(output_name, return_value)
else:
raise RuntimeError(
'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'
' subclass of `Artifact`'.format(annotation_type))
def _write_executor_output(self, func_output: Optional[Any] = None):
if self._output_artifacts:
self._executor_output['artifacts'] = {}
for name, artifact in self._output_artifacts.items():
runtime_artifact = {
'name': artifact.name,
'uri': artifact.uri,
'metadata': artifact.metadata,
}
artifacts_list = {'artifacts': [runtime_artifact]}
self._executor_output['artifacts'][name] = artifacts_list
if func_output is not None:
if self._is_parameter(self._return_annotation) or self._is_artifact(
self._return_annotation):
# Note: single output is named `Output` in component.yaml.
self._handle_single_return_value('Output', self._return_annotation,
func_output)
elif self._is_named_tuple(self._return_annotation):
if len(self._return_annotation._fields) != len(func_output):
raise RuntimeError(
'Expected {} return values from function `{}`, got {}'.format(
len(self._return_annotation._fields), self._func.__name__,
len(func_output)))
for i in range(len(self._return_annotation._fields)):
field = self._return_annotation._fields[i]
field_type = self._return_annotation.__annotations__[field]
if type(func_output) == tuple:
field_value = func_output[i]
else:
field_value = getattr(func_output, field)
self._handle_single_return_value(field, field_type, field_value)
else:
raise RuntimeError(
'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'
' subclass of `Artifact`, or a NamedTuple collection of these types.'
.format(self._return_annotation))
import os
os.makedirs(
os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)
with open(self._input['outputs']['outputFile'], 'w') as f:
f.write(json.dumps(self._executor_output))
def _maybe_strip_path_suffix(self, name) -> str:
if name.endswith('_path'):
name = name[0:-len('_path')]
if name.endswith('_file'):
name = name[0:-len('_file')]
return name
def execute(self):
annotations = inspect.getfullargspec(self._func).annotations
# Function arguments.
func_kwargs = {}
for k, v in annotations.items():
if k == 'return':
continue
if self._is_parameter(v):
func_kwargs[k] = self._get_input_parameter_value(k, v)
if is_artifact_annotation(v):
if is_input_artifact(v):
func_kwargs[k] = self._get_input_artifact(k)
if is_output_artifact(v):
func_kwargs[k] = self._get_output_artifact(k)
elif isinstance(v, OutputPath):
if self._is_parameter(v.type):
func_kwargs[k] = self._get_output_parameter_path(k)
else:
func_kwargs[k] = self._get_output_artifact_path(k)
elif isinstance(v, InputPath):
func_kwargs[k] = self._get_input_artifact_path(k)
result = self._func(**func_kwargs)
self._write_executor_output(result)

View File

@ -1,455 +0,0 @@
# Copyright 2021 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for kfp.components.executor"""
import os
import tempfile
from typing import Callable, NamedTuple, Optional
import unittest
import json
from kfp.components import executor, InputPath, OutputPath
from kfp.dsl import io_types
from kfp.dsl.io_types import Artifact, Dataset, Input, Metrics, Model, Output
_EXECUTOR_INPUT = """\
{
"inputs": {
"parameters": {
"input_parameter": {
"stringValue": "Hello, KFP"
}
},
"artifacts": {
"input_artifact_one": {
"artifacts": [
{
"metadata": {},
"name": "input_artifact_one",
"type": {
"schemaTitle": "system.Dataset"
},
"uri": "gs://some-bucket/input_artifact_one"
}
]
}
}
},
"outputs": {
"artifacts": {
"output_artifact_one": {
"artifacts": [
{
"metadata": {},
"name": "output_artifact_one",
"type": {
"schemaTitle": "system.Model"
},
"uri": "gs://some-bucket/output_artifact_one"
}
]
},
"output_artifact_two": {
"artifacts": [
{
"metadata": {},
"name": "output_artifact_two",
"type": {
"schemaTitle": "system.Metrics"
},
"uri": "gs://some-bucket/output_artifact_two"
}
]
}
},
"parameters": {
"output_parameter": {
"outputFile": "gs://some-bucket/some_task/nested/output_parameter"
}
},
"outputFile": "%s/output_metadata.json"
}
}
"""
class ExecutorTest(unittest.TestCase):
def setUp(self):
self.maxDiff = None
self._test_dir = tempfile.mkdtemp()
io_types._GCS_LOCAL_MOUNT_PREFIX = self._test_dir + '/'
io_types._MINIO_LOCAL_MOUNT_PREFIX = self._test_dir + '/minio/'
io_types._S3_LOCAL_MOUNT_PREFIX = self._test_dir + '/s3/'
return super().setUp()
def _get_executor(self,
func: Callable,
executor_input: Optional[str] = None) -> executor.Executor:
if executor_input is None:
executor_input = _EXECUTOR_INPUT
executor_input_dict = json.loads(executor_input % self._test_dir)
return executor.Executor(executor_input=executor_input_dict,
function_to_execute=func)
def test_input_parameter(self):
def test_func(input_parameter: str):
self.assertEqual(input_parameter, "Hello, KFP")
self._get_executor(test_func).execute()
def test_input_artifact(self):
def test_func(input_artifact_one: Input[Dataset]):
self.assertEqual(input_artifact_one.uri,
'gs://some-bucket/input_artifact_one')
self.assertEqual(
input_artifact_one.path,
os.path.join(self._test_dir, 'some-bucket/input_artifact_one'))
self.assertEqual(input_artifact_one.name, 'input_artifact_one')
self._get_executor(test_func).execute()
def test_output_artifact(self):
def test_func(output_artifact_one: Output[Model]):
self.assertEqual(output_artifact_one.uri,
'gs://some-bucket/output_artifact_one')
self.assertEqual(
output_artifact_one.path,
os.path.join(self._test_dir, 'some-bucket/output_artifact_one'))
self.assertEqual(output_artifact_one.name, 'output_artifact_one')
self._get_executor(test_func).execute()
def test_output_parameter(self):
def test_func(output_parameter_path: OutputPath(str)):
# Test that output parameters just use the passed in filename.
self.assertEqual(output_parameter_path,
'gs://some-bucket/some_task/nested/output_parameter')
# Test writing to the path succeeds. This fails if parent directories
# don't exist.
with open(output_parameter_path, 'w') as f:
f.write('Hello, World!')
self._get_executor(test_func).execute()
def test_input_path_artifact(self):
def test_func(input_artifact_one_path: InputPath('Dataset')):
self.assertEqual(
input_artifact_one_path,
os.path.join(self._test_dir, 'some-bucket/input_artifact_one'))
self._get_executor(test_func).execute()
def test_output_path_artifact(self):
def test_func(output_artifact_one_path: OutputPath('Model')):
self.assertEqual(
output_artifact_one_path,
os.path.join(self._test_dir, 'some-bucket/output_artifact_one'))
self._get_executor(test_func).execute()
def test_output_metadata(self):
def test_func(output_artifact_two: Output[Metrics]):
output_artifact_two.metadata['key_1'] = 'value_1'
output_artifact_two.metadata['key_2'] = 2
output_artifact_two.uri = 'new-uri'
# log_metric works here since the schema is specified as Metrics.
output_artifact_two.log_metric('metric', 0.9)
self._get_executor(test_func).execute()
with open(os.path.join(self._test_dir, 'output_metadata.json'), 'r') as f:
output_metadata = json.loads(f.read())
self.assertDictEqual(
output_metadata, {
'artifacts': {
'output_artifact_one': {
'artifacts': [{
'name': 'output_artifact_one',
'uri': 'gs://some-bucket/output_artifact_one',
'metadata': {}
}]
},
'output_artifact_two': {
'artifacts': [{
'name': 'output_artifact_two',
'uri': 'new-uri',
'metadata': {
'key_1': 'value_1',
'key_2': 2,
'metric': 0.9
}
}]
}
}
})
def test_function_string_output(self):
executor_input = """\
{
"inputs": {
"parameters": {
"first_message": {
"stringValue": "Hello"
},
"second_message": {
"stringValue": "World"
}
}
},
"outputs": {
"parameters": {
"output": {
"outputFile": "gs://some-bucket/output"
}
},
"outputFile": "%s/output_metadata.json"
}
}
"""
def test_func(first_message: str, second_message: str) -> str:
return first_message + ", " + second_message
self._get_executor(test_func, executor_input).execute()
with open(os.path.join(self._test_dir, 'output_metadata.json'), 'r') as f:
output_metadata = json.loads(f.read())
self.assertDictEqual(output_metadata, {
"parameters": {
"Output": {
"stringValue": "Hello, World"
}
},
})
def test_function_with_int_output(self):
executor_input = """\
{
"inputs": {
"parameters": {
"first": {
"intValue": 40
},
"second": {
"intValue": 2
}
}
},
"outputs": {
"parameters": {
"output": {
"outputFile": "gs://some-bucket/output"
}
},
"outputFile": "%s/output_metadata.json"
}
}
"""
def test_func(first: int, second: int) -> int:
return first + second
self._get_executor(test_func, executor_input).execute()
with open(os.path.join(self._test_dir, 'output_metadata.json'), 'r') as f:
output_metadata = json.loads(f.read())
self.assertDictEqual(output_metadata, {
"parameters": {
"Output": {
"intValue": 42
}
},
})
def test_function_with_int_output(self):
executor_input = """\
{
"inputs": {
"parameters": {
"first_message": {
"stringValue": "Hello"
},
"second_message": {
"stringValue": "World"
}
}
},
"outputs": {
"artifacts": {
"Output": {
"outputFile": "gs://some-bucket/output"
}
},
"outputFile": "%s/output_metadata.json"
}
}
"""
def test_func(first_message: str, second_message: str) -> str:
return first_message + ", " + second_message
self._get_executor(test_func, executor_input).execute()
with open(os.path.join(self._test_dir, 'output_metadata.json'), 'r') as f:
output_metadata = json.loads(f.read())
self.assertDictEqual(output_metadata, {
"parameters": {
"Output": {
"stringValue": "Hello, World"
}
},
})
def test_artifact_output(self):
executor_input = """\
{
"inputs": {
"parameters": {
"first": {
"stringValue": "Hello"
},
"second": {
"stringValue": "World"
}
}
},
"outputs": {
"artifacts": {
"Output": {
"artifacts": [
{
"name": "output",
"type": {
"schemaTitle": "system.Artifact"
},
"uri": "gs://some-bucket/output"
}
]
}
},
"outputFile": "%s/output_metadata.json"
}
}
"""
def test_func(first: str, second: str) -> Artifact:
return first + ", " + second
self._get_executor(test_func, executor_input).execute()
with open(os.path.join(self._test_dir, 'output_metadata.json'), 'r') as f:
output_metadata = json.loads(f.read())
self.assertDictEqual(
output_metadata, {
'artifacts': {
'Output': {
'artifacts': [{
'metadata': {},
'name': 'output',
'uri': 'gs://some-bucket/output'
}]
}
}
})
with open(os.path.join(self._test_dir, 'some-bucket/output'), 'r') as f:
artifact_payload = f.read()
self.assertEqual(artifact_payload, "Hello, World")
def test_named_tuple_output(self):
executor_input = """\
{
"outputs": {
"artifacts": {
"output_dataset": {
"artifacts": [
{
"name": "output_dataset",
"type": {
"schemaTitle": "system.Dataset"
},
"uri": "gs://some-bucket/output_dataset"
}
]
}
},
"parameters": {
"output_int": {
"outputFile": "gs://some-bucket/output_int"
},
"output_string": {
"outputFile": "gs://some-bucket/output_string"
}
},
"outputFile": "%s/output_metadata.json"
}
}
"""
# Functions returning named tuples should work.
def func_returning_named_tuple() -> NamedTuple('Outputs', [
("output_dataset", Dataset),
("output_int", int),
("output_string", str),
]):
from collections import namedtuple
output = namedtuple('Outputs',
['output_dataset', 'output_int', 'output_string'])
return output("Dataset contents", 101, "Some output string")
# Functions returning plain tuples should work too.
def func_returning_plain_tuple() -> NamedTuple('Outputs', [
("output_dataset", Dataset),
("output_int", int),
("output_string", str),
]):
return ("Dataset contents", 101, "Some output string")
for test_func in [func_returning_named_tuple, func_returning_plain_tuple]:
self._get_executor(test_func, executor_input).execute()
with open(os.path.join(self._test_dir, 'output_metadata.json'), 'r') as f:
output_metadata = json.loads(f.read())
self.assertDictEqual(
output_metadata, {
'artifacts': {
'output_dataset': {
'artifacts': [{
'metadata': {},
'name': 'output_dataset',
'uri': 'gs://some-bucket/output_dataset'
}]
}
},
"parameters": {
"output_string": {
"stringValue": "Some output string"
},
"output_int": {
"intValue": 101
}
},
})
with open(os.path.join(self._test_dir, 'some-bucket/output_dataset'),
'r') as f:
artifact_payload = f.read()
self.assertEqual(artifact_payload, "Dataset contents")

View File

@ -21,7 +21,17 @@ from ._pipeline_volume import PipelineVolume
from ._volume_snapshot_op import VolumeSnapshotOp
from ._ops_group import OpsGroup, ExitHandler, Condition, ParallelFor, SubGraph
from ._component import python_component, graph_component, component
from .importer_node import importer
def importer(*args, **kwargs):
import warnings
from kfp.v2.dsl import importer as v2importer
warnings.warn(
'`kfp.dsl.importer` is a deprecated alias and will be removed'
' in KFP v2.0. Please import from `kfp.v2.dsl` instead.',
category=FutureWarning)
return v2importer(*args, **kwargs)
EXECUTION_ID_PLACEHOLDER = '{{workflow.uid}}-{{pod.name}}'
RUN_ID_PLACEHOLDER = '{{workflow.uid}}'

View File

@ -29,8 +29,8 @@ from kfp.dsl import _pipeline_param
from kfp.dsl import component_spec as dsl_component_spec
from kfp.dsl import dsl_utils
from kfp.dsl import types
from kfp.dsl import type_utils
from kfp.pipeline_spec import pipeline_spec_pb2
from kfp.v2.components.types import type_utils
# Placeholder to represent the output directory hosting all the generated URIs.

View File

@ -19,8 +19,8 @@ from kfp.components import _structures as structures
from kfp.dsl import _for_loop
from kfp.dsl import _pipeline_param
from kfp.dsl import dsl_utils
from kfp.dsl import type_utils
from kfp.pipeline_spec import pipeline_spec_pb2
from kfp.v2.components.types import type_utils
def additional_input_name_for_pipelineparam(

View File

@ -1,156 +0,0 @@
# Copyright 2020 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility function for building Importer Node spec."""
from typing import Optional, Union, Type
from kfp.dsl import _container_op
from kfp.dsl import _pipeline_param
from kfp.dsl import dsl_utils
from kfp.dsl import io_types
from kfp.dsl import type_utils
from kfp.pipeline_spec import pipeline_spec_pb2
INPUT_KEY = 'uri'
OUTPUT_KEY = 'artifact'
def _build_importer_spec(
artifact_uri: Union[_pipeline_param.PipelineParam, str],
artifact_type_schema: pipeline_spec_pb2.ArtifactTypeSchema,
) -> pipeline_spec_pb2.PipelineDeploymentConfig.ImporterSpec:
"""Builds an importer executor spec.
Args:
artifact_uri: The artifact uri to import from.
artifact_type_schema: The user specified artifact type schema of the
artifact to be imported.
Returns:
An importer spec.
"""
importer_spec = pipeline_spec_pb2.PipelineDeploymentConfig.ImporterSpec()
importer_spec.type_schema.CopyFrom(artifact_type_schema)
if isinstance(artifact_uri, _pipeline_param.PipelineParam):
importer_spec.artifact_uri.runtime_parameter = INPUT_KEY
elif isinstance(artifact_uri, str):
importer_spec.artifact_uri.constant_value.string_value = artifact_uri
return importer_spec
def _build_importer_task_spec(
importer_base_name: str,
artifact_uri: Union[_pipeline_param.PipelineParam, str],
) -> pipeline_spec_pb2.PipelineTaskSpec:
"""Builds an importer task spec.
Args:
importer_base_name: The base name of the importer node.
artifact_uri: The artifact uri to import from.
Returns:
An importer node task spec.
"""
result = pipeline_spec_pb2.PipelineTaskSpec()
result.task_info.name = dsl_utils.sanitize_task_name(importer_base_name)
result.component_ref.name = dsl_utils.sanitize_component_name(
importer_base_name)
if isinstance(artifact_uri, _pipeline_param.PipelineParam):
result.inputs.parameters[
INPUT_KEY].component_input_parameter = artifact_uri.full_name
elif isinstance(artifact_uri, str):
result.inputs.parameters[
INPUT_KEY].runtime_value.constant_value.string_value = artifact_uri
return result
def _build_importer_component_spec(
importer_base_name: str,
artifact_type_schema: pipeline_spec_pb2.ArtifactTypeSchema,
) -> pipeline_spec_pb2.ComponentSpec:
"""Builds an importer component spec.
Args:
importer_base_name: The base name of the importer node.
artifact_type_schema: The user specified artifact type schema of the
artifact to be imported.
Returns:
An importer node component spec.
"""
result = pipeline_spec_pb2.ComponentSpec()
result.executor_label = dsl_utils.sanitize_executor_label(importer_base_name)
result.input_definitions.parameters[
INPUT_KEY].type = pipeline_spec_pb2.PrimitiveType.STRING
result.output_definitions.artifacts[OUTPUT_KEY].artifact_type.CopyFrom(
artifact_type_schema)
return result
def importer(artifact_uri: Union[_pipeline_param.PipelineParam, str],
artifact_class: Type[io_types.Artifact],
reimport: bool = False) -> _container_op.ContainerOp:
"""dsl.importer for importing an existing artifact. Only for v2 pipeline.
Args:
artifact_uri: The artifact uri to import from.
artifact_type_schema: The user specified artifact type schema of the
artifact to be imported.
reimport: Whether to reimport the artifact. Defaults to False.
Returns:
A ContainerOp instance.
Raises:
ValueError if the passed in artifact_uri is neither a PipelineParam nor a
constant string value.
"""
if isinstance(artifact_uri, _pipeline_param.PipelineParam):
input_param = artifact_uri
elif isinstance(artifact_uri, str):
input_param = _pipeline_param.PipelineParam(
name='uri', value=artifact_uri, param_type='String')
else:
raise ValueError(
'Importer got unexpected artifact_uri: {} of type: {}.'.format(
artifact_uri, type(artifact_uri)))
old_warn_value = _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING
_container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True
task = _container_op.ContainerOp(
name='importer',
image='importer_image', # TODO: need a v1 implementation of importer.
file_outputs={
OUTPUT_KEY: "{{{{$.outputs.artifacts['{}'].uri}}}}".format(OUTPUT_KEY)
},
)
_container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value
artifact_type_schema = type_utils.get_artifact_type_schema(artifact_class)
task.importer_spec = _build_importer_spec(
artifact_uri=artifact_uri, artifact_type_schema=artifact_type_schema)
task.task_spec = _build_importer_task_spec(
importer_base_name=task.name, artifact_uri=artifact_uri)
task.component_spec = _build_importer_component_spec(
importer_base_name=task.name, artifact_type_schema=artifact_type_schema)
task.inputs = [input_param]
return task

View File

@ -1,165 +0,0 @@
# Copyright 2020 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from absl.testing import parameterized
import unittest
from kfp.dsl import _pipeline_param
from kfp.dsl import importer_node
from kfp.pipeline_spec import pipeline_spec_pb2 as pb
from google.protobuf import json_format
class ImporterNodeTest(parameterized.TestCase):
@parameterized.parameters(
{
# artifact_uri is a constant value
'input_uri':
'gs://artifact',
'artifact_type_schema':
pb.ArtifactTypeSchema(schema_title='system.Dataset'),
'expected_result': {
'artifactUri': {
'constantValue': {
'stringValue': 'gs://artifact'
}
},
'typeSchema': {
'schemaTitle': 'system.Dataset'
}
}
},
{
# artifact_uri is from PipelineParam
'input_uri':
_pipeline_param.PipelineParam(name='uri_to_import'),
'artifact_type_schema':
pb.ArtifactTypeSchema(schema_title='system.Model'),
'expected_result': {
'artifactUri': {
'runtimeParameter': 'uri'
},
'typeSchema': {
'schemaTitle': 'system.Model'
}
},
})
def test_build_importer_spec(self, input_uri, artifact_type_schema,
expected_result):
expected_importer_spec = pb.PipelineDeploymentConfig.ImporterSpec()
json_format.ParseDict(expected_result, expected_importer_spec)
importer_spec = importer_node._build_importer_spec(
artifact_uri=input_uri, artifact_type_schema=artifact_type_schema)
self.maxDiff = None
self.assertEqual(expected_importer_spec, importer_spec)
@parameterized.parameters(
{
# artifact_uri is a constant value
'importer_name': 'importer-1',
'input_uri': 'gs://artifact',
'expected_result': {
'taskInfo': {
'name': 'importer-1'
},
'inputs': {
'parameters': {
'uri': {
'runtimeValue': {
'constantValue': {
'stringValue': 'gs://artifact'
}
}
}
}
},
'componentRef': {
'name': 'comp-importer-1'
},
}
},
{
# artifact_uri is from PipelineParam
'importer_name': 'importer-2',
'input_uri': _pipeline_param.PipelineParam(name='uri_to_import'),
'expected_result': {
'taskInfo': {
'name': 'importer-2'
},
'inputs': {
'parameters': {
'uri': {
'componentInputParameter': 'uri_to_import'
}
}
},
'componentRef': {
'name': 'comp-importer-2'
},
},
})
def test_build_importer_task_spec(self, importer_name, input_uri,
expected_result):
expected_task_spec = pb.PipelineTaskSpec()
json_format.ParseDict(expected_result, expected_task_spec)
task_spec = importer_node._build_importer_task_spec(
importer_base_name=importer_name, artifact_uri=input_uri)
self.maxDiff = None
self.assertEqual(expected_task_spec, task_spec)
def test_build_importer_component_spec(self):
expected_importer_component = {
'inputDefinitions': {
'parameters': {
'uri': {
'type': 'STRING'
}
}
},
'outputDefinitions': {
'artifacts': {
'artifact': {
'artifactType': {
'schemaTitle': 'system.Artifact'
}
}
}
},
'executorLabel': 'exec-importer-1'
}
expected_importer_comp_spec = pb.ComponentSpec()
json_format.ParseDict(expected_importer_component,
expected_importer_comp_spec)
importer_comp_spec = importer_node._build_importer_component_spec(
importer_base_name='importer-1',
artifact_type_schema=pb.ArtifactTypeSchema(
schema_title='system.Artifact'))
self.maxDiff = None
self.assertEqual(expected_importer_comp_spec, importer_comp_spec)
def test_import_with_invalid_artifact_uri_value_should_fail(self):
from kfp.dsl.io_types import Dataset
with self.assertRaisesRegex(
ValueError,
"Importer got unexpected artifact_uri: 123 of type: <class 'int'>."):
importer_node.importer(artifact_uri=123, artifact_class=Dataset)
if __name__ == '__main__':
unittest.main()

View File

@ -11,534 +11,24 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Classes for input/output types in KFP SDK.
"""Deprecated. See kfp.v2.types.artifact_types instead.
These are only compatible with v2 Pipelines.
This module will be removed in KFP v2.0.
"""
import os
from typing import Dict, Generic, List, Optional, Type, TypeVar, Union
_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'
_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'
_S3_LOCAL_MOUNT_PREFIX = '/s3/'
class Artifact(object):
"""Generic Artifact class.
This class is meant to represent the metadata around an input or output
machine-learning Artifact. Artifacts have URIs, which can either be a location
on disk (or Cloud storage) or some other resource identifier such as
an API resource name.
Artifacts carry a `metadata` field, which is a dictionary for storing
metadata related to this artifact.
"""
TYPE_NAME = 'system.Artifact'
def __init__(self,
name: Optional[str] = None,
uri: Optional[str] = None,
metadata: Optional[Dict] = None):
"""Initializes the Artifact with the given name, URI and metadata."""
self.uri = uri or ''
self.name = name or ''
self.metadata = metadata or {}
@property
def path(self):
return self._get_path()
@path.setter
def path(self, path):
self._set_path(path)
def _get_path(self) -> Optional[str]:
if self.uri.startswith('gs://'):
return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]
elif self.uri.startswith('minio://'):
return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]
elif self.uri.startswith('s3://'):
return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]
return None
def _set_path(self, path):
if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):
path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]
elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):
path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]
elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):
path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]
self.uri = path
class Model(Artifact):
"""An artifact representing an ML Model."""
TYPE_NAME = 'system.Model'
def __init__(self,
name: Optional[str] = None,
uri: Optional[str] = None,
metadata: Optional[Dict] = None):
super().__init__(uri=uri, name=name, metadata=metadata)
@property
def framework(self) -> str:
return self._get_framework()
def _get_framework(self) -> str:
return self.metadata.get('framework', '')
@framework.setter
def framework(self, framework: str):
self._set_framework(framework)
def _set_framework(self, framework: str):
self.metadata['framework'] = framework
class Dataset(Artifact):
"""An artifact representing an ML Dataset."""
TYPE_NAME = 'system.Dataset'
def __init__(self,
name: Optional[str] = None,
uri: Optional[str] = None,
metadata: Optional[Dict] = None):
super().__init__(uri=uri, name=name, metadata=metadata)
class Metrics(Artifact):
"""Represent a simple base Artifact type to store key-value scalar metrics."""
TYPE_NAME = 'system.Metrics'
def __init__(self,
name: Optional[str] = None,
uri: Optional[str] = None,
metadata: Optional[Dict] = None):
super().__init__(uri=uri, name=name, metadata=metadata)
def log_metric(self, metric: str, value: float):
"""Sets a custom scalar metric.
Args:
metric: Metric key
value: Value of the metric.
"""
self.metadata[metric] = value
class ClassificationMetrics(Artifact):
"""Represents Artifact class to store Classification Metrics."""
TYPE_NAME = 'system.ClassificationMetrics'
def __init__(self,
name: Optional[str] = None,
uri: Optional[str] = None,
metadata: Optional[Dict] = None):
super().__init__(uri=uri, name=name, metadata=metadata)
def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):
"""Logs a single data point in the ROC Curve.
Args:
fpr: False positive rate value of the data point.
tpr: True positive rate value of the data point.
threshold: Threshold value for the data point.
"""
roc_reading = {
'confidenceThreshold': threshold,
'recall': tpr,
'falsePositiveRate': fpr
}
if 'confidenceMetrics' not in self.metadata.keys():
self.metadata['confidenceMetrics'] = []
self.metadata['confidenceMetrics'].append(roc_reading)
def log_roc_curve(self, fpr: List[float], tpr: List[float],
threshold: List[float]):
"""Logs an ROC curve.
The list length of fpr, tpr and threshold must be the same.
Args:
fpr: List of false positive rate values.
tpr: List of true positive rate values.
threshold: List of threshold values.
"""
if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(
threshold):
raise ValueError('Length of fpr, tpr and threshold must be the same. '
'Got lengths {}, {} and {} respectively.'.format(
len(fpr), len(tpr), len(threshold)))
for i in range(len(fpr)):
self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])
def set_confusion_matrix_categories(self, categories: List[str]):
"""Stores confusion matrix categories.
Args:
categories: List of strings specifying the categories.
"""
self._categories = []
annotation_specs = []
for category in categories:
annotation_spec = {'displayName': category}
self._categories.append(category)
annotation_specs.append(annotation_spec)
self._matrix = []
for row in range(len(self._categories)):
self._matrix.append({'row': [0] * len(self._categories)})
self._confusion_matrix = {}
self._confusion_matrix['annotationSpecs'] = annotation_specs
self._confusion_matrix['rows'] = self._matrix
self.metadata['confusionMatrix'] = self._confusion_matrix
def log_confusion_matrix_row(self, row_category: str, row: List[float]):
"""Logs a confusion matrix row.
Args:
row_category: Category to which the row belongs.
row: List of integers specifying the values for the row.
Raises:
ValueError: If row_category is not in the list of categories
set in set_categories call.
"""
if row_category not in self._categories:
raise ValueError('Invalid category: {} passed. Expected one of: {}'.\
format(row_category, self._categories))
if len(row) != len(self._categories):
raise ValueError('Invalid row. Expected size: {} got: {}'.\
format(len(self._categories), len(row)))
self._matrix[self._categories.index(row_category)] = {'row': row}
self.metadata['confusionMatrix'] = self._confusion_matrix
def log_confusion_matrix_cell(self, row_category: str, col_category: str,
value: int):
"""Logs a cell in the confusion matrix.
Args:
row_category: String representing the name of the row category.
col_category: String representing the name of the column category.
value: Int value of the cell.
Raises:
ValueError: If row_category or col_category is not in the list of
categories set in set_categories.
"""
if row_category not in self._categories:
raise ValueError('Invalid category: {} passed. Expected one of: {}'.\
format(row_category, self._categories))
if col_category not in self._categories:
raise ValueError('Invalid category: {} passed. Expected one of: {}'.\
format(row_category, self._categories))
self._matrix[self._categories.index(row_category)]['row'][
self._categories.index(col_category)] = value
self.metadata['confusionMatrix'] = self._confusion_matrix
def log_confusion_matrix(self, categories: List[str],
matrix: List[List[int]]):
"""Logs a confusion matrix.
Args:
categories: List of the category names.
matrix: Complete confusion matrix.
Raises:
ValueError: Length of categories does not match number of rows or columns.
"""
self.set_confusion_matrix_categories(categories)
if len(matrix) != len(categories):
raise ValueError('Invalid matrix: {} passed for categories: {}'.\
format(matrix, categories))
for index in range(len(categories)):
if len(matrix[index]) != len(categories):
raise ValueError('Invalid matrix: {} passed for categories: {}'.\
format(matrix, categories))
self.log_confusion_matrix_row(categories[index], matrix[index])
self.metadata['confusionMatrix'] = self._confusion_matrix
class SlicedClassificationMetrics(Artifact):
"""Metrics class representing Sliced Classification Metrics.
Similar to ClassificationMetrics clients using this class are expected to use
log methods of the class to log metrics with the difference being each log
method takes a slice to associate the ClassificationMetrics.
"""
TYPE_NAME = 'system.SlicedClassificationMetrics'
def __init__(self,
name: Optional[str] = None,
uri: Optional[str] = None,
metadata: Optional[Dict] = None):
super().__init__(uri=uri, name=name, metadata=metadata)
def _upsert_classification_metrics_for_slice(self, slice: str):
"""Upserts the classification metrics instance for a slice."""
if slice not in self._sliced_metrics:
self._sliced_metrics[slice] = ClassificationMetrics()
def _update_metadata(self, slice: str):
"""Updates metadata to adhere to the metrics schema."""
self.metadata = {}
self.metadata['evaluationSlices'] = []
for slice in self._sliced_metrics.keys():
slice_metrics = {
'slice': slice,
'sliceClassificationMetrics': self._sliced_metrics[slice].metadata
}
self.metadata['evaluationSlices'].append(slice_metrics)
def log_roc_reading(self, slice: str, threshold: float, tpr: float,
fpr: float):
"""Logs a single data point in the ROC Curve of a slice.
Args:
slice: String representing slice label.
threshold: Thresold value for the data point.
tpr: True positive rate value of the data point.
fpr: False positive rate value of the data point.
"""
self._upsert_classification_metrics_for_slice(slice)
self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)
self._update_metadata(slice)
def load_roc_readings(self, slice: str, readings: List[List[float]]):
"""Supports bulk loading ROC Curve readings for a slice.
Args:
slice: String representing slice label.
readings: A 2-D list providing ROC Curve data points.
The expected order of the data points is: threshold,
true_positive_rate, false_positive_rate.
"""
self._upsert_classification_metrics_for_slice(slice)
self._sliced_metrics[slice].load_roc_readings(readings)
self._update_metadata(slice)
def set_confusion_matrix_categories(self, slice: str, categories: List[str]):
"""Stores confusion matrix categories for a slice..
Categories are stored in the internal metrics_utils.ConfusionMatrix
instance of the slice.
Args:
slice: String representing slice label.
categories: List of strings specifying the categories.
"""
self._upsert_classification_metrics_for_slice(slice)
self._sliced_metrics[slice].set_confusion_matrix_categories(categories)
self._update_metadata(slice)
def log_confusion_matrix_row(self, slice: str, row_category: str,
row: List[int]):
"""Logs a confusion matrix row for a slice.
Row is updated on the internal metrics_utils.ConfusionMatrix
instance of the slice.
Args:
slice: String representing slice label.
row_category: Category to which the row belongs.
row: List of integers specifying the values for the row.
"""
self._upsert_classification_metrics_for_slice(slice)
self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)
self._update_metadata(slice)
def log_confusion_matrix_cell(self, slice: str, row_category: str,
col_category: str, value: int):
"""Logs a confusion matrix cell for a slice..
Cell is updated on the internal metrics_utils.ConfusionMatrix
instance of the slice.
Args:
slice: String representing slice label.
row_category: String representing the name of the row category.
col_category: String representing the name of the column category.
value: Int value of the cell.
"""
self._upsert_classification_metrics_for_slice(slice)
self._sliced_metrics[slice].log_confusion_matrix_cell(
row_category, col_category, value)
self._update_metadata(slice)
def load_confusion_matrix(self, slice: str, categories: List[str],
matrix: List[List[int]]):
"""Supports bulk loading the whole confusion matrix for a slice.
Args:
slice: String representing slice label.
categories: List of the category names.
matrix: Complete confusion matrix.
"""
self._upsert_classification_metrics_for_slice(slice)
self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)
self._update_metadata(slice)
class HTML(Artifact):
"""An artifact representing an HTML file."""
TYPE_NAME = 'system.HTML'
def __init__(self,
name: Optional[str] = None,
uri: Optional[str] = None,
metadata: Optional[Dict] = None):
super().__init__(uri=uri, name=name, metadata=metadata)
class Markdown(Artifact):
"""An artifact representing an Markdown file."""
TYPE_NAME = 'system.Markdown'
def __init__(self,
name: Optional[str] = None,
uri: Optional[str] = None,
metadata: Optional[Dict] = None):
super().__init__(uri=uri, name=name, metadata=metadata)
T = TypeVar('T')
class InputAnnotation():
"""Marker type for input artifacts."""
pass
class OutputAnnotation():
"""Marker type for output artifacts."""
pass
# TODO: Use typing.Annotated instead of this hack.
# With typing.Annotated (Python 3.9+ or typing_extensions package), the
# following would look like:
# Input = typing.Annotated[T, InputAnnotation]
# Output = typing.Annotated[T, OutputAnnotation]
# Input represents an Input artifact of type T.
Input = Union[T, InputAnnotation]
# Output represents an Output artifact of type T.
Output = Union[T, OutputAnnotation]
def is_artifact_annotation(typ) -> bool:
if hasattr(typ, '_subs_tree'): # Python 3.6
subs_tree = typ._subs_tree()
return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [
InputAnnotation, OutputAnnotation
]
if not hasattr(typ, '__origin__'):
return False
if typ.__origin__ != Union and type(typ.__origin__) != type(Union):
return False
if not hasattr(typ, '__args__') or len(typ.__args__) != 2:
return False
if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:
return False
return True
def is_input_artifact(typ) -> bool:
"""Returns True if typ is of type Input[T]."""
if not is_artifact_annotation(typ):
return False
if hasattr(typ, '_subs_tree'): # Python 3.6
subs_tree = typ._subs_tree()
return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation
return typ.__args__[1] == InputAnnotation
def is_output_artifact(typ) -> bool:
"""Returns True if typ is of type Output[T]."""
if not is_artifact_annotation(typ):
return False
if hasattr(typ, '_subs_tree'): # Python 3.6
subs_tree = typ._subs_tree()
return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation
return typ.__args__[1] == OutputAnnotation
def get_io_artifact_class(typ):
if not is_artifact_annotation(typ):
return None
if typ == Input or typ == Output:
return None
if hasattr(typ, '_subs_tree'): # Python 3.6
subs_tree = typ._subs_tree()
if len(subs_tree) != 3:
return None
return subs_tree[1]
return typ.__args__[0]
def get_io_artifact_annotation(typ):
if not is_artifact_annotation(typ):
return None
if hasattr(typ, '_subs_tree'): # Python 3.6
subs_tree = typ._subs_tree()
if len(subs_tree) != 3:
return None
return subs_tree[2]
return typ.__args__[1]
_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {
x.TYPE_NAME: x
for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]
}
def create_runtime_artifact(runtime_artifact: Dict) -> Artifact:
"""Creates an Artifact instance from the specified RuntimeArtifact.
Args:
runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.
"""
schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')
artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)
if not artifact_type:
artifact_type = Artifact
return artifact_type(
uri=runtime_artifact.get('uri', ''),
name=runtime_artifact.get('name', ''),
metadata=runtime_artifact.get('metadata', {}),
)
import warnings
from kfp.v2.components.types import artifact_types
warnings.warn(
'Module kfp.dsl.io_types is deprecated and will be removed'
' in KFP v2.0. Please import types from kfp.v2.dsl instead.',
category=FutureWarning)
Artifact = artifact_types.Artifact
Dataset = artifact_types.Dataset
Metrics = artifact_types.Metrics
ClassificationMetrics = artifact_types.ClassificationMetrics
Model = artifact_types.Model
SlicedClassificationMetrics = artifact_types.SlicedClassificationMetrics
HTML = artifact_types.HTML
Markdown = artifact_types.Markdown
create_runtime_artifact = artifact_types.create_runtime_artifact

View File

@ -1,107 +0,0 @@
# Copyright 2021 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for kfp.dsl.io_types."""
import unittest
import json
import os
from typing import List, Optional, Union
from kfp.dsl import io_types
from kfp.dsl.io_types import Input, InputAnnotation, Output, Model, OutputAnnotation
class IOTypesTest(unittest.TestCase):
def test_complex_metrics(self):
metrics = io_types.ClassificationMetrics()
metrics.log_roc_data_point(threshold=0.1, tpr=98.2, fpr=96.2)
metrics.log_roc_data_point(threshold=24.3, tpr=24.5, fpr=98.4)
metrics.set_confusion_matrix_categories(['dog', 'cat', 'horses'])
metrics.log_confusion_matrix_row('dog', [2, 6, 0])
metrics.log_confusion_matrix_cell('cat', 'dog', 3)
metrics.log_confusion_matrix_cell('horses', 'horses', 3)
metrics.metadata['test'] = 1.0
with open(
os.path.join(
os.path.dirname(__file__), 'test_data',
'expected_io_types_classification_metrics.json')) as json_file:
expected_json = json.load(json_file)
self.assertEqual(expected_json, metrics.metadata)
def test_complex_metrics_bulk_loading(self):
metrics = io_types.ClassificationMetrics()
metrics.log_roc_curve(
fpr=[85.1, 85.1, 85.1],
tpr=[52.6, 52.6, 52.6],
threshold=[53.6, 53.6, 53.6])
metrics.log_confusion_matrix(['dog', 'cat', 'horses'],
[[2, 6, 0], [3, 5, 6], [5, 7, 8]])
with open(
os.path.join(
os.path.dirname(__file__), 'test_data',
'expected_io_types_bulk_load_classification_metrics.json')
) as json_file:
expected_json = json.load(json_file)
self.assertEqual(expected_json, metrics.metadata)
def test_is_artifact_annotation(self):
self.assertTrue(io_types.is_artifact_annotation(Input[Model]))
self.assertTrue(io_types.is_artifact_annotation(Output[Model]))
self.assertTrue(io_types.is_artifact_annotation(Output['MyArtifact']))
self.assertFalse(io_types.is_artifact_annotation(Model))
self.assertFalse(io_types.is_artifact_annotation(int))
self.assertFalse(io_types.is_artifact_annotation('Dataset'))
self.assertFalse(io_types.is_artifact_annotation(List[str]))
self.assertFalse(io_types.is_artifact_annotation(Optional[str]))
def test_is_input_artifact(self):
self.assertTrue(io_types.is_input_artifact(Input[Model]))
self.assertTrue(io_types.is_input_artifact(Input))
self.assertFalse(io_types.is_input_artifact(Output[Model]))
self.assertFalse(io_types.is_input_artifact(Output))
def test_is_output_artifact(self):
self.assertTrue(io_types.is_output_artifact(Output[Model]))
self.assertTrue(io_types.is_output_artifact(Output))
self.assertFalse(io_types.is_output_artifact(Input[Model]))
self.assertFalse(io_types.is_output_artifact(Input))
def test_get_io_artifact_class(self):
self.assertEqual(io_types.get_io_artifact_class(Output[Model]), Model)
self.assertEqual(io_types.get_io_artifact_class(Input), None)
self.assertEqual(io_types.get_io_artifact_class(Output), None)
self.assertEqual(io_types.get_io_artifact_class(Model), None)
self.assertEqual(io_types.get_io_artifact_class(str), None)
def test_get_io_artifact_annotation(self):
self.assertEqual(
io_types.get_io_artifact_annotation(Output[Model]), OutputAnnotation)
self.assertEqual(
io_types.get_io_artifact_annotation(Input[Model]), InputAnnotation)
self.assertEqual(
io_types.get_io_artifact_annotation(Input), InputAnnotation)
self.assertEqual(
io_types.get_io_artifact_annotation(Output), OutputAnnotation)
self.assertEqual(io_types.get_io_artifact_annotation(Model), None)
self.assertEqual(io_types.get_io_artifact_annotation(str), None)
if __name__ == '__main__':
unittest.main()

View File

@ -1,4 +1,4 @@
# Copyright 2020 The Kubeflow Authors
# Copyright 2021 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -11,150 +11,20 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for component I/O type mapping."""
import inspect
from typing import Dict, List, Optional, Type, Union
from kfp.components import structures
from kfp.components import type_annotation_utils
from kfp.pipeline_spec import pipeline_spec_pb2
from kfp.dsl import artifact_utils
from kfp.dsl import io_types
"""Deprecated. See kfp.v2.components.types.type_utils instead.
# ComponentSpec I/O types to DSL ontology artifact classes mapping.
_ARTIFACT_CLASSES_MAPPING = {
'model': io_types.Model,
'dataset': io_types.Dataset,
'metrics': io_types.Metrics,
'classificationmetrics': io_types.ClassificationMetrics,
'slicedclassificationmetrics': io_types.SlicedClassificationMetrics,
'html': io_types.HTML,
'markdown': io_types.Markdown,
}
This module will be removed in KFP v2.0.
"""
import warnings
from kfp.v2.components.types import type_utils
# ComponentSpec I/O types to (IR) PipelineTaskSpec I/O types mapping.
# The keys are normalized (lowercased). These are types viewed as Parameters.
# The values are the corresponding IR parameter primitive types.
_PARAMETER_TYPES_MAPPING = {
'integer': pipeline_spec_pb2.PrimitiveType.INT,
'int': pipeline_spec_pb2.PrimitiveType.INT,
'double': pipeline_spec_pb2.PrimitiveType.DOUBLE,
'float': pipeline_spec_pb2.PrimitiveType.DOUBLE,
'string': pipeline_spec_pb2.PrimitiveType.STRING,
'str': pipeline_spec_pb2.PrimitiveType.STRING,
'text': pipeline_spec_pb2.PrimitiveType.STRING,
'bool': pipeline_spec_pb2.PrimitiveType.STRING,
'boolean': pipeline_spec_pb2.PrimitiveType.STRING,
'dict': pipeline_spec_pb2.PrimitiveType.STRING,
'list': pipeline_spec_pb2.PrimitiveType.STRING,
'jsonobject': pipeline_spec_pb2.PrimitiveType.STRING,
'jsonarray': pipeline_spec_pb2.PrimitiveType.STRING,
}
warnings.warn(
'Module kfp.dsl.type_utils is deprecated and will be removed'
' in KFP v2.0. Please use from kfp.v2.components.types.type_utils instead.',
category=FutureWarning)
# Mapping primitive types to their IR message field names.
# This is used in constructing condition strings.
_PARAMETER_TYPES_VALUE_REFERENCE_MAPPING = {
pipeline_spec_pb2.PrimitiveType.INT: 'int_value',
pipeline_spec_pb2.PrimitiveType.DOUBLE: 'double_value',
pipeline_spec_pb2.PrimitiveType.STRING: 'string_value',
}
def is_parameter_type(type_name: Optional[Union[str, dict]]) -> bool:
"""Check if a ComponentSpec I/O type is considered as a parameter type.
Args:
type_name: type name of the ComponentSpec I/O type.
Returns:
True if the type name maps to a parameter type else False.
"""
if isinstance(type_name, str):
type_name = type_annotation_utils.get_short_type_name(type_name)
elif isinstance(type_name, dict):
type_name = list(type_name.keys())[0]
else:
return False
return type_name.lower() in _PARAMETER_TYPES_MAPPING
def get_artifact_type_schema(
artifact_class_or_type_name: Optional[Union[str, Type[io_types.Artifact]]]
) -> pipeline_spec_pb2.ArtifactTypeSchema:
"""Gets the IR I/O artifact type msg for the given ComponentSpec I/O type."""
artifact_class = io_types.Artifact
if isinstance(artifact_class_or_type_name, str):
artifact_class = _ARTIFACT_CLASSES_MAPPING.get(
artifact_class_or_type_name.lower(), io_types.Artifact)
elif inspect.isclass(artifact_class_or_type_name) and issubclass(
artifact_class_or_type_name, io_types.Artifact):
artifact_class = artifact_class_or_type_name
return pipeline_spec_pb2.ArtifactTypeSchema(
schema_title=artifact_class.TYPE_NAME)
def get_parameter_type(
param_type: Optional[Union[Type, str, dict]]
) -> pipeline_spec_pb2.PrimitiveType:
"""Get the IR I/O parameter type for the given ComponentSpec I/O type.
Args:
param_type: type of the ComponentSpec I/O type. Can be a primitive Python
builtin type or a type name.
Returns:
The enum value of the mapped IR I/O primitive type.
Raises:
AttributeError: if type_name is not a string type.
"""
if type(param_type) == type:
type_name = param_type.__name__
elif isinstance(param_type, dict):
type_name = list(param_type.keys())[0]
else:
type_name = type_annotation_utils.get_short_type_name(str(param_type))
return _PARAMETER_TYPES_MAPPING.get(type_name.lower())
def get_parameter_type_field_name(type_name: Optional[str]) -> str:
"""Get the IR field name for the given primitive type.
For example: 'str' -> 'string_value', 'double' -> 'double_value', etc.
Args:
type_name: type name of the ComponentSpec I/O primitive type.
Returns:
The IR value reference field name.
Raises:
AttributeError: if type_name is not a string type.
"""
return _PARAMETER_TYPES_VALUE_REFERENCE_MAPPING.get(
get_parameter_type(type_name))
def get_input_artifact_type_schema(
input_name: str,
inputs: List[structures.InputSpec],
) -> Optional[str]:
"""Find the input artifact type by input name.
Args:
input_name: The name of the component input.
inputs: The list of InputSpec
Returns:
The artifact type schema of the input.
Raises:
AssertionError if input not found, or input found but not an artifact type.
"""
for component_input in inputs:
if component_input.name == input_name:
assert not is_parameter_type(
component_input.type), 'Input is not an artifact type.'
return get_artifact_type_schema(component_input.type)
assert False, 'Input not found.'
is_parameter_type = type_utils.is_parameter_type
get_artifact_type_schema = type_utils.type_utils.get_artifact_type_schema
get_parameter_type = type_utils.get_parameter_type
get_parameter_type_field_name = type_utils.get_parameter_type_field_name
get_input_artifact_type_schema = type_utils.type_utils.get_input_artifact_type_schema

View File

@ -1,277 +0,0 @@
# Copyright 2020 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from absl.testing import parameterized
import sys
import unittest
from typing import Any, Dict, List
from kfp.components import structures
from kfp.dsl import io_types
from kfp.dsl import type_utils
from kfp.pipeline_spec import pipeline_spec_pb2 as pb
_PARAMETER_TYPES = [
'String',
'str',
'Integer',
'int',
'Float',
'Double',
'bool',
'Boolean',
'Dict',
'List',
'JsonObject',
'JsonArray',
{
'JsonObject': {
'data_type': 'proto:tfx.components.trainer.TrainArgs'
}
},
]
_KNOWN_ARTIFACT_TYPES = ['Model', 'Dataset', 'Schema', 'Metrics']
_UNKNOWN_ARTIFACT_TYPES = [None, 'Arbtrary Model', 'dummy']
class _ArbitraryClass:
pass
class TypeUtilsTest(parameterized.TestCase):
def test_is_parameter_type(self):
for type_name in _PARAMETER_TYPES:
self.assertTrue(type_utils.is_parameter_type(type_name))
for type_name in _KNOWN_ARTIFACT_TYPES + _UNKNOWN_ARTIFACT_TYPES:
self.assertFalse(type_utils.is_parameter_type(type_name))
@parameterized.parameters(
{
'artifact_class_or_type_name': 'Model',
'expected_result': pb.ArtifactTypeSchema(schema_title='system.Model')
},
{
'artifact_class_or_type_name': io_types.Model,
'expected_result': pb.ArtifactTypeSchema(schema_title='system.Model')
},
{
'artifact_class_or_type_name':
'Dataset',
'expected_result':
pb.ArtifactTypeSchema(schema_title='system.Dataset')
},
{
'artifact_class_or_type_name':
io_types.Dataset,
'expected_result':
pb.ArtifactTypeSchema(schema_title='system.Dataset')
},
{
'artifact_class_or_type_name':
'Metrics',
'expected_result':
pb.ArtifactTypeSchema(schema_title='system.Metrics')
},
{
'artifact_class_or_type_name':
io_types.Metrics,
'expected_result':
pb.ArtifactTypeSchema(schema_title='system.Metrics')
},
{
'artifact_class_or_type_name':
'ClassificationMetrics',
'expected_result':
pb.ArtifactTypeSchema(schema_title='system.ClassificationMetrics')
},
{
'artifact_class_or_type_name':
io_types.ClassificationMetrics,
'expected_result':
pb.ArtifactTypeSchema(schema_title='system.ClassificationMetrics')
},
{
'artifact_class_or_type_name':
'SlicedClassificationMetrics',
'expected_result':
pb.ArtifactTypeSchema(
schema_title='system.SlicedClassificationMetrics')
},
{
'artifact_class_or_type_name':
io_types.SlicedClassificationMetrics,
'expected_result':
pb.ArtifactTypeSchema(
schema_title='system.SlicedClassificationMetrics')
},
{
'artifact_class_or_type_name':
'arbitrary name',
'expected_result':
pb.ArtifactTypeSchema(schema_title='system.Artifact')
},
{
'artifact_class_or_type_name':
_ArbitraryClass,
'expected_result':
pb.ArtifactTypeSchema(schema_title='system.Artifact')
},
{
'artifact_class_or_type_name':
io_types.HTML,
'expected_result':
pb.ArtifactTypeSchema(schema_title='system.HTML')
},
{
'artifact_class_or_type_name':
io_types.Markdown,
'expected_result':
pb.ArtifactTypeSchema(schema_title='system.Markdown')
},
)
def test_get_artifact_type_schema(self, artifact_class_or_type_name,
expected_result):
self.assertEqual(
expected_result,
type_utils.get_artifact_type_schema(artifact_class_or_type_name))
@parameterized.parameters(
{
'given_type': 'Int',
'expected_type': pb.PrimitiveType.INT,
},
{
'given_type': 'Integer',
'expected_type': pb.PrimitiveType.INT,
},
{
'given_type': int,
'expected_type': pb.PrimitiveType.INT,
},
{
'given_type': 'Double',
'expected_type': pb.PrimitiveType.DOUBLE,
},
{
'given_type': 'Float',
'expected_type': pb.PrimitiveType.DOUBLE,
},
{
'given_type': float,
'expected_type': pb.PrimitiveType.DOUBLE,
},
{
'given_type': 'String',
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': 'Text',
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': str,
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': 'Boolean',
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': bool,
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': 'Dict',
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': dict,
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': 'List',
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': list,
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': Dict[str, int],
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': List[Any],
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': {
'JsonObject': {
'data_type': 'proto:tfx.components.trainer.TrainArgs'
}
},
'expected_type': pb.PrimitiveType.STRING,
},
)
def test_get_parameter_type(self, given_type, expected_type):
self.assertEqual(expected_type, type_utils.get_parameter_type(given_type))
# Test get parameter by Python type.
self.assertEqual(pb.PrimitiveType.INT, type_utils.get_parameter_type(int))
def test_get_parameter_type_invalid(self):
with self.assertRaises(AttributeError):
type_utils.get_parameter_type_schema(None)
def test_get_input_artifact_type_schema(self):
input_specs = [
structures.InputSpec(name='input1', type='String'),
structures.InputSpec(name='input2', type='Model'),
structures.InputSpec(name='input3', type=None),
]
# input not found.
with self.assertRaises(AssertionError) as cm:
type_utils.get_input_artifact_type_schema('input0', input_specs)
self.assertEqual('Input not found.', str(cm))
# input found, but it doesn't map to an artifact type.
with self.assertRaises(AssertionError) as cm:
type_utils.get_input_artifact_type_schema('input1', input_specs)
self.assertEqual('Input is not an artifact type.', str(cm))
# input found, and a matching artifact type schema returned.
self.assertEqual(
'system.Model',
type_utils.get_input_artifact_type_schema('input2',
input_specs).schema_title)
# input found, and the default artifact type schema returned.
self.assertEqual(
'system.Artifact',
type_utils.get_input_artifact_type_schema('input3',
input_specs).schema_title)
def test_get_parameter_type_field_name(self):
self.assertEqual('string_value',
type_utils.get_parameter_type_field_name('String'))
self.assertEqual('int_value',
type_utils.get_parameter_type_field_name('Integer'))
self.assertEqual('double_value',
type_utils.get_parameter_type_field_name('Float'))
if __name__ == '__main__':
unittest.main()

View File

@ -19,7 +19,7 @@ Feature stage:
from typing import Dict, Union
import warnings
from kfp.dsl import type_utils
from kfp.v2.components.types import type_utils
class BaseType:

View File

@ -25,20 +25,19 @@ import uuid
import warnings
from typing import Any, Callable, Dict, List, Mapping, Optional, Set, Tuple, Union
from google.protobuf import json_format
import kfp
from kfp.compiler._k8s_helper import sanitize_k8s_name
from kfp.components import _python_op
from kfp import dsl
from kfp.dsl import _for_loop
from kfp.dsl import _pipeline_param
from kfp.v2.compiler import compiler_utils
from kfp.dsl import component_spec as dsl_component_spec
from kfp.dsl import dsl_utils
from kfp.dsl import io_types
from kfp.dsl import type_utils
from kfp.pipeline_spec import pipeline_spec_pb2
from kfp.v2.components.types import artifact_types, type_utils
from kfp.v2.components import component_factory
from google.protobuf import json_format
_GroupOrOp = Union[dsl.OpsGroup, dsl.BaseOp]
@ -557,8 +556,8 @@ class Compiler(object):
if artifact_spec.artifact_type.WhichOneof(
'kind'
) == 'schema_title' and artifact_spec.artifact_type.schema_title in [
io_types.Metrics.TYPE_NAME,
io_types.ClassificationMetrics.TYPE_NAME,
artifact_types.Metrics.TYPE_NAME,
artifact_types.ClassificationMetrics.TYPE_NAME,
]:
unique_output_name = '{}-{}'.format(op_task_spec.task_info.name,
output_name)
@ -1045,7 +1044,7 @@ class Compiler(object):
# Create the arg list with no default values and call pipeline function.
# Assign type information to the PipelineParam
pipeline_meta = _python_op._extract_component_interface(pipeline_func)
pipeline_meta = component_factory.extract_component_interface(pipeline_func)
pipeline_name = pipeline_name or pipeline_meta.name
pipeline_root = getattr(pipeline_func, 'pipeline_root', None)

View File

@ -18,7 +18,7 @@ import shutil
import tempfile
import unittest
from kfp.v2 import components
from kfp import components
from kfp.v2 import compiler
from kfp.v2 import dsl
from kfp.dsl import types

View File

@ -19,8 +19,8 @@ from typing import Any, Mapping, Optional, Union
from kfp.containers import _component_builder
from kfp.dsl import _container_op
from kfp.dsl import _pipeline_param
from kfp.dsl import type_utils
from kfp.pipeline_spec import pipeline_spec_pb2
from kfp.v2.components.types import type_utils
# Alias for PipelineContainerSpec
PipelineContainerSpec = pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec

View File

@ -17,23 +17,23 @@
"schemaTitle": "system.Dataset"
}
},
"output_dataset_two": {
"output_dataset_two_path": {
"artifactType": {
"schemaTitle": "system.Dataset"
}
}
},
"parameters": {
"output_bool_parameter": {
"output_bool_parameter_path": {
"type": "STRING"
},
"output_dict_parameter": {
"output_dict_parameter_path": {
"type": "STRING"
},
"output_list_parameter": {
"output_list_parameter_path": {
"type": "STRING"
},
"output_parameter": {
"output_parameter_path": {
"type": "STRING"
}
}
@ -43,7 +43,7 @@
"executorLabel": "exec-train",
"inputDefinitions": {
"artifacts": {
"dataset_one": {
"dataset_one_path": {
"artifactType": {
"schemaTitle": "system.Dataset"
}
@ -99,8 +99,8 @@
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
"sh",
"-ec",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef preprocess(\n # An input parameter of type string.\n message: str,\n # Use Output[T] to get a metadata-rich handle to the output artifact\n # of type `Dataset`.\n output_dataset_one: Output[Dataset],\n # A locally accessible filepath for another output artifact of type\n # `Dataset`.\n output_dataset_two_path: OutputPath('Dataset'),\n # A locally accessible filepath for an output parameter of type string.\n output_parameter_path: OutputPath(str),\n # A locally accessible filepath for an output parameter of type bool.\n output_bool_parameter_path: OutputPath(bool),\n # A locally accessible filepath for an output parameter of type dict.\n output_dict_parameter_path: OutputPath(Dict[str, int]),\n # A locally accessible filepath for an output parameter of type list.\n output_list_parameter_path: OutputPath(List[str]),\n):\n \"\"\"Dummy preprocessing step\"\"\"\n\n # Use Dataset.path to access a local file path for writing.\n # One can also use Dataset.uri to access the actual URI file path.\n with open(output_dataset_one.path, 'w') as f:\n f.write(message)\n\n # OutputPath is used to just pass the local file path of the output artifact\n # to the function.\n with open(output_dataset_two_path, 'w') as f:\n f.write(message)\n\n with open(output_parameter_path, 'w') as f:\n f.write(message)\n\n with open(output_bool_parameter_path, 'w') as f:\n f.write(str(True)) # use either `str()` or `json.dumps()` for bool values.\n\n import json\n with open(output_dict_parameter_path, 'w') as f:\n f.write(json.dumps({'A': 1, 'B': 2}))\n\n with open(output_list_parameter_path, 'w') as f:\n f.write(json.dumps(['a', 'b', 'c']))\n\n"
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef preprocess(\n # An input parameter of type string.\n message: str,\n # Use Output[T] to get a metadata-rich handle to the output artifact\n # of type `Dataset`.\n output_dataset_one: Output[Dataset],\n # A locally accessible filepath for another output artifact of type\n # `Dataset`.\n output_dataset_two_path: OutputPath('Dataset'),\n # A locally accessible filepath for an output parameter of type string.\n output_parameter_path: OutputPath(str),\n # A locally accessible filepath for an output parameter of type bool.\n output_bool_parameter_path: OutputPath(bool),\n # A locally accessible filepath for an output parameter of type dict.\n output_dict_parameter_path: OutputPath(Dict[str, int]),\n # A locally accessible filepath for an output parameter of type list.\n output_list_parameter_path: OutputPath(List[str]),\n):\n \"\"\"Dummy preprocessing step\"\"\"\n\n # Use Dataset.path to access a local file path for writing.\n # One can also use Dataset.uri to access the actual URI file path.\n with open(output_dataset_one.path, 'w') as f:\n f.write(message)\n\n # OutputPath is used to just pass the local file path of the output artifact\n # to the function.\n with open(output_dataset_two_path, 'w') as f:\n f.write(message)\n\n with open(output_parameter_path, 'w') as f:\n f.write(message)\n\n with open(output_bool_parameter_path, 'w') as f:\n f.write(str(True)) # use either `str()` or `json.dumps()` for bool values.\n\n import json\n with open(output_dict_parameter_path, 'w') as f:\n f.write(json.dumps({'A': 1, 'B': 2}))\n\n with open(output_list_parameter_path, 'w') as f:\n f.write(json.dumps(['a', 'b', 'c']))\n\n"
],
"image": "python:3.7"
}
@ -119,8 +119,8 @@
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
"sh",
"-ec",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef train(\n # Use InputPath to get a locally accessible path for the input artifact\n # of type `Dataset`.\n dataset_one_path: InputPath('Dataset'),\n # Use Input[T] to get a metadata-rich handle to the input artifact\n # of type `Dataset`.\n dataset_two: Input[Dataset],\n # An input parameter of type string.\n message: str,\n # Use Output[T] to get a metadata-rich handle to the output artifact\n # of type `Dataset`.\n model: Output[Model],\n # An input parameter of type bool.\n input_bool: bool,\n # An input parameter of type dict.\n input_dict: Dict[str, int],\n # An input parameter of type List[str].\n input_list: List[str],\n # An input parameter of type int with a default value.\n num_steps: int = 100,\n):\n \"\"\"Dummy Training step\"\"\"\n with open(dataset_one_path, 'r') as input_file:\n dataset_one_contents = input_file.read()\n\n with open(dataset_two.path, 'r') as input_file:\n dataset_two_contents = input_file.read()\n\n line = (f'dataset_one_contents: {dataset_one_contents} || '\n f'dataset_two_contents: {dataset_two_contents} || '\n f'message: {message} || '\n f'input_bool: {input_bool}, type {type(input_bool)} || '\n f'input_dict: {input_dict}, type {type(input_dict)} || '\n f'input_list: {input_list}, type {type(input_list)} \\n')\n\n with open(model.path, 'w') as output_file:\n for i in range(num_steps):\n output_file.write('Step {}\\n{}\\n=====\\n'.format(i, line))\n\n # Use model.get() to get a Model artifact, which has a .metadata dictionary\n # to store arbitrary metadata for the output artifact.\n model.metadata['accuracy'] = 0.9\n\n"
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef train(\n # Use InputPath to get a locally accessible path for the input artifact\n # of type `Dataset`.\n dataset_one_path: InputPath('Dataset'),\n # Use Input[T] to get a metadata-rich handle to the input artifact\n # of type `Dataset`.\n dataset_two: Input[Dataset],\n # An input parameter of type string.\n message: str,\n # Use Output[T] to get a metadata-rich handle to the output artifact\n # of type `Dataset`.\n model: Output[Model],\n # An input parameter of type bool.\n input_bool: bool,\n # An input parameter of type dict.\n input_dict: Dict[str, int],\n # An input parameter of type List[str].\n input_list: List[str],\n # An input parameter of type int with a default value.\n num_steps: int = 100,\n):\n \"\"\"Dummy Training step\"\"\"\n with open(dataset_one_path, 'r') as input_file:\n dataset_one_contents = input_file.read()\n\n with open(dataset_two.path, 'r') as input_file:\n dataset_two_contents = input_file.read()\n\n line = (f'dataset_one_contents: {dataset_one_contents} || '\n f'dataset_two_contents: {dataset_two_contents} || '\n f'message: {message} || '\n f'input_bool: {input_bool}, type {type(input_bool)} || '\n f'input_dict: {input_dict}, type {type(input_dict)} || '\n f'input_list: {input_list}, type {type(input_list)} \\n')\n\n with open(model.path, 'w') as output_file:\n for i in range(num_steps):\n output_file.write('Step {}\\n{}\\n=====\\n'.format(i, line))\n\n # Use model.get() to get a Model artifact, which has a .metadata dictionary\n # to store arbitrary metadata for the output artifact.\n model.metadata['accuracy'] = 0.9\n\n"
],
"image": "python:3.7"
}
@ -163,7 +163,7 @@
],
"inputs": {
"artifacts": {
"dataset_one": {
"dataset_one_path": {
"taskOutputArtifact": {
"outputArtifactKey": "output_dataset_one",
"producerTask": "preprocess"
@ -171,7 +171,7 @@
},
"dataset_two": {
"taskOutputArtifact": {
"outputArtifactKey": "output_dataset_two",
"outputArtifactKey": "output_dataset_two_path",
"producerTask": "preprocess"
}
}
@ -179,25 +179,25 @@
"parameters": {
"input_bool": {
"taskOutputParameter": {
"outputParameterKey": "output_bool_parameter",
"outputParameterKey": "output_bool_parameter_path",
"producerTask": "preprocess"
}
},
"input_dict": {
"taskOutputParameter": {
"outputParameterKey": "output_dict_parameter",
"outputParameterKey": "output_dict_parameter_path",
"producerTask": "preprocess"
}
},
"input_list": {
"taskOutputParameter": {
"outputParameterKey": "output_list_parameter",
"outputParameterKey": "output_list_parameter_path",
"producerTask": "preprocess"
}
},
"message": {
"taskOutputParameter": {
"outputParameterKey": "output_parameter",
"outputParameterKey": "output_parameter_path",
"producerTask": "preprocess"
}
},

View File

@ -14,10 +14,8 @@
"""Sample pipeline for passing data in KFP v2."""
from typing import Dict, List
from kfp import dsl
from kfp import components
from kfp.components import InputPath, OutputPath
from kfp.v2.dsl import Input, Output, Dataset, Model, component
from kfp.v2 import dsl
from kfp.v2.dsl import Input, InputPath, Output, OutputPath, Dataset, Model, component
import kfp.v2.compiler as compiler
@ -40,30 +38,30 @@ def preprocess(
# A locally accessible filepath for an output parameter of type list.
output_list_parameter_path: OutputPath(List[str]),
):
"""Dummy preprocessing step"""
"""Dummy preprocessing step"""
# Use Dataset.path to access a local file path for writing.
# One can also use Dataset.uri to access the actual URI file path.
with open(output_dataset_one.path, 'w') as f:
f.write(message)
# Use Dataset.path to access a local file path for writing.
# One can also use Dataset.uri to access the actual URI file path.
with open(output_dataset_one.path, 'w') as f:
f.write(message)
# OutputPath is used to just pass the local file path of the output artifact
# to the function.
with open(output_dataset_two_path, 'w') as f:
f.write(message)
# OutputPath is used to just pass the local file path of the output artifact
# to the function.
with open(output_dataset_two_path, 'w') as f:
f.write(message)
with open(output_parameter_path, 'w') as f:
f.write(message)
with open(output_parameter_path, 'w') as f:
f.write(message)
with open(output_bool_parameter_path, 'w') as f:
f.write(str(True)) # use either `str()` or `json.dumps()` for bool values.
with open(output_bool_parameter_path, 'w') as f:
f.write(str(True)) # use either `str()` or `json.dumps()` for bool values.
import json
with open(output_dict_parameter_path, 'w') as f:
f.write(json.dumps({'A': 1, 'B': 2}))
import json
with open(output_dict_parameter_path, 'w') as f:
f.write(json.dumps({'A': 1, 'B': 2}))
with open(output_list_parameter_path, 'w') as f:
f.write(json.dumps(['a', 'b', 'c']))
with open(output_list_parameter_path, 'w') as f:
f.write(json.dumps(['a', 'b', 'c']))
@component
@ -88,42 +86,43 @@ def train(
# An input parameter of type int with a default value.
num_steps: int = 100,
):
"""Dummy Training step"""
with open(dataset_one_path, 'r') as input_file:
dataset_one_contents = input_file.read()
"""Dummy Training step"""
with open(dataset_one_path, 'r') as input_file:
dataset_one_contents = input_file.read()
with open(dataset_two.path, 'r') as input_file:
dataset_two_contents = input_file.read()
with open(dataset_two.path, 'r') as input_file:
dataset_two_contents = input_file.read()
line = (f'dataset_one_contents: {dataset_one_contents} || '
f'dataset_two_contents: {dataset_two_contents} || '
f'message: {message} || '
f'input_bool: {input_bool}, type {type(input_bool)} || '
f'input_dict: {input_dict}, type {type(input_dict)} || '
f'input_list: {input_list}, type {type(input_list)} \n')
line = (f'dataset_one_contents: {dataset_one_contents} || '
f'dataset_two_contents: {dataset_two_contents} || '
f'message: {message} || '
f'input_bool: {input_bool}, type {type(input_bool)} || '
f'input_dict: {input_dict}, type {type(input_dict)} || '
f'input_list: {input_list}, type {type(input_list)} \n')
with open(model.path, 'w') as output_file:
for i in range(num_steps):
output_file.write('Step {}\n{}\n=====\n'.format(i, line))
with open(model.path, 'w') as output_file:
for i in range(num_steps):
output_file.write('Step {}\n{}\n=====\n'.format(i, line))
# Use model.get() to get a Model artifact, which has a .metadata dictionary
# to store arbitrary metadata for the output artifact.
model.metadata['accuracy'] = 0.9
# Use model.get() to get a Model artifact, which has a .metadata dictionary
# to store arbitrary metadata for the output artifact.
model.metadata['accuracy'] = 0.9
@dsl.pipeline(pipeline_root='dummy_root', name='my-test-pipeline-beta')
def pipeline(message: str):
preprocess_task = preprocess(message=message)
train_task = train(
dataset_one=preprocess_task.outputs['output_dataset_one'],
dataset_two=preprocess_task.outputs['output_dataset_two'],
message=preprocess_task.outputs['output_parameter'],
input_bool=preprocess_task.outputs['output_bool_parameter'],
input_dict=preprocess_task.outputs['output_dict_parameter'],
input_list=preprocess_task.outputs['output_list_parameter'],
)
preprocess_task = preprocess(message=message)
train_task = train(
dataset_one_path=preprocess_task.outputs['output_dataset_one'],
dataset_two=preprocess_task.outputs['output_dataset_two_path'],
message=preprocess_task.outputs['output_parameter_path'],
input_bool=preprocess_task.outputs['output_bool_parameter_path'],
input_dict=preprocess_task.outputs['output_dict_parameter_path'],
input_list=preprocess_task.outputs['output_list_parameter_path'],
)
if __name__ == '__main__':
compiler.Compiler().compile(
pipeline_func=pipeline, package_path=__file__.replace('.py', '.json'))
compiler.Compiler().compile(
pipeline_func=pipeline, package_path=__file__.replace('.py', '.json'))

View File

@ -111,7 +111,7 @@
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
"sh",
"-ec",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef add_numbers(first: int, second: int) -> int:\n return first + second\n\n"
],
"image": "python:3.7"
@ -131,7 +131,7 @@
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
"sh",
"-ec",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef concat_message(first: str, second: str) -> str:\n return first + second\n\n"
],
"image": "python:3.7"
@ -151,7 +151,7 @@
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
"sh",
"-ec",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef output_artifact(number: int, message: str) -> Dataset:\n result = [message for _ in range(number)]\n return '\\n'.join(result)\n\n"
],
"image": "python:3.7"
@ -171,7 +171,7 @@
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
"sh",
"-ec",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef output_named_tuple(\n artifact: Input[Dataset]\n) -> NamedTuple('Outputs', [\n ('scalar', str),\n ('metrics', Metrics),\n ('model', Model),\n]):\n scalar = \"123\"\n\n import json\n metrics = json.dumps({\n 'metrics': [{\n 'name': 'accuracy',\n 'numberValue': 0.9,\n 'format': \"PERCENTAGE\",\n }]\n })\n\n with open(artifact.path, 'r') as f:\n artifact_contents = f.read()\n model = \"Model contents: \" + artifact_contents\n\n from collections import namedtuple\n output = namedtuple('Outputs', ['scalar', 'metrics', 'model'])\n return output(scalar, metrics, model)\n\n"
],
"image": "python:3.7"

View File

@ -15,22 +15,26 @@
from typing import NamedTuple
from kfp import components, dsl
from kfp.v2 import compiler
from kfp.v2.dsl import Input, Dataset, Model, Metrics
from kfp.v2.dsl import component, Input, Dataset, Model, Metrics
@component
def concat_message(first: str, second: str) -> str:
return first + second
@component
def add_numbers(first: int, second: int) -> int:
return first + second
@component
def output_artifact(number: int, message: str) -> Dataset:
result = [message for _ in range(number)]
return '\n'.join(result)
@component
def output_named_tuple(
artifact: Input[Dataset]
) -> NamedTuple('Outputs', [
@ -58,22 +62,15 @@ def output_named_tuple(
return output(scalar, metrics, model)
concat_op = components.create_component_from_func_v2(concat_message)
add_op = components.create_component_from_func_v2(add_numbers)
output_artifact_op = components.create_component_from_func_v2(output_artifact)
output_named_tuple_op = components.create_component_from_func_v2(
output_named_tuple)
@dsl.pipeline(pipeline_root='dummy_root',
name='functions-with-outputs')
def pipeline(first_message: str, second_message: str, first_number: int,
second_number: int):
concat = concat_op(first=first_message, second=second_message)
add_numbers = add_op(first=first_number, second=second_number)
output_artifact = output_artifact_op(number=add_numbers.output,
message=concat.output)
output_name_tuple = output_named_tuple_op(output_artifact.output)
concat_op = concat_message(first=first_message, second=second_message)
add_numbers_op = add_numbers(first=first_number, second=second_number)
output_artifact_op = output_artifact(number=add_numbers_op.output,
message=concat_op.output)
output_name_tuple_op = output_named_tuple(output_artifact_op.output)
if __name__ == '__main__':

View File

@ -44,7 +44,7 @@
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
"sh",
"-ec",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef training_op(input1: str):\n print('dummy training master: {}'.format(input1))\n\n"
],
"imageUri": "python:3.7"
@ -68,7 +68,7 @@
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
"sh",
"-ec",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef training_op(input1: str):\n print('dummy training master: {}'.format(input1))\n\n"
],
"imageUri": "python:3.7"

View File

@ -101,7 +101,7 @@
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
"sh",
"-ec",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef fail_op(message: str):\n \"\"\"Fails.\"\"\"\n import sys\n print(message)\n sys.exit(1)\n\n"
],
"image": "python:3.7"
@ -121,7 +121,7 @@
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
"sh",
"-ec",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef print_op(message: str):\n \"\"\"Prints a message.\"\"\"\n print(message)\n\n"
],
"image": "python:3.7"
@ -141,7 +141,7 @@
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
"sh",
"-ec",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef print_op(message: str):\n \"\"\"Prints a message.\"\"\"\n print(message)\n\n"
],
"image": "python:3.7"

View File

@ -188,7 +188,7 @@
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
"sh",
"-ec",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef train(\n dataset: Input[Dataset]\n) -> NamedTuple('Outputs', [\n ('scalar', str),\n ('model', Model),\n]):\n \"\"\"Dummy Training step.\"\"\"\n with open(dataset.path, 'r') as f:\n data = f.read()\n print('Dataset:', data)\n\n scalar = '123'\n model = 'My model trained using data: {}'.format(data)\n\n from collections import namedtuple\n output = namedtuple('Outputs', ['scalar', 'model'])\n return output(scalar, model)\n\n"
],
"image": "python:3.7"
@ -208,7 +208,7 @@
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
"sh",
"-ec",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef train(\n dataset: Input[Dataset]\n) -> NamedTuple('Outputs', [\n ('scalar', str),\n ('model', Model),\n]):\n \"\"\"Dummy Training step.\"\"\"\n with open(dataset.path, 'r') as f:\n data = f.read()\n print('Dataset:', data)\n\n scalar = '123'\n model = 'My model trained using data: {}'.format(data)\n\n from collections import namedtuple\n output = namedtuple('Outputs', ['scalar', 'model'])\n return output(scalar, model)\n\n"
],
"image": "python:3.7"

View File

@ -15,11 +15,12 @@
from typing import NamedTuple
from kfp import components
from kfp import dsl
from kfp.v2 import compiler
from kfp.v2.dsl import Dataset, Model, Input
from kfp.v2 import dsl
from kfp.v2.dsl import component, importer, Dataset, Model, Input
@component
def train(
dataset: Input[Dataset]
) -> NamedTuple('Outputs', [
@ -39,22 +40,21 @@ def train(
return output(scalar, model)
train_op = components.create_component_from_func_v2(train)
@dsl.pipeline(name='pipeline-with-importer', pipeline_root='dummy_root')
def my_pipeline(dataset2: str = 'gs://ml-pipeline-playground/shakespeare2.txt'):
importer = dsl.importer(
importer1 = importer(
artifact_uri='gs://ml-pipeline-playground/shakespeare1.txt',
artifact_class=Dataset,
reimport=False)
train1 = train_op(dataset=importer.output)
train1 = train(dataset=importer1.output)
with dsl.Condition(train1.outputs['scalar'] == '123'):
importer2 = dsl.importer(
importer2 = importer(
artifact_uri=dataset2, artifact_class=Dataset, reimport=True)
train_op(dataset=importer2.output)
train(dataset=importer2.output)
if __name__ == '__main__':

View File

@ -87,7 +87,7 @@
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
"sh",
"-ec",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef output_metrics(metrics: Output[Metrics]):\n \"\"\"Dummy component that outputs metrics with a random accuracy.\"\"\"\n import random\n result = random.randint(0, 100)\n metrics.log_metric('accuracy', result)\n\n"
],
"image": "python:3.7"
@ -107,7 +107,7 @@
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
"sh",
"-ec",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef output_metrics(metrics: Output[Metrics]):\n \"\"\"Dummy component that outputs metrics with a random accuracy.\"\"\"\n import random\n result = random.randint(0, 100)\n metrics.log_metric('accuracy', result)\n\n"
],
"image": "python:3.7"

View File

@ -11,5 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from kfp.components import *

View File

@ -15,7 +15,7 @@
import functools
from typing import Callable, Optional, List
from kfp import components
from kfp.v2.components import component_factory
def component(func: Optional[Callable] = None,
@ -25,7 +25,7 @@ def component(func: Optional[Callable] = None,
output_component_file: Optional[str] = None,
install_kfp_package: bool = True,
kfp_package_path: Optional[str] = None):
"""Decorator for Python-function based components in KFP v2.
"""Decorator for Python-function based components in KFP v2.
A lightweight component is a self-contained Python function that includes
all necessary imports and dependencies.
@ -73,18 +73,18 @@ def component(func: Optional[Callable] = None,
Returns:
A component task factory that can be used in pipeline definitions.
"""
if func is None:
return functools.partial(component,
base_image=base_image,
packages_to_install=packages_to_install,
output_component_file=output_component_file,
install_kfp_package=install_kfp_package,
kfp_package_path=kfp_package_path)
if func is None:
return functools.partial(component,
base_image=base_image,
packages_to_install=packages_to_install,
output_component_file=output_component_file,
install_kfp_package=install_kfp_package,
kfp_package_path=kfp_package_path)
return components.create_component_from_func_v2(
func,
base_image=base_image,
packages_to_install=packages_to_install,
output_component_file=output_component_file,
install_kfp_package=install_kfp_package,
kfp_package_path=kfp_package_path)
return component_factory.create_component_from_func(
func,
base_image=base_image,
packages_to_install=packages_to_install,
output_component_file=output_component_file,
install_kfp_package=install_kfp_package,
kfp_package_path=kfp_package_path)

View File

@ -0,0 +1,399 @@
# Copyright 2021 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import inspect
import itertools
import re
import textwrap
from typing import Callable, Dict, List, Mapping, Optional, TypeVar
import warnings
import docstring_parser
from kfp import components as v1_components
from kfp.components import _components, _data_passing, structures, type_annotation_utils
from kfp.v2.components.types import artifact_types, type_annotations
_DEFAULT_BASE_IMAGE = 'python:3.7'
def _python_function_name_to_component_name(name):
name_with_spaces = re.sub(' +', ' ', name.replace('_', ' ')).strip(' ')
return name_with_spaces[0].upper() + name_with_spaces[1:]
def _get_packages_to_install_command(
package_list: Optional[List[str]] = None) -> List[str]:
result = []
if package_list is not None:
install_pip_command = 'python3 -m ensurepip'
install_packages_command = (
'PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet \
--no-warn-script-location {}' ).format(' '.join(
[repr(str(package)) for package in package_list]))
result = [
'sh', '-c',
'({install_pip} || {install_pip} --user) &&'
' ({install_packages} || {install_packages} --user) && "$0" "$@"'.
format(install_pip=install_pip_command,
install_packages=install_packages_command)
]
return result
def _get_default_kfp_package_path() -> str:
import kfp
return 'kfp=={}'.format(kfp.__version__)
def _get_function_source_definition(func: Callable) -> str:
func_code = inspect.getsource(func)
# Function might be defined in some indented scope (e.g. in another
# function). We need to handle this and properly dedent the function source
# code
func_code = textwrap.dedent(func_code)
func_code_lines = func_code.split('\n')
# Removing possible decorators (can be multiline) until the function
# definition is found
func_code_lines = itertools.dropwhile(lambda x: not x.startswith('def'),
func_code_lines)
if not func_code_lines:
raise ValueError(
'Failed to dedent and clean up the source of function "{}". '
'It is probably not properly indented.'.format(func.__name__))
return '\n'.join(func_code_lines)
def _annotation_to_type_struct(annotation):
if not annotation or annotation == inspect.Parameter.empty:
return None
if hasattr(annotation, 'to_dict'):
annotation = annotation.to_dict()
if isinstance(annotation, dict):
return annotation
if isinstance(annotation, type):
type_struct = _data_passing.get_canonical_type_struct_for_type(
annotation)
if type_struct:
return type_struct
type_name = str(annotation.__name__)
elif hasattr(
annotation, '__forward_arg__'
): # Handling typing.ForwardRef('Type_name') (the name was _ForwardRef in python 3.5-3.6)
type_name = str(annotation.__forward_arg__)
else:
type_name = str(annotation)
# It's also possible to get the converter by type name
type_struct = _data_passing.get_canonical_type_struct_for_type(type_name)
if type_struct:
return type_struct
return type_name
def _maybe_make_unique(name: str, names: List[str]):
if name not in names:
return name
for i in range(2, 100):
unique_name = '{}_{}'.format(name, i)
if unique_name not in names:
return unique_name
raise RuntimeError('Too many arguments with the name {}'.format(name))
# TODO(KFPv2): Replace with v2 ComponentSpec.
def _func_to_component_spec(
func: Callable,
base_image: Optional[str] = None,
packages_to_install: Optional[List[str]] = None,
install_kfp_package: bool = True,
kfp_package_path: Optional[str] = None) -> structures.ComponentSpec:
decorator_base_image = getattr(func, '_component_base_image', None)
if decorator_base_image is not None:
if base_image is not None and decorator_base_image != base_image:
raise ValueError(
'base_image ({}) conflicts with the decorator-specified base image metadata ({})'
.format(base_image, decorator_base_image))
else:
base_image = decorator_base_image
else:
if base_image is None:
base_image = _DEFAULT_BASE_IMAGE
if isinstance(base_image, Callable):
base_image = base_image()
imports_source = [
"from kfp.v2.dsl import *",
"from typing import *",
]
func_source = _get_function_source_definition(func)
source = textwrap.dedent("""
{imports_source}
{func_source}\n""").format(imports_source='\n'.join(imports_source),
func_source=func_source)
packages_to_install = packages_to_install or []
if install_kfp_package:
if kfp_package_path is None:
kfp_package_path = _get_default_kfp_package_path()
packages_to_install.append(kfp_package_path)
packages_to_install_command = _get_packages_to_install_command(
package_list=packages_to_install)
from kfp.components._structures import ExecutorInputPlaceholder
component_spec = extract_component_interface(func)
component_spec.implementation = structures.ContainerImplementation(
container=structures.ContainerSpec(image=base_image,
command=packages_to_install_command +
[
'sh',
'-ec',
textwrap.dedent('''\
program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
python3 -m kfp.v2.components.executor_main \
--component_module_path \
"$program_path/ephemeral_component.py" \
"$@"
'''),
source,
],
args=[
"--executor_input",
ExecutorInputPlaceholder(),
"--function_to_execute",
func.__name__,
]))
return component_spec
def extract_component_interface(func: Callable) -> structures.ComponentSpec:
single_output_name_const = 'Output'
signature = inspect.signature(func)
parameters = list(signature.parameters.values())
parsed_docstring = docstring_parser.parse(inspect.getdoc(func))
doc_dict = {p.arg_name: p.description for p in parsed_docstring.params}
inputs = []
outputs = []
input_names = set()
output_names = set()
for parameter in parameters:
parameter_type = type_annotation_utils.maybe_strip_optional_from_annotation(
parameter.annotation)
passing_style = None
io_name = parameter.name
if type_annotations.is_artifact_annotation(parameter_type):
# passing_style is either type_annotations.InputAnnotation or
# type_annotations.OutputAnnotation.
passing_style = type_annotations.get_io_artifact_annotation(
parameter_type)
# parameter_type is type_annotations.Artifact or one of its subclasses.
parameter_type = type_annotations.get_io_artifact_class(
parameter_type)
if not issubclass(parameter_type, artifact_types.Artifact):
raise ValueError(
'Input[T] and Output[T] are only supported when T is a '
'subclass of Artifact. Found `{} with type {}`'.format(
io_name, parameter_type))
if parameter.default is not inspect.Parameter.empty:
raise ValueError(
'Default values for Input/Output artifacts are not supported.'
)
elif isinstance(parameter_type,
(v1_components.InputPath, v1_components.OutputPath)):
raise TypeError(
'In v2 components, please import the Python function'
' annotations `InputPath` and `OutputPath` from'
' package `kfp.v2.dsl` instead of `kfp.dsl`.')
elif isinstance(
parameter_type,
(type_annotations.InputPath, type_annotations.OutputPath)):
passing_style = type(parameter_type)
parameter_type = parameter_type.type
if parameter.default is not inspect.Parameter.empty and not (
passing_style == type_annotations.InputPath and
parameter.default is None):
raise ValueError(
'Path inputs only support default values of None. Default values for outputs are not supported.'
)
type_struct = _annotation_to_type_struct(parameter_type)
if passing_style in [
type_annotations.OutputAnnotation, type_annotations.OutputPath
]:
io_name = _maybe_make_unique(io_name, output_names)
output_names.add(io_name)
output_spec = structures.OutputSpec(name=io_name,
type=type_struct,
description=doc_dict.get(
parameter.name))
output_spec._passing_style = passing_style
output_spec._parameter_name = parameter.name
outputs.append(output_spec)
else:
io_name = _maybe_make_unique(io_name, input_names)
input_names.add(io_name)
input_spec = structures.InputSpec(name=io_name,
type=type_struct,
description=doc_dict.get(
parameter.name))
if parameter.default is not inspect.Parameter.empty:
input_spec.optional = True
if parameter.default is not None:
outer_type_name = list(type_struct.keys())[0] if isinstance(
type_struct, dict) else type_struct
try:
input_spec.default = _data_passing.serialize_value(
parameter.default, outer_type_name)
except Exception as ex:
warnings.warn(
'Could not serialize the default value of the parameter "{}". {}'
.format(parameter.name, ex))
input_spec._passing_style = passing_style
input_spec._parameter_name = parameter.name
inputs.append(input_spec)
#Analyzing the return type annotations.
return_ann = signature.return_annotation
if hasattr(return_ann, '_fields'): #NamedTuple
# Getting field type annotations.
# __annotations__ does not exist in python 3.5 and earlier
# _field_types does not exist in python 3.9 and later
field_annotations = getattr(return_ann,
'__annotations__', None) or getattr(
return_ann, '_field_types', None)
for field_name in return_ann._fields:
type_struct = None
if field_annotations:
type_struct = _annotation_to_type_struct(
field_annotations.get(field_name, None))
output_name = _maybe_make_unique(field_name, output_names)
output_names.add(output_name)
output_spec = structures.OutputSpec(
name=output_name,
type=type_struct,
)
output_spec._passing_style = None
output_spec._return_tuple_field_name = field_name
outputs.append(output_spec)
# Deprecated dict-based way of declaring multiple outputs. Was only used by the @component decorator
elif isinstance(return_ann, dict):
warnings.warn(
"The ability to specify multiple outputs using the dict syntax has been deprecated."
"It will be removed soon after release 0.1.32."
"Please use typing.NamedTuple to declare multiple outputs.")
for output_name, output_type_annotation in return_ann.items():
output_type_struct = _annotation_to_type_struct(
output_type_annotation)
output_spec = structures.OutputSpec(
name=output_name,
type=output_type_struct,
)
outputs.append(output_spec)
elif signature.return_annotation is not None and signature.return_annotation != inspect.Parameter.empty:
output_name = _maybe_make_unique(single_output_name_const, output_names)
# Fixes exotic, but possible collision: `def func(output_path: OutputPath()) -> str: ...`
output_names.add(output_name)
type_struct = _annotation_to_type_struct(signature.return_annotation)
output_spec = structures.OutputSpec(
name=output_name,
type=type_struct,
)
output_spec._passing_style = None
outputs.append(output_spec)
# Component name and description are derived from the function's name and docstring.
# The name can be overridden by setting setting func.__name__ attribute (of the legacy func._component_human_name attribute).
# The description can be overridden by setting the func.__doc__ attribute (or the legacy func._component_description attribute).
component_name = getattr(func, '_component_human_name',
None) or _python_function_name_to_component_name(
func.__name__)
description = getattr(func, '_component_description',
None) or parsed_docstring.short_description
if description:
description = description.strip()
component_spec = structures.ComponentSpec(
name=component_name,
description=description,
inputs=inputs if inputs else None,
outputs=outputs if outputs else None,
)
return component_spec
def create_component_from_func(func: Callable,
base_image: Optional[str] = None,
packages_to_install: List[str] = None,
output_component_file: Optional[str] = None,
install_kfp_package: bool = True,
kfp_package_path: Optional[str] = None):
"""Converts a Python function to a v2 lightweight component.
A lightweight component is a self-contained Python function that includes
all necessary imports and dependencies.
Args:
func: The python function to create a component from. The function
should have type annotations for all its arguments, indicating how
it is intended to be used (e.g. as an input/output Artifact object,
a plain parameter, or a path to a file).
base_image: The image to use when executing |func|. It should
contain a default Python interpreter that is compatible with KFP.
packages_to_install: A list of optional packages to install before
executing |func|.
install_kfp_package: Specifies if we should add a KFP Python package to
|packages_to_install|. Lightweight Python functions always require
an installation of KFP in |base_image| to work. If you specify
a |base_image| that already contains KFP, you can set this to False.
kfp_package_path: Specifies the location from which to install KFP. By
default, this will try to install from PyPi using the same version
as that used when this component was created. KFP developers can
choose to override this to point to a Github pull request or
other pip-compatible location when testing changes to lightweight
Python functions.
Returns:
A component task factory that can be used in pipeline definitions.
"""
component_spec = _func_to_component_spec(
func=func,
base_image=base_image,
packages_to_install=packages_to_install,
install_kfp_package=install_kfp_package,
kfp_package_path=kfp_package_path)
if output_component_file:
component_spec.save(output_component_file)
# TODO(KFPv2): Replace with v2 BaseComponent.
return _components._create_task_factory_from_component_spec(component_spec)

View File

@ -0,0 +1,281 @@
# Copyright 2021 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import inspect
from typing import Any, Callable, Dict, List, Optional, Union
from kfp.v2.components.types import artifact_types, type_annotations
class Executor():
"""Executor executes v2-based Python function components."""
def __init__(self, executor_input: Dict, function_to_execute: Callable):
self._func = function_to_execute
self._input = executor_input
self._input_artifacts: Dict[str, artifact_types.Artifact] = {}
self._output_artifacts: Dict[str, artifact_types.Artifact] = {}
for name, artifacts in self._input.get('inputs',
{}).get('artifacts', {}).items():
artifacts_list = artifacts.get('artifacts')
if artifacts_list:
self._input_artifacts[name] = self._make_input_artifact(
artifacts_list[0])
for name, artifacts in self._input.get('outputs',
{}).get('artifacts', {}).items():
artifacts_list = artifacts.get('artifacts')
if artifacts_list:
self._output_artifacts[name] = self._make_output_artifact(
artifacts_list[0])
self._return_annotation = inspect.signature(
self._func).return_annotation
self._executor_output = {}
@classmethod
def _make_input_artifact(cls, runtime_artifact: Dict):
return artifact_types.create_runtime_artifact(runtime_artifact)
@classmethod
def _make_output_artifact(cls, runtime_artifact: Dict):
import os
artifact = artifact_types.create_runtime_artifact(runtime_artifact)
os.makedirs(os.path.dirname(artifact.path), exist_ok=True)
return artifact
def _get_input_artifact(self, name: str):
return self._input_artifacts.get(name)
def _get_output_artifact(self, name: str):
return self._output_artifacts.get(name)
def _get_input_parameter_value(self, parameter_name: str,
parameter_type: Any):
parameter = self._input.get('inputs',
{}).get('parameters',
{}).get(parameter_name, None)
if parameter is None:
return None
if parameter.get('stringValue'):
if parameter_type == str:
return parameter['stringValue']
elif parameter_type == bool:
# Use `.lower()` so it can also handle 'True' and 'False' (resulted from
# `str(True)` and `str(False)`, respectively.
return json.loads(parameter['stringValue'].lower())
else:
return json.loads(parameter['stringValue'])
elif parameter.get('intValue'):
return int(parameter['intValue'])
elif parameter.get('doubleValue'):
return float(parameter['doubleValue'])
def _get_output_parameter_path(self, parameter_name: str):
parameter = self._input.get('outputs',
{}).get('parameters',
{}).get(parameter_name, None)
if parameter is None:
return None
import os
path = parameter.get('outputFile', None)
if path:
os.makedirs(os.path.dirname(path), exist_ok=True)
return path
def _get_output_artifact_path(self, artifact_name: str):
output_artifact = self._output_artifacts.get(artifact_name)
if not output_artifact:
raise ValueError(
'Failed to get output artifact path for artifact name {}'.
format(artifact_name))
return output_artifact.path
def _get_input_artifact_path(self, artifact_name: str):
input_artifact = self._input_artifacts.get(artifact_name)
if not input_artifact:
raise ValueError(
'Failed to get input artifact path for artifact name {}'.format(
artifact_name))
return input_artifact.path
def _write_output_parameter_value(self, name: str,
value: Union[str, int, float, bool, dict,
list, Dict, List]):
if type(value) == str:
output = {'stringValue': value}
elif type(value) == int:
output = {'intValue': value}
elif type(value) == float:
output = {'doubleValue': value}
else:
# For bool, list, dict, List, Dict, json serialize the value.
output = {'stringValue': json.dumps(value)}
if not self._executor_output.get('parameters'):
self._executor_output['parameters'] = {}
self._executor_output['parameters'][name] = output
def _write_output_artifact_payload(self, name: str, value: Any):
path = self._get_output_artifact_path(name)
with open(path, 'w') as f:
f.write(str(value))
# TODO: extract to a util
@classmethod
def _get_short_type_name(cls, type_name: str) -> str:
"""Extracts the short form type name.
This method is used for looking up serializer for a given type.
For example:
typing.List -> List
typing.List[int] -> List
typing.Dict[str, str] -> Dict
List -> List
str -> str
Args:
type_name: The original type name.
Returns:
The short form type name or the original name if pattern doesn't match.
"""
import re
match = re.match('(typing\.)?(?P<type>\w+)(?:\[.+\])?', type_name)
if match:
return match.group('type')
else:
return type_name
# TODO: merge with type_utils.is_parameter_type
@classmethod
def _is_parameter(cls, annotation: Any) -> bool:
if type(annotation) == type:
return annotation in [str, int, float, bool, dict, list]
# Annotation could be, for instance `typing.Dict[str, str]`, etc.
return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']
@classmethod
def _is_artifact(cls, annotation: Any) -> bool:
if type(annotation) == type:
return issubclass(annotation, artifact_types.Artifact)
return False
@classmethod
def _is_named_tuple(cls, annotation: Any) -> bool:
if type(annotation) == type:
return issubclass(annotation, tuple) and hasattr(
annotation, '_fields') and hasattr(annotation,
'__annotations__')
return False
def _handle_single_return_value(self, output_name: str,
annotation_type: Any, return_value: Any):
if self._is_parameter(annotation_type):
if type(return_value) != annotation_type:
raise ValueError(
'Function `{}` returned value of type {}; want type {}'.
format(self._func.__name__, type(return_value),
annotation_type))
self._write_output_parameter_value(output_name, return_value)
elif self._is_artifact(annotation_type):
self._write_output_artifact_payload(output_name, return_value)
else:
raise RuntimeError(
'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'
' subclass of `Artifact`'.format(annotation_type))
def _write_executor_output(self, func_output: Optional[Any] = None):
if self._output_artifacts:
self._executor_output['artifacts'] = {}
for name, artifact in self._output_artifacts.items():
runtime_artifact = {
'name': artifact.name,
'uri': artifact.uri,
'metadata': artifact.metadata,
}
artifacts_list = {'artifacts': [runtime_artifact]}
self._executor_output['artifacts'][name] = artifacts_list
if func_output is not None:
if self._is_parameter(self._return_annotation) or self._is_artifact(
self._return_annotation):
# Note: single output is named `Output` in component.yaml.
self._handle_single_return_value('Output',
self._return_annotation,
func_output)
elif self._is_named_tuple(self._return_annotation):
if len(self._return_annotation._fields) != len(func_output):
raise RuntimeError(
'Expected {} return values from function `{}`, got {}'.
format(len(self._return_annotation._fields),
self._func.__name__, len(func_output)))
for i in range(len(self._return_annotation._fields)):
field = self._return_annotation._fields[i]
field_type = self._return_annotation.__annotations__[field]
if type(func_output) == tuple:
field_value = func_output[i]
else:
field_value = getattr(func_output, field)
self._handle_single_return_value(field, field_type,
field_value)
else:
raise RuntimeError(
'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'
' subclass of `Artifact`, or a NamedTuple collection of these types.'
.format(self._return_annotation))
import os
os.makedirs(os.path.dirname(self._input['outputs']['outputFile']),
exist_ok=True)
with open(self._input['outputs']['outputFile'], 'w') as f:
f.write(json.dumps(self._executor_output))
def execute(self):
annotations = inspect.getfullargspec(self._func).annotations
# Function arguments.
func_kwargs = {}
for k, v in annotations.items():
if k == 'return':
continue
if self._is_parameter(v):
func_kwargs[k] = self._get_input_parameter_value(k, v)
if type_annotations.is_artifact_annotation(v):
if type_annotations.is_input_artifact(v):
func_kwargs[k] = self._get_input_artifact(k)
if type_annotations.is_output_artifact(v):
func_kwargs[k] = self._get_output_artifact(k)
elif isinstance(v, type_annotations.OutputPath):
if self._is_parameter(v.type):
func_kwargs[k] = self._get_output_parameter_path(k)
else:
func_kwargs[k] = self._get_output_artifact_path(k)
elif isinstance(v, type_annotations.InputPath):
func_kwargs[k] = self._get_input_artifact_path(k)
result = self._func(**func_kwargs)
self._write_executor_output(result)

View File

@ -17,7 +17,7 @@ import importlib
import os
import sys
from kfp.components import executor as component_executor
from kfp.v2.components import executor as component_executor
def _load_module(module_name: str, module_directory: str):

View File

@ -0,0 +1,468 @@
# Copyright 2021 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for kfp.components.executor"""
import os
import tempfile
from typing import Callable, NamedTuple, Optional
import unittest
import json
from kfp.v2.components import executor
from kfp.v2.components.types import artifact_types
from kfp.v2.components.types.artifact_types import Artifact, Dataset, Model, Metrics
from kfp.v2.components.types.type_annotations import Input, Output, InputPath, OutputPath
_EXECUTOR_INPUT = """\
{
"inputs": {
"parameters": {
"input_parameter": {
"stringValue": "Hello, KFP"
}
},
"artifacts": {
"input_artifact_one_path": {
"artifacts": [
{
"metadata": {},
"name": "input_artifact_one",
"type": {
"schemaTitle": "system.Dataset"
},
"uri": "gs://some-bucket/input_artifact_one"
}
]
}
}
},
"outputs": {
"artifacts": {
"output_artifact_one_path": {
"artifacts": [
{
"metadata": {},
"name": "output_artifact_one",
"type": {
"schemaTitle": "system.Model"
},
"uri": "gs://some-bucket/output_artifact_one"
}
]
},
"output_artifact_two": {
"artifacts": [
{
"metadata": {},
"name": "output_artifact_two",
"type": {
"schemaTitle": "system.Metrics"
},
"uri": "gs://some-bucket/output_artifact_two"
}
]
}
},
"parameters": {
"output_parameter_path": {
"outputFile": "gs://some-bucket/some_task/nested/output_parameter"
}
},
"outputFile": "%s/output_metadata.json"
}
}
"""
class ExecutorTest(unittest.TestCase):
def setUp(self):
self.maxDiff = None
self._test_dir = tempfile.mkdtemp()
artifact_types._GCS_LOCAL_MOUNT_PREFIX = self._test_dir + '/'
artifact_types._MINIO_LOCAL_MOUNT_PREFIX = self._test_dir + '/minio/'
artifact_types._S3_LOCAL_MOUNT_PREFIX = self._test_dir + '/s3/'
return super().setUp()
def _get_executor(
self,
func: Callable,
executor_input: Optional[str] = None) -> executor.Executor:
if executor_input is None:
executor_input = _EXECUTOR_INPUT
executor_input_dict = json.loads(executor_input % self._test_dir)
return executor.Executor(executor_input=executor_input_dict,
function_to_execute=func)
def test_input_parameter(self):
def test_func(input_parameter: str):
self.assertEqual(input_parameter, "Hello, KFP")
self._get_executor(test_func).execute()
def test_input_artifact(self):
def test_func(input_artifact_one_path: Input[Dataset]):
self.assertEqual(input_artifact_one_path.uri,
'gs://some-bucket/input_artifact_one')
self.assertEqual(
input_artifact_one_path.path,
os.path.join(self._test_dir, 'some-bucket/input_artifact_one'))
self.assertEqual(input_artifact_one_path.name, 'input_artifact_one')
self._get_executor(test_func).execute()
def test_output_artifact(self):
def test_func(output_artifact_one_path: Output[Model]):
self.assertEqual(output_artifact_one_path.uri,
'gs://some-bucket/output_artifact_one')
self.assertEqual(
output_artifact_one_path.path,
os.path.join(self._test_dir, 'some-bucket/output_artifact_one'))
self.assertEqual(output_artifact_one_path.name,
'output_artifact_one')
self._get_executor(test_func).execute()
def test_output_parameter(self):
def test_func(output_parameter_path: OutputPath(str)):
# Test that output parameters just use the passed in filename.
self.assertEqual(
output_parameter_path,
'gs://some-bucket/some_task/nested/output_parameter')
# Test writing to the path succeeds. This fails if parent directories
# don't exist.
with open(output_parameter_path, 'w') as f:
f.write('Hello, World!')
self._get_executor(test_func).execute()
def test_input_path_artifact(self):
def test_func(input_artifact_one_path: InputPath('Dataset')):
self.assertEqual(
input_artifact_one_path,
os.path.join(self._test_dir, 'some-bucket/input_artifact_one'))
self._get_executor(test_func).execute()
def test_output_path_artifact(self):
def test_func(output_artifact_one_path: OutputPath('Model')):
self.assertEqual(
output_artifact_one_path,
os.path.join(self._test_dir, 'some-bucket/output_artifact_one'))
self._get_executor(test_func).execute()
def test_output_metadata(self):
def test_func(output_artifact_two: Output[Metrics]):
output_artifact_two.metadata['key_1'] = 'value_1'
output_artifact_two.metadata['key_2'] = 2
output_artifact_two.uri = 'new-uri'
# log_metric works here since the schema is specified as Metrics.
output_artifact_two.log_metric('metric', 0.9)
self._get_executor(test_func).execute()
with open(os.path.join(self._test_dir, 'output_metadata.json'),
'r') as f:
output_metadata = json.loads(f.read())
self.assertDictEqual(
output_metadata, {
'artifacts': {
'output_artifact_one_path': {
'artifacts': [{
'name': 'output_artifact_one',
'uri': 'gs://some-bucket/output_artifact_one',
'metadata': {}
}]
},
'output_artifact_two': {
'artifacts': [{
'name': 'output_artifact_two',
'uri': 'new-uri',
'metadata': {
'key_1': 'value_1',
'key_2': 2,
'metric': 0.9
}
}]
}
}
})
def test_function_string_output(self):
executor_input = """\
{
"inputs": {
"parameters": {
"first_message": {
"stringValue": "Hello"
},
"second_message": {
"stringValue": "World"
}
}
},
"outputs": {
"parameters": {
"output": {
"outputFile": "gs://some-bucket/output"
}
},
"outputFile": "%s/output_metadata.json"
}
}
"""
def test_func(first_message: str, second_message: str) -> str:
return first_message + ", " + second_message
self._get_executor(test_func, executor_input).execute()
with open(os.path.join(self._test_dir, 'output_metadata.json'),
'r') as f:
output_metadata = json.loads(f.read())
self.assertDictEqual(output_metadata, {
"parameters": {
"Output": {
"stringValue": "Hello, World"
}
},
})
def test_function_with_int_output(self):
executor_input = """\
{
"inputs": {
"parameters": {
"first": {
"intValue": 40
},
"second": {
"intValue": 2
}
}
},
"outputs": {
"parameters": {
"output": {
"outputFile": "gs://some-bucket/output"
}
},
"outputFile": "%s/output_metadata.json"
}
}
"""
def test_func(first: int, second: int) -> int:
return first + second
self._get_executor(test_func, executor_input).execute()
with open(os.path.join(self._test_dir, 'output_metadata.json'),
'r') as f:
output_metadata = json.loads(f.read())
self.assertDictEqual(output_metadata, {
"parameters": {
"Output": {
"intValue": 42
}
},
})
def test_function_with_int_output(self):
executor_input = """\
{
"inputs": {
"parameters": {
"first_message": {
"stringValue": "Hello"
},
"second_message": {
"stringValue": "World"
}
}
},
"outputs": {
"artifacts": {
"Output": {
"outputFile": "gs://some-bucket/output"
}
},
"outputFile": "%s/output_metadata.json"
}
}
"""
def test_func(first_message: str, second_message: str) -> str:
return first_message + ", " + second_message
self._get_executor(test_func, executor_input).execute()
with open(os.path.join(self._test_dir, 'output_metadata.json'),
'r') as f:
output_metadata = json.loads(f.read())
self.assertDictEqual(output_metadata, {
"parameters": {
"Output": {
"stringValue": "Hello, World"
}
},
})
def test_artifact_output(self):
executor_input = """\
{
"inputs": {
"parameters": {
"first": {
"stringValue": "Hello"
},
"second": {
"stringValue": "World"
}
}
},
"outputs": {
"artifacts": {
"Output": {
"artifacts": [
{
"name": "output",
"type": {
"schemaTitle": "system.Artifact"
},
"uri": "gs://some-bucket/output"
}
]
}
},
"outputFile": "%s/output_metadata.json"
}
}
"""
def test_func(first: str, second: str) -> Artifact:
return first + ", " + second
self._get_executor(test_func, executor_input).execute()
with open(os.path.join(self._test_dir, 'output_metadata.json'),
'r') as f:
output_metadata = json.loads(f.read())
self.assertDictEqual(
output_metadata, {
'artifacts': {
'Output': {
'artifacts': [{
'metadata': {},
'name': 'output',
'uri': 'gs://some-bucket/output'
}]
}
}
})
with open(os.path.join(self._test_dir, 'some-bucket/output'), 'r') as f:
artifact_payload = f.read()
self.assertEqual(artifact_payload, "Hello, World")
def test_named_tuple_output(self):
executor_input = """\
{
"outputs": {
"artifacts": {
"output_dataset": {
"artifacts": [
{
"name": "output_dataset",
"type": {
"schemaTitle": "system.Dataset"
},
"uri": "gs://some-bucket/output_dataset"
}
]
}
},
"parameters": {
"output_int": {
"outputFile": "gs://some-bucket/output_int"
},
"output_string": {
"outputFile": "gs://some-bucket/output_string"
}
},
"outputFile": "%s/output_metadata.json"
}
}
"""
# Functions returning named tuples should work.
def func_returning_named_tuple() -> NamedTuple('Outputs', [
("output_dataset", Dataset),
("output_int", int),
("output_string", str),
]):
from collections import namedtuple
output = namedtuple(
'Outputs', ['output_dataset', 'output_int', 'output_string'])
return output("Dataset contents", 101, "Some output string")
# Functions returning plain tuples should work too.
def func_returning_plain_tuple() -> NamedTuple('Outputs', [
("output_dataset", Dataset),
("output_int", int),
("output_string", str),
]):
return ("Dataset contents", 101, "Some output string")
for test_func in [
func_returning_named_tuple, func_returning_plain_tuple
]:
self._get_executor(test_func, executor_input).execute()
with open(os.path.join(self._test_dir, 'output_metadata.json'),
'r') as f:
output_metadata = json.loads(f.read())
self.assertDictEqual(
output_metadata, {
'artifacts': {
'output_dataset': {
'artifacts': [{
'metadata': {},
'name': 'output_dataset',
'uri': 'gs://some-bucket/output_dataset'
}]
}
},
"parameters": {
"output_string": {
"stringValue": "Some output string"
},
"output_int": {
"intValue": 101
}
},
})
with open(
os.path.join(self._test_dir, 'some-bucket/output_dataset'),
'r') as f:
artifact_payload = f.read()
self.assertEqual(artifact_payload, "Dataset contents")

View File

@ -0,0 +1,158 @@
# Copyright 2020 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility function for building Importer Node spec."""
from typing import Union, Type
from kfp.dsl import _container_op
from kfp.dsl import _pipeline_param
from kfp.dsl import dsl_utils
from kfp.pipeline_spec import pipeline_spec_pb2
from kfp.v2.components.types import artifact_types, type_utils
INPUT_KEY = 'uri'
OUTPUT_KEY = 'artifact'
def _build_importer_spec(
artifact_uri: Union[_pipeline_param.PipelineParam, str],
artifact_type_schema: pipeline_spec_pb2.ArtifactTypeSchema,
) -> pipeline_spec_pb2.PipelineDeploymentConfig.ImporterSpec:
"""Builds an importer executor spec.
Args:
artifact_uri: The artifact uri to import from.
artifact_type_schema: The user specified artifact type schema of the
artifact to be imported.
Returns:
An importer spec.
"""
importer_spec = pipeline_spec_pb2.PipelineDeploymentConfig.ImporterSpec()
importer_spec.type_schema.CopyFrom(artifact_type_schema)
if isinstance(artifact_uri, _pipeline_param.PipelineParam):
importer_spec.artifact_uri.runtime_parameter = INPUT_KEY
elif isinstance(artifact_uri, str):
importer_spec.artifact_uri.constant_value.string_value = artifact_uri
return importer_spec
def _build_importer_task_spec(
importer_base_name: str,
artifact_uri: Union[_pipeline_param.PipelineParam, str],
) -> pipeline_spec_pb2.PipelineTaskSpec:
"""Builds an importer task spec.
Args:
importer_base_name: The base name of the importer node.
artifact_uri: The artifact uri to import from.
Returns:
An importer node task spec.
"""
result = pipeline_spec_pb2.PipelineTaskSpec()
result.task_info.name = dsl_utils.sanitize_task_name(importer_base_name)
result.component_ref.name = dsl_utils.sanitize_component_name(
importer_base_name)
if isinstance(artifact_uri, _pipeline_param.PipelineParam):
result.inputs.parameters[
INPUT_KEY].component_input_parameter = artifact_uri.full_name
elif isinstance(artifact_uri, str):
result.inputs.parameters[
INPUT_KEY].runtime_value.constant_value.string_value = artifact_uri
return result
def _build_importer_component_spec(
importer_base_name: str,
artifact_type_schema: pipeline_spec_pb2.ArtifactTypeSchema,
) -> pipeline_spec_pb2.ComponentSpec:
"""Builds an importer component spec.
Args:
importer_base_name: The base name of the importer node.
artifact_type_schema: The user specified artifact type schema of the
artifact to be imported.
Returns:
An importer node component spec.
"""
result = pipeline_spec_pb2.ComponentSpec()
result.executor_label = dsl_utils.sanitize_executor_label(
importer_base_name)
result.input_definitions.parameters[
INPUT_KEY].type = pipeline_spec_pb2.PrimitiveType.STRING
result.output_definitions.artifacts[OUTPUT_KEY].artifact_type.CopyFrom(
artifact_type_schema)
return result
def importer(artifact_uri: Union[_pipeline_param.PipelineParam, str],
artifact_class: Type[artifact_types.Artifact],
reimport: bool = False) -> _container_op.ContainerOp:
"""dsl.importer for importing an existing artifact. Only for v2 pipeline.
Args:
artifact_uri: The artifact uri to import from.
artifact_type_schema: The user specified artifact type schema of the
artifact to be imported.
reimport: Whether to reimport the artifact. Defaults to False.
Returns:
A ContainerOp instance.
Raises:
ValueError if the passed in artifact_uri is neither a PipelineParam nor a
constant string value.
"""
if isinstance(artifact_uri, _pipeline_param.PipelineParam):
input_param = artifact_uri
elif isinstance(artifact_uri, str):
input_param = _pipeline_param.PipelineParam(name='uri',
value=artifact_uri,
param_type='String')
else:
raise ValueError(
'Importer got unexpected artifact_uri: {} of type: {}.'.format(
artifact_uri, type(artifact_uri)))
old_warn_value = _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING
_container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True
task = _container_op.ContainerOp(
name='importer',
image='importer_image', # TODO: need a v1 implementation of importer.
file_outputs={
OUTPUT_KEY:
"{{{{$.outputs.artifacts['{}'].uri}}}}".format(OUTPUT_KEY)
},
)
_container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value
artifact_type_schema = type_utils.get_artifact_type_schema(artifact_class)
task.importer_spec = _build_importer_spec(
artifact_uri=artifact_uri, artifact_type_schema=artifact_type_schema)
task.task_spec = _build_importer_task_spec(importer_base_name=task.name,
artifact_uri=artifact_uri)
task.component_spec = _build_importer_component_spec(
importer_base_name=task.name, artifact_type_schema=artifact_type_schema)
task.inputs = [input_param]
return task

View File

@ -0,0 +1,167 @@
# Copyright 2020 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from absl.testing import parameterized
import unittest
from google.protobuf import json_format
from kfp.dsl import _pipeline_param
from kfp.pipeline_spec import pipeline_spec_pb2 as pb
from kfp.v2.components import importer_node
class ImporterNodeTest(parameterized.TestCase):
@parameterized.parameters(
{
# artifact_uri is a constant value
'input_uri':
'gs://artifact',
'artifact_type_schema':
pb.ArtifactTypeSchema(schema_title='system.Dataset'),
'expected_result': {
'artifactUri': {
'constantValue': {
'stringValue': 'gs://artifact'
}
},
'typeSchema': {
'schemaTitle': 'system.Dataset'
}
}
},
{
# artifact_uri is from PipelineParam
'input_uri':
_pipeline_param.PipelineParam(name='uri_to_import'),
'artifact_type_schema':
pb.ArtifactTypeSchema(schema_title='system.Model'),
'expected_result': {
'artifactUri': {
'runtimeParameter': 'uri'
},
'typeSchema': {
'schemaTitle': 'system.Model'
}
},
})
def test_build_importer_spec(self, input_uri, artifact_type_schema,
expected_result):
expected_importer_spec = pb.PipelineDeploymentConfig.ImporterSpec()
json_format.ParseDict(expected_result, expected_importer_spec)
importer_spec = importer_node._build_importer_spec(
artifact_uri=input_uri, artifact_type_schema=artifact_type_schema)
self.maxDiff = None
self.assertEqual(expected_importer_spec, importer_spec)
@parameterized.parameters(
{
# artifact_uri is a constant value
'importer_name': 'importer-1',
'input_uri': 'gs://artifact',
'expected_result': {
'taskInfo': {
'name': 'importer-1'
},
'inputs': {
'parameters': {
'uri': {
'runtimeValue': {
'constantValue': {
'stringValue': 'gs://artifact'
}
}
}
}
},
'componentRef': {
'name': 'comp-importer-1'
},
}
},
{
# artifact_uri is from PipelineParam
'importer_name': 'importer-2',
'input_uri': _pipeline_param.PipelineParam(name='uri_to_import'),
'expected_result': {
'taskInfo': {
'name': 'importer-2'
},
'inputs': {
'parameters': {
'uri': {
'componentInputParameter': 'uri_to_import'
}
}
},
'componentRef': {
'name': 'comp-importer-2'
},
},
})
def test_build_importer_task_spec(self, importer_name, input_uri,
expected_result):
expected_task_spec = pb.PipelineTaskSpec()
json_format.ParseDict(expected_result, expected_task_spec)
task_spec = importer_node._build_importer_task_spec(
importer_base_name=importer_name, artifact_uri=input_uri)
self.maxDiff = None
self.assertEqual(expected_task_spec, task_spec)
def test_build_importer_component_spec(self):
expected_importer_component = {
'inputDefinitions': {
'parameters': {
'uri': {
'type': 'STRING'
}
}
},
'outputDefinitions': {
'artifacts': {
'artifact': {
'artifactType': {
'schemaTitle': 'system.Artifact'
}
}
}
},
'executorLabel': 'exec-importer-1'
}
expected_importer_comp_spec = pb.ComponentSpec()
json_format.ParseDict(expected_importer_component,
expected_importer_comp_spec)
importer_comp_spec = importer_node._build_importer_component_spec(
importer_base_name='importer-1',
artifact_type_schema=pb.ArtifactTypeSchema(
schema_title='system.Artifact'))
self.maxDiff = None
self.assertEqual(expected_importer_comp_spec, importer_comp_spec)
def test_import_with_invalid_artifact_uri_value_should_fail(self):
from kfp.v2.components.types.artifact_types import Dataset
with self.assertRaisesRegex(
ValueError,
"Importer got unexpected artifact_uri: 123 of type: <class 'int'>."
):
importer_node.importer(artifact_uri=123, artifact_class=Dataset)
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,13 @@
# Copyright 2021 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

View File

@ -0,0 +1,451 @@
# Copyright 2021 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Classes for input/output Artifacts in KFP SDK.
These are only compatible with v2 Pipelines.
"""
import os
from typing import Dict, Generic, List, Optional, Type, TypeVar, Union
_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'
_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'
_S3_LOCAL_MOUNT_PREFIX = '/s3/'
class Artifact(object):
"""Generic Artifact class.
This class is meant to represent the metadata around an input or output
machine-learning Artifact. Artifacts have URIs, which can either be a location
on disk (or Cloud storage) or some other resource identifier such as
an API resource name.
Artifacts carry a `metadata` field, which is a dictionary for storing
metadata related to this artifact.
"""
TYPE_NAME = 'system.Artifact'
def __init__(self,
name: Optional[str] = None,
uri: Optional[str] = None,
metadata: Optional[Dict] = None):
"""Initializes the Artifact with the given name, URI and metadata."""
self.uri = uri or ''
self.name = name or ''
self.metadata = metadata or {}
@property
def path(self):
return self._get_path()
@path.setter
def path(self, path):
self._set_path(path)
def _get_path(self) -> Optional[str]:
if self.uri.startswith('gs://'):
return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]
elif self.uri.startswith('minio://'):
return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]
elif self.uri.startswith('s3://'):
return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]
return None
def _set_path(self, path):
if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):
path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]
elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):
path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]
elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):
path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]
self.uri = path
class Model(Artifact):
"""An artifact representing an ML Model."""
TYPE_NAME = 'system.Model'
def __init__(self,
name: Optional[str] = None,
uri: Optional[str] = None,
metadata: Optional[Dict] = None):
super().__init__(uri=uri, name=name, metadata=metadata)
@property
def framework(self) -> str:
return self._get_framework()
def _get_framework(self) -> str:
return self.metadata.get('framework', '')
@framework.setter
def framework(self, framework: str):
self._set_framework(framework)
def _set_framework(self, framework: str):
self.metadata['framework'] = framework
class Dataset(Artifact):
"""An artifact representing an ML Dataset."""
TYPE_NAME = 'system.Dataset'
def __init__(self,
name: Optional[str] = None,
uri: Optional[str] = None,
metadata: Optional[Dict] = None):
super().__init__(uri=uri, name=name, metadata=metadata)
class Metrics(Artifact):
"""Represent a simple base Artifact type to store key-value scalar metrics."""
TYPE_NAME = 'system.Metrics'
def __init__(self,
name: Optional[str] = None,
uri: Optional[str] = None,
metadata: Optional[Dict] = None):
super().__init__(uri=uri, name=name, metadata=metadata)
def log_metric(self, metric: str, value: float):
"""Sets a custom scalar metric.
Args:
metric: Metric key
value: Value of the metric.
"""
self.metadata[metric] = value
class ClassificationMetrics(Artifact):
"""Represents Artifact class to store Classification Metrics."""
TYPE_NAME = 'system.ClassificationMetrics'
def __init__(self,
name: Optional[str] = None,
uri: Optional[str] = None,
metadata: Optional[Dict] = None):
super().__init__(uri=uri, name=name, metadata=metadata)
def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):
"""Logs a single data point in the ROC Curve.
Args:
fpr: False positive rate value of the data point.
tpr: True positive rate value of the data point.
threshold: Threshold value for the data point.
"""
roc_reading = {
'confidenceThreshold': threshold,
'recall': tpr,
'falsePositiveRate': fpr
}
if 'confidenceMetrics' not in self.metadata.keys():
self.metadata['confidenceMetrics'] = []
self.metadata['confidenceMetrics'].append(roc_reading)
def log_roc_curve(self, fpr: List[float], tpr: List[float],
threshold: List[float]):
"""Logs an ROC curve.
The list length of fpr, tpr and threshold must be the same.
Args:
fpr: List of false positive rate values.
tpr: List of true positive rate values.
threshold: List of threshold values.
"""
if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(
tpr) != len(threshold):
raise ValueError(
'Length of fpr, tpr and threshold must be the same. '
'Got lengths {}, {} and {} respectively.'.format(
len(fpr), len(tpr), len(threshold)))
for i in range(len(fpr)):
self.log_roc_data_point(fpr=fpr[i],
tpr=tpr[i],
threshold=threshold[i])
def set_confusion_matrix_categories(self, categories: List[str]):
"""Stores confusion matrix categories.
Args:
categories: List of strings specifying the categories.
"""
self._categories = []
annotation_specs = []
for category in categories:
annotation_spec = {'displayName': category}
self._categories.append(category)
annotation_specs.append(annotation_spec)
self._matrix = []
for row in range(len(self._categories)):
self._matrix.append({'row': [0] * len(self._categories)})
self._confusion_matrix = {}
self._confusion_matrix['annotationSpecs'] = annotation_specs
self._confusion_matrix['rows'] = self._matrix
self.metadata['confusionMatrix'] = self._confusion_matrix
def log_confusion_matrix_row(self, row_category: str, row: List[float]):
"""Logs a confusion matrix row.
Args:
row_category: Category to which the row belongs.
row: List of integers specifying the values for the row.
Raises:
ValueError: If row_category is not in the list of categories
set in set_categories call.
"""
if row_category not in self._categories:
raise ValueError('Invalid category: {} passed. Expected one of: {}'.\
format(row_category, self._categories))
if len(row) != len(self._categories):
raise ValueError('Invalid row. Expected size: {} got: {}'.\
format(len(self._categories), len(row)))
self._matrix[self._categories.index(row_category)] = {'row': row}
self.metadata['confusionMatrix'] = self._confusion_matrix
def log_confusion_matrix_cell(self, row_category: str, col_category: str,
value: int):
"""Logs a cell in the confusion matrix.
Args:
row_category: String representing the name of the row category.
col_category: String representing the name of the column category.
value: Int value of the cell.
Raises:
ValueError: If row_category or col_category is not in the list of
categories set in set_categories.
"""
if row_category not in self._categories:
raise ValueError('Invalid category: {} passed. Expected one of: {}'.\
format(row_category, self._categories))
if col_category not in self._categories:
raise ValueError('Invalid category: {} passed. Expected one of: {}'.\
format(row_category, self._categories))
self._matrix[self._categories.index(row_category)]['row'][
self._categories.index(col_category)] = value
self.metadata['confusionMatrix'] = self._confusion_matrix
def log_confusion_matrix(self, categories: List[str],
matrix: List[List[int]]):
"""Logs a confusion matrix.
Args:
categories: List of the category names.
matrix: Complete confusion matrix.
Raises:
ValueError: Length of categories does not match number of rows or columns.
"""
self.set_confusion_matrix_categories(categories)
if len(matrix) != len(categories):
raise ValueError('Invalid matrix: {} passed for categories: {}'.\
format(matrix, categories))
for index in range(len(categories)):
if len(matrix[index]) != len(categories):
raise ValueError('Invalid matrix: {} passed for categories: {}'.\
format(matrix, categories))
self.log_confusion_matrix_row(categories[index], matrix[index])
self.metadata['confusionMatrix'] = self._confusion_matrix
class SlicedClassificationMetrics(Artifact):
"""Metrics class representing Sliced Classification Metrics.
Similar to ClassificationMetrics clients using this class are expected to use
log methods of the class to log metrics with the difference being each log
method takes a slice to associate the ClassificationMetrics.
"""
TYPE_NAME = 'system.SlicedClassificationMetrics'
def __init__(self,
name: Optional[str] = None,
uri: Optional[str] = None,
metadata: Optional[Dict] = None):
super().__init__(uri=uri, name=name, metadata=metadata)
def _upsert_classification_metrics_for_slice(self, slice: str):
"""Upserts the classification metrics instance for a slice."""
if slice not in self._sliced_metrics:
self._sliced_metrics[slice] = ClassificationMetrics()
def _update_metadata(self, slice: str):
"""Updates metadata to adhere to the metrics schema."""
self.metadata = {}
self.metadata['evaluationSlices'] = []
for slice in self._sliced_metrics.keys():
slice_metrics = {
'slice':
slice,
'sliceClassificationMetrics':
self._sliced_metrics[slice].metadata
}
self.metadata['evaluationSlices'].append(slice_metrics)
def log_roc_reading(self, slice: str, threshold: float, tpr: float,
fpr: float):
"""Logs a single data point in the ROC Curve of a slice.
Args:
slice: String representing slice label.
threshold: Thresold value for the data point.
tpr: True positive rate value of the data point.
fpr: False positive rate value of the data point.
"""
self._upsert_classification_metrics_for_slice(slice)
self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)
self._update_metadata(slice)
def load_roc_readings(self, slice: str, readings: List[List[float]]):
"""Supports bulk loading ROC Curve readings for a slice.
Args:
slice: String representing slice label.
readings: A 2-D list providing ROC Curve data points.
The expected order of the data points is: threshold,
true_positive_rate, false_positive_rate.
"""
self._upsert_classification_metrics_for_slice(slice)
self._sliced_metrics[slice].load_roc_readings(readings)
self._update_metadata(slice)
def set_confusion_matrix_categories(self, slice: str,
categories: List[str]):
"""Stores confusion matrix categories for a slice..
Categories are stored in the internal metrics_utils.ConfusionMatrix
instance of the slice.
Args:
slice: String representing slice label.
categories: List of strings specifying the categories.
"""
self._upsert_classification_metrics_for_slice(slice)
self._sliced_metrics[slice].set_confusion_matrix_categories(categories)
self._update_metadata(slice)
def log_confusion_matrix_row(self, slice: str, row_category: str,
row: List[int]):
"""Logs a confusion matrix row for a slice.
Row is updated on the internal metrics_utils.ConfusionMatrix
instance of the slice.
Args:
slice: String representing slice label.
row_category: Category to which the row belongs.
row: List of integers specifying the values for the row.
"""
self._upsert_classification_metrics_for_slice(slice)
self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)
self._update_metadata(slice)
def log_confusion_matrix_cell(self, slice: str, row_category: str,
col_category: str, value: int):
"""Logs a confusion matrix cell for a slice..
Cell is updated on the internal metrics_utils.ConfusionMatrix
instance of the slice.
Args:
slice: String representing slice label.
row_category: String representing the name of the row category.
col_category: String representing the name of the column category.
value: Int value of the cell.
"""
self._upsert_classification_metrics_for_slice(slice)
self._sliced_metrics[slice].log_confusion_matrix_cell(
row_category, col_category, value)
self._update_metadata(slice)
def load_confusion_matrix(self, slice: str, categories: List[str],
matrix: List[List[int]]):
"""Supports bulk loading the whole confusion matrix for a slice.
Args:
slice: String representing slice label.
categories: List of the category names.
matrix: Complete confusion matrix.
"""
self._upsert_classification_metrics_for_slice(slice)
self._sliced_metrics[slice].log_confusion_matrix_cell(
categories, matrix)
self._update_metadata(slice)
class HTML(Artifact):
"""An artifact representing an HTML file."""
TYPE_NAME = 'system.HTML'
def __init__(self,
name: Optional[str] = None,
uri: Optional[str] = None,
metadata: Optional[Dict] = None):
super().__init__(uri=uri, name=name, metadata=metadata)
class Markdown(Artifact):
"""An artifact representing an Markdown file."""
TYPE_NAME = 'system.Markdown'
def __init__(self,
name: Optional[str] = None,
uri: Optional[str] = None,
metadata: Optional[Dict] = None):
super().__init__(uri=uri, name=name, metadata=metadata)
_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {
x.TYPE_NAME: x
for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]
}
def create_runtime_artifact(runtime_artifact: Dict) -> Artifact:
"""Creates an Artifact instance from the specified RuntimeArtifact.
Args:
runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.
"""
schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')
artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)
if not artifact_type:
artifact_type = Artifact
return artifact_type(
uri=runtime_artifact.get('uri', ''),
name=runtime_artifact.get('name', ''),
metadata=runtime_artifact.get('metadata', {}),
)

View File

@ -0,0 +1,58 @@
# Copyright 2021 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for kfp.v2.components.types.artifact_types."""
import unittest
import json
import os
from kfp.v2.components.types import artifact_types
class ArtifactsTest(unittest.TestCase):
def test_complex_metrics(self):
metrics = artifact_types.ClassificationMetrics()
metrics.log_roc_data_point(threshold=0.1, tpr=98.2, fpr=96.2)
metrics.log_roc_data_point(threshold=24.3, tpr=24.5, fpr=98.4)
metrics.set_confusion_matrix_categories(['dog', 'cat', 'horses'])
metrics.log_confusion_matrix_row('dog', [2, 6, 0])
metrics.log_confusion_matrix_cell('cat', 'dog', 3)
metrics.log_confusion_matrix_cell('horses', 'horses', 3)
metrics.metadata['test'] = 1.0
with open(
os.path.join(os.path.dirname(__file__), 'test_data',
'expected_io_types_classification_metrics.json')
) as json_file:
expected_json = json.load(json_file)
self.assertEqual(expected_json, metrics.metadata)
def test_complex_metrics_bulk_loading(self):
metrics = artifact_types.ClassificationMetrics()
metrics.log_roc_curve(fpr=[85.1, 85.1, 85.1],
tpr=[52.6, 52.6, 52.6],
threshold=[53.6, 53.6, 53.6])
metrics.log_confusion_matrix(['dog', 'cat', 'horses'],
[[2, 6, 0], [3, 5, 6], [5, 7, 8]])
with open(
os.path.join(
os.path.dirname(__file__), 'test_data',
'expected_io_types_bulk_load_classification_metrics.json')
) as json_file:
expected_json = json.load(json_file)
self.assertEqual(expected_json, metrics.metadata)
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,10 @@
{
"annotationSpecs": [
{"displayName": "dog"},
{"displayName": "cat"},
{"displayName": "horses"}],
"row": [
[2, 6, 0],
[3, 5, 6],
[5, 7, 8]]
}

View File

@ -0,0 +1,10 @@
{
"annotationSpecs": [
{"displayName": "dog"},
{"displayName": "cat"},
{"displayName": "horses"}],
"row": [
[2, 6, 0],
[3, 0, 0],
[0, 0, 0]]
}

View File

@ -0,0 +1,133 @@
# Copyright 2021 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Classes for input/output type annotations in KFP SDK.
These are only compatible with v2 Pipelines.
"""
from typing import TypeVar, Union
T = TypeVar('T')
class OutputPath:
'''Annotation for indicating a variable is a path to an output.'''
def __init__(self, type=None):
self.type = type
class InputPath:
'''Annotation for indicating a variable is a path to an output.'''
def __init__(self, type=None):
self.type = type
class InputAnnotation():
"""Marker type for input artifacts."""
pass
class OutputAnnotation():
"""Marker type for output artifacts."""
pass
# TODO: Use typing.Annotated instead of this hack.
# With typing.Annotated (Python 3.9+ or typing_extensions package), the
# following would look like:
# Input = typing.Annotated[T, InputAnnotation]
# Output = typing.Annotated[T, OutputAnnotation]
# Input represents an Input artifact of type T.
Input = Union[T, InputAnnotation]
# Output represents an Output artifact of type T.
Output = Union[T, OutputAnnotation]
def is_artifact_annotation(typ) -> bool:
if hasattr(typ, '_subs_tree'): # Python 3.6
subs_tree = typ._subs_tree()
return len(
subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [
InputAnnotation, OutputAnnotation
]
if not hasattr(typ, '__origin__'):
return False
if typ.__origin__ != Union and type(typ.__origin__) != type(Union):
return False
if not hasattr(typ, '__args__') or len(typ.__args__) != 2:
return False
if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:
return False
return True
def is_input_artifact(typ) -> bool:
"""Returns True if typ is of type Input[T]."""
if not is_artifact_annotation(typ):
return False
if hasattr(typ, '_subs_tree'): # Python 3.6
subs_tree = typ._subs_tree()
return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation
return typ.__args__[1] == InputAnnotation
def is_output_artifact(typ) -> bool:
"""Returns True if typ is of type Output[T]."""
if not is_artifact_annotation(typ):
return False
if hasattr(typ, '_subs_tree'): # Python 3.6
subs_tree = typ._subs_tree()
return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation
return typ.__args__[1] == OutputAnnotation
def get_io_artifact_class(typ):
if not is_artifact_annotation(typ):
return None
if typ == Input or typ == Output:
return None
if hasattr(typ, '_subs_tree'): # Python 3.6
subs_tree = typ._subs_tree()
if len(subs_tree) != 3:
return None
return subs_tree[1]
return typ.__args__[0]
def get_io_artifact_annotation(typ):
if not is_artifact_annotation(typ):
return None
if hasattr(typ, '_subs_tree'): # Python 3.6
subs_tree = typ._subs_tree()
if len(subs_tree) != 3:
return None
return subs_tree[2]
return typ.__args__[1]

View File

@ -0,0 +1,79 @@
# Copyright 2021 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for kfp.v2.components.types.type_annotations."""
import unittest
from typing import List, Optional
from kfp.v2.components.types import type_annotations
from kfp.v2.components.types.artifact_types import Model
from kfp.v2.components.types.type_annotations import Input, InputAnnotation, Output, OutputAnnotation
class AnnotationsTest(unittest.TestCase):
def test_is_artifact_annotation(self):
self.assertTrue(type_annotations.is_artifact_annotation(Input[Model]))
self.assertTrue(type_annotations.is_artifact_annotation(Output[Model]))
self.assertTrue(
type_annotations.is_artifact_annotation(Output['MyArtifact']))
self.assertFalse(type_annotations.is_artifact_annotation(Model))
self.assertFalse(type_annotations.is_artifact_annotation(int))
self.assertFalse(type_annotations.is_artifact_annotation('Dataset'))
self.assertFalse(type_annotations.is_artifact_annotation(List[str]))
self.assertFalse(type_annotations.is_artifact_annotation(Optional[str]))
def test_is_input_artifact(self):
self.assertTrue(type_annotations.is_input_artifact(Input[Model]))
self.assertTrue(type_annotations.is_input_artifact(Input))
self.assertFalse(type_annotations.is_input_artifact(Output[Model]))
self.assertFalse(type_annotations.is_input_artifact(Output))
def test_is_output_artifact(self):
self.assertTrue(type_annotations.is_output_artifact(Output[Model]))
self.assertTrue(type_annotations.is_output_artifact(Output))
self.assertFalse(type_annotations.is_output_artifact(Input[Model]))
self.assertFalse(type_annotations.is_output_artifact(Input))
def test_get_io_artifact_class(self):
self.assertEqual(type_annotations.get_io_artifact_class(Output[Model]),
Model)
self.assertEqual(type_annotations.get_io_artifact_class(Input), None)
self.assertEqual(type_annotations.get_io_artifact_class(Output), None)
self.assertEqual(type_annotations.get_io_artifact_class(Model), None)
self.assertEqual(type_annotations.get_io_artifact_class(str), None)
def test_get_io_artifact_annotation(self):
self.assertEqual(
type_annotations.get_io_artifact_annotation(Output[Model]),
OutputAnnotation)
self.assertEqual(
type_annotations.get_io_artifact_annotation(Input[Model]),
InputAnnotation)
self.assertEqual(type_annotations.get_io_artifact_annotation(Input),
InputAnnotation)
self.assertEqual(type_annotations.get_io_artifact_annotation(Output),
OutputAnnotation)
self.assertEqual(type_annotations.get_io_artifact_annotation(Model),
None)
self.assertEqual(type_annotations.get_io_artifact_annotation(str), None)
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,160 @@
# Copyright 2020 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for component I/O type mapping."""
import inspect
from typing import Dict, List, Optional, Type, Union
from kfp.components import structures
from kfp.components import type_annotation_utils
from kfp.pipeline_spec import pipeline_spec_pb2
from kfp.v2.components.types import artifact_types
# ComponentSpec I/O types to DSL ontology artifact classes mapping.
_ARTIFACT_CLASSES_MAPPING = {
'model': artifact_types.Model,
'dataset': artifact_types.Dataset,
'metrics': artifact_types.Metrics,
'classificationmetrics': artifact_types.ClassificationMetrics,
'slicedclassificationmetrics': artifact_types.SlicedClassificationMetrics,
'html': artifact_types.HTML,
'markdown': artifact_types.Markdown,
}
# ComponentSpec I/O types to (IR) PipelineTaskSpec I/O types mapping.
# The keys are normalized (lowercased). These are types viewed as Parameters.
# The values are the corresponding IR parameter primitive types.
_PARAMETER_TYPES_MAPPING = {
'integer': pipeline_spec_pb2.PrimitiveType.INT,
'int': pipeline_spec_pb2.PrimitiveType.INT,
'double': pipeline_spec_pb2.PrimitiveType.DOUBLE,
'float': pipeline_spec_pb2.PrimitiveType.DOUBLE,
'string': pipeline_spec_pb2.PrimitiveType.STRING,
'str': pipeline_spec_pb2.PrimitiveType.STRING,
'text': pipeline_spec_pb2.PrimitiveType.STRING,
'bool': pipeline_spec_pb2.PrimitiveType.STRING,
'boolean': pipeline_spec_pb2.PrimitiveType.STRING,
'dict': pipeline_spec_pb2.PrimitiveType.STRING,
'list': pipeline_spec_pb2.PrimitiveType.STRING,
'jsonobject': pipeline_spec_pb2.PrimitiveType.STRING,
'jsonarray': pipeline_spec_pb2.PrimitiveType.STRING,
}
# Mapping primitive types to their IR message field names.
# This is used in constructing condition strings.
_PARAMETER_TYPES_VALUE_REFERENCE_MAPPING = {
pipeline_spec_pb2.PrimitiveType.INT: 'int_value',
pipeline_spec_pb2.PrimitiveType.DOUBLE: 'double_value',
pipeline_spec_pb2.PrimitiveType.STRING: 'string_value',
}
def is_parameter_type(type_name: Optional[Union[str, dict]]) -> bool:
"""Check if a ComponentSpec I/O type is considered as a parameter type.
Args:
type_name: type name of the ComponentSpec I/O type.
Returns:
True if the type name maps to a parameter type else False.
"""
if isinstance(type_name, str):
type_name = type_annotation_utils.get_short_type_name(type_name)
elif isinstance(type_name, dict):
type_name = list(type_name.keys())[0]
else:
return False
return type_name.lower() in _PARAMETER_TYPES_MAPPING
def get_artifact_type_schema(
artifact_class_or_type_name: Optional[Union[str,
Type[artifact_types.Artifact]]]
) -> pipeline_spec_pb2.ArtifactTypeSchema:
"""Gets the IR I/O artifact type msg for the given ComponentSpec I/O type."""
artifact_class = artifact_types.Artifact
if isinstance(artifact_class_or_type_name, str):
artifact_class = _ARTIFACT_CLASSES_MAPPING.get(
artifact_class_or_type_name.lower(), artifact_types.Artifact)
elif inspect.isclass(artifact_class_or_type_name) and issubclass(
artifact_class_or_type_name, artifact_types.Artifact):
artifact_class = artifact_class_or_type_name
return pipeline_spec_pb2.ArtifactTypeSchema(
schema_title=artifact_class.TYPE_NAME)
def get_parameter_type(
param_type: Optional[Union[Type, str, dict]]
) -> pipeline_spec_pb2.PrimitiveType:
"""Get the IR I/O parameter type for the given ComponentSpec I/O type.
Args:
param_type: type of the ComponentSpec I/O type. Can be a primitive Python
builtin type or a type name.
Returns:
The enum value of the mapped IR I/O primitive type.
Raises:
AttributeError: if type_name is not a string type.
"""
if type(param_type) == type:
type_name = param_type.__name__
elif isinstance(param_type, dict):
type_name = list(param_type.keys())[0]
else:
type_name = type_annotation_utils.get_short_type_name(str(param_type))
return _PARAMETER_TYPES_MAPPING.get(type_name.lower())
def get_parameter_type_field_name(type_name: Optional[str]) -> str:
"""Get the IR field name for the given primitive type.
For example: 'str' -> 'string_value', 'double' -> 'double_value', etc.
Args:
type_name: type name of the ComponentSpec I/O primitive type.
Returns:
The IR value reference field name.
Raises:
AttributeError: if type_name is not a string type.
"""
return _PARAMETER_TYPES_VALUE_REFERENCE_MAPPING.get(
get_parameter_type(type_name))
def get_input_artifact_type_schema(
input_name: str,
inputs: List[structures.InputSpec],
) -> Optional[str]:
"""Find the input artifact type by input name.
Args:
input_name: The name of the component input.
inputs: The list of InputSpec
Returns:
The artifact type schema of the input.
Raises:
AssertionError if input not found, or input found but not an artifact type.
"""
for component_input in inputs:
if component_input.name == input_name:
assert not is_parameter_type(
component_input.type), 'Input is not an artifact type.'
return get_artifact_type_schema(component_input.type)
assert False, 'Input not found.'

View File

@ -0,0 +1,282 @@
# Copyright 2020 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from absl.testing import parameterized
import sys
import unittest
from typing import Any, Dict, List
from kfp.components import structures
from kfp.pipeline_spec import pipeline_spec_pb2 as pb
from kfp.v2.components.types import artifact_types, type_utils
_PARAMETER_TYPES = [
'String',
'str',
'Integer',
'int',
'Float',
'Double',
'bool',
'Boolean',
'Dict',
'List',
'JsonObject',
'JsonArray',
{
'JsonObject': {
'data_type': 'proto:tfx.components.trainer.TrainArgs'
}
},
]
_KNOWN_ARTIFACT_TYPES = ['Model', 'Dataset', 'Schema', 'Metrics']
_UNKNOWN_ARTIFACT_TYPES = [None, 'Arbtrary Model', 'dummy']
class _ArbitraryClass:
pass
class TypeUtilsTest(parameterized.TestCase):
def test_is_parameter_type(self):
for type_name in _PARAMETER_TYPES:
self.assertTrue(type_utils.is_parameter_type(type_name))
for type_name in _KNOWN_ARTIFACT_TYPES + _UNKNOWN_ARTIFACT_TYPES:
self.assertFalse(type_utils.is_parameter_type(type_name))
@parameterized.parameters(
{
'artifact_class_or_type_name':
'Model',
'expected_result':
pb.ArtifactTypeSchema(schema_title='system.Model')
},
{
'artifact_class_or_type_name':
artifact_types.Model,
'expected_result':
pb.ArtifactTypeSchema(schema_title='system.Model')
},
{
'artifact_class_or_type_name':
'Dataset',
'expected_result':
pb.ArtifactTypeSchema(schema_title='system.Dataset')
},
{
'artifact_class_or_type_name':
artifact_types.Dataset,
'expected_result':
pb.ArtifactTypeSchema(schema_title='system.Dataset')
},
{
'artifact_class_or_type_name':
'Metrics',
'expected_result':
pb.ArtifactTypeSchema(schema_title='system.Metrics')
},
{
'artifact_class_or_type_name':
artifact_types.Metrics,
'expected_result':
pb.ArtifactTypeSchema(schema_title='system.Metrics')
},
{
'artifact_class_or_type_name':
'ClassificationMetrics',
'expected_result':
pb.ArtifactTypeSchema(
schema_title='system.ClassificationMetrics')
},
{
'artifact_class_or_type_name':
artifact_types.ClassificationMetrics,
'expected_result':
pb.ArtifactTypeSchema(
schema_title='system.ClassificationMetrics')
},
{
'artifact_class_or_type_name':
'SlicedClassificationMetrics',
'expected_result':
pb.ArtifactTypeSchema(
schema_title='system.SlicedClassificationMetrics')
},
{
'artifact_class_or_type_name':
artifact_types.SlicedClassificationMetrics,
'expected_result':
pb.ArtifactTypeSchema(
schema_title='system.SlicedClassificationMetrics')
},
{
'artifact_class_or_type_name':
'arbitrary name',
'expected_result':
pb.ArtifactTypeSchema(schema_title='system.Artifact')
},
{
'artifact_class_or_type_name':
_ArbitraryClass,
'expected_result':
pb.ArtifactTypeSchema(schema_title='system.Artifact')
},
{
'artifact_class_or_type_name': artifact_types.HTML,
'expected_result': pb.ArtifactTypeSchema(schema_title='system.HTML')
},
{
'artifact_class_or_type_name':
artifact_types.Markdown,
'expected_result':
pb.ArtifactTypeSchema(schema_title='system.Markdown')
},
)
def test_get_artifact_type_schema(self, artifact_class_or_type_name,
expected_result):
self.assertEqual(
expected_result,
type_utils.get_artifact_type_schema(artifact_class_or_type_name))
@parameterized.parameters(
{
'given_type': 'Int',
'expected_type': pb.PrimitiveType.INT,
},
{
'given_type': 'Integer',
'expected_type': pb.PrimitiveType.INT,
},
{
'given_type': int,
'expected_type': pb.PrimitiveType.INT,
},
{
'given_type': 'Double',
'expected_type': pb.PrimitiveType.DOUBLE,
},
{
'given_type': 'Float',
'expected_type': pb.PrimitiveType.DOUBLE,
},
{
'given_type': float,
'expected_type': pb.PrimitiveType.DOUBLE,
},
{
'given_type': 'String',
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': 'Text',
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': str,
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': 'Boolean',
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': bool,
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': 'Dict',
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': dict,
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': 'List',
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': list,
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': Dict[str, int],
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': List[Any],
'expected_type': pb.PrimitiveType.STRING,
},
{
'given_type': {
'JsonObject': {
'data_type': 'proto:tfx.components.trainer.TrainArgs'
}
},
'expected_type': pb.PrimitiveType.STRING,
},
)
def test_get_parameter_type(self, given_type, expected_type):
self.assertEqual(expected_type,
type_utils.get_parameter_type(given_type))
# Test get parameter by Python type.
self.assertEqual(pb.PrimitiveType.INT,
type_utils.get_parameter_type(int))
def test_get_parameter_type_invalid(self):
with self.assertRaises(AttributeError):
type_utils.get_parameter_type_schema(None)
def test_get_input_artifact_type_schema(self):
input_specs = [
structures.InputSpec(name='input1', type='String'),
structures.InputSpec(name='input2', type='Model'),
structures.InputSpec(name='input3', type=None),
]
# input not found.
with self.assertRaises(AssertionError) as cm:
type_utils.get_input_artifact_type_schema('input0', input_specs)
self.assertEqual('Input not found.', str(cm))
# input found, but it doesn't map to an artifact type.
with self.assertRaises(AssertionError) as cm:
type_utils.get_input_artifact_type_schema('input1', input_specs)
self.assertEqual('Input is not an artifact type.', str(cm))
# input found, and a matching artifact type schema returned.
self.assertEqual(
'system.Model',
type_utils.get_input_artifact_type_schema('input2',
input_specs).schema_title)
# input found, and the default artifact type schema returned.
self.assertEqual(
'system.Artifact',
type_utils.get_input_artifact_type_schema('input3',
input_specs).schema_title)
def test_get_parameter_type_field_name(self):
self.assertEqual('string_value',
type_utils.get_parameter_type_field_name('String'))
self.assertEqual('int_value',
type_utils.get_parameter_type_field_name('Integer'))
self.assertEqual('double_value',
type_utils.get_parameter_type_field_name('Float'))
if __name__ == '__main__':
unittest.main()

View File

@ -12,23 +12,28 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from kfp.v2.dsl.component_decorator import component
from kfp.dsl.io_types import (
Input,
Output,
from kfp.v2.components.component_decorator import component
from kfp.v2.components.importer_node import importer
from kfp.v2.components.types.artifact_types import (
Artifact,
Dataset,
Model,
Metrics,
ClassificationMetrics,
SlicedClassificationMetrics,
Dataset,
HTML,
Markdown,
Metrics,
Model,
SlicedClassificationMetrics,
)
from kfp.components import (
from kfp.v2.components.types.type_annotations import (
Input,
Output,
InputPath,
OutputPath,
)
from kfp.dsl import (
graph_component,
pipeline,

View File

@ -102,6 +102,7 @@ setup(
'kfp.v2',
'kfp.v2.compiler',
'kfp.v2.components',
'kfp.v2.components.types',
'kfp.v2.components.experimental',
'kfp.v2.dsl',
'kfp.v2.google.client',
@ -131,5 +132,4 @@ setup(
'dsl-compile-v2 = kfp.v2.compiler.main:main',
'kfp=kfp.__main__:main'
]
}
)
})

View File

@ -1279,7 +1279,8 @@ implementation:
@dsl.pipeline(name='test-pipeline')
def my_pipeline():
dsl.importer(artifact_uri='dummy', artifact_class=dsl.io_types.Artifact)
from kfp.v2.dsl import importer, Artifact
importer(artifact_uri='dummy', artifact_class=Artifact)
with self.assertRaisesRegex(
NotImplementedError,