chore(sdk): Refactor and move all v2 related code to under the v2 namespace. (#6358)
* Refactor and move all v2 related code to under the v2 namespace. Most of the changes are around imports and restructuring of the codebase. While it looks like a lot of code was added, most of the code already existed and was simply moved or copied over to v2. The only exceptions are: - under kfp/v2/components/component_factory.py: some helper functions were copied with simplification from _python_op.py - we no longer strip the `_path` suffix in v2 components. Note: there is still some duplication of code (particularly between component_factory.py and _python_op.py), but it's ok for now since we intend to replace some of this with v2 ComponentSpec + BaseComponent. * Update setup.py. * update tests. * revert accidental change of gcpc * Fix component entrypoint. * Update goldens. * fix tests. * fix merge conflict. * revert gcpc change. * fix tests. * fix tests. * Add type aliases for moved files. * merge and update goldens.
This commit is contained in:
parent
7f6e11dedc
commit
f3f383c2ff
|
|
@ -15,10 +15,8 @@
|
|||
from typing import Dict, List
|
||||
import os
|
||||
|
||||
from kfp import dsl
|
||||
from kfp import components
|
||||
from kfp.components import InputPath, OutputPath
|
||||
from kfp.v2.dsl import Input, Output, Dataset, Model, component
|
||||
from kfp.v2 import dsl
|
||||
from kfp.v2.dsl import Input, InputPath, Output, OutputPath, Dataset, Model, component
|
||||
import kfp.v2.compiler as compiler
|
||||
|
||||
# In tests, we install a KFP package from the PR under test. Users should not
|
||||
|
|
@ -126,12 +124,12 @@ def train(
|
|||
def pipeline(message: str = 'message'):
|
||||
preprocess_task = preprocess(message=message)
|
||||
train_task = train(
|
||||
dataset_one=preprocess_task.outputs['output_dataset_one'],
|
||||
dataset_two=preprocess_task.outputs['output_dataset_two'],
|
||||
message=preprocess_task.outputs['output_parameter'],
|
||||
input_bool=preprocess_task.outputs['output_bool_parameter'],
|
||||
input_dict=preprocess_task.outputs['output_dict_parameter'],
|
||||
input_list=preprocess_task.outputs['output_list_parameter'],
|
||||
dataset_one_path=preprocess_task.outputs['output_dataset_one'],
|
||||
dataset_two=preprocess_task.outputs['output_dataset_two_path'],
|
||||
message=preprocess_task.outputs['output_parameter_path'],
|
||||
input_bool=preprocess_task.outputs['output_bool_parameter_path'],
|
||||
input_dict=preprocess_task.outputs['output_dict_parameter_path'],
|
||||
input_list=preprocess_task.outputs['output_list_parameter_path'],
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -55,16 +55,16 @@ def verify(run: kfp_server_api.ApiRun, mlmd_connection_config, **kwargs):
|
|||
'type': 'system.Dataset'
|
||||
}, {
|
||||
'metadata': {
|
||||
'display_name': 'output_dataset_two'
|
||||
'display_name': 'output_dataset_two_path'
|
||||
},
|
||||
'name': 'output_dataset_two',
|
||||
'name': 'output_dataset_two_path',
|
||||
'type': 'system.Dataset'
|
||||
}],
|
||||
'parameters': {
|
||||
'output_bool_parameter': 'True',
|
||||
'output_dict_parameter': '{"A": 1, "B": 2}',
|
||||
'output_list_parameter': '["a", "b", "c"]',
|
||||
'output_parameter': 'message'
|
||||
'output_bool_parameter_path': 'True',
|
||||
'output_dict_parameter_path': '{"A": 1, "B": 2}',
|
||||
'output_list_parameter_path': '["a", "b", "c"]',
|
||||
'output_parameter_path': 'message'
|
||||
}
|
||||
},
|
||||
'type': 'system.ContainerExecution',
|
||||
|
|
@ -79,11 +79,11 @@ def verify(run: kfp_server_api.ApiRun, mlmd_connection_config, **kwargs):
|
|||
'metadata': {
|
||||
'display_name': 'output_dataset_one'
|
||||
},
|
||||
'name': 'dataset_one',
|
||||
'name': 'dataset_one_path',
|
||||
'type': 'system.Dataset'
|
||||
}, {
|
||||
'metadata': {
|
||||
'display_name': 'output_dataset_two'
|
||||
'display_name': 'output_dataset_two_path'
|
||||
},
|
||||
'name': 'dataset_two',
|
||||
'type': 'system.Dataset'
|
||||
|
|
|
|||
|
|
@ -19,9 +19,9 @@ from kfp.v2.dsl import (
|
|||
Output,
|
||||
ClassificationMetrics,
|
||||
Metrics,
|
||||
HTML,
|
||||
Markdown
|
||||
)
|
||||
from kfp.dsl.io_types import Markdown
|
||||
from kfp.dsl.io_types import HTML
|
||||
|
||||
# In tests, we install a KFP package from the PR under test. Users should not
|
||||
# normally need to specify `kfp_package_path` in their component definitions.
|
||||
|
|
|
|||
|
|
@ -11,14 +11,17 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import os
|
||||
|
||||
from kfp.v2 import dsl
|
||||
from kfp.v2 import compiler
|
||||
from kfp.v2 import components
|
||||
|
||||
# In tests, we install a KFP package from the PR under test. Users should not
|
||||
# normally need to specify `kfp_package_path` in their component definitions.
|
||||
_KFP_PACKAGE_PATH = os.getenv('KFP_PACKAGE_PATH')
|
||||
|
||||
@components.create_component_from_func
|
||||
def hello_world(text: str):
|
||||
@dsl.component(kfp_package_path=_KFP_PACKAGE_PATH)
|
||||
def hello_world(text: str) -> str:
|
||||
print(text)
|
||||
return text
|
||||
|
||||
|
|
|
|||
|
|
@ -35,15 +35,13 @@ def verify(run: kfp_server_api.ApiRun, mlmd_connection_config, **kwargs):
|
|||
t.assertEqual(
|
||||
{
|
||||
'hello-world':
|
||||
KfpTask(
|
||||
name='hello-world',
|
||||
type='system.ContainerExecution',
|
||||
state=Execution.State.COMPLETE,
|
||||
inputs=TaskInputs(
|
||||
parameters={'text': 'hi there'}, artifacts=[]
|
||||
),
|
||||
outputs=TaskOutputs(parameters={}, artifacts=[])
|
||||
)
|
||||
KfpTask(name='hello-world',
|
||||
type='system.ContainerExecution',
|
||||
state=Execution.State.COMPLETE,
|
||||
inputs=TaskInputs(parameters={'text': 'hi there'},
|
||||
artifacts=[]),
|
||||
outputs=TaskOutputs(parameters={'Output': 'hi there'},
|
||||
artifacts=[]))
|
||||
},
|
||||
tasks,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -14,10 +14,9 @@
|
|||
"""Sample pipeline for passing data in KFP v2."""
|
||||
from typing import Dict, List
|
||||
|
||||
from kfp import dsl
|
||||
from kfp import components
|
||||
from kfp.components import InputPath, OutputPath
|
||||
from kfp.v2.dsl import Input, Output, Dataset, Model, component
|
||||
from kfp import dsl as v1dsl
|
||||
from kfp.v2 import dsl
|
||||
from kfp.v2.dsl import Input, InputPath, Output, OutputPath, Dataset, Model, component
|
||||
import kfp.compiler as compiler
|
||||
|
||||
|
||||
|
|
@ -101,7 +100,7 @@ def train(
|
|||
f'input_bool: {input_bool}, type {type(input_bool)} || '
|
||||
f'input_dict: {input_dict}, type {type(input_dict)} || '
|
||||
f'input_list: {input_list}, type {type(input_list)} \n')
|
||||
|
||||
|
||||
with open(model.path, 'w') as output_file:
|
||||
for i in range(num_steps):
|
||||
output_file.write('Step {}\n{}\n=====\n'.format(i, line))
|
||||
|
|
@ -125,5 +124,5 @@ def pipeline(message: str = 'message'):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
compiler.Compiler(mode=dsl.PipelineExecutionMode.V2_COMPATIBLE).compile(
|
||||
compiler.Compiler(mode=v1dsl.PipelineExecutionMode.V2_COMPATIBLE).compile(
|
||||
pipeline_func=pipeline, package_path=__file__.replace('.py', '.yaml'))
|
||||
|
|
|
|||
|
|
@ -13,9 +13,10 @@
|
|||
# limitations under the License.
|
||||
|
||||
# Simple two-step pipeline with 'producer' and 'consumer' steps
|
||||
from kfp import components as v1components
|
||||
from kfp.v2 import components, compiler, dsl
|
||||
|
||||
producer_op = components.load_component_from_text(
|
||||
producer_op = v1components.load_component_from_text(
|
||||
"""
|
||||
name: Producer
|
||||
inputs:
|
||||
|
|
@ -36,7 +37,7 @@ implementation:
|
|||
"""
|
||||
)
|
||||
|
||||
consumer_op = components.load_component_from_text(
|
||||
consumer_op = v1components.load_component_from_text(
|
||||
"""
|
||||
name: Consumer
|
||||
inputs:
|
||||
|
|
|
|||
|
|
@ -3,15 +3,15 @@ kind: Workflow
|
|||
metadata:
|
||||
generateName: my-test-pipeline-
|
||||
annotations:
|
||||
pipelines.kubeflow.org/kfp_sdk_version: 1.7.0
|
||||
pipelines.kubeflow.org/pipeline_compilation_time: '2021-08-16T18:41:22.625538'
|
||||
pipelines.kubeflow.org/kfp_sdk_version: 1.7.1
|
||||
pipelines.kubeflow.org/pipeline_compilation_time: '2021-08-17T17:37:09.946952'
|
||||
pipelines.kubeflow.org/pipeline_spec: '{"inputs": [{"default": "gs://output-directory/v2-artifacts",
|
||||
"name": "pipeline-root"}, {"default": "pipeline/my-test-pipeline", "name": "pipeline-name"}],
|
||||
"name": "my-test-pipeline"}'
|
||||
pipelines.kubeflow.org/v2_pipeline: "true"
|
||||
labels:
|
||||
pipelines.kubeflow.org/v2_pipeline: "true"
|
||||
pipelines.kubeflow.org/kfp_sdk_version: 1.7.0
|
||||
pipelines.kubeflow.org/kfp_sdk_version: 1.7.1
|
||||
spec:
|
||||
entrypoint: my-test-pipeline
|
||||
templates:
|
||||
|
|
@ -42,42 +42,34 @@ spec:
|
|||
container:
|
||||
args:
|
||||
- sh
|
||||
- -c
|
||||
- (python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.1'
|
||||
|| PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'kfp==1.7.1' --user) && "$0" "$@"
|
||||
- sh
|
||||
- -ec
|
||||
- |
|
||||
program_path=$(mktemp)
|
||||
printf "%s" "$0" > "$program_path"
|
||||
python3 -u "$program_path" "$@"
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
program_path=$(mktemp -d)
|
||||
printf "%s" "$0" > "$program_path/ephemeral_component.py"
|
||||
python3 -m kfp.v2.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
|
||||
- |2+
|
||||
|
||||
def preprocess(uri, some_int, output_parameter_one,
|
||||
output_dataset_one):
|
||||
'''Dummy Preprocess Step.'''
|
||||
with open(output_dataset_one, 'w') as f:
|
||||
f.write('Output dataset')
|
||||
with open(output_parameter_one, 'w') as f:
|
||||
f.write("{}".format(1234))
|
||||
from kfp.v2.dsl import *
|
||||
from typing import *
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Preprocess', description='Dummy Preprocess Step.')
|
||||
_parser.add_argument("--uri", dest="uri", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--some-int", dest="some_int", type=int, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--output-parameter-one", dest="output_parameter_one", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--output-dataset-one", dest="output_dataset_one", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
def preprocess(uri: str, some_int: int, output_parameter_one: OutputPath(int),
|
||||
output_dataset_one: OutputPath('Dataset')):
|
||||
'''Dummy Preprocess Step.'''
|
||||
with open(output_dataset_one, 'w') as f:
|
||||
f.write('Output dataset')
|
||||
with open(output_parameter_one, 'w') as f:
|
||||
f.write("{}".format(1234))
|
||||
|
||||
_outputs = preprocess(**_parsed_args)
|
||||
- --uri
|
||||
- '{{$.inputs.parameters[''uri'']}}'
|
||||
- --some-int
|
||||
- '{{$.inputs.parameters[''some_int'']}}'
|
||||
- --output-parameter-one
|
||||
- '{{$.outputs.parameters[''output_parameter_one''].output_file}}'
|
||||
- --output-dataset-one
|
||||
- '{{$.outputs.artifacts[''output_dataset_one''].path}}'
|
||||
- --executor_input
|
||||
- '{{$}}'
|
||||
- --function_to_execute
|
||||
- preprocess
|
||||
command: [/kfp-launcher/launch, --mlmd_server_address, $(METADATA_GRPC_SERVICE_HOST),
|
||||
--mlmd_server_port, $(METADATA_GRPC_SERVICE_PORT), --runtime_info_json, $(KFP_V2_RUNTIME_INFO),
|
||||
--container_image, $(KFP_V2_IMAGE), --task_name, preprocess, --pipeline_name,
|
||||
|
|
@ -104,7 +96,7 @@ spec:
|
|||
- name: ENABLE_CACHING
|
||||
valueFrom:
|
||||
fieldRef: {fieldPath: 'metadata.labels[''pipelines.kubeflow.org/enable_caching'']'}
|
||||
- {name: KFP_V2_IMAGE, value: 'python:3.9'}
|
||||
- {name: KFP_V2_IMAGE, value: 'python:3.7'}
|
||||
- {name: KFP_V2_RUNTIME_INFO, value: '{"inputParameters": {"some_int": {"type":
|
||||
"INT"}, "uri": {"type": "STRING"}}, "inputArtifacts": {}, "outputParameters":
|
||||
{"output_parameter_one": {"type": "INT", "path": "/tmp/outputs/output_parameter_one/data"}},
|
||||
|
|
@ -112,7 +104,7 @@ spec:
|
|||
"instanceSchema": "", "metadataPath": "/tmp/outputs/output_dataset_one/data"}}}'}
|
||||
envFrom:
|
||||
- configMapRef: {name: metadata-grpc-configmap, optional: true}
|
||||
image: python:3.9
|
||||
image: python:3.7
|
||||
volumeMounts:
|
||||
- {mountPath: /kfp-launcher, name: kfp-launcher}
|
||||
inputs:
|
||||
|
|
@ -132,13 +124,13 @@ spec:
|
|||
pipelines.kubeflow.org/component_ref: '{}'
|
||||
pipelines.kubeflow.org/arguments.parameters: '{"some_int": "12", "uri": "uri-to-import"}'
|
||||
labels:
|
||||
pipelines.kubeflow.org/kfp_sdk_version: 1.7.0
|
||||
pipelines.kubeflow.org/kfp_sdk_version: 1.7.1
|
||||
pipelines.kubeflow.org/pipeline-sdk-type: kfp
|
||||
pipelines.kubeflow.org/v2_component: "true"
|
||||
pipelines.kubeflow.org/enable_caching: "true"
|
||||
initContainers:
|
||||
- command: [launcher, --copy, /kfp-launcher/launch]
|
||||
image: gcr.io/ml-pipeline/kfp-launcher:1.7.0
|
||||
image: gcr.io/ml-pipeline/kfp-launcher:1.7.1
|
||||
name: kfp-launcher
|
||||
mirrorVolumeMounts: true
|
||||
volumes:
|
||||
|
|
@ -147,42 +139,37 @@ spec:
|
|||
container:
|
||||
args:
|
||||
- sh
|
||||
- -c
|
||||
- (python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.1'
|
||||
|| PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'kfp==1.7.1' --user) && "$0" "$@"
|
||||
- sh
|
||||
- -ec
|
||||
- |
|
||||
program_path=$(mktemp)
|
||||
printf "%s" "$0" > "$program_path"
|
||||
python3 -u "$program_path" "$@"
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
program_path=$(mktemp -d)
|
||||
printf "%s" "$0" > "$program_path/ephemeral_component.py"
|
||||
python3 -m kfp.v2.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
|
||||
- |2+
|
||||
|
||||
def train(dataset,
|
||||
model,
|
||||
num_steps = 100):
|
||||
'''Dummy Training Step.'''
|
||||
from kfp.v2.dsl import *
|
||||
from typing import *
|
||||
|
||||
with open(dataset, 'r') as input_file:
|
||||
input_string = input_file.read()
|
||||
with open(model, 'w') as output_file:
|
||||
for i in range(num_steps):
|
||||
output_file.write("Step {}\n{}\n=====\n".format(i, input_string))
|
||||
def train(dataset: InputPath('Dataset'),
|
||||
model: OutputPath('Model'),
|
||||
num_steps: int = 100):
|
||||
'''Dummy Training Step.'''
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Train', description='Dummy Training Step.')
|
||||
_parser.add_argument("--dataset", dest="dataset", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--num-steps", dest="num_steps", type=int, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--model", dest="model", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
with open(dataset, 'r') as input_file:
|
||||
input_string = input_file.read()
|
||||
with open(model, 'w') as output_file:
|
||||
for i in range(num_steps):
|
||||
output_file.write("Step {}\n{}\n=====\n".format(i, input_string))
|
||||
|
||||
_outputs = train(**_parsed_args)
|
||||
- --dataset
|
||||
- '{{$.inputs.artifacts[''dataset''].path}}'
|
||||
- --num-steps
|
||||
- '{{$.inputs.parameters[''num_steps'']}}'
|
||||
- --model
|
||||
- '{{$.outputs.artifacts[''model''].path}}'
|
||||
- --executor_input
|
||||
- '{{$}}'
|
||||
- --function_to_execute
|
||||
- train
|
||||
command: [/kfp-launcher/launch, --mlmd_server_address, $(METADATA_GRPC_SERVICE_HOST),
|
||||
--mlmd_server_port, $(METADATA_GRPC_SERVICE_PORT), --runtime_info_json, $(KFP_V2_RUNTIME_INFO),
|
||||
--container_image, $(KFP_V2_IMAGE), --task_name, train, --pipeline_name, '{{inputs.parameters.pipeline-name}}',
|
||||
|
|
@ -237,13 +224,13 @@ spec:
|
|||
pipelines.kubeflow.org/component_ref: '{}'
|
||||
pipelines.kubeflow.org/arguments.parameters: '{"num_steps": "{{inputs.parameters.preprocess-output_parameter_one}}"}'
|
||||
labels:
|
||||
pipelines.kubeflow.org/kfp_sdk_version: 1.7.0
|
||||
pipelines.kubeflow.org/kfp_sdk_version: 1.7.1
|
||||
pipelines.kubeflow.org/pipeline-sdk-type: kfp
|
||||
pipelines.kubeflow.org/v2_component: "true"
|
||||
pipelines.kubeflow.org/enable_caching: "true"
|
||||
initContainers:
|
||||
- command: [launcher, --copy, /kfp-launcher/launch]
|
||||
image: gcr.io/ml-pipeline/kfp-launcher:1.7.0
|
||||
image: gcr.io/ml-pipeline/kfp-launcher:1.7.1
|
||||
name: kfp-launcher
|
||||
mirrorVolumeMounts: true
|
||||
volumes:
|
||||
|
|
|
|||
|
|
@ -3,15 +3,15 @@ kind: Workflow
|
|||
metadata:
|
||||
generateName: my-test-pipeline-with-custom-launcher-
|
||||
annotations:
|
||||
pipelines.kubeflow.org/kfp_sdk_version: 1.7.0
|
||||
pipelines.kubeflow.org/pipeline_compilation_time: '2021-08-16T18:41:22.156035'
|
||||
pipelines.kubeflow.org/kfp_sdk_version: 1.7.1
|
||||
pipelines.kubeflow.org/pipeline_compilation_time: '2021-08-17T17:37:09.475411'
|
||||
pipelines.kubeflow.org/pipeline_spec: '{"inputs": [{"default": "gs://output-directory/v2-artifacts",
|
||||
"name": "pipeline-root"}, {"default": "pipeline/my-test-pipeline-with-custom-launcher",
|
||||
"name": "pipeline-name"}], "name": "my-test-pipeline-with-custom-launcher"}'
|
||||
pipelines.kubeflow.org/v2_pipeline: "true"
|
||||
labels:
|
||||
pipelines.kubeflow.org/v2_pipeline: "true"
|
||||
pipelines.kubeflow.org/kfp_sdk_version: 1.7.0
|
||||
pipelines.kubeflow.org/kfp_sdk_version: 1.7.1
|
||||
spec:
|
||||
entrypoint: my-test-pipeline-with-custom-launcher
|
||||
templates:
|
||||
|
|
@ -42,42 +42,34 @@ spec:
|
|||
container:
|
||||
args:
|
||||
- sh
|
||||
- -c
|
||||
- (python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.1'
|
||||
|| PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'kfp==1.7.1' --user) && "$0" "$@"
|
||||
- sh
|
||||
- -ec
|
||||
- |
|
||||
program_path=$(mktemp)
|
||||
printf "%s" "$0" > "$program_path"
|
||||
python3 -u "$program_path" "$@"
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
program_path=$(mktemp -d)
|
||||
printf "%s" "$0" > "$program_path/ephemeral_component.py"
|
||||
python3 -m kfp.v2.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
|
||||
- |2+
|
||||
|
||||
def preprocess(uri, some_int, output_parameter_one,
|
||||
output_dataset_one):
|
||||
'''Dummy Preprocess Step.'''
|
||||
with open(output_dataset_one, 'w') as f:
|
||||
f.write('Output dataset')
|
||||
with open(output_parameter_one, 'w') as f:
|
||||
f.write("{}".format(1234))
|
||||
from kfp.v2.dsl import *
|
||||
from typing import *
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Preprocess', description='Dummy Preprocess Step.')
|
||||
_parser.add_argument("--uri", dest="uri", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--some-int", dest="some_int", type=int, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--output-parameter-one", dest="output_parameter_one", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--output-dataset-one", dest="output_dataset_one", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
def preprocess(uri: str, some_int: int, output_parameter_one: OutputPath(int),
|
||||
output_dataset_one: OutputPath('Dataset')):
|
||||
'''Dummy Preprocess Step.'''
|
||||
with open(output_dataset_one, 'w') as f:
|
||||
f.write('Output dataset')
|
||||
with open(output_parameter_one, 'w') as f:
|
||||
f.write("{}".format(1234))
|
||||
|
||||
_outputs = preprocess(**_parsed_args)
|
||||
- --uri
|
||||
- '{{$.inputs.parameters[''uri'']}}'
|
||||
- --some-int
|
||||
- '{{$.inputs.parameters[''some_int'']}}'
|
||||
- --output-parameter-one
|
||||
- '{{$.outputs.parameters[''output_parameter_one''].output_file}}'
|
||||
- --output-dataset-one
|
||||
- '{{$.outputs.artifacts[''output_dataset_one''].path}}'
|
||||
- --executor_input
|
||||
- '{{$}}'
|
||||
- --function_to_execute
|
||||
- preprocess
|
||||
command: [/kfp-launcher/launch, --mlmd_server_address, $(METADATA_GRPC_SERVICE_HOST),
|
||||
--mlmd_server_port, $(METADATA_GRPC_SERVICE_PORT), --runtime_info_json, $(KFP_V2_RUNTIME_INFO),
|
||||
--container_image, $(KFP_V2_IMAGE), --task_name, preprocess, --pipeline_name,
|
||||
|
|
@ -104,7 +96,7 @@ spec:
|
|||
- name: ENABLE_CACHING
|
||||
valueFrom:
|
||||
fieldRef: {fieldPath: 'metadata.labels[''pipelines.kubeflow.org/enable_caching'']'}
|
||||
- {name: KFP_V2_IMAGE, value: 'python:3.9'}
|
||||
- {name: KFP_V2_IMAGE, value: 'python:3.7'}
|
||||
- {name: KFP_V2_RUNTIME_INFO, value: '{"inputParameters": {"some_int": {"type":
|
||||
"INT"}, "uri": {"type": "STRING"}}, "inputArtifacts": {}, "outputParameters":
|
||||
{"output_parameter_one": {"type": "INT", "path": "/tmp/outputs/output_parameter_one/data"}},
|
||||
|
|
@ -112,7 +104,7 @@ spec:
|
|||
"instanceSchema": "", "metadataPath": "/tmp/outputs/output_dataset_one/data"}}}'}
|
||||
envFrom:
|
||||
- configMapRef: {name: metadata-grpc-configmap, optional: true}
|
||||
image: python:3.9
|
||||
image: python:3.7
|
||||
volumeMounts:
|
||||
- {mountPath: /kfp-launcher, name: kfp-launcher}
|
||||
inputs:
|
||||
|
|
@ -132,7 +124,7 @@ spec:
|
|||
pipelines.kubeflow.org/component_ref: '{}'
|
||||
pipelines.kubeflow.org/arguments.parameters: '{"some_int": "12", "uri": "uri-to-import"}'
|
||||
labels:
|
||||
pipelines.kubeflow.org/kfp_sdk_version: 1.7.0
|
||||
pipelines.kubeflow.org/kfp_sdk_version: 1.7.1
|
||||
pipelines.kubeflow.org/pipeline-sdk-type: kfp
|
||||
pipelines.kubeflow.org/v2_component: "true"
|
||||
pipelines.kubeflow.org/enable_caching: "true"
|
||||
|
|
@ -147,42 +139,37 @@ spec:
|
|||
container:
|
||||
args:
|
||||
- sh
|
||||
- -c
|
||||
- (python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.1'
|
||||
|| PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location
|
||||
'kfp==1.7.1' --user) && "$0" "$@"
|
||||
- sh
|
||||
- -ec
|
||||
- |
|
||||
program_path=$(mktemp)
|
||||
printf "%s" "$0" > "$program_path"
|
||||
python3 -u "$program_path" "$@"
|
||||
- |
|
||||
def _make_parent_dirs_and_return_path(file_path: str):
|
||||
import os
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
return file_path
|
||||
program_path=$(mktemp -d)
|
||||
printf "%s" "$0" > "$program_path/ephemeral_component.py"
|
||||
python3 -m kfp.v2.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
|
||||
- |2+
|
||||
|
||||
def train(dataset,
|
||||
model,
|
||||
num_steps = 100):
|
||||
'''Dummy Training Step.'''
|
||||
from kfp.v2.dsl import *
|
||||
from typing import *
|
||||
|
||||
with open(dataset, 'r') as input_file:
|
||||
input_string = input_file.read()
|
||||
with open(model, 'w') as output_file:
|
||||
for i in range(num_steps):
|
||||
output_file.write("Step {}\n{}\n=====\n".format(i, input_string))
|
||||
def train(dataset: InputPath('Dataset'),
|
||||
model: OutputPath('Model'),
|
||||
num_steps: int = 100):
|
||||
'''Dummy Training Step.'''
|
||||
|
||||
import argparse
|
||||
_parser = argparse.ArgumentParser(prog='Train', description='Dummy Training Step.')
|
||||
_parser.add_argument("--dataset", dest="dataset", type=str, required=True, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--num-steps", dest="num_steps", type=int, required=False, default=argparse.SUPPRESS)
|
||||
_parser.add_argument("--model", dest="model", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS)
|
||||
_parsed_args = vars(_parser.parse_args())
|
||||
with open(dataset, 'r') as input_file:
|
||||
input_string = input_file.read()
|
||||
with open(model, 'w') as output_file:
|
||||
for i in range(num_steps):
|
||||
output_file.write("Step {}\n{}\n=====\n".format(i, input_string))
|
||||
|
||||
_outputs = train(**_parsed_args)
|
||||
- --dataset
|
||||
- '{{$.inputs.artifacts[''dataset''].path}}'
|
||||
- --num-steps
|
||||
- '{{$.inputs.parameters[''num_steps'']}}'
|
||||
- --model
|
||||
- '{{$.outputs.artifacts[''model''].path}}'
|
||||
- --executor_input
|
||||
- '{{$}}'
|
||||
- --function_to_execute
|
||||
- train
|
||||
command: [/kfp-launcher/launch, --mlmd_server_address, $(METADATA_GRPC_SERVICE_HOST),
|
||||
--mlmd_server_port, $(METADATA_GRPC_SERVICE_PORT), --runtime_info_json, $(KFP_V2_RUNTIME_INFO),
|
||||
--container_image, $(KFP_V2_IMAGE), --task_name, train, --pipeline_name, '{{inputs.parameters.pipeline-name}}',
|
||||
|
|
@ -237,7 +224,7 @@ spec:
|
|||
pipelines.kubeflow.org/component_ref: '{}'
|
||||
pipelines.kubeflow.org/arguments.parameters: '{"num_steps": "{{inputs.parameters.preprocess-output_parameter_one}}"}'
|
||||
labels:
|
||||
pipelines.kubeflow.org/kfp_sdk_version: 1.7.0
|
||||
pipelines.kubeflow.org/kfp_sdk_version: 1.7.1
|
||||
pipelines.kubeflow.org/pipeline-sdk-type: kfp
|
||||
pipelines.kubeflow.org/v2_component: "true"
|
||||
pipelines.kubeflow.org/enable_caching: "true"
|
||||
|
|
|
|||
|
|
@ -13,142 +13,143 @@
|
|||
# limitations under the License.
|
||||
"""Tests for v2-compatible compiled pipelines."""
|
||||
|
||||
from kfp.v2.components.types.artifact_types import Artifact
|
||||
import os
|
||||
import tempfile
|
||||
from typing import Callable
|
||||
import unittest
|
||||
import yaml
|
||||
|
||||
from kfp import compiler, components, dsl
|
||||
from kfp.components import InputPath, OutputPath
|
||||
from kfp import compiler, components
|
||||
from kfp import dsl as v1dsl
|
||||
from kfp.v2 import dsl
|
||||
from kfp.v2.dsl import component, Artifact, InputPath, OutputPath
|
||||
|
||||
|
||||
@component
|
||||
def preprocess(uri: str, some_int: int, output_parameter_one: OutputPath(int),
|
||||
output_dataset_one: OutputPath('Dataset')):
|
||||
'''Dummy Preprocess Step.'''
|
||||
with open(output_dataset_one, 'w') as f:
|
||||
f.write('Output dataset')
|
||||
with open(output_parameter_one, 'w') as f:
|
||||
f.write("{}".format(1234))
|
||||
'''Dummy Preprocess Step.'''
|
||||
with open(output_dataset_one, 'w') as f:
|
||||
f.write('Output dataset')
|
||||
with open(output_parameter_one, 'w') as f:
|
||||
f.write("{}".format(1234))
|
||||
|
||||
|
||||
@component
|
||||
def train(dataset: InputPath('Dataset'),
|
||||
model: OutputPath('Model'),
|
||||
num_steps: int = 100):
|
||||
'''Dummy Training Step.'''
|
||||
'''Dummy Training Step.'''
|
||||
|
||||
with open(dataset, 'r') as input_file:
|
||||
input_string = input_file.read()
|
||||
with open(model, 'w') as output_file:
|
||||
for i in range(num_steps):
|
||||
output_file.write("Step {}\n{}\n=====\n".format(i, input_string))
|
||||
with open(dataset, 'r') as input_file:
|
||||
input_string = input_file.read()
|
||||
with open(model, 'w') as output_file:
|
||||
for i in range(num_steps):
|
||||
output_file.write("Step {}\n{}\n=====\n".format(i, input_string))
|
||||
|
||||
|
||||
preprocess_op = components.create_component_from_func(preprocess,
|
||||
base_image='python:3.9')
|
||||
train_op = components.create_component_from_func(train)
|
||||
|
||||
|
||||
class TestV2CompatibleModeCompiler(unittest.TestCase):
|
||||
|
||||
def _assert_compiled_pipeline_equals_golden(self,
|
||||
kfp_compiler: compiler.Compiler,
|
||||
pipeline_func: Callable,
|
||||
golden_yaml_filename: str):
|
||||
compiled_file = os.path.join(tempfile.mkdtemp(), 'workflow.yaml')
|
||||
kfp_compiler.compile(pipeline_func, package_path=compiled_file)
|
||||
def _assert_compiled_pipeline_equals_golden(self,
|
||||
kfp_compiler: compiler.Compiler,
|
||||
pipeline_func: Callable,
|
||||
golden_yaml_filename: str):
|
||||
compiled_file = os.path.join(tempfile.mkdtemp(), 'workflow.yaml')
|
||||
kfp_compiler.compile(pipeline_func, package_path=compiled_file)
|
||||
|
||||
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
|
||||
golden_file = os.path.join(test_data_dir, golden_yaml_filename)
|
||||
# Uncomment the following to update goldens.
|
||||
# TODO: place this behind some --update_goldens flag.
|
||||
# kfp_compiler.compile(pipeline_func, package_path=golden_file)
|
||||
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
|
||||
golden_file = os.path.join(test_data_dir, golden_yaml_filename)
|
||||
# Uncomment the following to update goldens.
|
||||
# TODO: place this behind some --update_goldens flag.
|
||||
# kfp_compiler.compile(pipeline_func, package_path=golden_file)
|
||||
|
||||
with open(golden_file, 'r') as f:
|
||||
golden = yaml.safe_load(f)
|
||||
with open(golden_file, 'r') as f:
|
||||
golden = yaml.safe_load(f)
|
||||
|
||||
with open(compiled_file, 'r') as f:
|
||||
compiled = yaml.safe_load(f)
|
||||
with open(compiled_file, 'r') as f:
|
||||
compiled = yaml.safe_load(f)
|
||||
|
||||
for workflow in golden, compiled:
|
||||
del workflow['metadata']
|
||||
for template in workflow['spec']['templates']:
|
||||
template.pop('metadata', None)
|
||||
for workflow in golden, compiled:
|
||||
del workflow['metadata']
|
||||
for template in workflow['spec']['templates']:
|
||||
template.pop('metadata', None)
|
||||
|
||||
if 'initContainers' not in template:
|
||||
continue
|
||||
# Strip off the launcher image label before comparison
|
||||
for initContainer in template['initContainers']:
|
||||
initContainer['image'] = initContainer['image'].split(':')[0]
|
||||
if 'initContainers' not in template:
|
||||
continue
|
||||
# Strip off the launcher image label before comparison
|
||||
for initContainer in template['initContainers']:
|
||||
initContainer['image'] = initContainer['image'].split(':')[0]
|
||||
|
||||
self.maxDiff = None
|
||||
self.assertDictEqual(golden, compiled)
|
||||
self.maxDiff = None
|
||||
self.assertDictEqual(golden, compiled)
|
||||
|
||||
def test_two_step_pipeline(self):
|
||||
def test_two_step_pipeline(self):
|
||||
|
||||
@dsl.pipeline(pipeline_root='gs://output-directory/v2-artifacts',
|
||||
name='my-test-pipeline')
|
||||
def v2_compatible_two_step_pipeline():
|
||||
preprocess_task = preprocess_op(uri='uri-to-import', some_int=12)
|
||||
train_task = train_op(
|
||||
num_steps=preprocess_task.outputs['output_parameter_one'],
|
||||
dataset=preprocess_task.outputs['output_dataset_one'])
|
||||
@dsl.pipeline(pipeline_root='gs://output-directory/v2-artifacts',
|
||||
name='my-test-pipeline')
|
||||
def v2_compatible_two_step_pipeline():
|
||||
preprocess_task = preprocess(uri='uri-to-import', some_int=12)
|
||||
train_task = train(
|
||||
num_steps=preprocess_task.outputs['output_parameter_one'],
|
||||
dataset=preprocess_task.outputs['output_dataset_one'])
|
||||
|
||||
kfp_compiler = compiler.Compiler(
|
||||
mode=dsl.PipelineExecutionMode.V2_COMPATIBLE)
|
||||
self._assert_compiled_pipeline_equals_golden(
|
||||
kfp_compiler, v2_compatible_two_step_pipeline,
|
||||
'v2_compatible_two_step_pipeline.yaml')
|
||||
kfp_compiler = compiler.Compiler(
|
||||
mode=v1dsl.PipelineExecutionMode.V2_COMPATIBLE)
|
||||
self._assert_compiled_pipeline_equals_golden(
|
||||
kfp_compiler, v2_compatible_two_step_pipeline,
|
||||
'v2_compatible_two_step_pipeline.yaml')
|
||||
|
||||
def test_custom_launcher(self):
|
||||
def test_custom_launcher(self):
|
||||
|
||||
@dsl.pipeline(pipeline_root='gs://output-directory/v2-artifacts',
|
||||
name='my-test-pipeline-with-custom-launcher')
|
||||
def v2_compatible_two_step_pipeline():
|
||||
preprocess_task = preprocess_op(uri='uri-to-import', some_int=12)
|
||||
train_task = train_op(
|
||||
num_steps=preprocess_task.outputs['output_parameter_one'],
|
||||
dataset=preprocess_task.outputs['output_dataset_one'])
|
||||
@dsl.pipeline(pipeline_root='gs://output-directory/v2-artifacts',
|
||||
name='my-test-pipeline-with-custom-launcher')
|
||||
def v2_compatible_two_step_pipeline():
|
||||
preprocess_task = preprocess(uri='uri-to-import', some_int=12)
|
||||
train_task = train(
|
||||
num_steps=preprocess_task.outputs['output_parameter_one'],
|
||||
dataset=preprocess_task.outputs['output_dataset_one'])
|
||||
|
||||
kfp_compiler = compiler.Compiler(
|
||||
mode=dsl.PipelineExecutionMode.V2_COMPATIBLE,
|
||||
launcher_image='my-custom-image')
|
||||
self._assert_compiled_pipeline_equals_golden(
|
||||
kfp_compiler, v2_compatible_two_step_pipeline,
|
||||
'v2_compatible_two_step_pipeline_with_custom_launcher.yaml')
|
||||
kfp_compiler = compiler.Compiler(
|
||||
mode=v1dsl.PipelineExecutionMode.V2_COMPATIBLE,
|
||||
launcher_image='my-custom-image')
|
||||
self._assert_compiled_pipeline_equals_golden(
|
||||
kfp_compiler, v2_compatible_two_step_pipeline,
|
||||
'v2_compatible_two_step_pipeline_with_custom_launcher.yaml')
|
||||
|
||||
def test_constructing_container_op_directly_should_error(
|
||||
self):
|
||||
def test_constructing_container_op_directly_should_error(
|
||||
self):
|
||||
|
||||
@dsl.pipeline(name='test-pipeline')
|
||||
def my_pipeline():
|
||||
dsl.ContainerOp(
|
||||
name='comp1',
|
||||
image='gcr.io/dummy',
|
||||
command=['python', 'main.py']
|
||||
)
|
||||
@dsl.pipeline(name='test-pipeline')
|
||||
def my_pipeline():
|
||||
v1dsl.ContainerOp(
|
||||
name='comp1',
|
||||
image='gcr.io/dummy',
|
||||
command=['python', 'main.py']
|
||||
)
|
||||
|
||||
with self.assertRaisesRegex(
|
||||
RuntimeError,
|
||||
'Constructing ContainerOp instances directly is deprecated and not '
|
||||
'supported when compiling to v2 \(using v2 compiler or v1 compiler '
|
||||
'with V2_COMPATIBLE or V2_ENGINE mode\).'):
|
||||
compiler.Compiler(mode=dsl.PipelineExecutionMode.V2_COMPATIBLE).compile(
|
||||
pipeline_func=my_pipeline, package_path='result.json')
|
||||
with self.assertRaisesRegex(
|
||||
RuntimeError,
|
||||
'Constructing ContainerOp instances directly is deprecated and not '
|
||||
'supported when compiling to v2 \(using v2 compiler or v1 compiler '
|
||||
'with V2_COMPATIBLE or V2_ENGINE mode\).'):
|
||||
compiler.Compiler(mode=v1dsl.PipelineExecutionMode.V2_COMPATIBLE).compile(
|
||||
pipeline_func=my_pipeline, package_path='result.json')
|
||||
|
||||
def test_use_importer_should_error(self):
|
||||
def test_use_importer_should_error(self):
|
||||
|
||||
@dsl.pipeline(name='test-pipeline')
|
||||
def my_pipeline():
|
||||
dsl.importer(artifact_uri='dummy', artifact_class=dsl.io_types.Artifact)
|
||||
@dsl.pipeline(name='test-pipeline')
|
||||
def my_pipeline():
|
||||
dsl.importer(artifact_uri='dummy', artifact_class=Artifact)
|
||||
|
||||
with self.assertRaisesRegex(
|
||||
NotImplementedError,
|
||||
'dsl.importer is not supported for Kubeflow Pipelines open source yet.',
|
||||
):
|
||||
compiler.Compiler(mode=dsl.PipelineExecutionMode.V2_COMPATIBLE).compile(
|
||||
pipeline_func=my_pipeline, package_path='result.json')
|
||||
with self.assertRaisesRegex(
|
||||
NotImplementedError,
|
||||
'dsl.importer is not supported for Kubeflow Pipelines open source yet.',
|
||||
):
|
||||
compiler.Compiler(mode=v1dsl.PipelineExecutionMode.V2_COMPATIBLE).compile(
|
||||
pipeline_func=my_pipeline, package_path='result.json')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ from ._data_passing import serialize_value, get_deserializer_code_for_type_struc
|
|||
from ._naming import _make_name_unique_by_adding_index
|
||||
from .structures import *
|
||||
from . import _structures as structures
|
||||
from kfp.components import type_annotation_utils
|
||||
|
||||
import inspect
|
||||
import itertools
|
||||
|
|
@ -44,9 +45,6 @@ import warnings
|
|||
|
||||
import docstring_parser
|
||||
|
||||
from kfp.components import type_annotation_utils
|
||||
from kfp.dsl import io_types
|
||||
|
||||
T = TypeVar('T')
|
||||
|
||||
|
||||
|
|
@ -335,22 +333,7 @@ def _extract_component_interface(func: Callable) -> ComponentSpec:
|
|||
passing_style = None
|
||||
io_name = parameter.name
|
||||
|
||||
if io_types.is_artifact_annotation(parameter_type):
|
||||
# passing_style is either io_types.InputAnnotation or
|
||||
# io_types.OutputAnnotation.
|
||||
passing_style = io_types.get_io_artifact_annotation(parameter_type)
|
||||
|
||||
# parameter_type is io_types.Artifact or one of its subclasses.
|
||||
parameter_type = io_types.get_io_artifact_class(parameter_type)
|
||||
if not issubclass(parameter_type, io_types.Artifact):
|
||||
raise ValueError(
|
||||
'Input[T] and Output[T] are only supported when T is a '
|
||||
'subclass of Artifact. Found `{} with type {}`'.format(
|
||||
io_name, parameter_type))
|
||||
|
||||
if parameter.default is not inspect.Parameter.empty:
|
||||
raise ValueError('Default values for Input/Output artifacts are not supported.')
|
||||
elif isinstance(
|
||||
if isinstance(
|
||||
parameter_type,
|
||||
(InputArtifact, InputPath, InputTextFile, InputBinaryFile,
|
||||
OutputArtifact, OutputPath, OutputTextFile, OutputBinaryFile)):
|
||||
|
|
@ -372,8 +355,7 @@ def _extract_component_interface(func: Callable) -> ComponentSpec:
|
|||
|
||||
type_struct = annotation_to_type_struct(parameter_type)
|
||||
|
||||
if passing_style in [io_types.OutputAnnotation, OutputArtifact,
|
||||
OutputPath, OutputTextFile, OutputBinaryFile]:
|
||||
if passing_style in [OutputArtifact, OutputPath, OutputTextFile, OutputBinaryFile]:
|
||||
io_name = _make_name_unique_by_adding_index(io_name, output_names, '_')
|
||||
output_names.add(io_name)
|
||||
output_spec = OutputSpec(
|
||||
|
|
@ -467,96 +449,6 @@ def _extract_component_interface(func: Callable) -> ComponentSpec:
|
|||
return component_spec
|
||||
|
||||
|
||||
def _get_default_kfp_package_path() -> str:
|
||||
import kfp
|
||||
return 'kfp=={}'.format(kfp.__version__)
|
||||
|
||||
def _get_packages_to_install_command(
|
||||
package_list: Optional[List[str]] = None) -> List[str]:
|
||||
result = []
|
||||
if package_list is not None:
|
||||
install_pip_command = 'python3 -m ensurepip'
|
||||
install_packages_command = (
|
||||
'PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet \
|
||||
--no-warn-script-location {}').format(
|
||||
' '.join([repr(str(package)) for package in package_list]))
|
||||
result = [
|
||||
'sh', '-c',
|
||||
'({install_pip} || {install_pip} --user) &&'
|
||||
' ({install_packages} || {install_packages} --user) && "$0" "$@"'.format(
|
||||
install_pip=install_pip_command,
|
||||
install_packages=install_packages_command)
|
||||
]
|
||||
return result
|
||||
|
||||
def _func_to_component_spec_v2(
|
||||
func: Callable,
|
||||
base_image : Optional[str] = None,
|
||||
packages_to_install: Optional[List[str]] = None,
|
||||
install_kfp_package: bool = True,
|
||||
kfp_package_path: Optional[str] = None) -> ComponentSpec:
|
||||
decorator_base_image = getattr(func, '_component_base_image', None)
|
||||
if decorator_base_image is not None:
|
||||
if base_image is not None and decorator_base_image != base_image:
|
||||
raise ValueError('base_image ({}) conflicts with the decorator-specified base image metadata ({})'.format(base_image, decorator_base_image))
|
||||
else:
|
||||
base_image = decorator_base_image
|
||||
else:
|
||||
if base_image is None:
|
||||
base_image = default_base_image_or_builder
|
||||
if isinstance(base_image, Callable):
|
||||
base_image = base_image()
|
||||
|
||||
imports_source = [
|
||||
"from kfp.v2.dsl import *",
|
||||
"from typing import *",
|
||||
]
|
||||
|
||||
func_source = _get_function_source_definition(func)
|
||||
|
||||
source = textwrap.dedent("""
|
||||
{imports_source}
|
||||
|
||||
{func_source}\n""").format(imports_source='\n'.join(imports_source),
|
||||
func_source=func_source)
|
||||
|
||||
|
||||
packages_to_install = packages_to_install or []
|
||||
if install_kfp_package:
|
||||
if kfp_package_path is None:
|
||||
kfp_package_path = _get_default_kfp_package_path()
|
||||
packages_to_install.append(kfp_package_path)
|
||||
|
||||
packages_to_install_command = _get_packages_to_install_command(package_list=packages_to_install)
|
||||
|
||||
from kfp.components._structures import ExecutorInputPlaceholder
|
||||
component_spec = _extract_component_interface(func)
|
||||
|
||||
component_spec.implementation=ContainerImplementation(
|
||||
container=ContainerSpec(
|
||||
image=base_image,
|
||||
command=packages_to_install_command + [
|
||||
'sh',
|
||||
'-ec',
|
||||
textwrap.dedent('''\
|
||||
program_path=$(mktemp -d)
|
||||
printf "%s" "$0" > "$program_path/ephemeral_component.py"
|
||||
python3 -m kfp.components.executor_main \
|
||||
--component_module_path \
|
||||
"$program_path/ephemeral_component.py" \
|
||||
"$@"
|
||||
'''),
|
||||
source,
|
||||
],
|
||||
args=[
|
||||
"--executor_input",
|
||||
ExecutorInputPlaceholder(),
|
||||
"--function_to_execute", func.__name__,
|
||||
]
|
||||
)
|
||||
)
|
||||
return component_spec
|
||||
|
||||
def _func_to_component_spec(func, extra_code='', base_image : str = None, packages_to_install: List[str] = None, modules_to_capture: List[str] = None, use_code_pickling=False) -> ComponentSpec:
|
||||
'''Takes a self-contained python function and converts it to component.
|
||||
|
||||
|
|
@ -980,17 +872,20 @@ def create_component_from_func_v2(
|
|||
Returns:
|
||||
A component task factory that can be used in pipeline definitions.
|
||||
"""
|
||||
component_spec = _func_to_component_spec_v2(
|
||||
warnings.warn(
|
||||
'create_component_from_func_v2() has been deprecated and will be'
|
||||
' removed in KFP v1.9. Please use'
|
||||
' kfp.v2.components.create_component_from_func() instead.',
|
||||
category=FutureWarning,
|
||||
)
|
||||
from kfp.v2.components import component_factory
|
||||
return component_factory.create_component_from_func(
|
||||
func=func,
|
||||
base_image=base_image,
|
||||
packages_to_install=packages_to_install,
|
||||
install_kfp_package=install_kfp_package,
|
||||
kfp_package_path=kfp_package_path
|
||||
)
|
||||
if output_component_file:
|
||||
component_spec.save(output_component_file)
|
||||
|
||||
return _create_task_factory_from_component_spec(component_spec)
|
||||
|
||||
|
||||
def create_component_from_func(
|
||||
|
|
|
|||
|
|
@ -1,284 +0,0 @@
|
|||
# Copyright 2021 The Kubeflow Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import json
|
||||
import inspect
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
from kfp.components._python_op import InputPath, OutputPath
|
||||
from kfp.dsl.io_types import Artifact, Input, Output, create_runtime_artifact, is_artifact_annotation, is_input_artifact, is_output_artifact
|
||||
|
||||
|
||||
class Executor():
|
||||
"""Executor executes v2-based Python function components."""
|
||||
|
||||
def __init__(self, executor_input: Dict, function_to_execute: Callable):
|
||||
self._func = function_to_execute
|
||||
self._input = executor_input
|
||||
self._input_artifacts: Dict[str, Artifact] = {}
|
||||
self._output_artifacts: Dict[str, Artifact] = {}
|
||||
|
||||
for name, artifacts in self._input.get('inputs', {}).get('artifacts',
|
||||
{}).items():
|
||||
artifacts_list = artifacts.get('artifacts')
|
||||
if artifacts_list:
|
||||
self._input_artifacts[name] = self._make_input_artifact(
|
||||
artifacts_list[0])
|
||||
|
||||
for name, artifacts in self._input.get('outputs', {}).get('artifacts',
|
||||
{}).items():
|
||||
artifacts_list = artifacts.get('artifacts')
|
||||
if artifacts_list:
|
||||
self._output_artifacts[name] = self._make_output_artifact(
|
||||
artifacts_list[0])
|
||||
|
||||
self._return_annotation = inspect.signature(self._func).return_annotation
|
||||
self._executor_output = {}
|
||||
|
||||
@classmethod
|
||||
def _make_input_artifact(cls, runtime_artifact: Dict):
|
||||
return create_runtime_artifact(runtime_artifact)
|
||||
|
||||
@classmethod
|
||||
def _make_output_artifact(cls, runtime_artifact: Dict):
|
||||
import os
|
||||
artifact = create_runtime_artifact(runtime_artifact)
|
||||
os.makedirs(os.path.dirname(artifact.path), exist_ok=True)
|
||||
return artifact
|
||||
|
||||
def _get_input_artifact(self, name: str):
|
||||
return self._input_artifacts.get(name)
|
||||
|
||||
def _get_output_artifact(self, name: str):
|
||||
return self._output_artifacts.get(name)
|
||||
|
||||
def _get_input_parameter_value(self, parameter_name: str, parameter_type: Any):
|
||||
parameter = self._input.get('inputs', {}).get('parameters',
|
||||
{}).get(parameter_name, None)
|
||||
if parameter is None:
|
||||
return None
|
||||
|
||||
if parameter.get('stringValue'):
|
||||
if parameter_type == str:
|
||||
return parameter['stringValue']
|
||||
elif parameter_type == bool:
|
||||
# Use `.lower()` so it can also handle 'True' and 'False' (resulted from
|
||||
# `str(True)` and `str(False)`, respectively.
|
||||
return json.loads(parameter['stringValue'].lower())
|
||||
else:
|
||||
return json.loads(parameter['stringValue'])
|
||||
elif parameter.get('intValue'):
|
||||
return int(parameter['intValue'])
|
||||
elif parameter.get('doubleValue'):
|
||||
return float(parameter['doubleValue'])
|
||||
|
||||
def _get_output_parameter_path(self, parameter_name: str):
|
||||
parameter_name = self._maybe_strip_path_suffix(parameter_name)
|
||||
parameter = self._input.get('outputs',
|
||||
{}).get('parameters',
|
||||
{}).get(parameter_name, None)
|
||||
if parameter is None:
|
||||
return None
|
||||
|
||||
import os
|
||||
path = parameter.get('outputFile', None)
|
||||
if path:
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
return path
|
||||
|
||||
def _get_output_artifact_path(self, artifact_name: str):
|
||||
artifact_name = self._maybe_strip_path_suffix(artifact_name)
|
||||
output_artifact = self._output_artifacts.get(artifact_name)
|
||||
if not output_artifact:
|
||||
raise ValueError(
|
||||
'Failed to get output artifact path for artifact name {}'.format(
|
||||
artifact_name))
|
||||
return output_artifact.path
|
||||
|
||||
def _get_input_artifact_path(self, artifact_name: str):
|
||||
artifact_name = self._maybe_strip_path_suffix(artifact_name)
|
||||
input_artifact = self._input_artifacts.get(artifact_name)
|
||||
if not input_artifact:
|
||||
raise ValueError(
|
||||
'Failed to get input artifact path for artifact name {}'.format(
|
||||
artifact_name))
|
||||
return input_artifact.path
|
||||
|
||||
def _write_output_parameter_value(self, name: str,
|
||||
value: Union[str, int, float, bool, dict,
|
||||
list, Dict, List]):
|
||||
if type(value) == str:
|
||||
output = {'stringValue': value}
|
||||
elif type(value) == int:
|
||||
output = {'intValue': value}
|
||||
elif type(value) == float:
|
||||
output = {'doubleValue': value}
|
||||
else:
|
||||
# For bool, list, dict, List, Dict, json serialize the value.
|
||||
output = {'stringValue': json.dumps(value)}
|
||||
|
||||
if not self._executor_output.get('parameters'):
|
||||
self._executor_output['parameters'] = {}
|
||||
|
||||
self._executor_output['parameters'][name] = output
|
||||
|
||||
def _write_output_artifact_payload(self, name: str, value: Any):
|
||||
path = self._get_output_artifact_path(name)
|
||||
with open(path, 'w') as f:
|
||||
f.write(str(value))
|
||||
|
||||
# TODO: extract to a util
|
||||
@classmethod
|
||||
def _get_short_type_name(cls, type_name: str) -> str:
|
||||
"""Extracts the short form type name.
|
||||
|
||||
This method is used for looking up serializer for a given type.
|
||||
|
||||
For example:
|
||||
typing.List -> List
|
||||
typing.List[int] -> List
|
||||
typing.Dict[str, str] -> Dict
|
||||
List -> List
|
||||
str -> str
|
||||
|
||||
Args:
|
||||
type_name: The original type name.
|
||||
|
||||
Returns:
|
||||
The short form type name or the original name if pattern doesn't match.
|
||||
"""
|
||||
import re
|
||||
match = re.match('(typing\.)?(?P<type>\w+)(?:\[.+\])?', type_name)
|
||||
if match:
|
||||
return match.group('type')
|
||||
else:
|
||||
return type_name
|
||||
|
||||
# TODO: merge with type_utils.is_parameter_type
|
||||
@classmethod
|
||||
def _is_parameter(cls, annotation: Any) -> bool:
|
||||
if type(annotation) == type:
|
||||
return annotation in [str, int, float, bool, dict, list]
|
||||
|
||||
# Annotation could be, for instance `typing.Dict[str, str]`, etc.
|
||||
return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']
|
||||
|
||||
@classmethod
|
||||
def _is_artifact(cls, annotation: Any) -> bool:
|
||||
if type(annotation) == type:
|
||||
return issubclass(annotation, Artifact)
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def _is_named_tuple(cls, annotation: Any) -> bool:
|
||||
if type(annotation) == type:
|
||||
return issubclass(annotation, tuple) and hasattr(
|
||||
annotation, '_fields') and hasattr(annotation, '__annotations__')
|
||||
return False
|
||||
|
||||
def _handle_single_return_value(self, output_name: str, annotation_type: Any,
|
||||
return_value: Any):
|
||||
if self._is_parameter(annotation_type):
|
||||
if type(return_value) != annotation_type:
|
||||
raise ValueError(
|
||||
'Function `{}` returned value of type {}; want type {}'.format(
|
||||
self._func.__name__, type(return_value), annotation_type))
|
||||
self._write_output_parameter_value(output_name, return_value)
|
||||
elif self._is_artifact(annotation_type):
|
||||
self._write_output_artifact_payload(output_name, return_value)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'
|
||||
' subclass of `Artifact`'.format(annotation_type))
|
||||
|
||||
def _write_executor_output(self, func_output: Optional[Any] = None):
|
||||
if self._output_artifacts:
|
||||
self._executor_output['artifacts'] = {}
|
||||
|
||||
for name, artifact in self._output_artifacts.items():
|
||||
runtime_artifact = {
|
||||
'name': artifact.name,
|
||||
'uri': artifact.uri,
|
||||
'metadata': artifact.metadata,
|
||||
}
|
||||
artifacts_list = {'artifacts': [runtime_artifact]}
|
||||
|
||||
self._executor_output['artifacts'][name] = artifacts_list
|
||||
|
||||
if func_output is not None:
|
||||
if self._is_parameter(self._return_annotation) or self._is_artifact(
|
||||
self._return_annotation):
|
||||
# Note: single output is named `Output` in component.yaml.
|
||||
self._handle_single_return_value('Output', self._return_annotation,
|
||||
func_output)
|
||||
elif self._is_named_tuple(self._return_annotation):
|
||||
if len(self._return_annotation._fields) != len(func_output):
|
||||
raise RuntimeError(
|
||||
'Expected {} return values from function `{}`, got {}'.format(
|
||||
len(self._return_annotation._fields), self._func.__name__,
|
||||
len(func_output)))
|
||||
for i in range(len(self._return_annotation._fields)):
|
||||
field = self._return_annotation._fields[i]
|
||||
field_type = self._return_annotation.__annotations__[field]
|
||||
if type(func_output) == tuple:
|
||||
field_value = func_output[i]
|
||||
else:
|
||||
field_value = getattr(func_output, field)
|
||||
self._handle_single_return_value(field, field_type, field_value)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'
|
||||
' subclass of `Artifact`, or a NamedTuple collection of these types.'
|
||||
.format(self._return_annotation))
|
||||
|
||||
import os
|
||||
os.makedirs(
|
||||
os.path.dirname(self._input['outputs']['outputFile']), exist_ok=True)
|
||||
with open(self._input['outputs']['outputFile'], 'w') as f:
|
||||
f.write(json.dumps(self._executor_output))
|
||||
|
||||
def _maybe_strip_path_suffix(self, name) -> str:
|
||||
if name.endswith('_path'):
|
||||
name = name[0:-len('_path')]
|
||||
if name.endswith('_file'):
|
||||
name = name[0:-len('_file')]
|
||||
return name
|
||||
|
||||
def execute(self):
|
||||
annotations = inspect.getfullargspec(self._func).annotations
|
||||
|
||||
# Function arguments.
|
||||
func_kwargs = {}
|
||||
|
||||
for k, v in annotations.items():
|
||||
if k == 'return':
|
||||
continue
|
||||
|
||||
if self._is_parameter(v):
|
||||
func_kwargs[k] = self._get_input_parameter_value(k, v)
|
||||
|
||||
if is_artifact_annotation(v):
|
||||
if is_input_artifact(v):
|
||||
func_kwargs[k] = self._get_input_artifact(k)
|
||||
if is_output_artifact(v):
|
||||
func_kwargs[k] = self._get_output_artifact(k)
|
||||
|
||||
elif isinstance(v, OutputPath):
|
||||
if self._is_parameter(v.type):
|
||||
func_kwargs[k] = self._get_output_parameter_path(k)
|
||||
else:
|
||||
func_kwargs[k] = self._get_output_artifact_path(k)
|
||||
elif isinstance(v, InputPath):
|
||||
func_kwargs[k] = self._get_input_artifact_path(k)
|
||||
|
||||
result = self._func(**func_kwargs)
|
||||
self._write_executor_output(result)
|
||||
|
|
@ -1,455 +0,0 @@
|
|||
# Copyright 2021 The Kubeflow Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tests for kfp.components.executor"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
from typing import Callable, NamedTuple, Optional
|
||||
import unittest
|
||||
import json
|
||||
|
||||
from kfp.components import executor, InputPath, OutputPath
|
||||
from kfp.dsl import io_types
|
||||
from kfp.dsl.io_types import Artifact, Dataset, Input, Metrics, Model, Output
|
||||
|
||||
_EXECUTOR_INPUT = """\
|
||||
{
|
||||
"inputs": {
|
||||
"parameters": {
|
||||
"input_parameter": {
|
||||
"stringValue": "Hello, KFP"
|
||||
}
|
||||
},
|
||||
"artifacts": {
|
||||
"input_artifact_one": {
|
||||
"artifacts": [
|
||||
{
|
||||
"metadata": {},
|
||||
"name": "input_artifact_one",
|
||||
"type": {
|
||||
"schemaTitle": "system.Dataset"
|
||||
},
|
||||
"uri": "gs://some-bucket/input_artifact_one"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": {
|
||||
"artifacts": {
|
||||
"output_artifact_one": {
|
||||
"artifacts": [
|
||||
{
|
||||
"metadata": {},
|
||||
"name": "output_artifact_one",
|
||||
"type": {
|
||||
"schemaTitle": "system.Model"
|
||||
},
|
||||
"uri": "gs://some-bucket/output_artifact_one"
|
||||
}
|
||||
]
|
||||
},
|
||||
"output_artifact_two": {
|
||||
"artifacts": [
|
||||
{
|
||||
"metadata": {},
|
||||
"name": "output_artifact_two",
|
||||
"type": {
|
||||
"schemaTitle": "system.Metrics"
|
||||
},
|
||||
"uri": "gs://some-bucket/output_artifact_two"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"parameters": {
|
||||
"output_parameter": {
|
||||
"outputFile": "gs://some-bucket/some_task/nested/output_parameter"
|
||||
}
|
||||
},
|
||||
"outputFile": "%s/output_metadata.json"
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
class ExecutorTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.maxDiff = None
|
||||
self._test_dir = tempfile.mkdtemp()
|
||||
io_types._GCS_LOCAL_MOUNT_PREFIX = self._test_dir + '/'
|
||||
io_types._MINIO_LOCAL_MOUNT_PREFIX = self._test_dir + '/minio/'
|
||||
io_types._S3_LOCAL_MOUNT_PREFIX = self._test_dir + '/s3/'
|
||||
return super().setUp()
|
||||
|
||||
def _get_executor(self,
|
||||
func: Callable,
|
||||
executor_input: Optional[str] = None) -> executor.Executor:
|
||||
if executor_input is None:
|
||||
executor_input = _EXECUTOR_INPUT
|
||||
|
||||
executor_input_dict = json.loads(executor_input % self._test_dir)
|
||||
|
||||
return executor.Executor(executor_input=executor_input_dict,
|
||||
function_to_execute=func)
|
||||
|
||||
def test_input_parameter(self):
|
||||
|
||||
def test_func(input_parameter: str):
|
||||
self.assertEqual(input_parameter, "Hello, KFP")
|
||||
|
||||
self._get_executor(test_func).execute()
|
||||
|
||||
def test_input_artifact(self):
|
||||
|
||||
def test_func(input_artifact_one: Input[Dataset]):
|
||||
self.assertEqual(input_artifact_one.uri,
|
||||
'gs://some-bucket/input_artifact_one')
|
||||
self.assertEqual(
|
||||
input_artifact_one.path,
|
||||
os.path.join(self._test_dir, 'some-bucket/input_artifact_one'))
|
||||
self.assertEqual(input_artifact_one.name, 'input_artifact_one')
|
||||
|
||||
self._get_executor(test_func).execute()
|
||||
|
||||
def test_output_artifact(self):
|
||||
|
||||
def test_func(output_artifact_one: Output[Model]):
|
||||
self.assertEqual(output_artifact_one.uri,
|
||||
'gs://some-bucket/output_artifact_one')
|
||||
|
||||
self.assertEqual(
|
||||
output_artifact_one.path,
|
||||
os.path.join(self._test_dir, 'some-bucket/output_artifact_one'))
|
||||
self.assertEqual(output_artifact_one.name, 'output_artifact_one')
|
||||
|
||||
self._get_executor(test_func).execute()
|
||||
|
||||
def test_output_parameter(self):
|
||||
|
||||
def test_func(output_parameter_path: OutputPath(str)):
|
||||
# Test that output parameters just use the passed in filename.
|
||||
self.assertEqual(output_parameter_path,
|
||||
'gs://some-bucket/some_task/nested/output_parameter')
|
||||
|
||||
# Test writing to the path succeeds. This fails if parent directories
|
||||
# don't exist.
|
||||
with open(output_parameter_path, 'w') as f:
|
||||
f.write('Hello, World!')
|
||||
|
||||
self._get_executor(test_func).execute()
|
||||
|
||||
def test_input_path_artifact(self):
|
||||
|
||||
def test_func(input_artifact_one_path: InputPath('Dataset')):
|
||||
self.assertEqual(
|
||||
input_artifact_one_path,
|
||||
os.path.join(self._test_dir, 'some-bucket/input_artifact_one'))
|
||||
|
||||
self._get_executor(test_func).execute()
|
||||
|
||||
def test_output_path_artifact(self):
|
||||
|
||||
def test_func(output_artifact_one_path: OutputPath('Model')):
|
||||
self.assertEqual(
|
||||
output_artifact_one_path,
|
||||
os.path.join(self._test_dir, 'some-bucket/output_artifact_one'))
|
||||
|
||||
self._get_executor(test_func).execute()
|
||||
|
||||
def test_output_metadata(self):
|
||||
|
||||
def test_func(output_artifact_two: Output[Metrics]):
|
||||
output_artifact_two.metadata['key_1'] = 'value_1'
|
||||
output_artifact_two.metadata['key_2'] = 2
|
||||
output_artifact_two.uri = 'new-uri'
|
||||
|
||||
# log_metric works here since the schema is specified as Metrics.
|
||||
output_artifact_two.log_metric('metric', 0.9)
|
||||
|
||||
self._get_executor(test_func).execute()
|
||||
with open(os.path.join(self._test_dir, 'output_metadata.json'), 'r') as f:
|
||||
output_metadata = json.loads(f.read())
|
||||
self.assertDictEqual(
|
||||
output_metadata, {
|
||||
'artifacts': {
|
||||
'output_artifact_one': {
|
||||
'artifacts': [{
|
||||
'name': 'output_artifact_one',
|
||||
'uri': 'gs://some-bucket/output_artifact_one',
|
||||
'metadata': {}
|
||||
}]
|
||||
},
|
||||
'output_artifact_two': {
|
||||
'artifacts': [{
|
||||
'name': 'output_artifact_two',
|
||||
'uri': 'new-uri',
|
||||
'metadata': {
|
||||
'key_1': 'value_1',
|
||||
'key_2': 2,
|
||||
'metric': 0.9
|
||||
}
|
||||
}]
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
def test_function_string_output(self):
|
||||
executor_input = """\
|
||||
{
|
||||
"inputs": {
|
||||
"parameters": {
|
||||
"first_message": {
|
||||
"stringValue": "Hello"
|
||||
},
|
||||
"second_message": {
|
||||
"stringValue": "World"
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": {
|
||||
"parameters": {
|
||||
"output": {
|
||||
"outputFile": "gs://some-bucket/output"
|
||||
}
|
||||
},
|
||||
"outputFile": "%s/output_metadata.json"
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
def test_func(first_message: str, second_message: str) -> str:
|
||||
return first_message + ", " + second_message
|
||||
|
||||
self._get_executor(test_func, executor_input).execute()
|
||||
with open(os.path.join(self._test_dir, 'output_metadata.json'), 'r') as f:
|
||||
output_metadata = json.loads(f.read())
|
||||
self.assertDictEqual(output_metadata, {
|
||||
"parameters": {
|
||||
"Output": {
|
||||
"stringValue": "Hello, World"
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
def test_function_with_int_output(self):
|
||||
executor_input = """\
|
||||
{
|
||||
"inputs": {
|
||||
"parameters": {
|
||||
"first": {
|
||||
"intValue": 40
|
||||
},
|
||||
"second": {
|
||||
"intValue": 2
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": {
|
||||
"parameters": {
|
||||
"output": {
|
||||
"outputFile": "gs://some-bucket/output"
|
||||
}
|
||||
},
|
||||
"outputFile": "%s/output_metadata.json"
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
def test_func(first: int, second: int) -> int:
|
||||
return first + second
|
||||
|
||||
self._get_executor(test_func, executor_input).execute()
|
||||
with open(os.path.join(self._test_dir, 'output_metadata.json'), 'r') as f:
|
||||
output_metadata = json.loads(f.read())
|
||||
self.assertDictEqual(output_metadata, {
|
||||
"parameters": {
|
||||
"Output": {
|
||||
"intValue": 42
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
def test_function_with_int_output(self):
|
||||
executor_input = """\
|
||||
{
|
||||
"inputs": {
|
||||
"parameters": {
|
||||
"first_message": {
|
||||
"stringValue": "Hello"
|
||||
},
|
||||
"second_message": {
|
||||
"stringValue": "World"
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": {
|
||||
"artifacts": {
|
||||
"Output": {
|
||||
"outputFile": "gs://some-bucket/output"
|
||||
}
|
||||
},
|
||||
"outputFile": "%s/output_metadata.json"
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
def test_func(first_message: str, second_message: str) -> str:
|
||||
return first_message + ", " + second_message
|
||||
|
||||
self._get_executor(test_func, executor_input).execute()
|
||||
with open(os.path.join(self._test_dir, 'output_metadata.json'), 'r') as f:
|
||||
output_metadata = json.loads(f.read())
|
||||
self.assertDictEqual(output_metadata, {
|
||||
"parameters": {
|
||||
"Output": {
|
||||
"stringValue": "Hello, World"
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
def test_artifact_output(self):
|
||||
executor_input = """\
|
||||
{
|
||||
"inputs": {
|
||||
"parameters": {
|
||||
"first": {
|
||||
"stringValue": "Hello"
|
||||
},
|
||||
"second": {
|
||||
"stringValue": "World"
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": {
|
||||
"artifacts": {
|
||||
"Output": {
|
||||
"artifacts": [
|
||||
{
|
||||
"name": "output",
|
||||
"type": {
|
||||
"schemaTitle": "system.Artifact"
|
||||
},
|
||||
"uri": "gs://some-bucket/output"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"outputFile": "%s/output_metadata.json"
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
def test_func(first: str, second: str) -> Artifact:
|
||||
return first + ", " + second
|
||||
|
||||
self._get_executor(test_func, executor_input).execute()
|
||||
with open(os.path.join(self._test_dir, 'output_metadata.json'), 'r') as f:
|
||||
output_metadata = json.loads(f.read())
|
||||
self.assertDictEqual(
|
||||
output_metadata, {
|
||||
'artifacts': {
|
||||
'Output': {
|
||||
'artifacts': [{
|
||||
'metadata': {},
|
||||
'name': 'output',
|
||||
'uri': 'gs://some-bucket/output'
|
||||
}]
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
with open(os.path.join(self._test_dir, 'some-bucket/output'), 'r') as f:
|
||||
artifact_payload = f.read()
|
||||
self.assertEqual(artifact_payload, "Hello, World")
|
||||
|
||||
def test_named_tuple_output(self):
|
||||
executor_input = """\
|
||||
{
|
||||
"outputs": {
|
||||
"artifacts": {
|
||||
"output_dataset": {
|
||||
"artifacts": [
|
||||
{
|
||||
"name": "output_dataset",
|
||||
"type": {
|
||||
"schemaTitle": "system.Dataset"
|
||||
},
|
||||
"uri": "gs://some-bucket/output_dataset"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"parameters": {
|
||||
"output_int": {
|
||||
"outputFile": "gs://some-bucket/output_int"
|
||||
},
|
||||
"output_string": {
|
||||
"outputFile": "gs://some-bucket/output_string"
|
||||
}
|
||||
},
|
||||
"outputFile": "%s/output_metadata.json"
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
# Functions returning named tuples should work.
|
||||
def func_returning_named_tuple() -> NamedTuple('Outputs', [
|
||||
("output_dataset", Dataset),
|
||||
("output_int", int),
|
||||
("output_string", str),
|
||||
]):
|
||||
from collections import namedtuple
|
||||
output = namedtuple('Outputs',
|
||||
['output_dataset', 'output_int', 'output_string'])
|
||||
return output("Dataset contents", 101, "Some output string")
|
||||
|
||||
# Functions returning plain tuples should work too.
|
||||
def func_returning_plain_tuple() -> NamedTuple('Outputs', [
|
||||
("output_dataset", Dataset),
|
||||
("output_int", int),
|
||||
("output_string", str),
|
||||
]):
|
||||
return ("Dataset contents", 101, "Some output string")
|
||||
|
||||
for test_func in [func_returning_named_tuple, func_returning_plain_tuple]:
|
||||
self._get_executor(test_func, executor_input).execute()
|
||||
with open(os.path.join(self._test_dir, 'output_metadata.json'), 'r') as f:
|
||||
output_metadata = json.loads(f.read())
|
||||
self.assertDictEqual(
|
||||
output_metadata, {
|
||||
'artifacts': {
|
||||
'output_dataset': {
|
||||
'artifacts': [{
|
||||
'metadata': {},
|
||||
'name': 'output_dataset',
|
||||
'uri': 'gs://some-bucket/output_dataset'
|
||||
}]
|
||||
}
|
||||
},
|
||||
"parameters": {
|
||||
"output_string": {
|
||||
"stringValue": "Some output string"
|
||||
},
|
||||
"output_int": {
|
||||
"intValue": 101
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
with open(os.path.join(self._test_dir, 'some-bucket/output_dataset'),
|
||||
'r') as f:
|
||||
artifact_payload = f.read()
|
||||
self.assertEqual(artifact_payload, "Dataset contents")
|
||||
|
|
@ -21,7 +21,17 @@ from ._pipeline_volume import PipelineVolume
|
|||
from ._volume_snapshot_op import VolumeSnapshotOp
|
||||
from ._ops_group import OpsGroup, ExitHandler, Condition, ParallelFor, SubGraph
|
||||
from ._component import python_component, graph_component, component
|
||||
from .importer_node import importer
|
||||
|
||||
|
||||
def importer(*args, **kwargs):
|
||||
import warnings
|
||||
from kfp.v2.dsl import importer as v2importer
|
||||
warnings.warn(
|
||||
'`kfp.dsl.importer` is a deprecated alias and will be removed'
|
||||
' in KFP v2.0. Please import from `kfp.v2.dsl` instead.',
|
||||
category=FutureWarning)
|
||||
return v2importer(*args, **kwargs)
|
||||
|
||||
|
||||
EXECUTION_ID_PLACEHOLDER = '{{workflow.uid}}-{{pod.name}}'
|
||||
RUN_ID_PLACEHOLDER = '{{workflow.uid}}'
|
||||
|
|
|
|||
|
|
@ -29,8 +29,8 @@ from kfp.dsl import _pipeline_param
|
|||
from kfp.dsl import component_spec as dsl_component_spec
|
||||
from kfp.dsl import dsl_utils
|
||||
from kfp.dsl import types
|
||||
from kfp.dsl import type_utils
|
||||
from kfp.pipeline_spec import pipeline_spec_pb2
|
||||
from kfp.v2.components.types import type_utils
|
||||
|
||||
|
||||
# Placeholder to represent the output directory hosting all the generated URIs.
|
||||
|
|
|
|||
|
|
@ -19,8 +19,8 @@ from kfp.components import _structures as structures
|
|||
from kfp.dsl import _for_loop
|
||||
from kfp.dsl import _pipeline_param
|
||||
from kfp.dsl import dsl_utils
|
||||
from kfp.dsl import type_utils
|
||||
from kfp.pipeline_spec import pipeline_spec_pb2
|
||||
from kfp.v2.components.types import type_utils
|
||||
|
||||
|
||||
def additional_input_name_for_pipelineparam(
|
||||
|
|
|
|||
|
|
@ -1,156 +0,0 @@
|
|||
# Copyright 2020 The Kubeflow Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Utility function for building Importer Node spec."""
|
||||
|
||||
from typing import Optional, Union, Type
|
||||
|
||||
from kfp.dsl import _container_op
|
||||
from kfp.dsl import _pipeline_param
|
||||
from kfp.dsl import dsl_utils
|
||||
from kfp.dsl import io_types
|
||||
from kfp.dsl import type_utils
|
||||
from kfp.pipeline_spec import pipeline_spec_pb2
|
||||
|
||||
INPUT_KEY = 'uri'
|
||||
OUTPUT_KEY = 'artifact'
|
||||
|
||||
|
||||
def _build_importer_spec(
|
||||
artifact_uri: Union[_pipeline_param.PipelineParam, str],
|
||||
artifact_type_schema: pipeline_spec_pb2.ArtifactTypeSchema,
|
||||
) -> pipeline_spec_pb2.PipelineDeploymentConfig.ImporterSpec:
|
||||
"""Builds an importer executor spec.
|
||||
|
||||
Args:
|
||||
artifact_uri: The artifact uri to import from.
|
||||
artifact_type_schema: The user specified artifact type schema of the
|
||||
artifact to be imported.
|
||||
|
||||
Returns:
|
||||
An importer spec.
|
||||
"""
|
||||
importer_spec = pipeline_spec_pb2.PipelineDeploymentConfig.ImporterSpec()
|
||||
importer_spec.type_schema.CopyFrom(artifact_type_schema)
|
||||
|
||||
if isinstance(artifact_uri, _pipeline_param.PipelineParam):
|
||||
importer_spec.artifact_uri.runtime_parameter = INPUT_KEY
|
||||
elif isinstance(artifact_uri, str):
|
||||
importer_spec.artifact_uri.constant_value.string_value = artifact_uri
|
||||
|
||||
return importer_spec
|
||||
|
||||
|
||||
def _build_importer_task_spec(
|
||||
importer_base_name: str,
|
||||
artifact_uri: Union[_pipeline_param.PipelineParam, str],
|
||||
) -> pipeline_spec_pb2.PipelineTaskSpec:
|
||||
"""Builds an importer task spec.
|
||||
|
||||
Args:
|
||||
importer_base_name: The base name of the importer node.
|
||||
artifact_uri: The artifact uri to import from.
|
||||
|
||||
Returns:
|
||||
An importer node task spec.
|
||||
"""
|
||||
result = pipeline_spec_pb2.PipelineTaskSpec()
|
||||
result.task_info.name = dsl_utils.sanitize_task_name(importer_base_name)
|
||||
result.component_ref.name = dsl_utils.sanitize_component_name(
|
||||
importer_base_name)
|
||||
|
||||
if isinstance(artifact_uri, _pipeline_param.PipelineParam):
|
||||
result.inputs.parameters[
|
||||
INPUT_KEY].component_input_parameter = artifact_uri.full_name
|
||||
elif isinstance(artifact_uri, str):
|
||||
result.inputs.parameters[
|
||||
INPUT_KEY].runtime_value.constant_value.string_value = artifact_uri
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _build_importer_component_spec(
|
||||
importer_base_name: str,
|
||||
artifact_type_schema: pipeline_spec_pb2.ArtifactTypeSchema,
|
||||
) -> pipeline_spec_pb2.ComponentSpec:
|
||||
"""Builds an importer component spec.
|
||||
|
||||
Args:
|
||||
importer_base_name: The base name of the importer node.
|
||||
artifact_type_schema: The user specified artifact type schema of the
|
||||
artifact to be imported.
|
||||
|
||||
Returns:
|
||||
An importer node component spec.
|
||||
"""
|
||||
result = pipeline_spec_pb2.ComponentSpec()
|
||||
result.executor_label = dsl_utils.sanitize_executor_label(importer_base_name)
|
||||
result.input_definitions.parameters[
|
||||
INPUT_KEY].type = pipeline_spec_pb2.PrimitiveType.STRING
|
||||
result.output_definitions.artifacts[OUTPUT_KEY].artifact_type.CopyFrom(
|
||||
artifact_type_schema)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def importer(artifact_uri: Union[_pipeline_param.PipelineParam, str],
|
||||
artifact_class: Type[io_types.Artifact],
|
||||
reimport: bool = False) -> _container_op.ContainerOp:
|
||||
"""dsl.importer for importing an existing artifact. Only for v2 pipeline.
|
||||
|
||||
Args:
|
||||
artifact_uri: The artifact uri to import from.
|
||||
artifact_type_schema: The user specified artifact type schema of the
|
||||
artifact to be imported.
|
||||
reimport: Whether to reimport the artifact. Defaults to False.
|
||||
|
||||
Returns:
|
||||
A ContainerOp instance.
|
||||
|
||||
Raises:
|
||||
ValueError if the passed in artifact_uri is neither a PipelineParam nor a
|
||||
constant string value.
|
||||
"""
|
||||
|
||||
if isinstance(artifact_uri, _pipeline_param.PipelineParam):
|
||||
input_param = artifact_uri
|
||||
elif isinstance(artifact_uri, str):
|
||||
input_param = _pipeline_param.PipelineParam(
|
||||
name='uri', value=artifact_uri, param_type='String')
|
||||
else:
|
||||
raise ValueError(
|
||||
'Importer got unexpected artifact_uri: {} of type: {}.'.format(
|
||||
artifact_uri, type(artifact_uri)))
|
||||
|
||||
old_warn_value = _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING
|
||||
_container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True
|
||||
|
||||
task = _container_op.ContainerOp(
|
||||
name='importer',
|
||||
image='importer_image', # TODO: need a v1 implementation of importer.
|
||||
file_outputs={
|
||||
OUTPUT_KEY: "{{{{$.outputs.artifacts['{}'].uri}}}}".format(OUTPUT_KEY)
|
||||
},
|
||||
)
|
||||
_container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value
|
||||
|
||||
artifact_type_schema = type_utils.get_artifact_type_schema(artifact_class)
|
||||
task.importer_spec = _build_importer_spec(
|
||||
artifact_uri=artifact_uri, artifact_type_schema=artifact_type_schema)
|
||||
task.task_spec = _build_importer_task_spec(
|
||||
importer_base_name=task.name, artifact_uri=artifact_uri)
|
||||
task.component_spec = _build_importer_component_spec(
|
||||
importer_base_name=task.name, artifact_type_schema=artifact_type_schema)
|
||||
task.inputs = [input_param]
|
||||
|
||||
return task
|
||||
|
|
@ -1,165 +0,0 @@
|
|||
# Copyright 2020 The Kubeflow Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from absl.testing import parameterized
|
||||
import unittest
|
||||
|
||||
from kfp.dsl import _pipeline_param
|
||||
from kfp.dsl import importer_node
|
||||
from kfp.pipeline_spec import pipeline_spec_pb2 as pb
|
||||
from google.protobuf import json_format
|
||||
|
||||
|
||||
class ImporterNodeTest(parameterized.TestCase):
|
||||
|
||||
@parameterized.parameters(
|
||||
{
|
||||
# artifact_uri is a constant value
|
||||
'input_uri':
|
||||
'gs://artifact',
|
||||
'artifact_type_schema':
|
||||
pb.ArtifactTypeSchema(schema_title='system.Dataset'),
|
||||
'expected_result': {
|
||||
'artifactUri': {
|
||||
'constantValue': {
|
||||
'stringValue': 'gs://artifact'
|
||||
}
|
||||
},
|
||||
'typeSchema': {
|
||||
'schemaTitle': 'system.Dataset'
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
# artifact_uri is from PipelineParam
|
||||
'input_uri':
|
||||
_pipeline_param.PipelineParam(name='uri_to_import'),
|
||||
'artifact_type_schema':
|
||||
pb.ArtifactTypeSchema(schema_title='system.Model'),
|
||||
'expected_result': {
|
||||
'artifactUri': {
|
||||
'runtimeParameter': 'uri'
|
||||
},
|
||||
'typeSchema': {
|
||||
'schemaTitle': 'system.Model'
|
||||
}
|
||||
},
|
||||
})
|
||||
def test_build_importer_spec(self, input_uri, artifact_type_schema,
|
||||
expected_result):
|
||||
expected_importer_spec = pb.PipelineDeploymentConfig.ImporterSpec()
|
||||
json_format.ParseDict(expected_result, expected_importer_spec)
|
||||
importer_spec = importer_node._build_importer_spec(
|
||||
artifact_uri=input_uri, artifact_type_schema=artifact_type_schema)
|
||||
|
||||
self.maxDiff = None
|
||||
self.assertEqual(expected_importer_spec, importer_spec)
|
||||
|
||||
@parameterized.parameters(
|
||||
{
|
||||
# artifact_uri is a constant value
|
||||
'importer_name': 'importer-1',
|
||||
'input_uri': 'gs://artifact',
|
||||
'expected_result': {
|
||||
'taskInfo': {
|
||||
'name': 'importer-1'
|
||||
},
|
||||
'inputs': {
|
||||
'parameters': {
|
||||
'uri': {
|
||||
'runtimeValue': {
|
||||
'constantValue': {
|
||||
'stringValue': 'gs://artifact'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
'componentRef': {
|
||||
'name': 'comp-importer-1'
|
||||
},
|
||||
}
|
||||
},
|
||||
{
|
||||
# artifact_uri is from PipelineParam
|
||||
'importer_name': 'importer-2',
|
||||
'input_uri': _pipeline_param.PipelineParam(name='uri_to_import'),
|
||||
'expected_result': {
|
||||
'taskInfo': {
|
||||
'name': 'importer-2'
|
||||
},
|
||||
'inputs': {
|
||||
'parameters': {
|
||||
'uri': {
|
||||
'componentInputParameter': 'uri_to_import'
|
||||
}
|
||||
}
|
||||
},
|
||||
'componentRef': {
|
||||
'name': 'comp-importer-2'
|
||||
},
|
||||
},
|
||||
})
|
||||
def test_build_importer_task_spec(self, importer_name, input_uri,
|
||||
expected_result):
|
||||
expected_task_spec = pb.PipelineTaskSpec()
|
||||
json_format.ParseDict(expected_result, expected_task_spec)
|
||||
|
||||
task_spec = importer_node._build_importer_task_spec(
|
||||
importer_base_name=importer_name, artifact_uri=input_uri)
|
||||
|
||||
self.maxDiff = None
|
||||
self.assertEqual(expected_task_spec, task_spec)
|
||||
|
||||
def test_build_importer_component_spec(self):
|
||||
expected_importer_component = {
|
||||
'inputDefinitions': {
|
||||
'parameters': {
|
||||
'uri': {
|
||||
'type': 'STRING'
|
||||
}
|
||||
}
|
||||
},
|
||||
'outputDefinitions': {
|
||||
'artifacts': {
|
||||
'artifact': {
|
||||
'artifactType': {
|
||||
'schemaTitle': 'system.Artifact'
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
'executorLabel': 'exec-importer-1'
|
||||
}
|
||||
expected_importer_comp_spec = pb.ComponentSpec()
|
||||
json_format.ParseDict(expected_importer_component,
|
||||
expected_importer_comp_spec)
|
||||
importer_comp_spec = importer_node._build_importer_component_spec(
|
||||
importer_base_name='importer-1',
|
||||
artifact_type_schema=pb.ArtifactTypeSchema(
|
||||
schema_title='system.Artifact'))
|
||||
|
||||
self.maxDiff = None
|
||||
self.assertEqual(expected_importer_comp_spec, importer_comp_spec)
|
||||
|
||||
def test_import_with_invalid_artifact_uri_value_should_fail(self):
|
||||
from kfp.dsl.io_types import Dataset
|
||||
with self.assertRaisesRegex(
|
||||
ValueError,
|
||||
"Importer got unexpected artifact_uri: 123 of type: <class 'int'>."):
|
||||
importer_node.importer(artifact_uri=123, artifact_class=Dataset)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
@ -11,534 +11,24 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Classes for input/output types in KFP SDK.
|
||||
"""Deprecated. See kfp.v2.types.artifact_types instead.
|
||||
|
||||
These are only compatible with v2 Pipelines.
|
||||
This module will be removed in KFP v2.0.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Dict, Generic, List, Optional, Type, TypeVar, Union
|
||||
|
||||
_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'
|
||||
_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'
|
||||
_S3_LOCAL_MOUNT_PREFIX = '/s3/'
|
||||
|
||||
|
||||
class Artifact(object):
|
||||
"""Generic Artifact class.
|
||||
|
||||
This class is meant to represent the metadata around an input or output
|
||||
machine-learning Artifact. Artifacts have URIs, which can either be a location
|
||||
on disk (or Cloud storage) or some other resource identifier such as
|
||||
an API resource name.
|
||||
|
||||
Artifacts carry a `metadata` field, which is a dictionary for storing
|
||||
metadata related to this artifact.
|
||||
"""
|
||||
TYPE_NAME = 'system.Artifact'
|
||||
|
||||
def __init__(self,
|
||||
name: Optional[str] = None,
|
||||
uri: Optional[str] = None,
|
||||
metadata: Optional[Dict] = None):
|
||||
"""Initializes the Artifact with the given name, URI and metadata."""
|
||||
self.uri = uri or ''
|
||||
self.name = name or ''
|
||||
self.metadata = metadata or {}
|
||||
|
||||
@property
|
||||
def path(self):
|
||||
return self._get_path()
|
||||
|
||||
@path.setter
|
||||
def path(self, path):
|
||||
self._set_path(path)
|
||||
|
||||
def _get_path(self) -> Optional[str]:
|
||||
if self.uri.startswith('gs://'):
|
||||
return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]
|
||||
elif self.uri.startswith('minio://'):
|
||||
return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]
|
||||
elif self.uri.startswith('s3://'):
|
||||
return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]
|
||||
return None
|
||||
|
||||
def _set_path(self, path):
|
||||
if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):
|
||||
path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]
|
||||
elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):
|
||||
path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]
|
||||
elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):
|
||||
path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]
|
||||
self.uri = path
|
||||
|
||||
|
||||
class Model(Artifact):
|
||||
"""An artifact representing an ML Model."""
|
||||
TYPE_NAME = 'system.Model'
|
||||
|
||||
def __init__(self,
|
||||
name: Optional[str] = None,
|
||||
uri: Optional[str] = None,
|
||||
metadata: Optional[Dict] = None):
|
||||
super().__init__(uri=uri, name=name, metadata=metadata)
|
||||
|
||||
@property
|
||||
def framework(self) -> str:
|
||||
return self._get_framework()
|
||||
|
||||
def _get_framework(self) -> str:
|
||||
return self.metadata.get('framework', '')
|
||||
|
||||
@framework.setter
|
||||
def framework(self, framework: str):
|
||||
self._set_framework(framework)
|
||||
|
||||
def _set_framework(self, framework: str):
|
||||
self.metadata['framework'] = framework
|
||||
|
||||
|
||||
class Dataset(Artifact):
|
||||
"""An artifact representing an ML Dataset."""
|
||||
TYPE_NAME = 'system.Dataset'
|
||||
|
||||
def __init__(self,
|
||||
name: Optional[str] = None,
|
||||
uri: Optional[str] = None,
|
||||
metadata: Optional[Dict] = None):
|
||||
super().__init__(uri=uri, name=name, metadata=metadata)
|
||||
|
||||
|
||||
class Metrics(Artifact):
|
||||
"""Represent a simple base Artifact type to store key-value scalar metrics."""
|
||||
TYPE_NAME = 'system.Metrics'
|
||||
|
||||
def __init__(self,
|
||||
name: Optional[str] = None,
|
||||
uri: Optional[str] = None,
|
||||
metadata: Optional[Dict] = None):
|
||||
super().__init__(uri=uri, name=name, metadata=metadata)
|
||||
|
||||
def log_metric(self, metric: str, value: float):
|
||||
"""Sets a custom scalar metric.
|
||||
|
||||
Args:
|
||||
metric: Metric key
|
||||
value: Value of the metric.
|
||||
"""
|
||||
self.metadata[metric] = value
|
||||
|
||||
|
||||
class ClassificationMetrics(Artifact):
|
||||
"""Represents Artifact class to store Classification Metrics."""
|
||||
TYPE_NAME = 'system.ClassificationMetrics'
|
||||
|
||||
def __init__(self,
|
||||
name: Optional[str] = None,
|
||||
uri: Optional[str] = None,
|
||||
metadata: Optional[Dict] = None):
|
||||
super().__init__(uri=uri, name=name, metadata=metadata)
|
||||
|
||||
def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):
|
||||
"""Logs a single data point in the ROC Curve.
|
||||
|
||||
Args:
|
||||
fpr: False positive rate value of the data point.
|
||||
tpr: True positive rate value of the data point.
|
||||
threshold: Threshold value for the data point.
|
||||
"""
|
||||
|
||||
roc_reading = {
|
||||
'confidenceThreshold': threshold,
|
||||
'recall': tpr,
|
||||
'falsePositiveRate': fpr
|
||||
}
|
||||
if 'confidenceMetrics' not in self.metadata.keys():
|
||||
self.metadata['confidenceMetrics'] = []
|
||||
|
||||
self.metadata['confidenceMetrics'].append(roc_reading)
|
||||
|
||||
def log_roc_curve(self, fpr: List[float], tpr: List[float],
|
||||
threshold: List[float]):
|
||||
"""Logs an ROC curve.
|
||||
|
||||
The list length of fpr, tpr and threshold must be the same.
|
||||
|
||||
Args:
|
||||
fpr: List of false positive rate values.
|
||||
tpr: List of true positive rate values.
|
||||
threshold: List of threshold values.
|
||||
"""
|
||||
if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(tpr) != len(
|
||||
threshold):
|
||||
raise ValueError('Length of fpr, tpr and threshold must be the same. '
|
||||
'Got lengths {}, {} and {} respectively.'.format(
|
||||
len(fpr), len(tpr), len(threshold)))
|
||||
|
||||
for i in range(len(fpr)):
|
||||
self.log_roc_data_point(fpr=fpr[i], tpr=tpr[i], threshold=threshold[i])
|
||||
|
||||
def set_confusion_matrix_categories(self, categories: List[str]):
|
||||
"""Stores confusion matrix categories.
|
||||
|
||||
Args:
|
||||
categories: List of strings specifying the categories.
|
||||
"""
|
||||
|
||||
self._categories = []
|
||||
annotation_specs = []
|
||||
for category in categories:
|
||||
annotation_spec = {'displayName': category}
|
||||
self._categories.append(category)
|
||||
annotation_specs.append(annotation_spec)
|
||||
|
||||
self._matrix = []
|
||||
for row in range(len(self._categories)):
|
||||
self._matrix.append({'row': [0] * len(self._categories)})
|
||||
|
||||
self._confusion_matrix = {}
|
||||
self._confusion_matrix['annotationSpecs'] = annotation_specs
|
||||
self._confusion_matrix['rows'] = self._matrix
|
||||
self.metadata['confusionMatrix'] = self._confusion_matrix
|
||||
|
||||
def log_confusion_matrix_row(self, row_category: str, row: List[float]):
|
||||
"""Logs a confusion matrix row.
|
||||
|
||||
Args:
|
||||
row_category: Category to which the row belongs.
|
||||
row: List of integers specifying the values for the row.
|
||||
|
||||
Raises:
|
||||
ValueError: If row_category is not in the list of categories
|
||||
set in set_categories call.
|
||||
"""
|
||||
if row_category not in self._categories:
|
||||
raise ValueError('Invalid category: {} passed. Expected one of: {}'.\
|
||||
format(row_category, self._categories))
|
||||
|
||||
if len(row) != len(self._categories):
|
||||
raise ValueError('Invalid row. Expected size: {} got: {}'.\
|
||||
format(len(self._categories), len(row)))
|
||||
|
||||
self._matrix[self._categories.index(row_category)] = {'row': row}
|
||||
self.metadata['confusionMatrix'] = self._confusion_matrix
|
||||
|
||||
def log_confusion_matrix_cell(self, row_category: str, col_category: str,
|
||||
value: int):
|
||||
"""Logs a cell in the confusion matrix.
|
||||
|
||||
Args:
|
||||
row_category: String representing the name of the row category.
|
||||
col_category: String representing the name of the column category.
|
||||
value: Int value of the cell.
|
||||
|
||||
Raises:
|
||||
ValueError: If row_category or col_category is not in the list of
|
||||
categories set in set_categories.
|
||||
"""
|
||||
if row_category not in self._categories:
|
||||
raise ValueError('Invalid category: {} passed. Expected one of: {}'.\
|
||||
format(row_category, self._categories))
|
||||
|
||||
if col_category not in self._categories:
|
||||
raise ValueError('Invalid category: {} passed. Expected one of: {}'.\
|
||||
format(row_category, self._categories))
|
||||
|
||||
self._matrix[self._categories.index(row_category)]['row'][
|
||||
self._categories.index(col_category)] = value
|
||||
self.metadata['confusionMatrix'] = self._confusion_matrix
|
||||
|
||||
def log_confusion_matrix(self, categories: List[str],
|
||||
matrix: List[List[int]]):
|
||||
"""Logs a confusion matrix.
|
||||
|
||||
Args:
|
||||
categories: List of the category names.
|
||||
matrix: Complete confusion matrix.
|
||||
|
||||
Raises:
|
||||
ValueError: Length of categories does not match number of rows or columns.
|
||||
"""
|
||||
self.set_confusion_matrix_categories(categories)
|
||||
|
||||
if len(matrix) != len(categories):
|
||||
raise ValueError('Invalid matrix: {} passed for categories: {}'.\
|
||||
format(matrix, categories))
|
||||
|
||||
for index in range(len(categories)):
|
||||
if len(matrix[index]) != len(categories):
|
||||
raise ValueError('Invalid matrix: {} passed for categories: {}'.\
|
||||
format(matrix, categories))
|
||||
|
||||
self.log_confusion_matrix_row(categories[index], matrix[index])
|
||||
|
||||
self.metadata['confusionMatrix'] = self._confusion_matrix
|
||||
|
||||
|
||||
class SlicedClassificationMetrics(Artifact):
|
||||
"""Metrics class representing Sliced Classification Metrics.
|
||||
|
||||
Similar to ClassificationMetrics clients using this class are expected to use
|
||||
log methods of the class to log metrics with the difference being each log
|
||||
method takes a slice to associate the ClassificationMetrics.
|
||||
|
||||
"""
|
||||
|
||||
TYPE_NAME = 'system.SlicedClassificationMetrics'
|
||||
|
||||
def __init__(self,
|
||||
name: Optional[str] = None,
|
||||
uri: Optional[str] = None,
|
||||
metadata: Optional[Dict] = None):
|
||||
super().__init__(uri=uri, name=name, metadata=metadata)
|
||||
|
||||
def _upsert_classification_metrics_for_slice(self, slice: str):
|
||||
"""Upserts the classification metrics instance for a slice."""
|
||||
if slice not in self._sliced_metrics:
|
||||
self._sliced_metrics[slice] = ClassificationMetrics()
|
||||
|
||||
def _update_metadata(self, slice: str):
|
||||
"""Updates metadata to adhere to the metrics schema."""
|
||||
self.metadata = {}
|
||||
self.metadata['evaluationSlices'] = []
|
||||
for slice in self._sliced_metrics.keys():
|
||||
slice_metrics = {
|
||||
'slice': slice,
|
||||
'sliceClassificationMetrics': self._sliced_metrics[slice].metadata
|
||||
}
|
||||
self.metadata['evaluationSlices'].append(slice_metrics)
|
||||
|
||||
def log_roc_reading(self, slice: str, threshold: float, tpr: float,
|
||||
fpr: float):
|
||||
"""Logs a single data point in the ROC Curve of a slice.
|
||||
|
||||
Args:
|
||||
slice: String representing slice label.
|
||||
threshold: Thresold value for the data point.
|
||||
tpr: True positive rate value of the data point.
|
||||
fpr: False positive rate value of the data point.
|
||||
"""
|
||||
|
||||
self._upsert_classification_metrics_for_slice(slice)
|
||||
self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)
|
||||
self._update_metadata(slice)
|
||||
|
||||
def load_roc_readings(self, slice: str, readings: List[List[float]]):
|
||||
"""Supports bulk loading ROC Curve readings for a slice.
|
||||
|
||||
Args:
|
||||
slice: String representing slice label.
|
||||
readings: A 2-D list providing ROC Curve data points.
|
||||
The expected order of the data points is: threshold,
|
||||
true_positive_rate, false_positive_rate.
|
||||
"""
|
||||
self._upsert_classification_metrics_for_slice(slice)
|
||||
self._sliced_metrics[slice].load_roc_readings(readings)
|
||||
self._update_metadata(slice)
|
||||
|
||||
def set_confusion_matrix_categories(self, slice: str, categories: List[str]):
|
||||
"""Stores confusion matrix categories for a slice..
|
||||
|
||||
Categories are stored in the internal metrics_utils.ConfusionMatrix
|
||||
instance of the slice.
|
||||
|
||||
Args:
|
||||
slice: String representing slice label.
|
||||
categories: List of strings specifying the categories.
|
||||
"""
|
||||
self._upsert_classification_metrics_for_slice(slice)
|
||||
self._sliced_metrics[slice].set_confusion_matrix_categories(categories)
|
||||
self._update_metadata(slice)
|
||||
|
||||
def log_confusion_matrix_row(self, slice: str, row_category: str,
|
||||
row: List[int]):
|
||||
"""Logs a confusion matrix row for a slice.
|
||||
|
||||
Row is updated on the internal metrics_utils.ConfusionMatrix
|
||||
instance of the slice.
|
||||
|
||||
Args:
|
||||
slice: String representing slice label.
|
||||
row_category: Category to which the row belongs.
|
||||
row: List of integers specifying the values for the row.
|
||||
"""
|
||||
self._upsert_classification_metrics_for_slice(slice)
|
||||
self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)
|
||||
self._update_metadata(slice)
|
||||
|
||||
def log_confusion_matrix_cell(self, slice: str, row_category: str,
|
||||
col_category: str, value: int):
|
||||
"""Logs a confusion matrix cell for a slice..
|
||||
|
||||
Cell is updated on the internal metrics_utils.ConfusionMatrix
|
||||
instance of the slice.
|
||||
|
||||
Args:
|
||||
slice: String representing slice label.
|
||||
row_category: String representing the name of the row category.
|
||||
col_category: String representing the name of the column category.
|
||||
value: Int value of the cell.
|
||||
"""
|
||||
self._upsert_classification_metrics_for_slice(slice)
|
||||
self._sliced_metrics[slice].log_confusion_matrix_cell(
|
||||
row_category, col_category, value)
|
||||
self._update_metadata(slice)
|
||||
|
||||
def load_confusion_matrix(self, slice: str, categories: List[str],
|
||||
matrix: List[List[int]]):
|
||||
"""Supports bulk loading the whole confusion matrix for a slice.
|
||||
|
||||
Args:
|
||||
slice: String representing slice label.
|
||||
categories: List of the category names.
|
||||
matrix: Complete confusion matrix.
|
||||
"""
|
||||
self._upsert_classification_metrics_for_slice(slice)
|
||||
self._sliced_metrics[slice].log_confusion_matrix_cell(categories, matrix)
|
||||
self._update_metadata(slice)
|
||||
|
||||
|
||||
class HTML(Artifact):
|
||||
"""An artifact representing an HTML file."""
|
||||
TYPE_NAME = 'system.HTML'
|
||||
|
||||
def __init__(self,
|
||||
name: Optional[str] = None,
|
||||
uri: Optional[str] = None,
|
||||
metadata: Optional[Dict] = None):
|
||||
super().__init__(uri=uri, name=name, metadata=metadata)
|
||||
|
||||
|
||||
class Markdown(Artifact):
|
||||
"""An artifact representing an Markdown file."""
|
||||
TYPE_NAME = 'system.Markdown'
|
||||
|
||||
def __init__(self,
|
||||
name: Optional[str] = None,
|
||||
uri: Optional[str] = None,
|
||||
metadata: Optional[Dict] = None):
|
||||
super().__init__(uri=uri, name=name, metadata=metadata)
|
||||
|
||||
|
||||
T = TypeVar('T')
|
||||
|
||||
|
||||
class InputAnnotation():
|
||||
"""Marker type for input artifacts."""
|
||||
pass
|
||||
|
||||
|
||||
class OutputAnnotation():
|
||||
"""Marker type for output artifacts."""
|
||||
pass
|
||||
|
||||
|
||||
# TODO: Use typing.Annotated instead of this hack.
|
||||
# With typing.Annotated (Python 3.9+ or typing_extensions package), the
|
||||
# following would look like:
|
||||
# Input = typing.Annotated[T, InputAnnotation]
|
||||
# Output = typing.Annotated[T, OutputAnnotation]
|
||||
|
||||
# Input represents an Input artifact of type T.
|
||||
Input = Union[T, InputAnnotation]
|
||||
|
||||
# Output represents an Output artifact of type T.
|
||||
Output = Union[T, OutputAnnotation]
|
||||
|
||||
|
||||
def is_artifact_annotation(typ) -> bool:
|
||||
if hasattr(typ, '_subs_tree'): # Python 3.6
|
||||
subs_tree = typ._subs_tree()
|
||||
return len(subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [
|
||||
InputAnnotation, OutputAnnotation
|
||||
]
|
||||
|
||||
if not hasattr(typ, '__origin__'):
|
||||
return False
|
||||
|
||||
if typ.__origin__ != Union and type(typ.__origin__) != type(Union):
|
||||
return False
|
||||
|
||||
if not hasattr(typ, '__args__') or len(typ.__args__) != 2:
|
||||
return False
|
||||
|
||||
if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def is_input_artifact(typ) -> bool:
|
||||
"""Returns True if typ is of type Input[T]."""
|
||||
if not is_artifact_annotation(typ):
|
||||
return False
|
||||
|
||||
if hasattr(typ, '_subs_tree'): # Python 3.6
|
||||
subs_tree = typ._subs_tree()
|
||||
return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation
|
||||
|
||||
return typ.__args__[1] == InputAnnotation
|
||||
|
||||
|
||||
def is_output_artifact(typ) -> bool:
|
||||
"""Returns True if typ is of type Output[T]."""
|
||||
if not is_artifact_annotation(typ):
|
||||
return False
|
||||
|
||||
if hasattr(typ, '_subs_tree'): # Python 3.6
|
||||
subs_tree = typ._subs_tree()
|
||||
return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation
|
||||
|
||||
return typ.__args__[1] == OutputAnnotation
|
||||
|
||||
|
||||
def get_io_artifact_class(typ):
|
||||
if not is_artifact_annotation(typ):
|
||||
return None
|
||||
if typ == Input or typ == Output:
|
||||
return None
|
||||
|
||||
if hasattr(typ, '_subs_tree'): # Python 3.6
|
||||
subs_tree = typ._subs_tree()
|
||||
if len(subs_tree) != 3:
|
||||
return None
|
||||
return subs_tree[1]
|
||||
|
||||
return typ.__args__[0]
|
||||
|
||||
|
||||
def get_io_artifact_annotation(typ):
|
||||
if not is_artifact_annotation(typ):
|
||||
return None
|
||||
|
||||
if hasattr(typ, '_subs_tree'): # Python 3.6
|
||||
subs_tree = typ._subs_tree()
|
||||
if len(subs_tree) != 3:
|
||||
return None
|
||||
return subs_tree[2]
|
||||
|
||||
return typ.__args__[1]
|
||||
|
||||
|
||||
_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {
|
||||
x.TYPE_NAME: x
|
||||
for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]
|
||||
}
|
||||
|
||||
|
||||
def create_runtime_artifact(runtime_artifact: Dict) -> Artifact:
|
||||
"""Creates an Artifact instance from the specified RuntimeArtifact.
|
||||
|
||||
Args:
|
||||
runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.
|
||||
"""
|
||||
schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')
|
||||
|
||||
artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)
|
||||
if not artifact_type:
|
||||
artifact_type = Artifact
|
||||
return artifact_type(
|
||||
uri=runtime_artifact.get('uri', ''),
|
||||
name=runtime_artifact.get('name', ''),
|
||||
metadata=runtime_artifact.get('metadata', {}),
|
||||
)
|
||||
import warnings
|
||||
from kfp.v2.components.types import artifact_types
|
||||
|
||||
warnings.warn(
|
||||
'Module kfp.dsl.io_types is deprecated and will be removed'
|
||||
' in KFP v2.0. Please import types from kfp.v2.dsl instead.',
|
||||
category=FutureWarning)
|
||||
|
||||
Artifact = artifact_types.Artifact
|
||||
Dataset = artifact_types.Dataset
|
||||
Metrics = artifact_types.Metrics
|
||||
ClassificationMetrics = artifact_types.ClassificationMetrics
|
||||
Model = artifact_types.Model
|
||||
SlicedClassificationMetrics = artifact_types.SlicedClassificationMetrics
|
||||
HTML = artifact_types.HTML
|
||||
Markdown = artifact_types.Markdown
|
||||
create_runtime_artifact = artifact_types.create_runtime_artifact
|
||||
|
|
|
|||
|
|
@ -1,107 +0,0 @@
|
|||
# Copyright 2021 The Kubeflow Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tests for kfp.dsl.io_types."""
|
||||
|
||||
import unittest
|
||||
import json
|
||||
import os
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from kfp.dsl import io_types
|
||||
from kfp.dsl.io_types import Input, InputAnnotation, Output, Model, OutputAnnotation
|
||||
|
||||
|
||||
class IOTypesTest(unittest.TestCase):
|
||||
|
||||
def test_complex_metrics(self):
|
||||
metrics = io_types.ClassificationMetrics()
|
||||
metrics.log_roc_data_point(threshold=0.1, tpr=98.2, fpr=96.2)
|
||||
metrics.log_roc_data_point(threshold=24.3, tpr=24.5, fpr=98.4)
|
||||
metrics.set_confusion_matrix_categories(['dog', 'cat', 'horses'])
|
||||
metrics.log_confusion_matrix_row('dog', [2, 6, 0])
|
||||
metrics.log_confusion_matrix_cell('cat', 'dog', 3)
|
||||
metrics.log_confusion_matrix_cell('horses', 'horses', 3)
|
||||
metrics.metadata['test'] = 1.0
|
||||
with open(
|
||||
os.path.join(
|
||||
os.path.dirname(__file__), 'test_data',
|
||||
'expected_io_types_classification_metrics.json')) as json_file:
|
||||
expected_json = json.load(json_file)
|
||||
self.assertEqual(expected_json, metrics.metadata)
|
||||
|
||||
def test_complex_metrics_bulk_loading(self):
|
||||
metrics = io_types.ClassificationMetrics()
|
||||
metrics.log_roc_curve(
|
||||
fpr=[85.1, 85.1, 85.1],
|
||||
tpr=[52.6, 52.6, 52.6],
|
||||
threshold=[53.6, 53.6, 53.6])
|
||||
metrics.log_confusion_matrix(['dog', 'cat', 'horses'],
|
||||
[[2, 6, 0], [3, 5, 6], [5, 7, 8]])
|
||||
with open(
|
||||
os.path.join(
|
||||
os.path.dirname(__file__), 'test_data',
|
||||
'expected_io_types_bulk_load_classification_metrics.json')
|
||||
) as json_file:
|
||||
expected_json = json.load(json_file)
|
||||
self.assertEqual(expected_json, metrics.metadata)
|
||||
|
||||
def test_is_artifact_annotation(self):
|
||||
self.assertTrue(io_types.is_artifact_annotation(Input[Model]))
|
||||
self.assertTrue(io_types.is_artifact_annotation(Output[Model]))
|
||||
self.assertTrue(io_types.is_artifact_annotation(Output['MyArtifact']))
|
||||
|
||||
self.assertFalse(io_types.is_artifact_annotation(Model))
|
||||
self.assertFalse(io_types.is_artifact_annotation(int))
|
||||
self.assertFalse(io_types.is_artifact_annotation('Dataset'))
|
||||
self.assertFalse(io_types.is_artifact_annotation(List[str]))
|
||||
self.assertFalse(io_types.is_artifact_annotation(Optional[str]))
|
||||
|
||||
def test_is_input_artifact(self):
|
||||
self.assertTrue(io_types.is_input_artifact(Input[Model]))
|
||||
self.assertTrue(io_types.is_input_artifact(Input))
|
||||
|
||||
self.assertFalse(io_types.is_input_artifact(Output[Model]))
|
||||
self.assertFalse(io_types.is_input_artifact(Output))
|
||||
|
||||
def test_is_output_artifact(self):
|
||||
self.assertTrue(io_types.is_output_artifact(Output[Model]))
|
||||
self.assertTrue(io_types.is_output_artifact(Output))
|
||||
|
||||
self.assertFalse(io_types.is_output_artifact(Input[Model]))
|
||||
self.assertFalse(io_types.is_output_artifact(Input))
|
||||
|
||||
def test_get_io_artifact_class(self):
|
||||
self.assertEqual(io_types.get_io_artifact_class(Output[Model]), Model)
|
||||
|
||||
self.assertEqual(io_types.get_io_artifact_class(Input), None)
|
||||
self.assertEqual(io_types.get_io_artifact_class(Output), None)
|
||||
self.assertEqual(io_types.get_io_artifact_class(Model), None)
|
||||
self.assertEqual(io_types.get_io_artifact_class(str), None)
|
||||
|
||||
def test_get_io_artifact_annotation(self):
|
||||
self.assertEqual(
|
||||
io_types.get_io_artifact_annotation(Output[Model]), OutputAnnotation)
|
||||
self.assertEqual(
|
||||
io_types.get_io_artifact_annotation(Input[Model]), InputAnnotation)
|
||||
self.assertEqual(
|
||||
io_types.get_io_artifact_annotation(Input), InputAnnotation)
|
||||
self.assertEqual(
|
||||
io_types.get_io_artifact_annotation(Output), OutputAnnotation)
|
||||
|
||||
self.assertEqual(io_types.get_io_artifact_annotation(Model), None)
|
||||
self.assertEqual(io_types.get_io_artifact_annotation(str), None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright 2020 The Kubeflow Authors
|
||||
# Copyright 2021 The Kubeflow Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
|
@ -11,150 +11,20 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Utilities for component I/O type mapping."""
|
||||
import inspect
|
||||
from typing import Dict, List, Optional, Type, Union
|
||||
from kfp.components import structures
|
||||
from kfp.components import type_annotation_utils
|
||||
from kfp.pipeline_spec import pipeline_spec_pb2
|
||||
from kfp.dsl import artifact_utils
|
||||
from kfp.dsl import io_types
|
||||
"""Deprecated. See kfp.v2.components.types.type_utils instead.
|
||||
|
||||
# ComponentSpec I/O types to DSL ontology artifact classes mapping.
|
||||
_ARTIFACT_CLASSES_MAPPING = {
|
||||
'model': io_types.Model,
|
||||
'dataset': io_types.Dataset,
|
||||
'metrics': io_types.Metrics,
|
||||
'classificationmetrics': io_types.ClassificationMetrics,
|
||||
'slicedclassificationmetrics': io_types.SlicedClassificationMetrics,
|
||||
'html': io_types.HTML,
|
||||
'markdown': io_types.Markdown,
|
||||
}
|
||||
This module will be removed in KFP v2.0.
|
||||
"""
|
||||
import warnings
|
||||
from kfp.v2.components.types import type_utils
|
||||
|
||||
# ComponentSpec I/O types to (IR) PipelineTaskSpec I/O types mapping.
|
||||
# The keys are normalized (lowercased). These are types viewed as Parameters.
|
||||
# The values are the corresponding IR parameter primitive types.
|
||||
_PARAMETER_TYPES_MAPPING = {
|
||||
'integer': pipeline_spec_pb2.PrimitiveType.INT,
|
||||
'int': pipeline_spec_pb2.PrimitiveType.INT,
|
||||
'double': pipeline_spec_pb2.PrimitiveType.DOUBLE,
|
||||
'float': pipeline_spec_pb2.PrimitiveType.DOUBLE,
|
||||
'string': pipeline_spec_pb2.PrimitiveType.STRING,
|
||||
'str': pipeline_spec_pb2.PrimitiveType.STRING,
|
||||
'text': pipeline_spec_pb2.PrimitiveType.STRING,
|
||||
'bool': pipeline_spec_pb2.PrimitiveType.STRING,
|
||||
'boolean': pipeline_spec_pb2.PrimitiveType.STRING,
|
||||
'dict': pipeline_spec_pb2.PrimitiveType.STRING,
|
||||
'list': pipeline_spec_pb2.PrimitiveType.STRING,
|
||||
'jsonobject': pipeline_spec_pb2.PrimitiveType.STRING,
|
||||
'jsonarray': pipeline_spec_pb2.PrimitiveType.STRING,
|
||||
}
|
||||
warnings.warn(
|
||||
'Module kfp.dsl.type_utils is deprecated and will be removed'
|
||||
' in KFP v2.0. Please use from kfp.v2.components.types.type_utils instead.',
|
||||
category=FutureWarning)
|
||||
|
||||
# Mapping primitive types to their IR message field names.
|
||||
# This is used in constructing condition strings.
|
||||
_PARAMETER_TYPES_VALUE_REFERENCE_MAPPING = {
|
||||
pipeline_spec_pb2.PrimitiveType.INT: 'int_value',
|
||||
pipeline_spec_pb2.PrimitiveType.DOUBLE: 'double_value',
|
||||
pipeline_spec_pb2.PrimitiveType.STRING: 'string_value',
|
||||
}
|
||||
|
||||
|
||||
def is_parameter_type(type_name: Optional[Union[str, dict]]) -> bool:
|
||||
"""Check if a ComponentSpec I/O type is considered as a parameter type.
|
||||
|
||||
Args:
|
||||
type_name: type name of the ComponentSpec I/O type.
|
||||
|
||||
Returns:
|
||||
True if the type name maps to a parameter type else False.
|
||||
"""
|
||||
if isinstance(type_name, str):
|
||||
type_name = type_annotation_utils.get_short_type_name(type_name)
|
||||
elif isinstance(type_name, dict):
|
||||
type_name = list(type_name.keys())[0]
|
||||
else:
|
||||
return False
|
||||
|
||||
return type_name.lower() in _PARAMETER_TYPES_MAPPING
|
||||
|
||||
|
||||
def get_artifact_type_schema(
|
||||
artifact_class_or_type_name: Optional[Union[str, Type[io_types.Artifact]]]
|
||||
) -> pipeline_spec_pb2.ArtifactTypeSchema:
|
||||
"""Gets the IR I/O artifact type msg for the given ComponentSpec I/O type."""
|
||||
artifact_class = io_types.Artifact
|
||||
if isinstance(artifact_class_or_type_name, str):
|
||||
artifact_class = _ARTIFACT_CLASSES_MAPPING.get(
|
||||
artifact_class_or_type_name.lower(), io_types.Artifact)
|
||||
elif inspect.isclass(artifact_class_or_type_name) and issubclass(
|
||||
artifact_class_or_type_name, io_types.Artifact):
|
||||
artifact_class = artifact_class_or_type_name
|
||||
|
||||
return pipeline_spec_pb2.ArtifactTypeSchema(
|
||||
schema_title=artifact_class.TYPE_NAME)
|
||||
|
||||
|
||||
def get_parameter_type(
|
||||
param_type: Optional[Union[Type, str, dict]]
|
||||
) -> pipeline_spec_pb2.PrimitiveType:
|
||||
"""Get the IR I/O parameter type for the given ComponentSpec I/O type.
|
||||
|
||||
Args:
|
||||
param_type: type of the ComponentSpec I/O type. Can be a primitive Python
|
||||
builtin type or a type name.
|
||||
|
||||
Returns:
|
||||
The enum value of the mapped IR I/O primitive type.
|
||||
|
||||
Raises:
|
||||
AttributeError: if type_name is not a string type.
|
||||
"""
|
||||
if type(param_type) == type:
|
||||
type_name = param_type.__name__
|
||||
elif isinstance(param_type, dict):
|
||||
type_name = list(param_type.keys())[0]
|
||||
else:
|
||||
type_name = type_annotation_utils.get_short_type_name(str(param_type))
|
||||
return _PARAMETER_TYPES_MAPPING.get(type_name.lower())
|
||||
|
||||
|
||||
def get_parameter_type_field_name(type_name: Optional[str]) -> str:
|
||||
"""Get the IR field name for the given primitive type.
|
||||
|
||||
For example: 'str' -> 'string_value', 'double' -> 'double_value', etc.
|
||||
|
||||
Args:
|
||||
type_name: type name of the ComponentSpec I/O primitive type.
|
||||
|
||||
Returns:
|
||||
The IR value reference field name.
|
||||
|
||||
Raises:
|
||||
AttributeError: if type_name is not a string type.
|
||||
"""
|
||||
return _PARAMETER_TYPES_VALUE_REFERENCE_MAPPING.get(
|
||||
get_parameter_type(type_name))
|
||||
|
||||
|
||||
def get_input_artifact_type_schema(
|
||||
input_name: str,
|
||||
inputs: List[structures.InputSpec],
|
||||
) -> Optional[str]:
|
||||
"""Find the input artifact type by input name.
|
||||
|
||||
Args:
|
||||
input_name: The name of the component input.
|
||||
inputs: The list of InputSpec
|
||||
|
||||
Returns:
|
||||
The artifact type schema of the input.
|
||||
|
||||
Raises:
|
||||
AssertionError if input not found, or input found but not an artifact type.
|
||||
"""
|
||||
for component_input in inputs:
|
||||
if component_input.name == input_name:
|
||||
assert not is_parameter_type(
|
||||
component_input.type), 'Input is not an artifact type.'
|
||||
return get_artifact_type_schema(component_input.type)
|
||||
assert False, 'Input not found.'
|
||||
is_parameter_type = type_utils.is_parameter_type
|
||||
get_artifact_type_schema = type_utils.type_utils.get_artifact_type_schema
|
||||
get_parameter_type = type_utils.get_parameter_type
|
||||
get_parameter_type_field_name = type_utils.get_parameter_type_field_name
|
||||
get_input_artifact_type_schema = type_utils.type_utils.get_input_artifact_type_schema
|
||||
|
|
@ -1,277 +0,0 @@
|
|||
# Copyright 2020 The Kubeflow Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from absl.testing import parameterized
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from kfp.components import structures
|
||||
from kfp.dsl import io_types
|
||||
from kfp.dsl import type_utils
|
||||
from kfp.pipeline_spec import pipeline_spec_pb2 as pb
|
||||
|
||||
_PARAMETER_TYPES = [
|
||||
'String',
|
||||
'str',
|
||||
'Integer',
|
||||
'int',
|
||||
'Float',
|
||||
'Double',
|
||||
'bool',
|
||||
'Boolean',
|
||||
'Dict',
|
||||
'List',
|
||||
'JsonObject',
|
||||
'JsonArray',
|
||||
{
|
||||
'JsonObject': {
|
||||
'data_type': 'proto:tfx.components.trainer.TrainArgs'
|
||||
}
|
||||
},
|
||||
]
|
||||
_KNOWN_ARTIFACT_TYPES = ['Model', 'Dataset', 'Schema', 'Metrics']
|
||||
_UNKNOWN_ARTIFACT_TYPES = [None, 'Arbtrary Model', 'dummy']
|
||||
|
||||
|
||||
class _ArbitraryClass:
|
||||
pass
|
||||
|
||||
|
||||
class TypeUtilsTest(parameterized.TestCase):
|
||||
|
||||
def test_is_parameter_type(self):
|
||||
for type_name in _PARAMETER_TYPES:
|
||||
self.assertTrue(type_utils.is_parameter_type(type_name))
|
||||
for type_name in _KNOWN_ARTIFACT_TYPES + _UNKNOWN_ARTIFACT_TYPES:
|
||||
self.assertFalse(type_utils.is_parameter_type(type_name))
|
||||
|
||||
@parameterized.parameters(
|
||||
{
|
||||
'artifact_class_or_type_name': 'Model',
|
||||
'expected_result': pb.ArtifactTypeSchema(schema_title='system.Model')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name': io_types.Model,
|
||||
'expected_result': pb.ArtifactTypeSchema(schema_title='system.Model')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
'Dataset',
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(schema_title='system.Dataset')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
io_types.Dataset,
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(schema_title='system.Dataset')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
'Metrics',
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(schema_title='system.Metrics')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
io_types.Metrics,
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(schema_title='system.Metrics')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
'ClassificationMetrics',
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(schema_title='system.ClassificationMetrics')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
io_types.ClassificationMetrics,
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(schema_title='system.ClassificationMetrics')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
'SlicedClassificationMetrics',
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(
|
||||
schema_title='system.SlicedClassificationMetrics')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
io_types.SlicedClassificationMetrics,
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(
|
||||
schema_title='system.SlicedClassificationMetrics')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
'arbitrary name',
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(schema_title='system.Artifact')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
_ArbitraryClass,
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(schema_title='system.Artifact')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
io_types.HTML,
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(schema_title='system.HTML')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
io_types.Markdown,
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(schema_title='system.Markdown')
|
||||
},
|
||||
)
|
||||
def test_get_artifact_type_schema(self, artifact_class_or_type_name,
|
||||
expected_result):
|
||||
self.assertEqual(
|
||||
expected_result,
|
||||
type_utils.get_artifact_type_schema(artifact_class_or_type_name))
|
||||
|
||||
@parameterized.parameters(
|
||||
{
|
||||
'given_type': 'Int',
|
||||
'expected_type': pb.PrimitiveType.INT,
|
||||
},
|
||||
{
|
||||
'given_type': 'Integer',
|
||||
'expected_type': pb.PrimitiveType.INT,
|
||||
},
|
||||
{
|
||||
'given_type': int,
|
||||
'expected_type': pb.PrimitiveType.INT,
|
||||
},
|
||||
{
|
||||
'given_type': 'Double',
|
||||
'expected_type': pb.PrimitiveType.DOUBLE,
|
||||
},
|
||||
{
|
||||
'given_type': 'Float',
|
||||
'expected_type': pb.PrimitiveType.DOUBLE,
|
||||
},
|
||||
{
|
||||
'given_type': float,
|
||||
'expected_type': pb.PrimitiveType.DOUBLE,
|
||||
},
|
||||
{
|
||||
'given_type': 'String',
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': 'Text',
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': str,
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': 'Boolean',
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': bool,
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': 'Dict',
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': dict,
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': 'List',
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': list,
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': Dict[str, int],
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': List[Any],
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': {
|
||||
'JsonObject': {
|
||||
'data_type': 'proto:tfx.components.trainer.TrainArgs'
|
||||
}
|
||||
},
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
)
|
||||
def test_get_parameter_type(self, given_type, expected_type):
|
||||
self.assertEqual(expected_type, type_utils.get_parameter_type(given_type))
|
||||
|
||||
# Test get parameter by Python type.
|
||||
self.assertEqual(pb.PrimitiveType.INT, type_utils.get_parameter_type(int))
|
||||
|
||||
def test_get_parameter_type_invalid(self):
|
||||
with self.assertRaises(AttributeError):
|
||||
type_utils.get_parameter_type_schema(None)
|
||||
|
||||
def test_get_input_artifact_type_schema(self):
|
||||
input_specs = [
|
||||
structures.InputSpec(name='input1', type='String'),
|
||||
structures.InputSpec(name='input2', type='Model'),
|
||||
structures.InputSpec(name='input3', type=None),
|
||||
]
|
||||
# input not found.
|
||||
with self.assertRaises(AssertionError) as cm:
|
||||
type_utils.get_input_artifact_type_schema('input0', input_specs)
|
||||
self.assertEqual('Input not found.', str(cm))
|
||||
|
||||
# input found, but it doesn't map to an artifact type.
|
||||
with self.assertRaises(AssertionError) as cm:
|
||||
type_utils.get_input_artifact_type_schema('input1', input_specs)
|
||||
self.assertEqual('Input is not an artifact type.', str(cm))
|
||||
|
||||
# input found, and a matching artifact type schema returned.
|
||||
self.assertEqual(
|
||||
'system.Model',
|
||||
type_utils.get_input_artifact_type_schema('input2',
|
||||
input_specs).schema_title)
|
||||
|
||||
# input found, and the default artifact type schema returned.
|
||||
self.assertEqual(
|
||||
'system.Artifact',
|
||||
type_utils.get_input_artifact_type_schema('input3',
|
||||
input_specs).schema_title)
|
||||
|
||||
def test_get_parameter_type_field_name(self):
|
||||
self.assertEqual('string_value',
|
||||
type_utils.get_parameter_type_field_name('String'))
|
||||
self.assertEqual('int_value',
|
||||
type_utils.get_parameter_type_field_name('Integer'))
|
||||
self.assertEqual('double_value',
|
||||
type_utils.get_parameter_type_field_name('Float'))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
@ -19,7 +19,7 @@ Feature stage:
|
|||
from typing import Dict, Union
|
||||
import warnings
|
||||
|
||||
from kfp.dsl import type_utils
|
||||
from kfp.v2.components.types import type_utils
|
||||
|
||||
|
||||
class BaseType:
|
||||
|
|
|
|||
|
|
@ -25,20 +25,19 @@ import uuid
|
|||
import warnings
|
||||
from typing import Any, Callable, Dict, List, Mapping, Optional, Set, Tuple, Union
|
||||
|
||||
from google.protobuf import json_format
|
||||
|
||||
import kfp
|
||||
from kfp.compiler._k8s_helper import sanitize_k8s_name
|
||||
from kfp.components import _python_op
|
||||
from kfp import dsl
|
||||
from kfp.dsl import _for_loop
|
||||
from kfp.dsl import _pipeline_param
|
||||
from kfp.v2.compiler import compiler_utils
|
||||
from kfp.dsl import component_spec as dsl_component_spec
|
||||
from kfp.dsl import dsl_utils
|
||||
from kfp.dsl import io_types
|
||||
from kfp.dsl import type_utils
|
||||
from kfp.pipeline_spec import pipeline_spec_pb2
|
||||
from kfp.v2.components.types import artifact_types, type_utils
|
||||
from kfp.v2.components import component_factory
|
||||
|
||||
from google.protobuf import json_format
|
||||
|
||||
_GroupOrOp = Union[dsl.OpsGroup, dsl.BaseOp]
|
||||
|
||||
|
|
@ -557,8 +556,8 @@ class Compiler(object):
|
|||
if artifact_spec.artifact_type.WhichOneof(
|
||||
'kind'
|
||||
) == 'schema_title' and artifact_spec.artifact_type.schema_title in [
|
||||
io_types.Metrics.TYPE_NAME,
|
||||
io_types.ClassificationMetrics.TYPE_NAME,
|
||||
artifact_types.Metrics.TYPE_NAME,
|
||||
artifact_types.ClassificationMetrics.TYPE_NAME,
|
||||
]:
|
||||
unique_output_name = '{}-{}'.format(op_task_spec.task_info.name,
|
||||
output_name)
|
||||
|
|
@ -1045,7 +1044,7 @@ class Compiler(object):
|
|||
|
||||
# Create the arg list with no default values and call pipeline function.
|
||||
# Assign type information to the PipelineParam
|
||||
pipeline_meta = _python_op._extract_component_interface(pipeline_func)
|
||||
pipeline_meta = component_factory.extract_component_interface(pipeline_func)
|
||||
pipeline_name = pipeline_name or pipeline_meta.name
|
||||
|
||||
pipeline_root = getattr(pipeline_func, 'pipeline_root', None)
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ import shutil
|
|||
import tempfile
|
||||
import unittest
|
||||
|
||||
from kfp.v2 import components
|
||||
from kfp import components
|
||||
from kfp.v2 import compiler
|
||||
from kfp.v2 import dsl
|
||||
from kfp.dsl import types
|
||||
|
|
|
|||
|
|
@ -19,8 +19,8 @@ from typing import Any, Mapping, Optional, Union
|
|||
from kfp.containers import _component_builder
|
||||
from kfp.dsl import _container_op
|
||||
from kfp.dsl import _pipeline_param
|
||||
from kfp.dsl import type_utils
|
||||
from kfp.pipeline_spec import pipeline_spec_pb2
|
||||
from kfp.v2.components.types import type_utils
|
||||
|
||||
# Alias for PipelineContainerSpec
|
||||
PipelineContainerSpec = pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec
|
||||
|
|
|
|||
|
|
@ -17,23 +17,23 @@
|
|||
"schemaTitle": "system.Dataset"
|
||||
}
|
||||
},
|
||||
"output_dataset_two": {
|
||||
"output_dataset_two_path": {
|
||||
"artifactType": {
|
||||
"schemaTitle": "system.Dataset"
|
||||
}
|
||||
}
|
||||
},
|
||||
"parameters": {
|
||||
"output_bool_parameter": {
|
||||
"output_bool_parameter_path": {
|
||||
"type": "STRING"
|
||||
},
|
||||
"output_dict_parameter": {
|
||||
"output_dict_parameter_path": {
|
||||
"type": "STRING"
|
||||
},
|
||||
"output_list_parameter": {
|
||||
"output_list_parameter_path": {
|
||||
"type": "STRING"
|
||||
},
|
||||
"output_parameter": {
|
||||
"output_parameter_path": {
|
||||
"type": "STRING"
|
||||
}
|
||||
}
|
||||
|
|
@ -43,7 +43,7 @@
|
|||
"executorLabel": "exec-train",
|
||||
"inputDefinitions": {
|
||||
"artifacts": {
|
||||
"dataset_one": {
|
||||
"dataset_one_path": {
|
||||
"artifactType": {
|
||||
"schemaTitle": "system.Dataset"
|
||||
}
|
||||
|
|
@ -99,8 +99,8 @@
|
|||
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
|
||||
"sh",
|
||||
"-ec",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef preprocess(\n # An input parameter of type string.\n message: str,\n # Use Output[T] to get a metadata-rich handle to the output artifact\n # of type `Dataset`.\n output_dataset_one: Output[Dataset],\n # A locally accessible filepath for another output artifact of type\n # `Dataset`.\n output_dataset_two_path: OutputPath('Dataset'),\n # A locally accessible filepath for an output parameter of type string.\n output_parameter_path: OutputPath(str),\n # A locally accessible filepath for an output parameter of type bool.\n output_bool_parameter_path: OutputPath(bool),\n # A locally accessible filepath for an output parameter of type dict.\n output_dict_parameter_path: OutputPath(Dict[str, int]),\n # A locally accessible filepath for an output parameter of type list.\n output_list_parameter_path: OutputPath(List[str]),\n):\n \"\"\"Dummy preprocessing step\"\"\"\n\n # Use Dataset.path to access a local file path for writing.\n # One can also use Dataset.uri to access the actual URI file path.\n with open(output_dataset_one.path, 'w') as f:\n f.write(message)\n\n # OutputPath is used to just pass the local file path of the output artifact\n # to the function.\n with open(output_dataset_two_path, 'w') as f:\n f.write(message)\n\n with open(output_parameter_path, 'w') as f:\n f.write(message)\n\n with open(output_bool_parameter_path, 'w') as f:\n f.write(str(True)) # use either `str()` or `json.dumps()` for bool values.\n\n import json\n with open(output_dict_parameter_path, 'w') as f:\n f.write(json.dumps({'A': 1, 'B': 2}))\n\n with open(output_list_parameter_path, 'w') as f:\n f.write(json.dumps(['a', 'b', 'c']))\n\n"
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef preprocess(\n # An input parameter of type string.\n message: str,\n # Use Output[T] to get a metadata-rich handle to the output artifact\n # of type `Dataset`.\n output_dataset_one: Output[Dataset],\n # A locally accessible filepath for another output artifact of type\n # `Dataset`.\n output_dataset_two_path: OutputPath('Dataset'),\n # A locally accessible filepath for an output parameter of type string.\n output_parameter_path: OutputPath(str),\n # A locally accessible filepath for an output parameter of type bool.\n output_bool_parameter_path: OutputPath(bool),\n # A locally accessible filepath for an output parameter of type dict.\n output_dict_parameter_path: OutputPath(Dict[str, int]),\n # A locally accessible filepath for an output parameter of type list.\n output_list_parameter_path: OutputPath(List[str]),\n):\n \"\"\"Dummy preprocessing step\"\"\"\n\n # Use Dataset.path to access a local file path for writing.\n # One can also use Dataset.uri to access the actual URI file path.\n with open(output_dataset_one.path, 'w') as f:\n f.write(message)\n\n # OutputPath is used to just pass the local file path of the output artifact\n # to the function.\n with open(output_dataset_two_path, 'w') as f:\n f.write(message)\n\n with open(output_parameter_path, 'w') as f:\n f.write(message)\n\n with open(output_bool_parameter_path, 'w') as f:\n f.write(str(True)) # use either `str()` or `json.dumps()` for bool values.\n\n import json\n with open(output_dict_parameter_path, 'w') as f:\n f.write(json.dumps({'A': 1, 'B': 2}))\n\n with open(output_list_parameter_path, 'w') as f:\n f.write(json.dumps(['a', 'b', 'c']))\n\n"
|
||||
],
|
||||
"image": "python:3.7"
|
||||
}
|
||||
|
|
@ -119,8 +119,8 @@
|
|||
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
|
||||
"sh",
|
||||
"-ec",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef train(\n # Use InputPath to get a locally accessible path for the input artifact\n # of type `Dataset`.\n dataset_one_path: InputPath('Dataset'),\n # Use Input[T] to get a metadata-rich handle to the input artifact\n # of type `Dataset`.\n dataset_two: Input[Dataset],\n # An input parameter of type string.\n message: str,\n # Use Output[T] to get a metadata-rich handle to the output artifact\n # of type `Dataset`.\n model: Output[Model],\n # An input parameter of type bool.\n input_bool: bool,\n # An input parameter of type dict.\n input_dict: Dict[str, int],\n # An input parameter of type List[str].\n input_list: List[str],\n # An input parameter of type int with a default value.\n num_steps: int = 100,\n):\n \"\"\"Dummy Training step\"\"\"\n with open(dataset_one_path, 'r') as input_file:\n dataset_one_contents = input_file.read()\n\n with open(dataset_two.path, 'r') as input_file:\n dataset_two_contents = input_file.read()\n\n line = (f'dataset_one_contents: {dataset_one_contents} || '\n f'dataset_two_contents: {dataset_two_contents} || '\n f'message: {message} || '\n f'input_bool: {input_bool}, type {type(input_bool)} || '\n f'input_dict: {input_dict}, type {type(input_dict)} || '\n f'input_list: {input_list}, type {type(input_list)} \\n')\n\n with open(model.path, 'w') as output_file:\n for i in range(num_steps):\n output_file.write('Step {}\\n{}\\n=====\\n'.format(i, line))\n\n # Use model.get() to get a Model artifact, which has a .metadata dictionary\n # to store arbitrary metadata for the output artifact.\n model.metadata['accuracy'] = 0.9\n\n"
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef train(\n # Use InputPath to get a locally accessible path for the input artifact\n # of type `Dataset`.\n dataset_one_path: InputPath('Dataset'),\n # Use Input[T] to get a metadata-rich handle to the input artifact\n # of type `Dataset`.\n dataset_two: Input[Dataset],\n # An input parameter of type string.\n message: str,\n # Use Output[T] to get a metadata-rich handle to the output artifact\n # of type `Dataset`.\n model: Output[Model],\n # An input parameter of type bool.\n input_bool: bool,\n # An input parameter of type dict.\n input_dict: Dict[str, int],\n # An input parameter of type List[str].\n input_list: List[str],\n # An input parameter of type int with a default value.\n num_steps: int = 100,\n):\n \"\"\"Dummy Training step\"\"\"\n with open(dataset_one_path, 'r') as input_file:\n dataset_one_contents = input_file.read()\n\n with open(dataset_two.path, 'r') as input_file:\n dataset_two_contents = input_file.read()\n\n line = (f'dataset_one_contents: {dataset_one_contents} || '\n f'dataset_two_contents: {dataset_two_contents} || '\n f'message: {message} || '\n f'input_bool: {input_bool}, type {type(input_bool)} || '\n f'input_dict: {input_dict}, type {type(input_dict)} || '\n f'input_list: {input_list}, type {type(input_list)} \\n')\n\n with open(model.path, 'w') as output_file:\n for i in range(num_steps):\n output_file.write('Step {}\\n{}\\n=====\\n'.format(i, line))\n\n # Use model.get() to get a Model artifact, which has a .metadata dictionary\n # to store arbitrary metadata for the output artifact.\n model.metadata['accuracy'] = 0.9\n\n"
|
||||
],
|
||||
"image": "python:3.7"
|
||||
}
|
||||
|
|
@ -163,7 +163,7 @@
|
|||
],
|
||||
"inputs": {
|
||||
"artifacts": {
|
||||
"dataset_one": {
|
||||
"dataset_one_path": {
|
||||
"taskOutputArtifact": {
|
||||
"outputArtifactKey": "output_dataset_one",
|
||||
"producerTask": "preprocess"
|
||||
|
|
@ -171,7 +171,7 @@
|
|||
},
|
||||
"dataset_two": {
|
||||
"taskOutputArtifact": {
|
||||
"outputArtifactKey": "output_dataset_two",
|
||||
"outputArtifactKey": "output_dataset_two_path",
|
||||
"producerTask": "preprocess"
|
||||
}
|
||||
}
|
||||
|
|
@ -179,25 +179,25 @@
|
|||
"parameters": {
|
||||
"input_bool": {
|
||||
"taskOutputParameter": {
|
||||
"outputParameterKey": "output_bool_parameter",
|
||||
"outputParameterKey": "output_bool_parameter_path",
|
||||
"producerTask": "preprocess"
|
||||
}
|
||||
},
|
||||
"input_dict": {
|
||||
"taskOutputParameter": {
|
||||
"outputParameterKey": "output_dict_parameter",
|
||||
"outputParameterKey": "output_dict_parameter_path",
|
||||
"producerTask": "preprocess"
|
||||
}
|
||||
},
|
||||
"input_list": {
|
||||
"taskOutputParameter": {
|
||||
"outputParameterKey": "output_list_parameter",
|
||||
"outputParameterKey": "output_list_parameter_path",
|
||||
"producerTask": "preprocess"
|
||||
}
|
||||
},
|
||||
"message": {
|
||||
"taskOutputParameter": {
|
||||
"outputParameterKey": "output_parameter",
|
||||
"outputParameterKey": "output_parameter_path",
|
||||
"producerTask": "preprocess"
|
||||
}
|
||||
},
|
||||
|
|
|
|||
|
|
@ -14,10 +14,8 @@
|
|||
"""Sample pipeline for passing data in KFP v2."""
|
||||
from typing import Dict, List
|
||||
|
||||
from kfp import dsl
|
||||
from kfp import components
|
||||
from kfp.components import InputPath, OutputPath
|
||||
from kfp.v2.dsl import Input, Output, Dataset, Model, component
|
||||
from kfp.v2 import dsl
|
||||
from kfp.v2.dsl import Input, InputPath, Output, OutputPath, Dataset, Model, component
|
||||
import kfp.v2.compiler as compiler
|
||||
|
||||
|
||||
|
|
@ -40,30 +38,30 @@ def preprocess(
|
|||
# A locally accessible filepath for an output parameter of type list.
|
||||
output_list_parameter_path: OutputPath(List[str]),
|
||||
):
|
||||
"""Dummy preprocessing step"""
|
||||
"""Dummy preprocessing step"""
|
||||
|
||||
# Use Dataset.path to access a local file path for writing.
|
||||
# One can also use Dataset.uri to access the actual URI file path.
|
||||
with open(output_dataset_one.path, 'w') as f:
|
||||
f.write(message)
|
||||
# Use Dataset.path to access a local file path for writing.
|
||||
# One can also use Dataset.uri to access the actual URI file path.
|
||||
with open(output_dataset_one.path, 'w') as f:
|
||||
f.write(message)
|
||||
|
||||
# OutputPath is used to just pass the local file path of the output artifact
|
||||
# to the function.
|
||||
with open(output_dataset_two_path, 'w') as f:
|
||||
f.write(message)
|
||||
# OutputPath is used to just pass the local file path of the output artifact
|
||||
# to the function.
|
||||
with open(output_dataset_two_path, 'w') as f:
|
||||
f.write(message)
|
||||
|
||||
with open(output_parameter_path, 'w') as f:
|
||||
f.write(message)
|
||||
with open(output_parameter_path, 'w') as f:
|
||||
f.write(message)
|
||||
|
||||
with open(output_bool_parameter_path, 'w') as f:
|
||||
f.write(str(True)) # use either `str()` or `json.dumps()` for bool values.
|
||||
with open(output_bool_parameter_path, 'w') as f:
|
||||
f.write(str(True)) # use either `str()` or `json.dumps()` for bool values.
|
||||
|
||||
import json
|
||||
with open(output_dict_parameter_path, 'w') as f:
|
||||
f.write(json.dumps({'A': 1, 'B': 2}))
|
||||
import json
|
||||
with open(output_dict_parameter_path, 'w') as f:
|
||||
f.write(json.dumps({'A': 1, 'B': 2}))
|
||||
|
||||
with open(output_list_parameter_path, 'w') as f:
|
||||
f.write(json.dumps(['a', 'b', 'c']))
|
||||
with open(output_list_parameter_path, 'w') as f:
|
||||
f.write(json.dumps(['a', 'b', 'c']))
|
||||
|
||||
|
||||
@component
|
||||
|
|
@ -88,42 +86,43 @@ def train(
|
|||
# An input parameter of type int with a default value.
|
||||
num_steps: int = 100,
|
||||
):
|
||||
"""Dummy Training step"""
|
||||
with open(dataset_one_path, 'r') as input_file:
|
||||
dataset_one_contents = input_file.read()
|
||||
"""Dummy Training step"""
|
||||
with open(dataset_one_path, 'r') as input_file:
|
||||
dataset_one_contents = input_file.read()
|
||||
|
||||
with open(dataset_two.path, 'r') as input_file:
|
||||
dataset_two_contents = input_file.read()
|
||||
with open(dataset_two.path, 'r') as input_file:
|
||||
dataset_two_contents = input_file.read()
|
||||
|
||||
line = (f'dataset_one_contents: {dataset_one_contents} || '
|
||||
f'dataset_two_contents: {dataset_two_contents} || '
|
||||
f'message: {message} || '
|
||||
f'input_bool: {input_bool}, type {type(input_bool)} || '
|
||||
f'input_dict: {input_dict}, type {type(input_dict)} || '
|
||||
f'input_list: {input_list}, type {type(input_list)} \n')
|
||||
line = (f'dataset_one_contents: {dataset_one_contents} || '
|
||||
f'dataset_two_contents: {dataset_two_contents} || '
|
||||
f'message: {message} || '
|
||||
f'input_bool: {input_bool}, type {type(input_bool)} || '
|
||||
f'input_dict: {input_dict}, type {type(input_dict)} || '
|
||||
f'input_list: {input_list}, type {type(input_list)} \n')
|
||||
|
||||
with open(model.path, 'w') as output_file:
|
||||
for i in range(num_steps):
|
||||
output_file.write('Step {}\n{}\n=====\n'.format(i, line))
|
||||
with open(model.path, 'w') as output_file:
|
||||
for i in range(num_steps):
|
||||
output_file.write('Step {}\n{}\n=====\n'.format(i, line))
|
||||
|
||||
# Use model.get() to get a Model artifact, which has a .metadata dictionary
|
||||
# to store arbitrary metadata for the output artifact.
|
||||
model.metadata['accuracy'] = 0.9
|
||||
# Use model.get() to get a Model artifact, which has a .metadata dictionary
|
||||
# to store arbitrary metadata for the output artifact.
|
||||
model.metadata['accuracy'] = 0.9
|
||||
|
||||
|
||||
@dsl.pipeline(pipeline_root='dummy_root', name='my-test-pipeline-beta')
|
||||
def pipeline(message: str):
|
||||
preprocess_task = preprocess(message=message)
|
||||
train_task = train(
|
||||
dataset_one=preprocess_task.outputs['output_dataset_one'],
|
||||
dataset_two=preprocess_task.outputs['output_dataset_two'],
|
||||
message=preprocess_task.outputs['output_parameter'],
|
||||
input_bool=preprocess_task.outputs['output_bool_parameter'],
|
||||
input_dict=preprocess_task.outputs['output_dict_parameter'],
|
||||
input_list=preprocess_task.outputs['output_list_parameter'],
|
||||
)
|
||||
|
||||
preprocess_task = preprocess(message=message)
|
||||
train_task = train(
|
||||
dataset_one_path=preprocess_task.outputs['output_dataset_one'],
|
||||
dataset_two=preprocess_task.outputs['output_dataset_two_path'],
|
||||
message=preprocess_task.outputs['output_parameter_path'],
|
||||
input_bool=preprocess_task.outputs['output_bool_parameter_path'],
|
||||
input_dict=preprocess_task.outputs['output_dict_parameter_path'],
|
||||
input_list=preprocess_task.outputs['output_list_parameter_path'],
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
compiler.Compiler().compile(
|
||||
pipeline_func=pipeline, package_path=__file__.replace('.py', '.json'))
|
||||
compiler.Compiler().compile(
|
||||
pipeline_func=pipeline, package_path=__file__.replace('.py', '.json'))
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@
|
|||
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
|
||||
"sh",
|
||||
"-ec",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef add_numbers(first: int, second: int) -> int:\n return first + second\n\n"
|
||||
],
|
||||
"image": "python:3.7"
|
||||
|
|
@ -131,7 +131,7 @@
|
|||
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
|
||||
"sh",
|
||||
"-ec",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef concat_message(first: str, second: str) -> str:\n return first + second\n\n"
|
||||
],
|
||||
"image": "python:3.7"
|
||||
|
|
@ -151,7 +151,7 @@
|
|||
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
|
||||
"sh",
|
||||
"-ec",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef output_artifact(number: int, message: str) -> Dataset:\n result = [message for _ in range(number)]\n return '\\n'.join(result)\n\n"
|
||||
],
|
||||
"image": "python:3.7"
|
||||
|
|
@ -171,7 +171,7 @@
|
|||
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
|
||||
"sh",
|
||||
"-ec",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef output_named_tuple(\n artifact: Input[Dataset]\n) -> NamedTuple('Outputs', [\n ('scalar', str),\n ('metrics', Metrics),\n ('model', Model),\n]):\n scalar = \"123\"\n\n import json\n metrics = json.dumps({\n 'metrics': [{\n 'name': 'accuracy',\n 'numberValue': 0.9,\n 'format': \"PERCENTAGE\",\n }]\n })\n\n with open(artifact.path, 'r') as f:\n artifact_contents = f.read()\n model = \"Model contents: \" + artifact_contents\n\n from collections import namedtuple\n output = namedtuple('Outputs', ['scalar', 'metrics', 'model'])\n return output(scalar, metrics, model)\n\n"
|
||||
],
|
||||
"image": "python:3.7"
|
||||
|
|
|
|||
|
|
@ -15,22 +15,26 @@
|
|||
from typing import NamedTuple
|
||||
from kfp import components, dsl
|
||||
from kfp.v2 import compiler
|
||||
from kfp.v2.dsl import Input, Dataset, Model, Metrics
|
||||
from kfp.v2.dsl import component, Input, Dataset, Model, Metrics
|
||||
|
||||
|
||||
@component
|
||||
def concat_message(first: str, second: str) -> str:
|
||||
return first + second
|
||||
|
||||
|
||||
@component
|
||||
def add_numbers(first: int, second: int) -> int:
|
||||
return first + second
|
||||
|
||||
|
||||
@component
|
||||
def output_artifact(number: int, message: str) -> Dataset:
|
||||
result = [message for _ in range(number)]
|
||||
return '\n'.join(result)
|
||||
|
||||
|
||||
@component
|
||||
def output_named_tuple(
|
||||
artifact: Input[Dataset]
|
||||
) -> NamedTuple('Outputs', [
|
||||
|
|
@ -58,22 +62,15 @@ def output_named_tuple(
|
|||
return output(scalar, metrics, model)
|
||||
|
||||
|
||||
concat_op = components.create_component_from_func_v2(concat_message)
|
||||
add_op = components.create_component_from_func_v2(add_numbers)
|
||||
output_artifact_op = components.create_component_from_func_v2(output_artifact)
|
||||
output_named_tuple_op = components.create_component_from_func_v2(
|
||||
output_named_tuple)
|
||||
|
||||
|
||||
@dsl.pipeline(pipeline_root='dummy_root',
|
||||
name='functions-with-outputs')
|
||||
def pipeline(first_message: str, second_message: str, first_number: int,
|
||||
second_number: int):
|
||||
concat = concat_op(first=first_message, second=second_message)
|
||||
add_numbers = add_op(first=first_number, second=second_number)
|
||||
output_artifact = output_artifact_op(number=add_numbers.output,
|
||||
message=concat.output)
|
||||
output_name_tuple = output_named_tuple_op(output_artifact.output)
|
||||
concat_op = concat_message(first=first_message, second=second_message)
|
||||
add_numbers_op = add_numbers(first=first_number, second=second_number)
|
||||
output_artifact_op = output_artifact(number=add_numbers_op.output,
|
||||
message=concat_op.output)
|
||||
output_name_tuple_op = output_named_tuple(output_artifact_op.output)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@
|
|||
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
|
||||
"sh",
|
||||
"-ec",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef training_op(input1: str):\n print('dummy training master: {}'.format(input1))\n\n"
|
||||
],
|
||||
"imageUri": "python:3.7"
|
||||
|
|
@ -68,7 +68,7 @@
|
|||
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
|
||||
"sh",
|
||||
"-ec",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef training_op(input1: str):\n print('dummy training master: {}'.format(input1))\n\n"
|
||||
],
|
||||
"imageUri": "python:3.7"
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@
|
|||
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
|
||||
"sh",
|
||||
"-ec",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef fail_op(message: str):\n \"\"\"Fails.\"\"\"\n import sys\n print(message)\n sys.exit(1)\n\n"
|
||||
],
|
||||
"image": "python:3.7"
|
||||
|
|
@ -121,7 +121,7 @@
|
|||
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
|
||||
"sh",
|
||||
"-ec",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef print_op(message: str):\n \"\"\"Prints a message.\"\"\"\n print(message)\n\n"
|
||||
],
|
||||
"image": "python:3.7"
|
||||
|
|
@ -141,7 +141,7 @@
|
|||
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
|
||||
"sh",
|
||||
"-ec",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef print_op(message: str):\n \"\"\"Prints a message.\"\"\"\n print(message)\n\n"
|
||||
],
|
||||
"image": "python:3.7"
|
||||
|
|
|
|||
|
|
@ -188,7 +188,7 @@
|
|||
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
|
||||
"sh",
|
||||
"-ec",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef train(\n dataset: Input[Dataset]\n) -> NamedTuple('Outputs', [\n ('scalar', str),\n ('model', Model),\n]):\n \"\"\"Dummy Training step.\"\"\"\n with open(dataset.path, 'r') as f:\n data = f.read()\n print('Dataset:', data)\n\n scalar = '123'\n model = 'My model trained using data: {}'.format(data)\n\n from collections import namedtuple\n output = namedtuple('Outputs', ['scalar', 'model'])\n return output(scalar, model)\n\n"
|
||||
],
|
||||
"image": "python:3.7"
|
||||
|
|
@ -208,7 +208,7 @@
|
|||
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
|
||||
"sh",
|
||||
"-ec",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef train(\n dataset: Input[Dataset]\n) -> NamedTuple('Outputs', [\n ('scalar', str),\n ('model', Model),\n]):\n \"\"\"Dummy Training step.\"\"\"\n with open(dataset.path, 'r') as f:\n data = f.read()\n print('Dataset:', data)\n\n scalar = '123'\n model = 'My model trained using data: {}'.format(data)\n\n from collections import namedtuple\n output = namedtuple('Outputs', ['scalar', 'model'])\n return output(scalar, model)\n\n"
|
||||
],
|
||||
"image": "python:3.7"
|
||||
|
|
|
|||
|
|
@ -15,11 +15,12 @@
|
|||
|
||||
from typing import NamedTuple
|
||||
from kfp import components
|
||||
from kfp import dsl
|
||||
from kfp.v2 import compiler
|
||||
from kfp.v2.dsl import Dataset, Model, Input
|
||||
from kfp.v2 import dsl
|
||||
from kfp.v2.dsl import component, importer, Dataset, Model, Input
|
||||
|
||||
|
||||
@component
|
||||
def train(
|
||||
dataset: Input[Dataset]
|
||||
) -> NamedTuple('Outputs', [
|
||||
|
|
@ -39,22 +40,21 @@ def train(
|
|||
return output(scalar, model)
|
||||
|
||||
|
||||
train_op = components.create_component_from_func_v2(train)
|
||||
|
||||
|
||||
@dsl.pipeline(name='pipeline-with-importer', pipeline_root='dummy_root')
|
||||
def my_pipeline(dataset2: str = 'gs://ml-pipeline-playground/shakespeare2.txt'):
|
||||
|
||||
importer = dsl.importer(
|
||||
importer1 = importer(
|
||||
artifact_uri='gs://ml-pipeline-playground/shakespeare1.txt',
|
||||
artifact_class=Dataset,
|
||||
reimport=False)
|
||||
train1 = train_op(dataset=importer.output)
|
||||
train1 = train(dataset=importer1.output)
|
||||
|
||||
with dsl.Condition(train1.outputs['scalar'] == '123'):
|
||||
importer2 = dsl.importer(
|
||||
importer2 = importer(
|
||||
artifact_uri=dataset2, artifact_class=Dataset, reimport=True)
|
||||
train_op(dataset=importer2.output)
|
||||
train(dataset=importer2.output)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@
|
|||
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
|
||||
"sh",
|
||||
"-ec",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef output_metrics(metrics: Output[Metrics]):\n \"\"\"Dummy component that outputs metrics with a random accuracy.\"\"\"\n import random\n result = random.randint(0, 100)\n metrics.log_metric('accuracy', result)\n\n"
|
||||
],
|
||||
"image": "python:3.7"
|
||||
|
|
@ -107,7 +107,7 @@
|
|||
"(python3 -m ensurepip || python3 -m ensurepip --user) && (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.7.0' --user) && \"$0\" \"$@\"",
|
||||
"sh",
|
||||
"-ec",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef output_metrics(metrics: Output[Metrics]):\n \"\"\"Dummy component that outputs metrics with a random accuracy.\"\"\"\n import random\n result = random.randint(0, 100)\n metrics.log_metric('accuracy', result)\n\n"
|
||||
],
|
||||
"image": "python:3.7"
|
||||
|
|
|
|||
|
|
@ -11,5 +11,3 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from kfp.components import *
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
import functools
|
||||
from typing import Callable, Optional, List
|
||||
|
||||
from kfp import components
|
||||
from kfp.v2.components import component_factory
|
||||
|
||||
|
||||
def component(func: Optional[Callable] = None,
|
||||
|
|
@ -25,7 +25,7 @@ def component(func: Optional[Callable] = None,
|
|||
output_component_file: Optional[str] = None,
|
||||
install_kfp_package: bool = True,
|
||||
kfp_package_path: Optional[str] = None):
|
||||
"""Decorator for Python-function based components in KFP v2.
|
||||
"""Decorator for Python-function based components in KFP v2.
|
||||
|
||||
A lightweight component is a self-contained Python function that includes
|
||||
all necessary imports and dependencies.
|
||||
|
|
@ -73,18 +73,18 @@ def component(func: Optional[Callable] = None,
|
|||
Returns:
|
||||
A component task factory that can be used in pipeline definitions.
|
||||
"""
|
||||
if func is None:
|
||||
return functools.partial(component,
|
||||
base_image=base_image,
|
||||
packages_to_install=packages_to_install,
|
||||
output_component_file=output_component_file,
|
||||
install_kfp_package=install_kfp_package,
|
||||
kfp_package_path=kfp_package_path)
|
||||
if func is None:
|
||||
return functools.partial(component,
|
||||
base_image=base_image,
|
||||
packages_to_install=packages_to_install,
|
||||
output_component_file=output_component_file,
|
||||
install_kfp_package=install_kfp_package,
|
||||
kfp_package_path=kfp_package_path)
|
||||
|
||||
return components.create_component_from_func_v2(
|
||||
func,
|
||||
base_image=base_image,
|
||||
packages_to_install=packages_to_install,
|
||||
output_component_file=output_component_file,
|
||||
install_kfp_package=install_kfp_package,
|
||||
kfp_package_path=kfp_package_path)
|
||||
return component_factory.create_component_from_func(
|
||||
func,
|
||||
base_image=base_image,
|
||||
packages_to_install=packages_to_install,
|
||||
output_component_file=output_component_file,
|
||||
install_kfp_package=install_kfp_package,
|
||||
kfp_package_path=kfp_package_path)
|
||||
|
|
@ -0,0 +1,399 @@
|
|||
# Copyright 2021 The Kubeflow Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import inspect
|
||||
import itertools
|
||||
import re
|
||||
import textwrap
|
||||
from typing import Callable, Dict, List, Mapping, Optional, TypeVar
|
||||
import warnings
|
||||
|
||||
import docstring_parser
|
||||
|
||||
from kfp import components as v1_components
|
||||
from kfp.components import _components, _data_passing, structures, type_annotation_utils
|
||||
from kfp.v2.components.types import artifact_types, type_annotations
|
||||
|
||||
_DEFAULT_BASE_IMAGE = 'python:3.7'
|
||||
|
||||
|
||||
def _python_function_name_to_component_name(name):
|
||||
name_with_spaces = re.sub(' +', ' ', name.replace('_', ' ')).strip(' ')
|
||||
return name_with_spaces[0].upper() + name_with_spaces[1:]
|
||||
|
||||
|
||||
def _get_packages_to_install_command(
|
||||
package_list: Optional[List[str]] = None) -> List[str]:
|
||||
result = []
|
||||
if package_list is not None:
|
||||
install_pip_command = 'python3 -m ensurepip'
|
||||
install_packages_command = (
|
||||
'PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet \
|
||||
--no-warn-script-location {}' ).format(' '.join(
|
||||
[repr(str(package)) for package in package_list]))
|
||||
result = [
|
||||
'sh', '-c',
|
||||
'({install_pip} || {install_pip} --user) &&'
|
||||
' ({install_packages} || {install_packages} --user) && "$0" "$@"'.
|
||||
format(install_pip=install_pip_command,
|
||||
install_packages=install_packages_command)
|
||||
]
|
||||
return result
|
||||
|
||||
|
||||
def _get_default_kfp_package_path() -> str:
|
||||
import kfp
|
||||
return 'kfp=={}'.format(kfp.__version__)
|
||||
|
||||
|
||||
def _get_function_source_definition(func: Callable) -> str:
|
||||
func_code = inspect.getsource(func)
|
||||
|
||||
# Function might be defined in some indented scope (e.g. in another
|
||||
# function). We need to handle this and properly dedent the function source
|
||||
# code
|
||||
func_code = textwrap.dedent(func_code)
|
||||
func_code_lines = func_code.split('\n')
|
||||
|
||||
# Removing possible decorators (can be multiline) until the function
|
||||
# definition is found
|
||||
func_code_lines = itertools.dropwhile(lambda x: not x.startswith('def'),
|
||||
func_code_lines)
|
||||
|
||||
if not func_code_lines:
|
||||
raise ValueError(
|
||||
'Failed to dedent and clean up the source of function "{}". '
|
||||
'It is probably not properly indented.'.format(func.__name__))
|
||||
|
||||
return '\n'.join(func_code_lines)
|
||||
|
||||
|
||||
def _annotation_to_type_struct(annotation):
|
||||
if not annotation or annotation == inspect.Parameter.empty:
|
||||
return None
|
||||
if hasattr(annotation, 'to_dict'):
|
||||
annotation = annotation.to_dict()
|
||||
if isinstance(annotation, dict):
|
||||
return annotation
|
||||
if isinstance(annotation, type):
|
||||
type_struct = _data_passing.get_canonical_type_struct_for_type(
|
||||
annotation)
|
||||
if type_struct:
|
||||
return type_struct
|
||||
type_name = str(annotation.__name__)
|
||||
elif hasattr(
|
||||
annotation, '__forward_arg__'
|
||||
): # Handling typing.ForwardRef('Type_name') (the name was _ForwardRef in python 3.5-3.6)
|
||||
type_name = str(annotation.__forward_arg__)
|
||||
else:
|
||||
type_name = str(annotation)
|
||||
|
||||
# It's also possible to get the converter by type name
|
||||
type_struct = _data_passing.get_canonical_type_struct_for_type(type_name)
|
||||
if type_struct:
|
||||
return type_struct
|
||||
return type_name
|
||||
|
||||
|
||||
def _maybe_make_unique(name: str, names: List[str]):
|
||||
if name not in names:
|
||||
return name
|
||||
|
||||
for i in range(2, 100):
|
||||
unique_name = '{}_{}'.format(name, i)
|
||||
if unique_name not in names:
|
||||
return unique_name
|
||||
|
||||
raise RuntimeError('Too many arguments with the name {}'.format(name))
|
||||
|
||||
|
||||
# TODO(KFPv2): Replace with v2 ComponentSpec.
|
||||
def _func_to_component_spec(
|
||||
func: Callable,
|
||||
base_image: Optional[str] = None,
|
||||
packages_to_install: Optional[List[str]] = None,
|
||||
install_kfp_package: bool = True,
|
||||
kfp_package_path: Optional[str] = None) -> structures.ComponentSpec:
|
||||
decorator_base_image = getattr(func, '_component_base_image', None)
|
||||
if decorator_base_image is not None:
|
||||
if base_image is not None and decorator_base_image != base_image:
|
||||
raise ValueError(
|
||||
'base_image ({}) conflicts with the decorator-specified base image metadata ({})'
|
||||
.format(base_image, decorator_base_image))
|
||||
else:
|
||||
base_image = decorator_base_image
|
||||
else:
|
||||
if base_image is None:
|
||||
base_image = _DEFAULT_BASE_IMAGE
|
||||
if isinstance(base_image, Callable):
|
||||
base_image = base_image()
|
||||
|
||||
imports_source = [
|
||||
"from kfp.v2.dsl import *",
|
||||
"from typing import *",
|
||||
]
|
||||
|
||||
func_source = _get_function_source_definition(func)
|
||||
|
||||
source = textwrap.dedent("""
|
||||
{imports_source}
|
||||
|
||||
{func_source}\n""").format(imports_source='\n'.join(imports_source),
|
||||
func_source=func_source)
|
||||
|
||||
packages_to_install = packages_to_install or []
|
||||
if install_kfp_package:
|
||||
if kfp_package_path is None:
|
||||
kfp_package_path = _get_default_kfp_package_path()
|
||||
packages_to_install.append(kfp_package_path)
|
||||
|
||||
packages_to_install_command = _get_packages_to_install_command(
|
||||
package_list=packages_to_install)
|
||||
|
||||
from kfp.components._structures import ExecutorInputPlaceholder
|
||||
component_spec = extract_component_interface(func)
|
||||
|
||||
component_spec.implementation = structures.ContainerImplementation(
|
||||
container=structures.ContainerSpec(image=base_image,
|
||||
command=packages_to_install_command +
|
||||
[
|
||||
'sh',
|
||||
'-ec',
|
||||
textwrap.dedent('''\
|
||||
program_path=$(mktemp -d)
|
||||
printf "%s" "$0" > "$program_path/ephemeral_component.py"
|
||||
python3 -m kfp.v2.components.executor_main \
|
||||
--component_module_path \
|
||||
"$program_path/ephemeral_component.py" \
|
||||
"$@"
|
||||
'''),
|
||||
source,
|
||||
],
|
||||
args=[
|
||||
"--executor_input",
|
||||
ExecutorInputPlaceholder(),
|
||||
"--function_to_execute",
|
||||
func.__name__,
|
||||
]))
|
||||
return component_spec
|
||||
|
||||
|
||||
def extract_component_interface(func: Callable) -> structures.ComponentSpec:
|
||||
single_output_name_const = 'Output'
|
||||
|
||||
signature = inspect.signature(func)
|
||||
parameters = list(signature.parameters.values())
|
||||
|
||||
parsed_docstring = docstring_parser.parse(inspect.getdoc(func))
|
||||
doc_dict = {p.arg_name: p.description for p in parsed_docstring.params}
|
||||
|
||||
inputs = []
|
||||
outputs = []
|
||||
|
||||
input_names = set()
|
||||
output_names = set()
|
||||
for parameter in parameters:
|
||||
parameter_type = type_annotation_utils.maybe_strip_optional_from_annotation(
|
||||
parameter.annotation)
|
||||
passing_style = None
|
||||
io_name = parameter.name
|
||||
|
||||
if type_annotations.is_artifact_annotation(parameter_type):
|
||||
# passing_style is either type_annotations.InputAnnotation or
|
||||
# type_annotations.OutputAnnotation.
|
||||
passing_style = type_annotations.get_io_artifact_annotation(
|
||||
parameter_type)
|
||||
|
||||
# parameter_type is type_annotations.Artifact or one of its subclasses.
|
||||
parameter_type = type_annotations.get_io_artifact_class(
|
||||
parameter_type)
|
||||
if not issubclass(parameter_type, artifact_types.Artifact):
|
||||
raise ValueError(
|
||||
'Input[T] and Output[T] are only supported when T is a '
|
||||
'subclass of Artifact. Found `{} with type {}`'.format(
|
||||
io_name, parameter_type))
|
||||
|
||||
if parameter.default is not inspect.Parameter.empty:
|
||||
raise ValueError(
|
||||
'Default values for Input/Output artifacts are not supported.'
|
||||
)
|
||||
elif isinstance(parameter_type,
|
||||
(v1_components.InputPath, v1_components.OutputPath)):
|
||||
raise TypeError(
|
||||
'In v2 components, please import the Python function'
|
||||
' annotations `InputPath` and `OutputPath` from'
|
||||
' package `kfp.v2.dsl` instead of `kfp.dsl`.')
|
||||
elif isinstance(
|
||||
parameter_type,
|
||||
(type_annotations.InputPath, type_annotations.OutputPath)):
|
||||
passing_style = type(parameter_type)
|
||||
parameter_type = parameter_type.type
|
||||
if parameter.default is not inspect.Parameter.empty and not (
|
||||
passing_style == type_annotations.InputPath and
|
||||
parameter.default is None):
|
||||
raise ValueError(
|
||||
'Path inputs only support default values of None. Default values for outputs are not supported.'
|
||||
)
|
||||
|
||||
type_struct = _annotation_to_type_struct(parameter_type)
|
||||
|
||||
if passing_style in [
|
||||
type_annotations.OutputAnnotation, type_annotations.OutputPath
|
||||
]:
|
||||
io_name = _maybe_make_unique(io_name, output_names)
|
||||
output_names.add(io_name)
|
||||
output_spec = structures.OutputSpec(name=io_name,
|
||||
type=type_struct,
|
||||
description=doc_dict.get(
|
||||
parameter.name))
|
||||
output_spec._passing_style = passing_style
|
||||
output_spec._parameter_name = parameter.name
|
||||
outputs.append(output_spec)
|
||||
else:
|
||||
io_name = _maybe_make_unique(io_name, input_names)
|
||||
input_names.add(io_name)
|
||||
input_spec = structures.InputSpec(name=io_name,
|
||||
type=type_struct,
|
||||
description=doc_dict.get(
|
||||
parameter.name))
|
||||
if parameter.default is not inspect.Parameter.empty:
|
||||
input_spec.optional = True
|
||||
if parameter.default is not None:
|
||||
outer_type_name = list(type_struct.keys())[0] if isinstance(
|
||||
type_struct, dict) else type_struct
|
||||
try:
|
||||
input_spec.default = _data_passing.serialize_value(
|
||||
parameter.default, outer_type_name)
|
||||
except Exception as ex:
|
||||
warnings.warn(
|
||||
'Could not serialize the default value of the parameter "{}". {}'
|
||||
.format(parameter.name, ex))
|
||||
input_spec._passing_style = passing_style
|
||||
input_spec._parameter_name = parameter.name
|
||||
inputs.append(input_spec)
|
||||
|
||||
#Analyzing the return type annotations.
|
||||
return_ann = signature.return_annotation
|
||||
if hasattr(return_ann, '_fields'): #NamedTuple
|
||||
# Getting field type annotations.
|
||||
# __annotations__ does not exist in python 3.5 and earlier
|
||||
# _field_types does not exist in python 3.9 and later
|
||||
field_annotations = getattr(return_ann,
|
||||
'__annotations__', None) or getattr(
|
||||
return_ann, '_field_types', None)
|
||||
for field_name in return_ann._fields:
|
||||
type_struct = None
|
||||
if field_annotations:
|
||||
type_struct = _annotation_to_type_struct(
|
||||
field_annotations.get(field_name, None))
|
||||
|
||||
output_name = _maybe_make_unique(field_name, output_names)
|
||||
output_names.add(output_name)
|
||||
output_spec = structures.OutputSpec(
|
||||
name=output_name,
|
||||
type=type_struct,
|
||||
)
|
||||
output_spec._passing_style = None
|
||||
output_spec._return_tuple_field_name = field_name
|
||||
outputs.append(output_spec)
|
||||
# Deprecated dict-based way of declaring multiple outputs. Was only used by the @component decorator
|
||||
elif isinstance(return_ann, dict):
|
||||
warnings.warn(
|
||||
"The ability to specify multiple outputs using the dict syntax has been deprecated."
|
||||
"It will be removed soon after release 0.1.32."
|
||||
"Please use typing.NamedTuple to declare multiple outputs.")
|
||||
for output_name, output_type_annotation in return_ann.items():
|
||||
output_type_struct = _annotation_to_type_struct(
|
||||
output_type_annotation)
|
||||
output_spec = structures.OutputSpec(
|
||||
name=output_name,
|
||||
type=output_type_struct,
|
||||
)
|
||||
outputs.append(output_spec)
|
||||
elif signature.return_annotation is not None and signature.return_annotation != inspect.Parameter.empty:
|
||||
output_name = _maybe_make_unique(single_output_name_const, output_names)
|
||||
# Fixes exotic, but possible collision: `def func(output_path: OutputPath()) -> str: ...`
|
||||
output_names.add(output_name)
|
||||
type_struct = _annotation_to_type_struct(signature.return_annotation)
|
||||
output_spec = structures.OutputSpec(
|
||||
name=output_name,
|
||||
type=type_struct,
|
||||
)
|
||||
output_spec._passing_style = None
|
||||
outputs.append(output_spec)
|
||||
|
||||
# Component name and description are derived from the function's name and docstring.
|
||||
# The name can be overridden by setting setting func.__name__ attribute (of the legacy func._component_human_name attribute).
|
||||
# The description can be overridden by setting the func.__doc__ attribute (or the legacy func._component_description attribute).
|
||||
component_name = getattr(func, '_component_human_name',
|
||||
None) or _python_function_name_to_component_name(
|
||||
func.__name__)
|
||||
description = getattr(func, '_component_description',
|
||||
None) or parsed_docstring.short_description
|
||||
if description:
|
||||
description = description.strip()
|
||||
|
||||
component_spec = structures.ComponentSpec(
|
||||
name=component_name,
|
||||
description=description,
|
||||
inputs=inputs if inputs else None,
|
||||
outputs=outputs if outputs else None,
|
||||
)
|
||||
return component_spec
|
||||
|
||||
|
||||
def create_component_from_func(func: Callable,
|
||||
base_image: Optional[str] = None,
|
||||
packages_to_install: List[str] = None,
|
||||
output_component_file: Optional[str] = None,
|
||||
install_kfp_package: bool = True,
|
||||
kfp_package_path: Optional[str] = None):
|
||||
"""Converts a Python function to a v2 lightweight component.
|
||||
|
||||
A lightweight component is a self-contained Python function that includes
|
||||
all necessary imports and dependencies.
|
||||
|
||||
Args:
|
||||
func: The python function to create a component from. The function
|
||||
should have type annotations for all its arguments, indicating how
|
||||
it is intended to be used (e.g. as an input/output Artifact object,
|
||||
a plain parameter, or a path to a file).
|
||||
base_image: The image to use when executing |func|. It should
|
||||
contain a default Python interpreter that is compatible with KFP.
|
||||
packages_to_install: A list of optional packages to install before
|
||||
executing |func|.
|
||||
install_kfp_package: Specifies if we should add a KFP Python package to
|
||||
|packages_to_install|. Lightweight Python functions always require
|
||||
an installation of KFP in |base_image| to work. If you specify
|
||||
a |base_image| that already contains KFP, you can set this to False.
|
||||
kfp_package_path: Specifies the location from which to install KFP. By
|
||||
default, this will try to install from PyPi using the same version
|
||||
as that used when this component was created. KFP developers can
|
||||
choose to override this to point to a Github pull request or
|
||||
other pip-compatible location when testing changes to lightweight
|
||||
Python functions.
|
||||
|
||||
Returns:
|
||||
A component task factory that can be used in pipeline definitions.
|
||||
"""
|
||||
component_spec = _func_to_component_spec(
|
||||
func=func,
|
||||
base_image=base_image,
|
||||
packages_to_install=packages_to_install,
|
||||
install_kfp_package=install_kfp_package,
|
||||
kfp_package_path=kfp_package_path)
|
||||
if output_component_file:
|
||||
component_spec.save(output_component_file)
|
||||
|
||||
# TODO(KFPv2): Replace with v2 BaseComponent.
|
||||
return _components._create_task_factory_from_component_spec(component_spec)
|
||||
|
|
@ -0,0 +1,281 @@
|
|||
# Copyright 2021 The Kubeflow Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import json
|
||||
import inspect
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
|
||||
from kfp.v2.components.types import artifact_types, type_annotations
|
||||
|
||||
|
||||
class Executor():
|
||||
"""Executor executes v2-based Python function components."""
|
||||
|
||||
def __init__(self, executor_input: Dict, function_to_execute: Callable):
|
||||
self._func = function_to_execute
|
||||
self._input = executor_input
|
||||
self._input_artifacts: Dict[str, artifact_types.Artifact] = {}
|
||||
self._output_artifacts: Dict[str, artifact_types.Artifact] = {}
|
||||
|
||||
for name, artifacts in self._input.get('inputs',
|
||||
{}).get('artifacts', {}).items():
|
||||
artifacts_list = artifacts.get('artifacts')
|
||||
if artifacts_list:
|
||||
self._input_artifacts[name] = self._make_input_artifact(
|
||||
artifacts_list[0])
|
||||
|
||||
for name, artifacts in self._input.get('outputs',
|
||||
{}).get('artifacts', {}).items():
|
||||
artifacts_list = artifacts.get('artifacts')
|
||||
if artifacts_list:
|
||||
self._output_artifacts[name] = self._make_output_artifact(
|
||||
artifacts_list[0])
|
||||
|
||||
self._return_annotation = inspect.signature(
|
||||
self._func).return_annotation
|
||||
self._executor_output = {}
|
||||
|
||||
@classmethod
|
||||
def _make_input_artifact(cls, runtime_artifact: Dict):
|
||||
return artifact_types.create_runtime_artifact(runtime_artifact)
|
||||
|
||||
@classmethod
|
||||
def _make_output_artifact(cls, runtime_artifact: Dict):
|
||||
import os
|
||||
artifact = artifact_types.create_runtime_artifact(runtime_artifact)
|
||||
os.makedirs(os.path.dirname(artifact.path), exist_ok=True)
|
||||
return artifact
|
||||
|
||||
def _get_input_artifact(self, name: str):
|
||||
return self._input_artifacts.get(name)
|
||||
|
||||
def _get_output_artifact(self, name: str):
|
||||
return self._output_artifacts.get(name)
|
||||
|
||||
def _get_input_parameter_value(self, parameter_name: str,
|
||||
parameter_type: Any):
|
||||
parameter = self._input.get('inputs',
|
||||
{}).get('parameters',
|
||||
{}).get(parameter_name, None)
|
||||
if parameter is None:
|
||||
return None
|
||||
|
||||
if parameter.get('stringValue'):
|
||||
if parameter_type == str:
|
||||
return parameter['stringValue']
|
||||
elif parameter_type == bool:
|
||||
# Use `.lower()` so it can also handle 'True' and 'False' (resulted from
|
||||
# `str(True)` and `str(False)`, respectively.
|
||||
return json.loads(parameter['stringValue'].lower())
|
||||
else:
|
||||
return json.loads(parameter['stringValue'])
|
||||
elif parameter.get('intValue'):
|
||||
return int(parameter['intValue'])
|
||||
elif parameter.get('doubleValue'):
|
||||
return float(parameter['doubleValue'])
|
||||
|
||||
def _get_output_parameter_path(self, parameter_name: str):
|
||||
parameter = self._input.get('outputs',
|
||||
{}).get('parameters',
|
||||
{}).get(parameter_name, None)
|
||||
if parameter is None:
|
||||
return None
|
||||
|
||||
import os
|
||||
path = parameter.get('outputFile', None)
|
||||
if path:
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
return path
|
||||
|
||||
def _get_output_artifact_path(self, artifact_name: str):
|
||||
output_artifact = self._output_artifacts.get(artifact_name)
|
||||
if not output_artifact:
|
||||
raise ValueError(
|
||||
'Failed to get output artifact path for artifact name {}'.
|
||||
format(artifact_name))
|
||||
return output_artifact.path
|
||||
|
||||
def _get_input_artifact_path(self, artifact_name: str):
|
||||
input_artifact = self._input_artifacts.get(artifact_name)
|
||||
if not input_artifact:
|
||||
raise ValueError(
|
||||
'Failed to get input artifact path for artifact name {}'.format(
|
||||
artifact_name))
|
||||
return input_artifact.path
|
||||
|
||||
def _write_output_parameter_value(self, name: str,
|
||||
value: Union[str, int, float, bool, dict,
|
||||
list, Dict, List]):
|
||||
if type(value) == str:
|
||||
output = {'stringValue': value}
|
||||
elif type(value) == int:
|
||||
output = {'intValue': value}
|
||||
elif type(value) == float:
|
||||
output = {'doubleValue': value}
|
||||
else:
|
||||
# For bool, list, dict, List, Dict, json serialize the value.
|
||||
output = {'stringValue': json.dumps(value)}
|
||||
|
||||
if not self._executor_output.get('parameters'):
|
||||
self._executor_output['parameters'] = {}
|
||||
|
||||
self._executor_output['parameters'][name] = output
|
||||
|
||||
def _write_output_artifact_payload(self, name: str, value: Any):
|
||||
path = self._get_output_artifact_path(name)
|
||||
with open(path, 'w') as f:
|
||||
f.write(str(value))
|
||||
|
||||
# TODO: extract to a util
|
||||
@classmethod
|
||||
def _get_short_type_name(cls, type_name: str) -> str:
|
||||
"""Extracts the short form type name.
|
||||
|
||||
This method is used for looking up serializer for a given type.
|
||||
|
||||
For example:
|
||||
typing.List -> List
|
||||
typing.List[int] -> List
|
||||
typing.Dict[str, str] -> Dict
|
||||
List -> List
|
||||
str -> str
|
||||
|
||||
Args:
|
||||
type_name: The original type name.
|
||||
|
||||
Returns:
|
||||
The short form type name or the original name if pattern doesn't match.
|
||||
"""
|
||||
import re
|
||||
match = re.match('(typing\.)?(?P<type>\w+)(?:\[.+\])?', type_name)
|
||||
if match:
|
||||
return match.group('type')
|
||||
else:
|
||||
return type_name
|
||||
|
||||
# TODO: merge with type_utils.is_parameter_type
|
||||
@classmethod
|
||||
def _is_parameter(cls, annotation: Any) -> bool:
|
||||
if type(annotation) == type:
|
||||
return annotation in [str, int, float, bool, dict, list]
|
||||
|
||||
# Annotation could be, for instance `typing.Dict[str, str]`, etc.
|
||||
return cls._get_short_type_name(str(annotation)) in ['Dict', 'List']
|
||||
|
||||
@classmethod
|
||||
def _is_artifact(cls, annotation: Any) -> bool:
|
||||
if type(annotation) == type:
|
||||
return issubclass(annotation, artifact_types.Artifact)
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def _is_named_tuple(cls, annotation: Any) -> bool:
|
||||
if type(annotation) == type:
|
||||
return issubclass(annotation, tuple) and hasattr(
|
||||
annotation, '_fields') and hasattr(annotation,
|
||||
'__annotations__')
|
||||
return False
|
||||
|
||||
def _handle_single_return_value(self, output_name: str,
|
||||
annotation_type: Any, return_value: Any):
|
||||
if self._is_parameter(annotation_type):
|
||||
if type(return_value) != annotation_type:
|
||||
raise ValueError(
|
||||
'Function `{}` returned value of type {}; want type {}'.
|
||||
format(self._func.__name__, type(return_value),
|
||||
annotation_type))
|
||||
self._write_output_parameter_value(output_name, return_value)
|
||||
elif self._is_artifact(annotation_type):
|
||||
self._write_output_artifact_payload(output_name, return_value)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
'Unknown return type: {}. Must be one of `str`, `int`, `float`, or a'
|
||||
' subclass of `Artifact`'.format(annotation_type))
|
||||
|
||||
def _write_executor_output(self, func_output: Optional[Any] = None):
|
||||
if self._output_artifacts:
|
||||
self._executor_output['artifacts'] = {}
|
||||
|
||||
for name, artifact in self._output_artifacts.items():
|
||||
runtime_artifact = {
|
||||
'name': artifact.name,
|
||||
'uri': artifact.uri,
|
||||
'metadata': artifact.metadata,
|
||||
}
|
||||
artifacts_list = {'artifacts': [runtime_artifact]}
|
||||
|
||||
self._executor_output['artifacts'][name] = artifacts_list
|
||||
|
||||
if func_output is not None:
|
||||
if self._is_parameter(self._return_annotation) or self._is_artifact(
|
||||
self._return_annotation):
|
||||
# Note: single output is named `Output` in component.yaml.
|
||||
self._handle_single_return_value('Output',
|
||||
self._return_annotation,
|
||||
func_output)
|
||||
elif self._is_named_tuple(self._return_annotation):
|
||||
if len(self._return_annotation._fields) != len(func_output):
|
||||
raise RuntimeError(
|
||||
'Expected {} return values from function `{}`, got {}'.
|
||||
format(len(self._return_annotation._fields),
|
||||
self._func.__name__, len(func_output)))
|
||||
for i in range(len(self._return_annotation._fields)):
|
||||
field = self._return_annotation._fields[i]
|
||||
field_type = self._return_annotation.__annotations__[field]
|
||||
if type(func_output) == tuple:
|
||||
field_value = func_output[i]
|
||||
else:
|
||||
field_value = getattr(func_output, field)
|
||||
self._handle_single_return_value(field, field_type,
|
||||
field_value)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
'Unknown return type: {}. Must be one of `str`, `int`, `float`, a'
|
||||
' subclass of `Artifact`, or a NamedTuple collection of these types.'
|
||||
.format(self._return_annotation))
|
||||
|
||||
import os
|
||||
os.makedirs(os.path.dirname(self._input['outputs']['outputFile']),
|
||||
exist_ok=True)
|
||||
with open(self._input['outputs']['outputFile'], 'w') as f:
|
||||
f.write(json.dumps(self._executor_output))
|
||||
|
||||
def execute(self):
|
||||
annotations = inspect.getfullargspec(self._func).annotations
|
||||
|
||||
# Function arguments.
|
||||
func_kwargs = {}
|
||||
|
||||
for k, v in annotations.items():
|
||||
if k == 'return':
|
||||
continue
|
||||
|
||||
if self._is_parameter(v):
|
||||
func_kwargs[k] = self._get_input_parameter_value(k, v)
|
||||
|
||||
if type_annotations.is_artifact_annotation(v):
|
||||
if type_annotations.is_input_artifact(v):
|
||||
func_kwargs[k] = self._get_input_artifact(k)
|
||||
if type_annotations.is_output_artifact(v):
|
||||
func_kwargs[k] = self._get_output_artifact(k)
|
||||
|
||||
elif isinstance(v, type_annotations.OutputPath):
|
||||
if self._is_parameter(v.type):
|
||||
func_kwargs[k] = self._get_output_parameter_path(k)
|
||||
else:
|
||||
func_kwargs[k] = self._get_output_artifact_path(k)
|
||||
elif isinstance(v, type_annotations.InputPath):
|
||||
func_kwargs[k] = self._get_input_artifact_path(k)
|
||||
|
||||
result = self._func(**func_kwargs)
|
||||
self._write_executor_output(result)
|
||||
|
|
@ -17,7 +17,7 @@ import importlib
|
|||
import os
|
||||
import sys
|
||||
|
||||
from kfp.components import executor as component_executor
|
||||
from kfp.v2.components import executor as component_executor
|
||||
|
||||
|
||||
def _load_module(module_name: str, module_directory: str):
|
||||
|
|
@ -0,0 +1,468 @@
|
|||
# Copyright 2021 The Kubeflow Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tests for kfp.components.executor"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
from typing import Callable, NamedTuple, Optional
|
||||
import unittest
|
||||
import json
|
||||
|
||||
from kfp.v2.components import executor
|
||||
from kfp.v2.components.types import artifact_types
|
||||
from kfp.v2.components.types.artifact_types import Artifact, Dataset, Model, Metrics
|
||||
from kfp.v2.components.types.type_annotations import Input, Output, InputPath, OutputPath
|
||||
|
||||
_EXECUTOR_INPUT = """\
|
||||
{
|
||||
"inputs": {
|
||||
"parameters": {
|
||||
"input_parameter": {
|
||||
"stringValue": "Hello, KFP"
|
||||
}
|
||||
},
|
||||
"artifacts": {
|
||||
"input_artifact_one_path": {
|
||||
"artifacts": [
|
||||
{
|
||||
"metadata": {},
|
||||
"name": "input_artifact_one",
|
||||
"type": {
|
||||
"schemaTitle": "system.Dataset"
|
||||
},
|
||||
"uri": "gs://some-bucket/input_artifact_one"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": {
|
||||
"artifacts": {
|
||||
"output_artifact_one_path": {
|
||||
"artifacts": [
|
||||
{
|
||||
"metadata": {},
|
||||
"name": "output_artifact_one",
|
||||
"type": {
|
||||
"schemaTitle": "system.Model"
|
||||
},
|
||||
"uri": "gs://some-bucket/output_artifact_one"
|
||||
}
|
||||
]
|
||||
},
|
||||
"output_artifact_two": {
|
||||
"artifacts": [
|
||||
{
|
||||
"metadata": {},
|
||||
"name": "output_artifact_two",
|
||||
"type": {
|
||||
"schemaTitle": "system.Metrics"
|
||||
},
|
||||
"uri": "gs://some-bucket/output_artifact_two"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"parameters": {
|
||||
"output_parameter_path": {
|
||||
"outputFile": "gs://some-bucket/some_task/nested/output_parameter"
|
||||
}
|
||||
},
|
||||
"outputFile": "%s/output_metadata.json"
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
class ExecutorTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.maxDiff = None
|
||||
self._test_dir = tempfile.mkdtemp()
|
||||
artifact_types._GCS_LOCAL_MOUNT_PREFIX = self._test_dir + '/'
|
||||
artifact_types._MINIO_LOCAL_MOUNT_PREFIX = self._test_dir + '/minio/'
|
||||
artifact_types._S3_LOCAL_MOUNT_PREFIX = self._test_dir + '/s3/'
|
||||
return super().setUp()
|
||||
|
||||
def _get_executor(
|
||||
self,
|
||||
func: Callable,
|
||||
executor_input: Optional[str] = None) -> executor.Executor:
|
||||
if executor_input is None:
|
||||
executor_input = _EXECUTOR_INPUT
|
||||
|
||||
executor_input_dict = json.loads(executor_input % self._test_dir)
|
||||
|
||||
return executor.Executor(executor_input=executor_input_dict,
|
||||
function_to_execute=func)
|
||||
|
||||
def test_input_parameter(self):
|
||||
|
||||
def test_func(input_parameter: str):
|
||||
self.assertEqual(input_parameter, "Hello, KFP")
|
||||
|
||||
self._get_executor(test_func).execute()
|
||||
|
||||
def test_input_artifact(self):
|
||||
|
||||
def test_func(input_artifact_one_path: Input[Dataset]):
|
||||
self.assertEqual(input_artifact_one_path.uri,
|
||||
'gs://some-bucket/input_artifact_one')
|
||||
self.assertEqual(
|
||||
input_artifact_one_path.path,
|
||||
os.path.join(self._test_dir, 'some-bucket/input_artifact_one'))
|
||||
self.assertEqual(input_artifact_one_path.name, 'input_artifact_one')
|
||||
|
||||
self._get_executor(test_func).execute()
|
||||
|
||||
def test_output_artifact(self):
|
||||
|
||||
def test_func(output_artifact_one_path: Output[Model]):
|
||||
self.assertEqual(output_artifact_one_path.uri,
|
||||
'gs://some-bucket/output_artifact_one')
|
||||
|
||||
self.assertEqual(
|
||||
output_artifact_one_path.path,
|
||||
os.path.join(self._test_dir, 'some-bucket/output_artifact_one'))
|
||||
self.assertEqual(output_artifact_one_path.name,
|
||||
'output_artifact_one')
|
||||
|
||||
self._get_executor(test_func).execute()
|
||||
|
||||
def test_output_parameter(self):
|
||||
|
||||
def test_func(output_parameter_path: OutputPath(str)):
|
||||
# Test that output parameters just use the passed in filename.
|
||||
self.assertEqual(
|
||||
output_parameter_path,
|
||||
'gs://some-bucket/some_task/nested/output_parameter')
|
||||
|
||||
# Test writing to the path succeeds. This fails if parent directories
|
||||
# don't exist.
|
||||
with open(output_parameter_path, 'w') as f:
|
||||
f.write('Hello, World!')
|
||||
|
||||
self._get_executor(test_func).execute()
|
||||
|
||||
def test_input_path_artifact(self):
|
||||
|
||||
def test_func(input_artifact_one_path: InputPath('Dataset')):
|
||||
self.assertEqual(
|
||||
input_artifact_one_path,
|
||||
os.path.join(self._test_dir, 'some-bucket/input_artifact_one'))
|
||||
|
||||
self._get_executor(test_func).execute()
|
||||
|
||||
def test_output_path_artifact(self):
|
||||
|
||||
def test_func(output_artifact_one_path: OutputPath('Model')):
|
||||
self.assertEqual(
|
||||
output_artifact_one_path,
|
||||
os.path.join(self._test_dir, 'some-bucket/output_artifact_one'))
|
||||
|
||||
self._get_executor(test_func).execute()
|
||||
|
||||
def test_output_metadata(self):
|
||||
|
||||
def test_func(output_artifact_two: Output[Metrics]):
|
||||
output_artifact_two.metadata['key_1'] = 'value_1'
|
||||
output_artifact_two.metadata['key_2'] = 2
|
||||
output_artifact_two.uri = 'new-uri'
|
||||
|
||||
# log_metric works here since the schema is specified as Metrics.
|
||||
output_artifact_two.log_metric('metric', 0.9)
|
||||
|
||||
self._get_executor(test_func).execute()
|
||||
with open(os.path.join(self._test_dir, 'output_metadata.json'),
|
||||
'r') as f:
|
||||
output_metadata = json.loads(f.read())
|
||||
self.assertDictEqual(
|
||||
output_metadata, {
|
||||
'artifacts': {
|
||||
'output_artifact_one_path': {
|
||||
'artifacts': [{
|
||||
'name': 'output_artifact_one',
|
||||
'uri': 'gs://some-bucket/output_artifact_one',
|
||||
'metadata': {}
|
||||
}]
|
||||
},
|
||||
'output_artifact_two': {
|
||||
'artifacts': [{
|
||||
'name': 'output_artifact_two',
|
||||
'uri': 'new-uri',
|
||||
'metadata': {
|
||||
'key_1': 'value_1',
|
||||
'key_2': 2,
|
||||
'metric': 0.9
|
||||
}
|
||||
}]
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
def test_function_string_output(self):
|
||||
executor_input = """\
|
||||
{
|
||||
"inputs": {
|
||||
"parameters": {
|
||||
"first_message": {
|
||||
"stringValue": "Hello"
|
||||
},
|
||||
"second_message": {
|
||||
"stringValue": "World"
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": {
|
||||
"parameters": {
|
||||
"output": {
|
||||
"outputFile": "gs://some-bucket/output"
|
||||
}
|
||||
},
|
||||
"outputFile": "%s/output_metadata.json"
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
def test_func(first_message: str, second_message: str) -> str:
|
||||
return first_message + ", " + second_message
|
||||
|
||||
self._get_executor(test_func, executor_input).execute()
|
||||
with open(os.path.join(self._test_dir, 'output_metadata.json'),
|
||||
'r') as f:
|
||||
output_metadata = json.loads(f.read())
|
||||
self.assertDictEqual(output_metadata, {
|
||||
"parameters": {
|
||||
"Output": {
|
||||
"stringValue": "Hello, World"
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
def test_function_with_int_output(self):
|
||||
executor_input = """\
|
||||
{
|
||||
"inputs": {
|
||||
"parameters": {
|
||||
"first": {
|
||||
"intValue": 40
|
||||
},
|
||||
"second": {
|
||||
"intValue": 2
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": {
|
||||
"parameters": {
|
||||
"output": {
|
||||
"outputFile": "gs://some-bucket/output"
|
||||
}
|
||||
},
|
||||
"outputFile": "%s/output_metadata.json"
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
def test_func(first: int, second: int) -> int:
|
||||
return first + second
|
||||
|
||||
self._get_executor(test_func, executor_input).execute()
|
||||
with open(os.path.join(self._test_dir, 'output_metadata.json'),
|
||||
'r') as f:
|
||||
output_metadata = json.loads(f.read())
|
||||
self.assertDictEqual(output_metadata, {
|
||||
"parameters": {
|
||||
"Output": {
|
||||
"intValue": 42
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
def test_function_with_int_output(self):
|
||||
executor_input = """\
|
||||
{
|
||||
"inputs": {
|
||||
"parameters": {
|
||||
"first_message": {
|
||||
"stringValue": "Hello"
|
||||
},
|
||||
"second_message": {
|
||||
"stringValue": "World"
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": {
|
||||
"artifacts": {
|
||||
"Output": {
|
||||
"outputFile": "gs://some-bucket/output"
|
||||
}
|
||||
},
|
||||
"outputFile": "%s/output_metadata.json"
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
def test_func(first_message: str, second_message: str) -> str:
|
||||
return first_message + ", " + second_message
|
||||
|
||||
self._get_executor(test_func, executor_input).execute()
|
||||
with open(os.path.join(self._test_dir, 'output_metadata.json'),
|
||||
'r') as f:
|
||||
output_metadata = json.loads(f.read())
|
||||
self.assertDictEqual(output_metadata, {
|
||||
"parameters": {
|
||||
"Output": {
|
||||
"stringValue": "Hello, World"
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
def test_artifact_output(self):
|
||||
executor_input = """\
|
||||
{
|
||||
"inputs": {
|
||||
"parameters": {
|
||||
"first": {
|
||||
"stringValue": "Hello"
|
||||
},
|
||||
"second": {
|
||||
"stringValue": "World"
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": {
|
||||
"artifacts": {
|
||||
"Output": {
|
||||
"artifacts": [
|
||||
{
|
||||
"name": "output",
|
||||
"type": {
|
||||
"schemaTitle": "system.Artifact"
|
||||
},
|
||||
"uri": "gs://some-bucket/output"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"outputFile": "%s/output_metadata.json"
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
def test_func(first: str, second: str) -> Artifact:
|
||||
return first + ", " + second
|
||||
|
||||
self._get_executor(test_func, executor_input).execute()
|
||||
with open(os.path.join(self._test_dir, 'output_metadata.json'),
|
||||
'r') as f:
|
||||
output_metadata = json.loads(f.read())
|
||||
self.assertDictEqual(
|
||||
output_metadata, {
|
||||
'artifacts': {
|
||||
'Output': {
|
||||
'artifacts': [{
|
||||
'metadata': {},
|
||||
'name': 'output',
|
||||
'uri': 'gs://some-bucket/output'
|
||||
}]
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
with open(os.path.join(self._test_dir, 'some-bucket/output'), 'r') as f:
|
||||
artifact_payload = f.read()
|
||||
self.assertEqual(artifact_payload, "Hello, World")
|
||||
|
||||
def test_named_tuple_output(self):
|
||||
executor_input = """\
|
||||
{
|
||||
"outputs": {
|
||||
"artifacts": {
|
||||
"output_dataset": {
|
||||
"artifacts": [
|
||||
{
|
||||
"name": "output_dataset",
|
||||
"type": {
|
||||
"schemaTitle": "system.Dataset"
|
||||
},
|
||||
"uri": "gs://some-bucket/output_dataset"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"parameters": {
|
||||
"output_int": {
|
||||
"outputFile": "gs://some-bucket/output_int"
|
||||
},
|
||||
"output_string": {
|
||||
"outputFile": "gs://some-bucket/output_string"
|
||||
}
|
||||
},
|
||||
"outputFile": "%s/output_metadata.json"
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
# Functions returning named tuples should work.
|
||||
def func_returning_named_tuple() -> NamedTuple('Outputs', [
|
||||
("output_dataset", Dataset),
|
||||
("output_int", int),
|
||||
("output_string", str),
|
||||
]):
|
||||
from collections import namedtuple
|
||||
output = namedtuple(
|
||||
'Outputs', ['output_dataset', 'output_int', 'output_string'])
|
||||
return output("Dataset contents", 101, "Some output string")
|
||||
|
||||
# Functions returning plain tuples should work too.
|
||||
def func_returning_plain_tuple() -> NamedTuple('Outputs', [
|
||||
("output_dataset", Dataset),
|
||||
("output_int", int),
|
||||
("output_string", str),
|
||||
]):
|
||||
return ("Dataset contents", 101, "Some output string")
|
||||
|
||||
for test_func in [
|
||||
func_returning_named_tuple, func_returning_plain_tuple
|
||||
]:
|
||||
self._get_executor(test_func, executor_input).execute()
|
||||
with open(os.path.join(self._test_dir, 'output_metadata.json'),
|
||||
'r') as f:
|
||||
output_metadata = json.loads(f.read())
|
||||
self.assertDictEqual(
|
||||
output_metadata, {
|
||||
'artifacts': {
|
||||
'output_dataset': {
|
||||
'artifacts': [{
|
||||
'metadata': {},
|
||||
'name': 'output_dataset',
|
||||
'uri': 'gs://some-bucket/output_dataset'
|
||||
}]
|
||||
}
|
||||
},
|
||||
"parameters": {
|
||||
"output_string": {
|
||||
"stringValue": "Some output string"
|
||||
},
|
||||
"output_int": {
|
||||
"intValue": 101
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
with open(
|
||||
os.path.join(self._test_dir, 'some-bucket/output_dataset'),
|
||||
'r') as f:
|
||||
artifact_payload = f.read()
|
||||
self.assertEqual(artifact_payload, "Dataset contents")
|
||||
|
|
@ -0,0 +1,158 @@
|
|||
# Copyright 2020 The Kubeflow Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Utility function for building Importer Node spec."""
|
||||
|
||||
from typing import Union, Type
|
||||
|
||||
from kfp.dsl import _container_op
|
||||
from kfp.dsl import _pipeline_param
|
||||
from kfp.dsl import dsl_utils
|
||||
from kfp.pipeline_spec import pipeline_spec_pb2
|
||||
from kfp.v2.components.types import artifact_types, type_utils
|
||||
|
||||
INPUT_KEY = 'uri'
|
||||
OUTPUT_KEY = 'artifact'
|
||||
|
||||
|
||||
def _build_importer_spec(
|
||||
artifact_uri: Union[_pipeline_param.PipelineParam, str],
|
||||
artifact_type_schema: pipeline_spec_pb2.ArtifactTypeSchema,
|
||||
) -> pipeline_spec_pb2.PipelineDeploymentConfig.ImporterSpec:
|
||||
"""Builds an importer executor spec.
|
||||
|
||||
Args:
|
||||
artifact_uri: The artifact uri to import from.
|
||||
artifact_type_schema: The user specified artifact type schema of the
|
||||
artifact to be imported.
|
||||
|
||||
Returns:
|
||||
An importer spec.
|
||||
"""
|
||||
importer_spec = pipeline_spec_pb2.PipelineDeploymentConfig.ImporterSpec()
|
||||
importer_spec.type_schema.CopyFrom(artifact_type_schema)
|
||||
|
||||
if isinstance(artifact_uri, _pipeline_param.PipelineParam):
|
||||
importer_spec.artifact_uri.runtime_parameter = INPUT_KEY
|
||||
elif isinstance(artifact_uri, str):
|
||||
importer_spec.artifact_uri.constant_value.string_value = artifact_uri
|
||||
|
||||
return importer_spec
|
||||
|
||||
|
||||
def _build_importer_task_spec(
|
||||
importer_base_name: str,
|
||||
artifact_uri: Union[_pipeline_param.PipelineParam, str],
|
||||
) -> pipeline_spec_pb2.PipelineTaskSpec:
|
||||
"""Builds an importer task spec.
|
||||
|
||||
Args:
|
||||
importer_base_name: The base name of the importer node.
|
||||
artifact_uri: The artifact uri to import from.
|
||||
|
||||
Returns:
|
||||
An importer node task spec.
|
||||
"""
|
||||
result = pipeline_spec_pb2.PipelineTaskSpec()
|
||||
result.task_info.name = dsl_utils.sanitize_task_name(importer_base_name)
|
||||
result.component_ref.name = dsl_utils.sanitize_component_name(
|
||||
importer_base_name)
|
||||
|
||||
if isinstance(artifact_uri, _pipeline_param.PipelineParam):
|
||||
result.inputs.parameters[
|
||||
INPUT_KEY].component_input_parameter = artifact_uri.full_name
|
||||
elif isinstance(artifact_uri, str):
|
||||
result.inputs.parameters[
|
||||
INPUT_KEY].runtime_value.constant_value.string_value = artifact_uri
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _build_importer_component_spec(
|
||||
importer_base_name: str,
|
||||
artifact_type_schema: pipeline_spec_pb2.ArtifactTypeSchema,
|
||||
) -> pipeline_spec_pb2.ComponentSpec:
|
||||
"""Builds an importer component spec.
|
||||
|
||||
Args:
|
||||
importer_base_name: The base name of the importer node.
|
||||
artifact_type_schema: The user specified artifact type schema of the
|
||||
artifact to be imported.
|
||||
|
||||
Returns:
|
||||
An importer node component spec.
|
||||
"""
|
||||
result = pipeline_spec_pb2.ComponentSpec()
|
||||
result.executor_label = dsl_utils.sanitize_executor_label(
|
||||
importer_base_name)
|
||||
result.input_definitions.parameters[
|
||||
INPUT_KEY].type = pipeline_spec_pb2.PrimitiveType.STRING
|
||||
result.output_definitions.artifacts[OUTPUT_KEY].artifact_type.CopyFrom(
|
||||
artifact_type_schema)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def importer(artifact_uri: Union[_pipeline_param.PipelineParam, str],
|
||||
artifact_class: Type[artifact_types.Artifact],
|
||||
reimport: bool = False) -> _container_op.ContainerOp:
|
||||
"""dsl.importer for importing an existing artifact. Only for v2 pipeline.
|
||||
|
||||
Args:
|
||||
artifact_uri: The artifact uri to import from.
|
||||
artifact_type_schema: The user specified artifact type schema of the
|
||||
artifact to be imported.
|
||||
reimport: Whether to reimport the artifact. Defaults to False.
|
||||
|
||||
Returns:
|
||||
A ContainerOp instance.
|
||||
|
||||
Raises:
|
||||
ValueError if the passed in artifact_uri is neither a PipelineParam nor a
|
||||
constant string value.
|
||||
"""
|
||||
|
||||
if isinstance(artifact_uri, _pipeline_param.PipelineParam):
|
||||
input_param = artifact_uri
|
||||
elif isinstance(artifact_uri, str):
|
||||
input_param = _pipeline_param.PipelineParam(name='uri',
|
||||
value=artifact_uri,
|
||||
param_type='String')
|
||||
else:
|
||||
raise ValueError(
|
||||
'Importer got unexpected artifact_uri: {} of type: {}.'.format(
|
||||
artifact_uri, type(artifact_uri)))
|
||||
|
||||
old_warn_value = _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING
|
||||
_container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True
|
||||
|
||||
task = _container_op.ContainerOp(
|
||||
name='importer',
|
||||
image='importer_image', # TODO: need a v1 implementation of importer.
|
||||
file_outputs={
|
||||
OUTPUT_KEY:
|
||||
"{{{{$.outputs.artifacts['{}'].uri}}}}".format(OUTPUT_KEY)
|
||||
},
|
||||
)
|
||||
_container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value
|
||||
|
||||
artifact_type_schema = type_utils.get_artifact_type_schema(artifact_class)
|
||||
task.importer_spec = _build_importer_spec(
|
||||
artifact_uri=artifact_uri, artifact_type_schema=artifact_type_schema)
|
||||
task.task_spec = _build_importer_task_spec(importer_base_name=task.name,
|
||||
artifact_uri=artifact_uri)
|
||||
task.component_spec = _build_importer_component_spec(
|
||||
importer_base_name=task.name, artifact_type_schema=artifact_type_schema)
|
||||
task.inputs = [input_param]
|
||||
|
||||
return task
|
||||
|
|
@ -0,0 +1,167 @@
|
|||
# Copyright 2020 The Kubeflow Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from absl.testing import parameterized
|
||||
import unittest
|
||||
|
||||
from google.protobuf import json_format
|
||||
|
||||
from kfp.dsl import _pipeline_param
|
||||
from kfp.pipeline_spec import pipeline_spec_pb2 as pb
|
||||
from kfp.v2.components import importer_node
|
||||
|
||||
|
||||
class ImporterNodeTest(parameterized.TestCase):
|
||||
|
||||
@parameterized.parameters(
|
||||
{
|
||||
# artifact_uri is a constant value
|
||||
'input_uri':
|
||||
'gs://artifact',
|
||||
'artifact_type_schema':
|
||||
pb.ArtifactTypeSchema(schema_title='system.Dataset'),
|
||||
'expected_result': {
|
||||
'artifactUri': {
|
||||
'constantValue': {
|
||||
'stringValue': 'gs://artifact'
|
||||
}
|
||||
},
|
||||
'typeSchema': {
|
||||
'schemaTitle': 'system.Dataset'
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
# artifact_uri is from PipelineParam
|
||||
'input_uri':
|
||||
_pipeline_param.PipelineParam(name='uri_to_import'),
|
||||
'artifact_type_schema':
|
||||
pb.ArtifactTypeSchema(schema_title='system.Model'),
|
||||
'expected_result': {
|
||||
'artifactUri': {
|
||||
'runtimeParameter': 'uri'
|
||||
},
|
||||
'typeSchema': {
|
||||
'schemaTitle': 'system.Model'
|
||||
}
|
||||
},
|
||||
})
|
||||
def test_build_importer_spec(self, input_uri, artifact_type_schema,
|
||||
expected_result):
|
||||
expected_importer_spec = pb.PipelineDeploymentConfig.ImporterSpec()
|
||||
json_format.ParseDict(expected_result, expected_importer_spec)
|
||||
importer_spec = importer_node._build_importer_spec(
|
||||
artifact_uri=input_uri, artifact_type_schema=artifact_type_schema)
|
||||
|
||||
self.maxDiff = None
|
||||
self.assertEqual(expected_importer_spec, importer_spec)
|
||||
|
||||
@parameterized.parameters(
|
||||
{
|
||||
# artifact_uri is a constant value
|
||||
'importer_name': 'importer-1',
|
||||
'input_uri': 'gs://artifact',
|
||||
'expected_result': {
|
||||
'taskInfo': {
|
||||
'name': 'importer-1'
|
||||
},
|
||||
'inputs': {
|
||||
'parameters': {
|
||||
'uri': {
|
||||
'runtimeValue': {
|
||||
'constantValue': {
|
||||
'stringValue': 'gs://artifact'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
'componentRef': {
|
||||
'name': 'comp-importer-1'
|
||||
},
|
||||
}
|
||||
},
|
||||
{
|
||||
# artifact_uri is from PipelineParam
|
||||
'importer_name': 'importer-2',
|
||||
'input_uri': _pipeline_param.PipelineParam(name='uri_to_import'),
|
||||
'expected_result': {
|
||||
'taskInfo': {
|
||||
'name': 'importer-2'
|
||||
},
|
||||
'inputs': {
|
||||
'parameters': {
|
||||
'uri': {
|
||||
'componentInputParameter': 'uri_to_import'
|
||||
}
|
||||
}
|
||||
},
|
||||
'componentRef': {
|
||||
'name': 'comp-importer-2'
|
||||
},
|
||||
},
|
||||
})
|
||||
def test_build_importer_task_spec(self, importer_name, input_uri,
|
||||
expected_result):
|
||||
expected_task_spec = pb.PipelineTaskSpec()
|
||||
json_format.ParseDict(expected_result, expected_task_spec)
|
||||
|
||||
task_spec = importer_node._build_importer_task_spec(
|
||||
importer_base_name=importer_name, artifact_uri=input_uri)
|
||||
|
||||
self.maxDiff = None
|
||||
self.assertEqual(expected_task_spec, task_spec)
|
||||
|
||||
def test_build_importer_component_spec(self):
|
||||
expected_importer_component = {
|
||||
'inputDefinitions': {
|
||||
'parameters': {
|
||||
'uri': {
|
||||
'type': 'STRING'
|
||||
}
|
||||
}
|
||||
},
|
||||
'outputDefinitions': {
|
||||
'artifacts': {
|
||||
'artifact': {
|
||||
'artifactType': {
|
||||
'schemaTitle': 'system.Artifact'
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
'executorLabel': 'exec-importer-1'
|
||||
}
|
||||
expected_importer_comp_spec = pb.ComponentSpec()
|
||||
json_format.ParseDict(expected_importer_component,
|
||||
expected_importer_comp_spec)
|
||||
importer_comp_spec = importer_node._build_importer_component_spec(
|
||||
importer_base_name='importer-1',
|
||||
artifact_type_schema=pb.ArtifactTypeSchema(
|
||||
schema_title='system.Artifact'))
|
||||
|
||||
self.maxDiff = None
|
||||
self.assertEqual(expected_importer_comp_spec, importer_comp_spec)
|
||||
|
||||
def test_import_with_invalid_artifact_uri_value_should_fail(self):
|
||||
from kfp.v2.components.types.artifact_types import Dataset
|
||||
with self.assertRaisesRegex(
|
||||
ValueError,
|
||||
"Importer got unexpected artifact_uri: 123 of type: <class 'int'>."
|
||||
):
|
||||
importer_node.importer(artifact_uri=123, artifact_class=Dataset)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
# Copyright 2021 The Kubeflow Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
|
@ -0,0 +1,451 @@
|
|||
# Copyright 2021 The Kubeflow Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Classes for input/output Artifacts in KFP SDK.
|
||||
|
||||
These are only compatible with v2 Pipelines.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Dict, Generic, List, Optional, Type, TypeVar, Union
|
||||
|
||||
_GCS_LOCAL_MOUNT_PREFIX = '/gcs/'
|
||||
_MINIO_LOCAL_MOUNT_PREFIX = '/minio/'
|
||||
_S3_LOCAL_MOUNT_PREFIX = '/s3/'
|
||||
|
||||
|
||||
class Artifact(object):
|
||||
"""Generic Artifact class.
|
||||
|
||||
This class is meant to represent the metadata around an input or output
|
||||
machine-learning Artifact. Artifacts have URIs, which can either be a location
|
||||
on disk (or Cloud storage) or some other resource identifier such as
|
||||
an API resource name.
|
||||
|
||||
Artifacts carry a `metadata` field, which is a dictionary for storing
|
||||
metadata related to this artifact.
|
||||
"""
|
||||
TYPE_NAME = 'system.Artifact'
|
||||
|
||||
def __init__(self,
|
||||
name: Optional[str] = None,
|
||||
uri: Optional[str] = None,
|
||||
metadata: Optional[Dict] = None):
|
||||
"""Initializes the Artifact with the given name, URI and metadata."""
|
||||
self.uri = uri or ''
|
||||
self.name = name or ''
|
||||
self.metadata = metadata or {}
|
||||
|
||||
@property
|
||||
def path(self):
|
||||
return self._get_path()
|
||||
|
||||
@path.setter
|
||||
def path(self, path):
|
||||
self._set_path(path)
|
||||
|
||||
def _get_path(self) -> Optional[str]:
|
||||
if self.uri.startswith('gs://'):
|
||||
return _GCS_LOCAL_MOUNT_PREFIX + self.uri[len('gs://'):]
|
||||
elif self.uri.startswith('minio://'):
|
||||
return _MINIO_LOCAL_MOUNT_PREFIX + self.uri[len('minio://'):]
|
||||
elif self.uri.startswith('s3://'):
|
||||
return _S3_LOCAL_MOUNT_PREFIX + self.uri[len('s3://'):]
|
||||
return None
|
||||
|
||||
def _set_path(self, path):
|
||||
if path.startswith(_GCS_LOCAL_MOUNT_PREFIX):
|
||||
path = 'gs://' + path[len(_GCS_LOCAL_MOUNT_PREFIX):]
|
||||
elif path.startswith(_MINIO_LOCAL_MOUNT_PREFIX):
|
||||
path = 'minio://' + path[len(_MINIO_LOCAL_MOUNT_PREFIX):]
|
||||
elif path.startswith(_S3_LOCAL_MOUNT_PREFIX):
|
||||
path = 's3://' + path[len(_S3_LOCAL_MOUNT_PREFIX):]
|
||||
self.uri = path
|
||||
|
||||
|
||||
class Model(Artifact):
|
||||
"""An artifact representing an ML Model."""
|
||||
TYPE_NAME = 'system.Model'
|
||||
|
||||
def __init__(self,
|
||||
name: Optional[str] = None,
|
||||
uri: Optional[str] = None,
|
||||
metadata: Optional[Dict] = None):
|
||||
super().__init__(uri=uri, name=name, metadata=metadata)
|
||||
|
||||
@property
|
||||
def framework(self) -> str:
|
||||
return self._get_framework()
|
||||
|
||||
def _get_framework(self) -> str:
|
||||
return self.metadata.get('framework', '')
|
||||
|
||||
@framework.setter
|
||||
def framework(self, framework: str):
|
||||
self._set_framework(framework)
|
||||
|
||||
def _set_framework(self, framework: str):
|
||||
self.metadata['framework'] = framework
|
||||
|
||||
|
||||
class Dataset(Artifact):
|
||||
"""An artifact representing an ML Dataset."""
|
||||
TYPE_NAME = 'system.Dataset'
|
||||
|
||||
def __init__(self,
|
||||
name: Optional[str] = None,
|
||||
uri: Optional[str] = None,
|
||||
metadata: Optional[Dict] = None):
|
||||
super().__init__(uri=uri, name=name, metadata=metadata)
|
||||
|
||||
|
||||
class Metrics(Artifact):
|
||||
"""Represent a simple base Artifact type to store key-value scalar metrics."""
|
||||
TYPE_NAME = 'system.Metrics'
|
||||
|
||||
def __init__(self,
|
||||
name: Optional[str] = None,
|
||||
uri: Optional[str] = None,
|
||||
metadata: Optional[Dict] = None):
|
||||
super().__init__(uri=uri, name=name, metadata=metadata)
|
||||
|
||||
def log_metric(self, metric: str, value: float):
|
||||
"""Sets a custom scalar metric.
|
||||
|
||||
Args:
|
||||
metric: Metric key
|
||||
value: Value of the metric.
|
||||
"""
|
||||
self.metadata[metric] = value
|
||||
|
||||
|
||||
class ClassificationMetrics(Artifact):
|
||||
"""Represents Artifact class to store Classification Metrics."""
|
||||
TYPE_NAME = 'system.ClassificationMetrics'
|
||||
|
||||
def __init__(self,
|
||||
name: Optional[str] = None,
|
||||
uri: Optional[str] = None,
|
||||
metadata: Optional[Dict] = None):
|
||||
super().__init__(uri=uri, name=name, metadata=metadata)
|
||||
|
||||
def log_roc_data_point(self, fpr: float, tpr: float, threshold: float):
|
||||
"""Logs a single data point in the ROC Curve.
|
||||
|
||||
Args:
|
||||
fpr: False positive rate value of the data point.
|
||||
tpr: True positive rate value of the data point.
|
||||
threshold: Threshold value for the data point.
|
||||
"""
|
||||
|
||||
roc_reading = {
|
||||
'confidenceThreshold': threshold,
|
||||
'recall': tpr,
|
||||
'falsePositiveRate': fpr
|
||||
}
|
||||
if 'confidenceMetrics' not in self.metadata.keys():
|
||||
self.metadata['confidenceMetrics'] = []
|
||||
|
||||
self.metadata['confidenceMetrics'].append(roc_reading)
|
||||
|
||||
def log_roc_curve(self, fpr: List[float], tpr: List[float],
|
||||
threshold: List[float]):
|
||||
"""Logs an ROC curve.
|
||||
|
||||
The list length of fpr, tpr and threshold must be the same.
|
||||
|
||||
Args:
|
||||
fpr: List of false positive rate values.
|
||||
tpr: List of true positive rate values.
|
||||
threshold: List of threshold values.
|
||||
"""
|
||||
if len(fpr) != len(tpr) or len(fpr) != len(threshold) or len(
|
||||
tpr) != len(threshold):
|
||||
raise ValueError(
|
||||
'Length of fpr, tpr and threshold must be the same. '
|
||||
'Got lengths {}, {} and {} respectively.'.format(
|
||||
len(fpr), len(tpr), len(threshold)))
|
||||
|
||||
for i in range(len(fpr)):
|
||||
self.log_roc_data_point(fpr=fpr[i],
|
||||
tpr=tpr[i],
|
||||
threshold=threshold[i])
|
||||
|
||||
def set_confusion_matrix_categories(self, categories: List[str]):
|
||||
"""Stores confusion matrix categories.
|
||||
|
||||
Args:
|
||||
categories: List of strings specifying the categories.
|
||||
"""
|
||||
|
||||
self._categories = []
|
||||
annotation_specs = []
|
||||
for category in categories:
|
||||
annotation_spec = {'displayName': category}
|
||||
self._categories.append(category)
|
||||
annotation_specs.append(annotation_spec)
|
||||
|
||||
self._matrix = []
|
||||
for row in range(len(self._categories)):
|
||||
self._matrix.append({'row': [0] * len(self._categories)})
|
||||
|
||||
self._confusion_matrix = {}
|
||||
self._confusion_matrix['annotationSpecs'] = annotation_specs
|
||||
self._confusion_matrix['rows'] = self._matrix
|
||||
self.metadata['confusionMatrix'] = self._confusion_matrix
|
||||
|
||||
def log_confusion_matrix_row(self, row_category: str, row: List[float]):
|
||||
"""Logs a confusion matrix row.
|
||||
|
||||
Args:
|
||||
row_category: Category to which the row belongs.
|
||||
row: List of integers specifying the values for the row.
|
||||
|
||||
Raises:
|
||||
ValueError: If row_category is not in the list of categories
|
||||
set in set_categories call.
|
||||
"""
|
||||
if row_category not in self._categories:
|
||||
raise ValueError('Invalid category: {} passed. Expected one of: {}'.\
|
||||
format(row_category, self._categories))
|
||||
|
||||
if len(row) != len(self._categories):
|
||||
raise ValueError('Invalid row. Expected size: {} got: {}'.\
|
||||
format(len(self._categories), len(row)))
|
||||
|
||||
self._matrix[self._categories.index(row_category)] = {'row': row}
|
||||
self.metadata['confusionMatrix'] = self._confusion_matrix
|
||||
|
||||
def log_confusion_matrix_cell(self, row_category: str, col_category: str,
|
||||
value: int):
|
||||
"""Logs a cell in the confusion matrix.
|
||||
|
||||
Args:
|
||||
row_category: String representing the name of the row category.
|
||||
col_category: String representing the name of the column category.
|
||||
value: Int value of the cell.
|
||||
|
||||
Raises:
|
||||
ValueError: If row_category or col_category is not in the list of
|
||||
categories set in set_categories.
|
||||
"""
|
||||
if row_category not in self._categories:
|
||||
raise ValueError('Invalid category: {} passed. Expected one of: {}'.\
|
||||
format(row_category, self._categories))
|
||||
|
||||
if col_category not in self._categories:
|
||||
raise ValueError('Invalid category: {} passed. Expected one of: {}'.\
|
||||
format(row_category, self._categories))
|
||||
|
||||
self._matrix[self._categories.index(row_category)]['row'][
|
||||
self._categories.index(col_category)] = value
|
||||
self.metadata['confusionMatrix'] = self._confusion_matrix
|
||||
|
||||
def log_confusion_matrix(self, categories: List[str],
|
||||
matrix: List[List[int]]):
|
||||
"""Logs a confusion matrix.
|
||||
|
||||
Args:
|
||||
categories: List of the category names.
|
||||
matrix: Complete confusion matrix.
|
||||
|
||||
Raises:
|
||||
ValueError: Length of categories does not match number of rows or columns.
|
||||
"""
|
||||
self.set_confusion_matrix_categories(categories)
|
||||
|
||||
if len(matrix) != len(categories):
|
||||
raise ValueError('Invalid matrix: {} passed for categories: {}'.\
|
||||
format(matrix, categories))
|
||||
|
||||
for index in range(len(categories)):
|
||||
if len(matrix[index]) != len(categories):
|
||||
raise ValueError('Invalid matrix: {} passed for categories: {}'.\
|
||||
format(matrix, categories))
|
||||
|
||||
self.log_confusion_matrix_row(categories[index], matrix[index])
|
||||
|
||||
self.metadata['confusionMatrix'] = self._confusion_matrix
|
||||
|
||||
|
||||
class SlicedClassificationMetrics(Artifact):
|
||||
"""Metrics class representing Sliced Classification Metrics.
|
||||
|
||||
Similar to ClassificationMetrics clients using this class are expected to use
|
||||
log methods of the class to log metrics with the difference being each log
|
||||
method takes a slice to associate the ClassificationMetrics.
|
||||
|
||||
"""
|
||||
|
||||
TYPE_NAME = 'system.SlicedClassificationMetrics'
|
||||
|
||||
def __init__(self,
|
||||
name: Optional[str] = None,
|
||||
uri: Optional[str] = None,
|
||||
metadata: Optional[Dict] = None):
|
||||
super().__init__(uri=uri, name=name, metadata=metadata)
|
||||
|
||||
def _upsert_classification_metrics_for_slice(self, slice: str):
|
||||
"""Upserts the classification metrics instance for a slice."""
|
||||
if slice not in self._sliced_metrics:
|
||||
self._sliced_metrics[slice] = ClassificationMetrics()
|
||||
|
||||
def _update_metadata(self, slice: str):
|
||||
"""Updates metadata to adhere to the metrics schema."""
|
||||
self.metadata = {}
|
||||
self.metadata['evaluationSlices'] = []
|
||||
for slice in self._sliced_metrics.keys():
|
||||
slice_metrics = {
|
||||
'slice':
|
||||
slice,
|
||||
'sliceClassificationMetrics':
|
||||
self._sliced_metrics[slice].metadata
|
||||
}
|
||||
self.metadata['evaluationSlices'].append(slice_metrics)
|
||||
|
||||
def log_roc_reading(self, slice: str, threshold: float, tpr: float,
|
||||
fpr: float):
|
||||
"""Logs a single data point in the ROC Curve of a slice.
|
||||
|
||||
Args:
|
||||
slice: String representing slice label.
|
||||
threshold: Thresold value for the data point.
|
||||
tpr: True positive rate value of the data point.
|
||||
fpr: False positive rate value of the data point.
|
||||
"""
|
||||
|
||||
self._upsert_classification_metrics_for_slice(slice)
|
||||
self._sliced_metrics[slice].log_roc_reading(threshold, tpr, fpr)
|
||||
self._update_metadata(slice)
|
||||
|
||||
def load_roc_readings(self, slice: str, readings: List[List[float]]):
|
||||
"""Supports bulk loading ROC Curve readings for a slice.
|
||||
|
||||
Args:
|
||||
slice: String representing slice label.
|
||||
readings: A 2-D list providing ROC Curve data points.
|
||||
The expected order of the data points is: threshold,
|
||||
true_positive_rate, false_positive_rate.
|
||||
"""
|
||||
self._upsert_classification_metrics_for_slice(slice)
|
||||
self._sliced_metrics[slice].load_roc_readings(readings)
|
||||
self._update_metadata(slice)
|
||||
|
||||
def set_confusion_matrix_categories(self, slice: str,
|
||||
categories: List[str]):
|
||||
"""Stores confusion matrix categories for a slice..
|
||||
|
||||
Categories are stored in the internal metrics_utils.ConfusionMatrix
|
||||
instance of the slice.
|
||||
|
||||
Args:
|
||||
slice: String representing slice label.
|
||||
categories: List of strings specifying the categories.
|
||||
"""
|
||||
self._upsert_classification_metrics_for_slice(slice)
|
||||
self._sliced_metrics[slice].set_confusion_matrix_categories(categories)
|
||||
self._update_metadata(slice)
|
||||
|
||||
def log_confusion_matrix_row(self, slice: str, row_category: str,
|
||||
row: List[int]):
|
||||
"""Logs a confusion matrix row for a slice.
|
||||
|
||||
Row is updated on the internal metrics_utils.ConfusionMatrix
|
||||
instance of the slice.
|
||||
|
||||
Args:
|
||||
slice: String representing slice label.
|
||||
row_category: Category to which the row belongs.
|
||||
row: List of integers specifying the values for the row.
|
||||
"""
|
||||
self._upsert_classification_metrics_for_slice(slice)
|
||||
self._sliced_metrics[slice].log_confusion_matrix_row(row_category, row)
|
||||
self._update_metadata(slice)
|
||||
|
||||
def log_confusion_matrix_cell(self, slice: str, row_category: str,
|
||||
col_category: str, value: int):
|
||||
"""Logs a confusion matrix cell for a slice..
|
||||
|
||||
Cell is updated on the internal metrics_utils.ConfusionMatrix
|
||||
instance of the slice.
|
||||
|
||||
Args:
|
||||
slice: String representing slice label.
|
||||
row_category: String representing the name of the row category.
|
||||
col_category: String representing the name of the column category.
|
||||
value: Int value of the cell.
|
||||
"""
|
||||
self._upsert_classification_metrics_for_slice(slice)
|
||||
self._sliced_metrics[slice].log_confusion_matrix_cell(
|
||||
row_category, col_category, value)
|
||||
self._update_metadata(slice)
|
||||
|
||||
def load_confusion_matrix(self, slice: str, categories: List[str],
|
||||
matrix: List[List[int]]):
|
||||
"""Supports bulk loading the whole confusion matrix for a slice.
|
||||
|
||||
Args:
|
||||
slice: String representing slice label.
|
||||
categories: List of the category names.
|
||||
matrix: Complete confusion matrix.
|
||||
"""
|
||||
self._upsert_classification_metrics_for_slice(slice)
|
||||
self._sliced_metrics[slice].log_confusion_matrix_cell(
|
||||
categories, matrix)
|
||||
self._update_metadata(slice)
|
||||
|
||||
|
||||
class HTML(Artifact):
|
||||
"""An artifact representing an HTML file."""
|
||||
TYPE_NAME = 'system.HTML'
|
||||
|
||||
def __init__(self,
|
||||
name: Optional[str] = None,
|
||||
uri: Optional[str] = None,
|
||||
metadata: Optional[Dict] = None):
|
||||
super().__init__(uri=uri, name=name, metadata=metadata)
|
||||
|
||||
|
||||
class Markdown(Artifact):
|
||||
"""An artifact representing an Markdown file."""
|
||||
TYPE_NAME = 'system.Markdown'
|
||||
|
||||
def __init__(self,
|
||||
name: Optional[str] = None,
|
||||
uri: Optional[str] = None,
|
||||
metadata: Optional[Dict] = None):
|
||||
super().__init__(uri=uri, name=name, metadata=metadata)
|
||||
|
||||
|
||||
_SCHEMA_TITLE_TO_TYPE: Dict[str, Artifact] = {
|
||||
x.TYPE_NAME: x
|
||||
for x in [Artifact, Model, Dataset, Metrics, ClassificationMetrics]
|
||||
}
|
||||
|
||||
|
||||
def create_runtime_artifact(runtime_artifact: Dict) -> Artifact:
|
||||
"""Creates an Artifact instance from the specified RuntimeArtifact.
|
||||
|
||||
Args:
|
||||
runtime_artifact: Dictionary representing JSON-encoded RuntimeArtifact.
|
||||
"""
|
||||
schema_title = runtime_artifact.get('type', {}).get('schemaTitle', '')
|
||||
|
||||
artifact_type = _SCHEMA_TITLE_TO_TYPE.get(schema_title)
|
||||
if not artifact_type:
|
||||
artifact_type = Artifact
|
||||
return artifact_type(
|
||||
uri=runtime_artifact.get('uri', ''),
|
||||
name=runtime_artifact.get('name', ''),
|
||||
metadata=runtime_artifact.get('metadata', {}),
|
||||
)
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
# Copyright 2021 The Kubeflow Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tests for kfp.v2.components.types.artifact_types."""
|
||||
|
||||
import unittest
|
||||
import json
|
||||
import os
|
||||
|
||||
from kfp.v2.components.types import artifact_types
|
||||
|
||||
|
||||
class ArtifactsTest(unittest.TestCase):
|
||||
|
||||
def test_complex_metrics(self):
|
||||
metrics = artifact_types.ClassificationMetrics()
|
||||
metrics.log_roc_data_point(threshold=0.1, tpr=98.2, fpr=96.2)
|
||||
metrics.log_roc_data_point(threshold=24.3, tpr=24.5, fpr=98.4)
|
||||
metrics.set_confusion_matrix_categories(['dog', 'cat', 'horses'])
|
||||
metrics.log_confusion_matrix_row('dog', [2, 6, 0])
|
||||
metrics.log_confusion_matrix_cell('cat', 'dog', 3)
|
||||
metrics.log_confusion_matrix_cell('horses', 'horses', 3)
|
||||
metrics.metadata['test'] = 1.0
|
||||
with open(
|
||||
os.path.join(os.path.dirname(__file__), 'test_data',
|
||||
'expected_io_types_classification_metrics.json')
|
||||
) as json_file:
|
||||
expected_json = json.load(json_file)
|
||||
self.assertEqual(expected_json, metrics.metadata)
|
||||
|
||||
def test_complex_metrics_bulk_loading(self):
|
||||
metrics = artifact_types.ClassificationMetrics()
|
||||
metrics.log_roc_curve(fpr=[85.1, 85.1, 85.1],
|
||||
tpr=[52.6, 52.6, 52.6],
|
||||
threshold=[53.6, 53.6, 53.6])
|
||||
metrics.log_confusion_matrix(['dog', 'cat', 'horses'],
|
||||
[[2, 6, 0], [3, 5, 6], [5, 7, 8]])
|
||||
with open(
|
||||
os.path.join(
|
||||
os.path.dirname(__file__), 'test_data',
|
||||
'expected_io_types_bulk_load_classification_metrics.json')
|
||||
) as json_file:
|
||||
expected_json = json.load(json_file)
|
||||
self.assertEqual(expected_json, metrics.metadata)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
"annotationSpecs": [
|
||||
{"displayName": "dog"},
|
||||
{"displayName": "cat"},
|
||||
{"displayName": "horses"}],
|
||||
"row": [
|
||||
[2, 6, 0],
|
||||
[3, 5, 6],
|
||||
[5, 7, 8]]
|
||||
}
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
"annotationSpecs": [
|
||||
{"displayName": "dog"},
|
||||
{"displayName": "cat"},
|
||||
{"displayName": "horses"}],
|
||||
"row": [
|
||||
[2, 6, 0],
|
||||
[3, 0, 0],
|
||||
[0, 0, 0]]
|
||||
}
|
||||
|
|
@ -0,0 +1,133 @@
|
|||
# Copyright 2021 The Kubeflow Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Classes for input/output type annotations in KFP SDK.
|
||||
|
||||
These are only compatible with v2 Pipelines.
|
||||
"""
|
||||
|
||||
from typing import TypeVar, Union
|
||||
|
||||
T = TypeVar('T')
|
||||
|
||||
|
||||
class OutputPath:
|
||||
'''Annotation for indicating a variable is a path to an output.'''
|
||||
|
||||
def __init__(self, type=None):
|
||||
self.type = type
|
||||
|
||||
|
||||
class InputPath:
|
||||
'''Annotation for indicating a variable is a path to an output.'''
|
||||
|
||||
def __init__(self, type=None):
|
||||
self.type = type
|
||||
|
||||
|
||||
class InputAnnotation():
|
||||
"""Marker type for input artifacts."""
|
||||
pass
|
||||
|
||||
|
||||
class OutputAnnotation():
|
||||
"""Marker type for output artifacts."""
|
||||
pass
|
||||
|
||||
|
||||
# TODO: Use typing.Annotated instead of this hack.
|
||||
# With typing.Annotated (Python 3.9+ or typing_extensions package), the
|
||||
# following would look like:
|
||||
# Input = typing.Annotated[T, InputAnnotation]
|
||||
# Output = typing.Annotated[T, OutputAnnotation]
|
||||
|
||||
# Input represents an Input artifact of type T.
|
||||
Input = Union[T, InputAnnotation]
|
||||
|
||||
# Output represents an Output artifact of type T.
|
||||
Output = Union[T, OutputAnnotation]
|
||||
|
||||
|
||||
def is_artifact_annotation(typ) -> bool:
|
||||
if hasattr(typ, '_subs_tree'): # Python 3.6
|
||||
subs_tree = typ._subs_tree()
|
||||
return len(
|
||||
subs_tree) == 3 and subs_tree[0] == Union and subs_tree[2] in [
|
||||
InputAnnotation, OutputAnnotation
|
||||
]
|
||||
|
||||
if not hasattr(typ, '__origin__'):
|
||||
return False
|
||||
|
||||
if typ.__origin__ != Union and type(typ.__origin__) != type(Union):
|
||||
return False
|
||||
|
||||
if not hasattr(typ, '__args__') or len(typ.__args__) != 2:
|
||||
return False
|
||||
|
||||
if typ.__args__[1] not in [InputAnnotation, OutputAnnotation]:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def is_input_artifact(typ) -> bool:
|
||||
"""Returns True if typ is of type Input[T]."""
|
||||
if not is_artifact_annotation(typ):
|
||||
return False
|
||||
|
||||
if hasattr(typ, '_subs_tree'): # Python 3.6
|
||||
subs_tree = typ._subs_tree()
|
||||
return len(subs_tree) == 3 and subs_tree[2] == InputAnnotation
|
||||
|
||||
return typ.__args__[1] == InputAnnotation
|
||||
|
||||
|
||||
def is_output_artifact(typ) -> bool:
|
||||
"""Returns True if typ is of type Output[T]."""
|
||||
if not is_artifact_annotation(typ):
|
||||
return False
|
||||
|
||||
if hasattr(typ, '_subs_tree'): # Python 3.6
|
||||
subs_tree = typ._subs_tree()
|
||||
return len(subs_tree) == 3 and subs_tree[2] == OutputAnnotation
|
||||
|
||||
return typ.__args__[1] == OutputAnnotation
|
||||
|
||||
|
||||
def get_io_artifact_class(typ):
|
||||
if not is_artifact_annotation(typ):
|
||||
return None
|
||||
if typ == Input or typ == Output:
|
||||
return None
|
||||
|
||||
if hasattr(typ, '_subs_tree'): # Python 3.6
|
||||
subs_tree = typ._subs_tree()
|
||||
if len(subs_tree) != 3:
|
||||
return None
|
||||
return subs_tree[1]
|
||||
|
||||
return typ.__args__[0]
|
||||
|
||||
|
||||
def get_io_artifact_annotation(typ):
|
||||
if not is_artifact_annotation(typ):
|
||||
return None
|
||||
|
||||
if hasattr(typ, '_subs_tree'): # Python 3.6
|
||||
subs_tree = typ._subs_tree()
|
||||
if len(subs_tree) != 3:
|
||||
return None
|
||||
return subs_tree[2]
|
||||
|
||||
return typ.__args__[1]
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
# Copyright 2021 The Kubeflow Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Tests for kfp.v2.components.types.type_annotations."""
|
||||
|
||||
import unittest
|
||||
from typing import List, Optional
|
||||
|
||||
from kfp.v2.components.types import type_annotations
|
||||
from kfp.v2.components.types.artifact_types import Model
|
||||
from kfp.v2.components.types.type_annotations import Input, InputAnnotation, Output, OutputAnnotation
|
||||
|
||||
|
||||
class AnnotationsTest(unittest.TestCase):
|
||||
|
||||
def test_is_artifact_annotation(self):
|
||||
self.assertTrue(type_annotations.is_artifact_annotation(Input[Model]))
|
||||
self.assertTrue(type_annotations.is_artifact_annotation(Output[Model]))
|
||||
self.assertTrue(
|
||||
type_annotations.is_artifact_annotation(Output['MyArtifact']))
|
||||
|
||||
self.assertFalse(type_annotations.is_artifact_annotation(Model))
|
||||
self.assertFalse(type_annotations.is_artifact_annotation(int))
|
||||
self.assertFalse(type_annotations.is_artifact_annotation('Dataset'))
|
||||
self.assertFalse(type_annotations.is_artifact_annotation(List[str]))
|
||||
self.assertFalse(type_annotations.is_artifact_annotation(Optional[str]))
|
||||
|
||||
def test_is_input_artifact(self):
|
||||
self.assertTrue(type_annotations.is_input_artifact(Input[Model]))
|
||||
self.assertTrue(type_annotations.is_input_artifact(Input))
|
||||
|
||||
self.assertFalse(type_annotations.is_input_artifact(Output[Model]))
|
||||
self.assertFalse(type_annotations.is_input_artifact(Output))
|
||||
|
||||
def test_is_output_artifact(self):
|
||||
self.assertTrue(type_annotations.is_output_artifact(Output[Model]))
|
||||
self.assertTrue(type_annotations.is_output_artifact(Output))
|
||||
|
||||
self.assertFalse(type_annotations.is_output_artifact(Input[Model]))
|
||||
self.assertFalse(type_annotations.is_output_artifact(Input))
|
||||
|
||||
def test_get_io_artifact_class(self):
|
||||
self.assertEqual(type_annotations.get_io_artifact_class(Output[Model]),
|
||||
Model)
|
||||
|
||||
self.assertEqual(type_annotations.get_io_artifact_class(Input), None)
|
||||
self.assertEqual(type_annotations.get_io_artifact_class(Output), None)
|
||||
self.assertEqual(type_annotations.get_io_artifact_class(Model), None)
|
||||
self.assertEqual(type_annotations.get_io_artifact_class(str), None)
|
||||
|
||||
def test_get_io_artifact_annotation(self):
|
||||
self.assertEqual(
|
||||
type_annotations.get_io_artifact_annotation(Output[Model]),
|
||||
OutputAnnotation)
|
||||
self.assertEqual(
|
||||
type_annotations.get_io_artifact_annotation(Input[Model]),
|
||||
InputAnnotation)
|
||||
self.assertEqual(type_annotations.get_io_artifact_annotation(Input),
|
||||
InputAnnotation)
|
||||
self.assertEqual(type_annotations.get_io_artifact_annotation(Output),
|
||||
OutputAnnotation)
|
||||
|
||||
self.assertEqual(type_annotations.get_io_artifact_annotation(Model),
|
||||
None)
|
||||
self.assertEqual(type_annotations.get_io_artifact_annotation(str), None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
@ -0,0 +1,160 @@
|
|||
# Copyright 2020 The Kubeflow Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Utilities for component I/O type mapping."""
|
||||
import inspect
|
||||
from typing import Dict, List, Optional, Type, Union
|
||||
from kfp.components import structures
|
||||
from kfp.components import type_annotation_utils
|
||||
from kfp.pipeline_spec import pipeline_spec_pb2
|
||||
from kfp.v2.components.types import artifact_types
|
||||
|
||||
# ComponentSpec I/O types to DSL ontology artifact classes mapping.
|
||||
_ARTIFACT_CLASSES_MAPPING = {
|
||||
'model': artifact_types.Model,
|
||||
'dataset': artifact_types.Dataset,
|
||||
'metrics': artifact_types.Metrics,
|
||||
'classificationmetrics': artifact_types.ClassificationMetrics,
|
||||
'slicedclassificationmetrics': artifact_types.SlicedClassificationMetrics,
|
||||
'html': artifact_types.HTML,
|
||||
'markdown': artifact_types.Markdown,
|
||||
}
|
||||
|
||||
# ComponentSpec I/O types to (IR) PipelineTaskSpec I/O types mapping.
|
||||
# The keys are normalized (lowercased). These are types viewed as Parameters.
|
||||
# The values are the corresponding IR parameter primitive types.
|
||||
_PARAMETER_TYPES_MAPPING = {
|
||||
'integer': pipeline_spec_pb2.PrimitiveType.INT,
|
||||
'int': pipeline_spec_pb2.PrimitiveType.INT,
|
||||
'double': pipeline_spec_pb2.PrimitiveType.DOUBLE,
|
||||
'float': pipeline_spec_pb2.PrimitiveType.DOUBLE,
|
||||
'string': pipeline_spec_pb2.PrimitiveType.STRING,
|
||||
'str': pipeline_spec_pb2.PrimitiveType.STRING,
|
||||
'text': pipeline_spec_pb2.PrimitiveType.STRING,
|
||||
'bool': pipeline_spec_pb2.PrimitiveType.STRING,
|
||||
'boolean': pipeline_spec_pb2.PrimitiveType.STRING,
|
||||
'dict': pipeline_spec_pb2.PrimitiveType.STRING,
|
||||
'list': pipeline_spec_pb2.PrimitiveType.STRING,
|
||||
'jsonobject': pipeline_spec_pb2.PrimitiveType.STRING,
|
||||
'jsonarray': pipeline_spec_pb2.PrimitiveType.STRING,
|
||||
}
|
||||
|
||||
# Mapping primitive types to their IR message field names.
|
||||
# This is used in constructing condition strings.
|
||||
_PARAMETER_TYPES_VALUE_REFERENCE_MAPPING = {
|
||||
pipeline_spec_pb2.PrimitiveType.INT: 'int_value',
|
||||
pipeline_spec_pb2.PrimitiveType.DOUBLE: 'double_value',
|
||||
pipeline_spec_pb2.PrimitiveType.STRING: 'string_value',
|
||||
}
|
||||
|
||||
|
||||
def is_parameter_type(type_name: Optional[Union[str, dict]]) -> bool:
|
||||
"""Check if a ComponentSpec I/O type is considered as a parameter type.
|
||||
|
||||
Args:
|
||||
type_name: type name of the ComponentSpec I/O type.
|
||||
|
||||
Returns:
|
||||
True if the type name maps to a parameter type else False.
|
||||
"""
|
||||
if isinstance(type_name, str):
|
||||
type_name = type_annotation_utils.get_short_type_name(type_name)
|
||||
elif isinstance(type_name, dict):
|
||||
type_name = list(type_name.keys())[0]
|
||||
else:
|
||||
return False
|
||||
|
||||
return type_name.lower() in _PARAMETER_TYPES_MAPPING
|
||||
|
||||
|
||||
def get_artifact_type_schema(
|
||||
artifact_class_or_type_name: Optional[Union[str,
|
||||
Type[artifact_types.Artifact]]]
|
||||
) -> pipeline_spec_pb2.ArtifactTypeSchema:
|
||||
"""Gets the IR I/O artifact type msg for the given ComponentSpec I/O type."""
|
||||
artifact_class = artifact_types.Artifact
|
||||
if isinstance(artifact_class_or_type_name, str):
|
||||
artifact_class = _ARTIFACT_CLASSES_MAPPING.get(
|
||||
artifact_class_or_type_name.lower(), artifact_types.Artifact)
|
||||
elif inspect.isclass(artifact_class_or_type_name) and issubclass(
|
||||
artifact_class_or_type_name, artifact_types.Artifact):
|
||||
artifact_class = artifact_class_or_type_name
|
||||
|
||||
return pipeline_spec_pb2.ArtifactTypeSchema(
|
||||
schema_title=artifact_class.TYPE_NAME)
|
||||
|
||||
|
||||
def get_parameter_type(
|
||||
param_type: Optional[Union[Type, str, dict]]
|
||||
) -> pipeline_spec_pb2.PrimitiveType:
|
||||
"""Get the IR I/O parameter type for the given ComponentSpec I/O type.
|
||||
|
||||
Args:
|
||||
param_type: type of the ComponentSpec I/O type. Can be a primitive Python
|
||||
builtin type or a type name.
|
||||
|
||||
Returns:
|
||||
The enum value of the mapped IR I/O primitive type.
|
||||
|
||||
Raises:
|
||||
AttributeError: if type_name is not a string type.
|
||||
"""
|
||||
if type(param_type) == type:
|
||||
type_name = param_type.__name__
|
||||
elif isinstance(param_type, dict):
|
||||
type_name = list(param_type.keys())[0]
|
||||
else:
|
||||
type_name = type_annotation_utils.get_short_type_name(str(param_type))
|
||||
return _PARAMETER_TYPES_MAPPING.get(type_name.lower())
|
||||
|
||||
|
||||
def get_parameter_type_field_name(type_name: Optional[str]) -> str:
|
||||
"""Get the IR field name for the given primitive type.
|
||||
|
||||
For example: 'str' -> 'string_value', 'double' -> 'double_value', etc.
|
||||
|
||||
Args:
|
||||
type_name: type name of the ComponentSpec I/O primitive type.
|
||||
|
||||
Returns:
|
||||
The IR value reference field name.
|
||||
|
||||
Raises:
|
||||
AttributeError: if type_name is not a string type.
|
||||
"""
|
||||
return _PARAMETER_TYPES_VALUE_REFERENCE_MAPPING.get(
|
||||
get_parameter_type(type_name))
|
||||
|
||||
|
||||
def get_input_artifact_type_schema(
|
||||
input_name: str,
|
||||
inputs: List[structures.InputSpec],
|
||||
) -> Optional[str]:
|
||||
"""Find the input artifact type by input name.
|
||||
|
||||
Args:
|
||||
input_name: The name of the component input.
|
||||
inputs: The list of InputSpec
|
||||
|
||||
Returns:
|
||||
The artifact type schema of the input.
|
||||
|
||||
Raises:
|
||||
AssertionError if input not found, or input found but not an artifact type.
|
||||
"""
|
||||
for component_input in inputs:
|
||||
if component_input.name == input_name:
|
||||
assert not is_parameter_type(
|
||||
component_input.type), 'Input is not an artifact type.'
|
||||
return get_artifact_type_schema(component_input.type)
|
||||
assert False, 'Input not found.'
|
||||
|
|
@ -0,0 +1,282 @@
|
|||
# Copyright 2020 The Kubeflow Authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from absl.testing import parameterized
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from kfp.components import structures
|
||||
from kfp.pipeline_spec import pipeline_spec_pb2 as pb
|
||||
from kfp.v2.components.types import artifact_types, type_utils
|
||||
|
||||
_PARAMETER_TYPES = [
|
||||
'String',
|
||||
'str',
|
||||
'Integer',
|
||||
'int',
|
||||
'Float',
|
||||
'Double',
|
||||
'bool',
|
||||
'Boolean',
|
||||
'Dict',
|
||||
'List',
|
||||
'JsonObject',
|
||||
'JsonArray',
|
||||
{
|
||||
'JsonObject': {
|
||||
'data_type': 'proto:tfx.components.trainer.TrainArgs'
|
||||
}
|
||||
},
|
||||
]
|
||||
_KNOWN_ARTIFACT_TYPES = ['Model', 'Dataset', 'Schema', 'Metrics']
|
||||
_UNKNOWN_ARTIFACT_TYPES = [None, 'Arbtrary Model', 'dummy']
|
||||
|
||||
|
||||
class _ArbitraryClass:
|
||||
pass
|
||||
|
||||
|
||||
class TypeUtilsTest(parameterized.TestCase):
|
||||
|
||||
def test_is_parameter_type(self):
|
||||
for type_name in _PARAMETER_TYPES:
|
||||
self.assertTrue(type_utils.is_parameter_type(type_name))
|
||||
for type_name in _KNOWN_ARTIFACT_TYPES + _UNKNOWN_ARTIFACT_TYPES:
|
||||
self.assertFalse(type_utils.is_parameter_type(type_name))
|
||||
|
||||
@parameterized.parameters(
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
'Model',
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(schema_title='system.Model')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
artifact_types.Model,
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(schema_title='system.Model')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
'Dataset',
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(schema_title='system.Dataset')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
artifact_types.Dataset,
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(schema_title='system.Dataset')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
'Metrics',
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(schema_title='system.Metrics')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
artifact_types.Metrics,
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(schema_title='system.Metrics')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
'ClassificationMetrics',
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(
|
||||
schema_title='system.ClassificationMetrics')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
artifact_types.ClassificationMetrics,
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(
|
||||
schema_title='system.ClassificationMetrics')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
'SlicedClassificationMetrics',
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(
|
||||
schema_title='system.SlicedClassificationMetrics')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
artifact_types.SlicedClassificationMetrics,
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(
|
||||
schema_title='system.SlicedClassificationMetrics')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
'arbitrary name',
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(schema_title='system.Artifact')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
_ArbitraryClass,
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(schema_title='system.Artifact')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name': artifact_types.HTML,
|
||||
'expected_result': pb.ArtifactTypeSchema(schema_title='system.HTML')
|
||||
},
|
||||
{
|
||||
'artifact_class_or_type_name':
|
||||
artifact_types.Markdown,
|
||||
'expected_result':
|
||||
pb.ArtifactTypeSchema(schema_title='system.Markdown')
|
||||
},
|
||||
)
|
||||
def test_get_artifact_type_schema(self, artifact_class_or_type_name,
|
||||
expected_result):
|
||||
self.assertEqual(
|
||||
expected_result,
|
||||
type_utils.get_artifact_type_schema(artifact_class_or_type_name))
|
||||
|
||||
@parameterized.parameters(
|
||||
{
|
||||
'given_type': 'Int',
|
||||
'expected_type': pb.PrimitiveType.INT,
|
||||
},
|
||||
{
|
||||
'given_type': 'Integer',
|
||||
'expected_type': pb.PrimitiveType.INT,
|
||||
},
|
||||
{
|
||||
'given_type': int,
|
||||
'expected_type': pb.PrimitiveType.INT,
|
||||
},
|
||||
{
|
||||
'given_type': 'Double',
|
||||
'expected_type': pb.PrimitiveType.DOUBLE,
|
||||
},
|
||||
{
|
||||
'given_type': 'Float',
|
||||
'expected_type': pb.PrimitiveType.DOUBLE,
|
||||
},
|
||||
{
|
||||
'given_type': float,
|
||||
'expected_type': pb.PrimitiveType.DOUBLE,
|
||||
},
|
||||
{
|
||||
'given_type': 'String',
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': 'Text',
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': str,
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': 'Boolean',
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': bool,
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': 'Dict',
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': dict,
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': 'List',
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': list,
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': Dict[str, int],
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': List[Any],
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
{
|
||||
'given_type': {
|
||||
'JsonObject': {
|
||||
'data_type': 'proto:tfx.components.trainer.TrainArgs'
|
||||
}
|
||||
},
|
||||
'expected_type': pb.PrimitiveType.STRING,
|
||||
},
|
||||
)
|
||||
def test_get_parameter_type(self, given_type, expected_type):
|
||||
self.assertEqual(expected_type,
|
||||
type_utils.get_parameter_type(given_type))
|
||||
|
||||
# Test get parameter by Python type.
|
||||
self.assertEqual(pb.PrimitiveType.INT,
|
||||
type_utils.get_parameter_type(int))
|
||||
|
||||
def test_get_parameter_type_invalid(self):
|
||||
with self.assertRaises(AttributeError):
|
||||
type_utils.get_parameter_type_schema(None)
|
||||
|
||||
def test_get_input_artifact_type_schema(self):
|
||||
input_specs = [
|
||||
structures.InputSpec(name='input1', type='String'),
|
||||
structures.InputSpec(name='input2', type='Model'),
|
||||
structures.InputSpec(name='input3', type=None),
|
||||
]
|
||||
# input not found.
|
||||
with self.assertRaises(AssertionError) as cm:
|
||||
type_utils.get_input_artifact_type_schema('input0', input_specs)
|
||||
self.assertEqual('Input not found.', str(cm))
|
||||
|
||||
# input found, but it doesn't map to an artifact type.
|
||||
with self.assertRaises(AssertionError) as cm:
|
||||
type_utils.get_input_artifact_type_schema('input1', input_specs)
|
||||
self.assertEqual('Input is not an artifact type.', str(cm))
|
||||
|
||||
# input found, and a matching artifact type schema returned.
|
||||
self.assertEqual(
|
||||
'system.Model',
|
||||
type_utils.get_input_artifact_type_schema('input2',
|
||||
input_specs).schema_title)
|
||||
|
||||
# input found, and the default artifact type schema returned.
|
||||
self.assertEqual(
|
||||
'system.Artifact',
|
||||
type_utils.get_input_artifact_type_schema('input3',
|
||||
input_specs).schema_title)
|
||||
|
||||
def test_get_parameter_type_field_name(self):
|
||||
self.assertEqual('string_value',
|
||||
type_utils.get_parameter_type_field_name('String'))
|
||||
self.assertEqual('int_value',
|
||||
type_utils.get_parameter_type_field_name('Integer'))
|
||||
self.assertEqual('double_value',
|
||||
type_utils.get_parameter_type_field_name('Float'))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
@ -12,23 +12,28 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from kfp.v2.dsl.component_decorator import component
|
||||
from kfp.dsl.io_types import (
|
||||
Input,
|
||||
Output,
|
||||
from kfp.v2.components.component_decorator import component
|
||||
|
||||
from kfp.v2.components.importer_node import importer
|
||||
|
||||
from kfp.v2.components.types.artifact_types import (
|
||||
Artifact,
|
||||
Dataset,
|
||||
Model,
|
||||
Metrics,
|
||||
ClassificationMetrics,
|
||||
SlicedClassificationMetrics,
|
||||
Dataset,
|
||||
HTML,
|
||||
Markdown,
|
||||
Metrics,
|
||||
Model,
|
||||
SlicedClassificationMetrics,
|
||||
)
|
||||
from kfp.components import (
|
||||
|
||||
from kfp.v2.components.types.type_annotations import (
|
||||
Input,
|
||||
Output,
|
||||
InputPath,
|
||||
OutputPath,
|
||||
)
|
||||
|
||||
from kfp.dsl import (
|
||||
graph_component,
|
||||
pipeline,
|
||||
|
|
|
|||
|
|
@ -102,6 +102,7 @@ setup(
|
|||
'kfp.v2',
|
||||
'kfp.v2.compiler',
|
||||
'kfp.v2.components',
|
||||
'kfp.v2.components.types',
|
||||
'kfp.v2.components.experimental',
|
||||
'kfp.v2.dsl',
|
||||
'kfp.v2.google.client',
|
||||
|
|
@ -131,5 +132,4 @@ setup(
|
|||
'dsl-compile-v2 = kfp.v2.compiler.main:main',
|
||||
'kfp=kfp.__main__:main'
|
||||
]
|
||||
}
|
||||
)
|
||||
})
|
||||
|
|
|
|||
|
|
@ -1279,7 +1279,8 @@ implementation:
|
|||
|
||||
@dsl.pipeline(name='test-pipeline')
|
||||
def my_pipeline():
|
||||
dsl.importer(artifact_uri='dummy', artifact_class=dsl.io_types.Artifact)
|
||||
from kfp.v2.dsl import importer, Artifact
|
||||
importer(artifact_uri='dummy', artifact_class=Artifact)
|
||||
|
||||
with self.assertRaisesRegex(
|
||||
NotImplementedError,
|
||||
|
|
|
|||
Loading…
Reference in New Issue