pipelines/sdk/python/test_data/components/component_with_metadata_fie...

130 lines
4.3 KiB
YAML

# PIPELINE DEFINITION
# Name: dataset-joiner
# Description: Concatenate dataset_a and dataset_b.
# Also returns the concatenated string.
# Inputs:
# dataset_a: system.Dataset
# dataset_b: system.Dataset
# Outputs:
# Output: str
# out_dataset: system.Dataset
components:
comp-dataset-joiner:
executorLabel: exec-dataset-joiner
inputDefinitions:
artifacts:
dataset_a:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
description: First dataset.
dataset_b:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
description: Second dataset.
outputDefinitions:
artifacts:
out_dataset:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
description: The concatenated dataset.
parameters:
Output:
description: The concatenated string.
parameterType: STRING
deploymentSpec:
executors:
exec-dataset-joiner:
container:
args:
- --executor_input
- '{{$}}'
- --function_to_execute
- dataset_joiner
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\
$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef dataset_joiner(\n dataset_a: Input[Dataset],\n dataset_b:\
\ Input[Dataset],\n out_dataset: Output[Dataset],\n) -> str:\n \"\"\
\"Concatenate dataset_a and dataset_b.\n\n Also returns the concatenated\
\ string.\n\n Args:\n dataset_a: First dataset.\n dataset_b:\
\ Second dataset.\n\n Returns:\n out_dataset: The concatenated\
\ dataset.\n Output: The concatenated string.\n \"\"\"\n with\
\ open(dataset_a.path) as f:\n content_a = f.read()\n\n with open(dataset_b.path)\
\ as f:\n content_b = f.read()\n\n concatenated_string = content_a\
\ + content_b\n with open(out_dataset.path, 'w') as f:\n f.write(concatenated_string)\n\
\n return concatenated_string\n\n"
image: python:3.7
pipelineInfo:
name: dataset-joiner
root:
dag:
outputs:
artifacts:
out_dataset:
artifactSelectors:
- outputArtifactKey: out_dataset
producerSubtask: dataset-joiner
parameters:
Output:
valueFromParameter:
outputParameterKey: Output
producerSubtask: dataset-joiner
tasks:
dataset-joiner:
cachingOptions:
enableCache: true
componentRef:
name: comp-dataset-joiner
inputs:
artifacts:
dataset_a:
componentInputArtifact: dataset_a
dataset_b:
componentInputArtifact: dataset_b
taskInfo:
name: dataset-joiner
inputDefinitions:
artifacts:
dataset_a:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
description: First dataset.
dataset_b:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
description: Second dataset.
outputDefinitions:
artifacts:
out_dataset:
artifactType:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
description: The concatenated dataset.
parameters:
Output:
description: The concatenated string.
parameterType: STRING
schemaVersion: 2.1.0
sdkVersion: kfp-2.1.2