130 lines
4.3 KiB
YAML
130 lines
4.3 KiB
YAML
# PIPELINE DEFINITION
|
|
# Name: dataset-joiner
|
|
# Description: Concatenate dataset_a and dataset_b.
|
|
# Also returns the concatenated string.
|
|
# Inputs:
|
|
# dataset_a: system.Dataset
|
|
# dataset_b: system.Dataset
|
|
# Outputs:
|
|
# Output: str
|
|
# out_dataset: system.Dataset
|
|
components:
|
|
comp-dataset-joiner:
|
|
executorLabel: exec-dataset-joiner
|
|
inputDefinitions:
|
|
artifacts:
|
|
dataset_a:
|
|
artifactType:
|
|
schemaTitle: system.Dataset
|
|
schemaVersion: 0.0.1
|
|
description: First dataset.
|
|
dataset_b:
|
|
artifactType:
|
|
schemaTitle: system.Dataset
|
|
schemaVersion: 0.0.1
|
|
description: Second dataset.
|
|
outputDefinitions:
|
|
artifacts:
|
|
out_dataset:
|
|
artifactType:
|
|
schemaTitle: system.Dataset
|
|
schemaVersion: 0.0.1
|
|
description: The concatenated dataset.
|
|
parameters:
|
|
Output:
|
|
description: The concatenated string.
|
|
parameterType: STRING
|
|
deploymentSpec:
|
|
executors:
|
|
exec-dataset-joiner:
|
|
container:
|
|
args:
|
|
- --executor_input
|
|
- '{{$}}'
|
|
- --function_to_execute
|
|
- dataset_joiner
|
|
command:
|
|
- sh
|
|
- -c
|
|
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
|
|
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
|
|
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\
|
|
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\
|
|
$0\" \"$@\"\n"
|
|
- sh
|
|
- -ec
|
|
- 'program_path=$(mktemp -d)
|
|
|
|
|
|
printf "%s" "$0" > "$program_path/ephemeral_component.py"
|
|
|
|
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
|
|
|
|
'
|
|
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
|
|
\ *\n\ndef dataset_joiner(\n dataset_a: Input[Dataset],\n dataset_b:\
|
|
\ Input[Dataset],\n out_dataset: Output[Dataset],\n) -> str:\n \"\"\
|
|
\"Concatenate dataset_a and dataset_b.\n\n Also returns the concatenated\
|
|
\ string.\n\n Args:\n dataset_a: First dataset.\n dataset_b:\
|
|
\ Second dataset.\n\n Returns:\n out_dataset: The concatenated\
|
|
\ dataset.\n Output: The concatenated string.\n \"\"\"\n with\
|
|
\ open(dataset_a.path) as f:\n content_a = f.read()\n\n with open(dataset_b.path)\
|
|
\ as f:\n content_b = f.read()\n\n concatenated_string = content_a\
|
|
\ + content_b\n with open(out_dataset.path, 'w') as f:\n f.write(concatenated_string)\n\
|
|
\n return concatenated_string\n\n"
|
|
image: python:3.7
|
|
pipelineInfo:
|
|
name: dataset-joiner
|
|
root:
|
|
dag:
|
|
outputs:
|
|
artifacts:
|
|
out_dataset:
|
|
artifactSelectors:
|
|
- outputArtifactKey: out_dataset
|
|
producerSubtask: dataset-joiner
|
|
parameters:
|
|
Output:
|
|
valueFromParameter:
|
|
outputParameterKey: Output
|
|
producerSubtask: dataset-joiner
|
|
tasks:
|
|
dataset-joiner:
|
|
cachingOptions:
|
|
enableCache: true
|
|
componentRef:
|
|
name: comp-dataset-joiner
|
|
inputs:
|
|
artifacts:
|
|
dataset_a:
|
|
componentInputArtifact: dataset_a
|
|
dataset_b:
|
|
componentInputArtifact: dataset_b
|
|
taskInfo:
|
|
name: dataset-joiner
|
|
inputDefinitions:
|
|
artifacts:
|
|
dataset_a:
|
|
artifactType:
|
|
schemaTitle: system.Dataset
|
|
schemaVersion: 0.0.1
|
|
description: First dataset.
|
|
dataset_b:
|
|
artifactType:
|
|
schemaTitle: system.Dataset
|
|
schemaVersion: 0.0.1
|
|
description: Second dataset.
|
|
outputDefinitions:
|
|
artifacts:
|
|
out_dataset:
|
|
artifactType:
|
|
schemaTitle: system.Dataset
|
|
schemaVersion: 0.0.1
|
|
description: The concatenated dataset.
|
|
parameters:
|
|
Output:
|
|
description: The concatenated string.
|
|
parameterType: STRING
|
|
schemaVersion: 2.1.0
|
|
sdkVersion: kfp-2.1.2
|