# PIPELINE DEFINITION # Name: dataset-joiner # Description: Concatenate dataset_a and dataset_b. # Also returns the concatenated string. # Inputs: # dataset_a: system.Dataset # dataset_b: system.Dataset # Outputs: # Output: str # out_dataset: system.Dataset components: comp-dataset-joiner: executorLabel: exec-dataset-joiner inputDefinitions: artifacts: dataset_a: artifactType: schemaTitle: system.Dataset schemaVersion: 0.0.1 description: First dataset. dataset_b: artifactType: schemaTitle: system.Dataset schemaVersion: 0.0.1 description: Second dataset. outputDefinitions: artifacts: out_dataset: artifactType: schemaTitle: system.Dataset schemaVersion: 0.0.1 description: The concatenated dataset. parameters: Output: description: The concatenated string. parameterType: STRING deploymentSpec: executors: exec-dataset-joiner: container: args: - --executor_input - '{{$}}' - --function_to_execute - dataset_joiner command: - sh - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.1.2'\ \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ $0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) printf "%s" "$0" > "$program_path/ephemeral_component.py" _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef dataset_joiner(\n dataset_a: Input[Dataset],\n dataset_b:\ \ Input[Dataset],\n out_dataset: Output[Dataset],\n) -> str:\n \"\"\ \"Concatenate dataset_a and dataset_b.\n\n Also returns the concatenated\ \ string.\n\n Args:\n dataset_a: First dataset.\n dataset_b:\ \ Second dataset.\n\n Returns:\n out_dataset: The concatenated\ \ dataset.\n Output: The concatenated string.\n \"\"\"\n with\ \ open(dataset_a.path) as f:\n content_a = f.read()\n\n with open(dataset_b.path)\ \ as f:\n content_b = f.read()\n\n concatenated_string = content_a\ \ + content_b\n with open(out_dataset.path, 'w') as f:\n f.write(concatenated_string)\n\ \n return concatenated_string\n\n" image: python:3.7 pipelineInfo: name: dataset-joiner root: dag: outputs: artifacts: out_dataset: artifactSelectors: - outputArtifactKey: out_dataset producerSubtask: dataset-joiner parameters: Output: valueFromParameter: outputParameterKey: Output producerSubtask: dataset-joiner tasks: dataset-joiner: cachingOptions: enableCache: true componentRef: name: comp-dataset-joiner inputs: artifacts: dataset_a: componentInputArtifact: dataset_a dataset_b: componentInputArtifact: dataset_b taskInfo: name: dataset-joiner inputDefinitions: artifacts: dataset_a: artifactType: schemaTitle: system.Dataset schemaVersion: 0.0.1 description: First dataset. dataset_b: artifactType: schemaTitle: system.Dataset schemaVersion: 0.0.1 description: Second dataset. outputDefinitions: artifacts: out_dataset: artifactType: schemaTitle: system.Dataset schemaVersion: 0.0.1 description: The concatenated dataset. parameters: Output: description: The concatenated string. parameterType: STRING schemaVersion: 2.1.0 sdkVersion: kfp-2.1.2