#!/usr/bin/env python3 # Copyright 2019-2023 The Kubeflow Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from kfp import dsl, compiler def gcs_download_op(url): return dsl.ContainerOp( name='GCS - Download', image='google/cloud-sdk:279.0.0', command=['sh', '-c'], arguments=['gsutil cat $0 | tee $1', url, '/tmp/results.txt'], file_outputs={ 'data': '/tmp/results.txt', } ) def echo2_op(text1, text2): return dsl.ContainerOp( name='echo', image='library/bash:4.4.23', command=['sh', '-c'], arguments=['echo "Text 1: $0"; echo "Text 2: $1"', text1, text2] ) @dsl.pipeline( name='parallel-pipeline', description='Download two messages in parallel and prints the concatenated result.' ) def download_and_join( url1='gs://ml-pipeline/sample-data/shakespeare/shakespeare1.txt', url2='gs://ml-pipeline/sample-data/shakespeare/shakespeare2.txt' ): """A three-step pipeline with first two running in parallel.""" download1_task = gcs_download_op(url1) download2_task = gcs_download_op(url2) echo_task = echo2_op(download1_task.output, download2_task.output) if __name__ == '__main__': compiler.Compiler().compile(download_and_join, __file__ + '.yaml')