120 lines
3.4 KiB
Python
120 lines
3.4 KiB
Python
# Copyright 2020 kubeflow.org
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
|
|
from kfp import dsl, components
|
|
|
|
FREQUENT_WORD_STR = """
|
|
name: get-frequent
|
|
description: Calculate the frequent word from a text
|
|
inputs:
|
|
- {name: message, type: String, description: 'Required. message'}
|
|
outputs:
|
|
- {name: word, type: String}
|
|
implementation:
|
|
container:
|
|
image: python:3.6-jessie
|
|
command:
|
|
- sh
|
|
- -c
|
|
- |
|
|
python -c "import sys; from collections import Counter; \
|
|
input_text = sys.argv[1]; \
|
|
words = Counter(input_text.split()); print(max(words, key=words.get));" \
|
|
"$0" | tee $1
|
|
- {inputValue: message}
|
|
- {outputPath: word}
|
|
"""
|
|
|
|
frequent_word_op = components.load_component_from_text(FREQUENT_WORD_STR)
|
|
|
|
SAVE_MESSAGE_STR = """
|
|
name: save
|
|
description: |
|
|
save message to a given output_path
|
|
inputs:
|
|
- {name: message, type: String, description: 'Required. message'}
|
|
- {name: output_path, type: String, description: 'Required. output path'}
|
|
implementation:
|
|
container:
|
|
image: google/cloud-sdk
|
|
command:
|
|
- sh
|
|
- -c
|
|
- |
|
|
set -e
|
|
echo "$0"| gsutil cp - "$1"
|
|
- {inputValue: message}
|
|
- {inputValue: output_path}
|
|
"""
|
|
|
|
save_message_op = components.load_component_from_text(SAVE_MESSAGE_STR)
|
|
|
|
|
|
@dsl.pipeline(
|
|
name='save-most-frequent',
|
|
description='Get Most Frequent Word and Save to GCS'
|
|
)
|
|
def save_most_frequent_word(message: str,
|
|
outputpath: str):
|
|
"""A pipeline function describing the orchestration of the workflow."""
|
|
|
|
counter = frequent_word_op(message=message)
|
|
|
|
saver = save_message_op(
|
|
message=counter.outputs['word'],
|
|
output_path=outputpath)
|
|
|
|
|
|
DOWNLOAD_MESSAGE_STR = """
|
|
name: download
|
|
description: |
|
|
downloads a message and outputs it
|
|
inputs:
|
|
- {name: url, type: String, description: 'Required. the gcs url to download the message from'}
|
|
outputs:
|
|
- {name: downloaded, type: String, description: 'file content.'}
|
|
implementation:
|
|
container:
|
|
image: google/cloud-sdk
|
|
command:
|
|
- sh
|
|
- -c
|
|
- |
|
|
set -e
|
|
gsutil cat $0 | tee $1
|
|
- {inputValue: url}
|
|
- {outputPath: downloaded}
|
|
"""
|
|
|
|
download_message_op = components.load_component_from_text(DOWNLOAD_MESSAGE_STR)
|
|
|
|
|
|
@dsl.pipeline(
|
|
name='download-and-save-most-frequent',
|
|
description='Download and Get Most Frequent Word and Save to GCS'
|
|
)
|
|
def download_save_most_frequent_word(
|
|
url: str = 'gs://ml-pipeline-playground/shakespeare1.txt',
|
|
outputpath: str = '/tmp/output.txt'):
|
|
downloader = download_message_op(url)
|
|
save_most_frequent_word(downloader.outputs['downloaded'], outputpath)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
from kfp_tekton.compiler import TektonCompiler
|
|
tkc = TektonCompiler()
|
|
tkc.compile(save_most_frequent_word, __file__.replace('.py', '.yaml')) # Check if simple pipeline can be compiled
|
|
tkc.compile(download_save_most_frequent_word, __file__.replace('.py', '.yaml'))
|