pipelines/sdk/python/tests/compiler/testdata/basic.py

95 lines
3.4 KiB
Python

# Copyright 2018 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import kfp.dsl as dsl
import kfp.gcp as gcp
class GetFrequentWordOp(dsl.ContainerOp):
"""A get frequent word class representing a component in ML Pipelines.
The class provides a nice interface to users by hiding details such
as container, command, arguments.
"""
def __init__(self, name, message):
"""Args:
name: An identifier of the step which needs to be unique within a pipeline.
message: a dsl.PipelineParam object representing an input message.
"""
super(GetFrequentWordOp, self).__init__(
name=name,
image='python:3.5-jessie',
command=['sh', '-c'],
arguments=[
'python -c "from collections import Counter; '
'words = Counter(\'%s\'.split()); print(max(words, key=words.get))" '
'| tee /tmp/message.txt' % message
],
file_outputs={'word': '/tmp/message.txt'})
class SaveMessageOp(dsl.ContainerOp):
"""A class representing a component in ML Pipelines.
It saves a message to a given output_path.
"""
def __init__(self, name, message, output_path):
"""Args:
name: An identifier of the step which needs to be unique within a pipeline.
message: a dsl.PipelineParam object representing the message to be saved.
output_path: a dsl.PipelineParam object representing the GCS path for output file.
"""
super(SaveMessageOp, self).__init__(
name=name,
image='google/cloud-sdk',
command=['sh', '-c'],
arguments=[
'echo %s | tee /tmp/results.txt | gsutil cp /tmp/results.txt %s'
% (message, output_path)
])
class ExitHandlerOp(dsl.ContainerOp):
"""A class representing a component in ML Pipelines."""
def __init__(self, name):
super(ExitHandlerOp, self).__init__(
name=name,
image='python:3.5-jessie',
command=['sh', '-c'],
arguments=['echo exit!'])
@dsl.pipeline(
name='Save Most Frequent',
description='Get Most Frequent Word and Save to GCS')
def save_most_frequent_word(message: str, outputpath: str):
"""A pipeline function describing the orchestration of the workflow."""
exit_op = ExitHandlerOp('exiting')
with dsl.ExitHandler(exit_op):
counter = GetFrequentWordOp(name='get-Frequent', message=message)
counter.container.set_memory_request('200M')
saver = SaveMessageOp(
name='save', message=counter.output, output_path=outputpath)
saver.container.set_cpu_limit('0.5')
saver.container.set_gpu_limit('2')
saver.add_node_selector_constraint('cloud.google.com/gke-accelerator',
'nvidia-tesla-k80')
saver.apply(
gcp.use_tpu(tpu_cores=8, tpu_resource='v2', tf_version='1.12'))