pipelines/sdk/python/kfp/v2/compiler/compiler_test.py

451 lines
13 KiB
Python

# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
import shutil
import tempfile
import unittest
from kfp.v2 import components
from kfp.v2 import compiler
from kfp.v2 import dsl
class CompilerTest(unittest.TestCase):
def test_compile_simple_pipeline(self):
tmpdir = tempfile.mkdtemp()
try:
producer_op = components.load_component_from_text("""
name: producer
inputs:
- {name: input_param, type: String}
outputs:
- {name: output_model, type: Model}
- {name: output_value, type: Integer}
implementation:
container:
image: gcr.io/my-project/my-image:tag
args:
- {inputValue: input_param}
- {outputPath: output_model}
- {outputPath: output_value}
""")
consumer_op = components.load_component_from_text("""
name: consumer
inputs:
- {name: input_model, type: Model}
- {name: input_value, type: Integer}
implementation:
container:
image: gcr.io/my-project/my-image:tag
args:
- {inputPath: input_model}
- {inputValue: input_value}
""")
@dsl.pipeline(name='two-step-pipeline')
def simple_pipeline(pipeline_input='Hello KFP!'):
producer = producer_op(input_param=pipeline_input)
consumer = consumer_op(
input_model=producer.outputs['output_model'],
input_value=producer.outputs['output_value'])
target_json_file = os.path.join(tmpdir, 'result.json')
compiler.Compiler().compile(
pipeline_func=simple_pipeline,
pipeline_root='dummy_root',
output_path=target_json_file)
self.assertTrue(os.path.exists(target_json_file))
finally:
shutil.rmtree(tmpdir)
def test_compile_pipeline_with_dsl_condition_should_raise_error(self):
flip_coin_op = components.load_component_from_text("""
name: flip coin
inputs:
- {name: name, type: String}
outputs:
- {name: result, type: String}
implementation:
container:
image: gcr.io/my-project/my-image:tag
args:
- {inputValue: name}
- {outputPath: result}
""")
print_op = components.load_component_from_text("""
name: print
inputs:
- {name: name, type: String}
- {name: msg, type: String}
implementation:
container:
image: gcr.io/my-project/my-image:tag
args:
- {inputValue: name}
- {inputValue: msg}
""")
@dsl.pipeline()
def flipcoin():
flip = flip_coin_op('flip')
with dsl.Condition(flip.outputs['result'] == 'heads'):
flip2 = flip_coin_op('flip-again')
with dsl.Condition(flip2.outputs['result'] == 'tails'):
print_op('print1', flip2.outputs['result'])
with dsl.Condition(flip.outputs['result'] == 'tails'):
print_op('print2', flip2.outputs['results'])
with self.assertRaisesRegex(
NotImplementedError,
'dsl.Condition is not yet supported in KFP v2 compiler.'):
compiler.Compiler().compile(
pipeline_func=flipcoin,
pipeline_root='dummy_root',
output_path='output.json')
def test_compile_pipeline_with_dsl_exithandler_should_raise_error(self):
gcs_download_op = components.load_component_from_text("""
name: GCS - Download
inputs:
- {name: url, type: String}
outputs:
- {name: result, type: String}
implementation:
container:
image: gcr.io/my-project/my-image:tag
args:
- {inputValue: url}
- {outputPath: result}
""")
echo_op = components.load_component_from_text("""
name: echo
inputs:
- {name: msg, type: String}
implementation:
container:
image: gcr.io/my-project/my-image:tag
args:
- {inputValue: msg}
""")
@dsl.pipeline()
def download_and_print(url='gs://ml-pipeline/shakespeare/shakespeare1.txt'):
"""A sample pipeline showing exit handler."""
exit_task = echo_op('exit!')
with dsl.ExitHandler(exit_task):
download_task = gcs_download_op(url)
echo_task = echo_op(download_task.outputs['result'])
with self.assertRaisesRegex(
NotImplementedError,
'dsl.ExitHandler is not yet supported in KFP v2 compiler.'):
compiler.Compiler().compile(
pipeline_func=download_and_print,
pipeline_root='dummy_root',
output_path='output.json')
def test_compile_pipeline_with_dsl_parallelfor_should_raise_error(self):
@components.create_component_from_func
def print_op(s: str):
print(s)
@dsl.pipeline()
def my_pipeline():
loop_args = [{'A_a': 1, 'B_b': 2}, {'A_a': 10, 'B_b': 20}]
with dsl.ParallelFor(loop_args, parallelism=10) as item:
print_op(item)
print_op(item.A_a)
print_op(item.B_b)
with self.assertRaisesRegex(
NotImplementedError,
'dsl.ParallelFor is not yet supported in KFP v2 compiler.'):
compiler.Compiler().compile(
pipeline_func=my_pipeline,
pipeline_root='dummy_root',
output_path='output.json')
def test_compile_pipeline_with_dsl_graph_component_should_raise_error(self):
with self.assertRaisesRegex(
NotImplementedError,
'dsl.graph_component is not yet supported in KFP v2 compiler.'):
@dsl.graph_component
def echo1_graph_component(text1):
dsl.ContainerOp(
name='echo1-task1',
image='library/bash:4.4.23',
command=['sh', '-c'],
arguments=['echo "$0"', text1])
@dsl.graph_component
def echo2_graph_component(text2):
dsl.ContainerOp(
name='echo2-task1',
image='library/bash:4.4.23',
command=['sh', '-c'],
arguments=['echo "$0"', text2])
@dsl.pipeline()
def opsgroups_pipeline(text1='message 1', text2='message 2'):
step1_graph_component = echo1_graph_component(text1)
step2_graph_component = echo2_graph_component(text2)
step2_graph_component.after(step1_graph_component)
compiler.Compiler().compile(
pipeline_func=opsgroups_pipeline,
pipeline_root='dummy_root',
output_path='output.json')
def test_compile_pipeline_with_misused_inputvalue_should_raise_error(self):
component_op = components.load_component_from_text("""
name: compoent with misused placeholder
inputs:
- {name: model, type: Model}
implementation:
container:
image: dummy
args:
- {inputValue: model}
""")
def my_pipeline(model):
component_op(model=model)
with self.assertRaisesRegex(
TypeError,
' type "Model" cannot be paired with InputValuePlaceholder.'):
compiler.Compiler().compile(
pipeline_func=my_pipeline,
pipeline_root='dummy',
output_path='output.json')
def test_compile_pipeline_with_misused_inputpath_should_raise_error(self):
component_op = components.load_component_from_text("""
name: compoent with misused placeholder
inputs:
- {name: text, type: String}
implementation:
container:
image: dummy
args:
- {inputPath: text}
""")
def my_pipeline(text):
component_op(text=text)
with self.assertRaisesRegex(
TypeError,
' type "String" cannot be paired with InputPathPlaceholder.'):
compiler.Compiler().compile(
pipeline_func=my_pipeline,
pipeline_root='dummy',
output_path='output.json')
def test_compile_pipeline_with_misused_inputuri_should_raise_error(self):
component_op = components.load_component_from_text("""
name: compoent with misused placeholder
inputs:
- {name: value, type: Float}
implementation:
container:
image: dummy
args:
- {inputUri: value}
""")
def my_pipeline(value):
component_op(value=value)
with self.assertRaisesRegex(
TypeError, ' type "Float" cannot be paired with InputUriPlaceholder.'):
compiler.Compiler().compile(
pipeline_func=my_pipeline,
pipeline_root='dummy',
output_path='output.json')
def test_compile_pipeline_with_misused_outputuri_should_raise_error(self):
component_op = components.load_component_from_text("""
name: compoent with misused placeholder
outputs:
- {name: value, type: Integer}
implementation:
container:
image: dummy
args:
- {outputUri: value}
""")
def my_pipeline():
component_op()
with self.assertRaisesRegex(
TypeError,
' type "Integer" cannot be paired with OutputUriPlaceholder.'):
compiler.Compiler().compile(
pipeline_func=my_pipeline,
pipeline_root='dummy',
output_path='output.json')
def test_compile_pipeline_with_invalid_name_should_raise_error(self):
def my_pipeline():
pass
with self.assertRaisesRegex(
ValueError,
'Invalid pipeline name: .*\nPlease specify a pipeline name that matches'
):
compiler.Compiler().compile(
pipeline_func=my_pipeline,
pipeline_root='dummy',
output_path='output.json')
def test_compile_pipeline_with_importer_on_inputpath_should_raise_error(self):
# YAML componet authoring
component_op = components.load_component_from_text("""
name: compoent with misused placeholder
inputs:
- {name: model, type: Model}
implementation:
container:
image: dummy
args:
- {inputPath: model}
""")
@dsl.pipeline(name='my-component')
def my_pipeline(model):
component_op(model=model)
with self.assertRaisesRegex(
TypeError,
'Input "model" with type "Model" is not connected to any upstream '
'output. However it is used with InputPathPlaceholder.'):
compiler.Compiler().compile(
pipeline_func=my_pipeline,
pipeline_root='dummy',
output_path='output.json')
# Python function based component authoring
def my_component(datasets: components.InputPath('Datasets')):
pass
component_op = components.create_component_from_func(my_component)
@dsl.pipeline(name='my-component')
def my_pipeline(datasets):
component_op(datasets=datasets)
with self.assertRaisesRegex(
TypeError,
'Input "datasets" with type "Datasets" is not connected to any upstream '
'output. However it is used with InputPathPlaceholder.'):
compiler.Compiler().compile(
pipeline_func=my_pipeline,
pipeline_root='dummy',
output_path='output.json')
def test_set_pipeline_root_through_pipeline_decorator(self):
tmpdir = tempfile.mkdtemp()
try:
@dsl.pipeline(name='my-pipeline', pipeline_root='gs://path')
def my_pipeline():
pass
target_json_file = os.path.join(tmpdir, 'result.json')
compiler.Compiler().compile(
pipeline_func=my_pipeline, output_path=target_json_file)
self.assertTrue(os.path.exists(target_json_file))
with open(target_json_file) as f:
job_spec = json.load(f)
self.assertEqual('gs://path',
job_spec['runtimeConfig']['gcsOutputDirectory'])
finally:
shutil.rmtree(tmpdir)
def test_set_pipeline_root_through_compile_method(self):
tmpdir = tempfile.mkdtemp()
try:
@dsl.pipeline(name='my-pipeline', pipeline_root='gs://path')
def my_pipeline():
pass
target_json_file = os.path.join(tmpdir, 'result.json')
compiler.Compiler().compile(
pipeline_func=my_pipeline,
pipeline_root='gs://path-override',
output_path=target_json_file)
self.assertTrue(os.path.exists(target_json_file))
with open(target_json_file) as f:
job_spec = json.load(f)
self.assertEqual('gs://path-override',
job_spec['runtimeConfig']['gcsOutputDirectory'])
finally:
shutil.rmtree(tmpdir)
def test_missing_pipeline_root_is_allowed_but_warned(self):
tmpdir = tempfile.mkdtemp()
try:
@dsl.pipeline(name='my-pipeline')
def my_pipeline():
pass
target_json_file = os.path.join(tmpdir, 'result.json')
with self.assertWarnsRegex(UserWarning, 'pipeline_root is None or empty'):
compiler.Compiler().compile(
pipeline_func=my_pipeline, output_path=target_json_file)
self.assertTrue(os.path.exists(target_json_file))
with open(target_json_file) as f:
job_spec = json.load(f)
self.assertTrue('gcsOutputDirectory' not in job_spec['runtimeConfig'])
finally:
shutil.rmtree(tmpdir)
if __name__ == '__main__':
unittest.main()