489 lines
16 KiB
Python
489 lines
16 KiB
Python
# Copyright 2018-2019 Google LLC
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import kfp.compiler as compiler
|
|
import kfp.dsl as dsl
|
|
import os
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import zipfile
|
|
import tarfile
|
|
import tempfile
|
|
import unittest
|
|
import yaml
|
|
|
|
from kfp.dsl._component import component
|
|
from kfp.dsl import ContainerOp, pipeline
|
|
from kfp.dsl.types import Integer, InconsistentTypeException
|
|
from kubernetes.client import V1Toleration
|
|
|
|
|
|
class TestCompiler(unittest.TestCase):
|
|
|
|
def test_operator_to_template(self):
|
|
"""Test converting operator to template"""
|
|
|
|
from kubernetes import client as k8s_client
|
|
|
|
with dsl.Pipeline('somename') as p:
|
|
msg1 = dsl.PipelineParam('msg1')
|
|
msg2 = dsl.PipelineParam('msg2', value='value2')
|
|
json = dsl.PipelineParam('json')
|
|
kind = dsl.PipelineParam('kind')
|
|
op = dsl.ContainerOp(name='echo', image='image', command=['sh', '-c'],
|
|
arguments=['echo %s %s | tee /tmp/message.txt' % (msg1, msg2)],
|
|
file_outputs={'merged': '/tmp/message.txt'}) \
|
|
.add_volume_mount(k8s_client.V1VolumeMount(
|
|
mount_path='/secret/gcp-credentials',
|
|
name='gcp-credentials')) \
|
|
.add_env_variable(k8s_client.V1EnvVar(
|
|
name='GOOGLE_APPLICATION_CREDENTIALS',
|
|
value='/secret/gcp-credentials/user-gcp-sa.json'))
|
|
res = dsl.ResourceOp(
|
|
name="test-resource",
|
|
k8s_resource=k8s_client.V1PersistentVolumeClaim(
|
|
api_version="v1",
|
|
kind=kind,
|
|
metadata=k8s_client.V1ObjectMeta(
|
|
name="resource"
|
|
)
|
|
),
|
|
attribute_outputs={"out": json}
|
|
)
|
|
golden_output = {
|
|
'container': {
|
|
'image': 'image',
|
|
'args': [
|
|
'echo {{inputs.parameters.msg1}} {{inputs.parameters.msg2}} | tee /tmp/message.txt'
|
|
],
|
|
'command': ['sh', '-c'],
|
|
'env': [
|
|
{
|
|
'name': 'GOOGLE_APPLICATION_CREDENTIALS',
|
|
'value': '/secret/gcp-credentials/user-gcp-sa.json'
|
|
}
|
|
],
|
|
'volumeMounts':[
|
|
{
|
|
'mountPath': '/secret/gcp-credentials',
|
|
'name': 'gcp-credentials',
|
|
}
|
|
]
|
|
},
|
|
'inputs': {'parameters':
|
|
[
|
|
{'name': 'msg1'},
|
|
{'name': 'msg2', 'value': 'value2'},
|
|
]},
|
|
'name': 'echo',
|
|
'outputs': {
|
|
'parameters': [
|
|
{'name': 'echo-merged',
|
|
'valueFrom': {'path': '/tmp/message.txt'}
|
|
}],
|
|
'artifacts': [{
|
|
'name': 'mlpipeline-ui-metadata',
|
|
'path': '/mlpipeline-ui-metadata.json',
|
|
'optional': True,
|
|
},{
|
|
'name': 'mlpipeline-metrics',
|
|
'path': '/mlpipeline-metrics.json',
|
|
'optional': True,
|
|
}]
|
|
}
|
|
}
|
|
res_output = {
|
|
'inputs': {
|
|
'parameters': [{
|
|
'name': 'json'
|
|
}, {
|
|
'name': 'kind'
|
|
}]
|
|
},
|
|
'name': 'test-resource',
|
|
'outputs': {
|
|
'parameters': [{
|
|
'name': 'test-resource-manifest',
|
|
'valueFrom': {
|
|
'jsonPath': '{}'
|
|
}
|
|
}, {
|
|
'name': 'test-resource-name',
|
|
'valueFrom': {
|
|
'jsonPath': '{.metadata.name}'
|
|
}
|
|
}, {
|
|
'name': 'test-resource-out',
|
|
'valueFrom': {
|
|
'jsonPath': '{{inputs.parameters.json}}'
|
|
}
|
|
}]
|
|
},
|
|
'resource': {
|
|
'action': 'create',
|
|
'manifest': (
|
|
"apiVersion: v1\n"
|
|
"kind: '{{inputs.parameters.kind}}'\n"
|
|
"metadata:\n"
|
|
" name: resource\n"
|
|
)
|
|
}
|
|
}
|
|
|
|
self.maxDiff = None
|
|
self.assertEqual(golden_output, compiler.Compiler()._op_to_template(op))
|
|
self.assertEqual(res_output, compiler.Compiler()._op_to_template(res))
|
|
|
|
def _get_yaml_from_zip(self, zip_file):
|
|
with zipfile.ZipFile(zip_file, 'r') as zip:
|
|
with open(zip.extract(zip.namelist()[0]), 'r') as yaml_file:
|
|
return yaml.safe_load(yaml_file)
|
|
|
|
def _get_yaml_from_tar(self, tar_file):
|
|
with tarfile.open(tar_file, 'r:gz') as tar:
|
|
return yaml.safe_load(tar.extractfile(tar.getmembers()[0]))
|
|
|
|
def test_basic_workflow(self):
|
|
"""Test compiling a basic workflow."""
|
|
|
|
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
|
|
sys.path.append(test_data_dir)
|
|
import basic
|
|
tmpdir = tempfile.mkdtemp()
|
|
package_path = os.path.join(tmpdir, 'workflow.zip')
|
|
try:
|
|
compiler.Compiler().compile(basic.save_most_frequent_word, package_path)
|
|
with open(os.path.join(test_data_dir, 'basic.yaml'), 'r') as f:
|
|
golden = yaml.safe_load(f)
|
|
compiled = self._get_yaml_from_zip(package_path)
|
|
|
|
self.maxDiff = None
|
|
# Comment next line for generating golden yaml.
|
|
self.assertEqual(golden, compiled)
|
|
finally:
|
|
# Replace next line with commented line for gathering golden yaml.
|
|
shutil.rmtree(tmpdir)
|
|
# print(tmpdir)
|
|
|
|
def test_composing_workflow(self):
|
|
"""Test compiling a simple workflow, and a bigger one composed from the simple one."""
|
|
|
|
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
|
|
sys.path.append(test_data_dir)
|
|
import compose
|
|
tmpdir = tempfile.mkdtemp()
|
|
try:
|
|
# First make sure the simple pipeline can be compiled.
|
|
simple_package_path = os.path.join(tmpdir, 'simple.zip')
|
|
compiler.Compiler().compile(compose.save_most_frequent_word, simple_package_path)
|
|
|
|
# Then make sure the composed pipeline can be compiled and also compare with golden.
|
|
compose_package_path = os.path.join(tmpdir, 'compose.zip')
|
|
compiler.Compiler().compile(compose.download_save_most_frequent_word, compose_package_path)
|
|
with open(os.path.join(test_data_dir, 'compose.yaml'), 'r') as f:
|
|
golden = yaml.safe_load(f)
|
|
compiled = self._get_yaml_from_zip(compose_package_path)
|
|
|
|
self.maxDiff = None
|
|
# Comment next line for generating golden yaml.
|
|
self.assertEqual(golden, compiled)
|
|
finally:
|
|
# Replace next line with commented line for gathering golden yaml.
|
|
shutil.rmtree(tmpdir)
|
|
# print(tmpdir)
|
|
|
|
def test_package_compile(self):
|
|
"""Test compiling python packages."""
|
|
|
|
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
|
|
test_package_dir = os.path.join(test_data_dir, 'testpackage')
|
|
tmpdir = tempfile.mkdtemp()
|
|
cwd = os.getcwd()
|
|
try:
|
|
os.chdir(test_package_dir)
|
|
subprocess.check_call(['python3', 'setup.py', 'sdist', '--format=gztar', '-d', tmpdir])
|
|
package_path = os.path.join(tmpdir, 'testsample-0.1.tar.gz')
|
|
target_zip = os.path.join(tmpdir, 'compose.zip')
|
|
subprocess.check_call([
|
|
'dsl-compile', '--package', package_path, '--namespace', 'mypipeline',
|
|
'--output', target_zip, '--function', 'download_save_most_frequent_word'])
|
|
with open(os.path.join(test_data_dir, 'compose.yaml'), 'r') as f:
|
|
golden = yaml.safe_load(f)
|
|
compiled = self._get_yaml_from_zip(target_zip)
|
|
|
|
self.maxDiff = None
|
|
self.assertEqual(golden, compiled)
|
|
finally:
|
|
shutil.rmtree(tmpdir)
|
|
os.chdir(cwd)
|
|
|
|
def _test_py_compile_zip(self, file_base_name):
|
|
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
|
|
py_file = os.path.join(test_data_dir, file_base_name + '.py')
|
|
tmpdir = tempfile.mkdtemp()
|
|
try:
|
|
target_zip = os.path.join(tmpdir, file_base_name + '.zip')
|
|
subprocess.check_call([
|
|
'dsl-compile', '--py', py_file, '--output', target_zip])
|
|
with open(os.path.join(test_data_dir, file_base_name + '.yaml'), 'r') as f:
|
|
golden = yaml.safe_load(f)
|
|
compiled = self._get_yaml_from_zip(target_zip)
|
|
|
|
self.maxDiff = None
|
|
self.assertEqual(golden, compiled)
|
|
finally:
|
|
shutil.rmtree(tmpdir)
|
|
|
|
def _test_py_compile_targz(self, file_base_name):
|
|
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
|
|
py_file = os.path.join(test_data_dir, file_base_name + '.py')
|
|
tmpdir = tempfile.mkdtemp()
|
|
try:
|
|
target_tar = os.path.join(tmpdir, file_base_name + '.tar.gz')
|
|
subprocess.check_call([
|
|
'dsl-compile', '--py', py_file, '--output', target_tar])
|
|
with open(os.path.join(test_data_dir, file_base_name + '.yaml'), 'r') as f:
|
|
golden = yaml.safe_load(f)
|
|
compiled = self._get_yaml_from_tar(target_tar)
|
|
self.maxDiff = None
|
|
self.assertEqual(golden, compiled)
|
|
finally:
|
|
shutil.rmtree(tmpdir)
|
|
|
|
def _test_py_compile_yaml(self, file_base_name):
|
|
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
|
|
py_file = os.path.join(test_data_dir, file_base_name + '.py')
|
|
tmpdir = tempfile.mkdtemp()
|
|
try:
|
|
target_yaml = os.path.join(tmpdir, file_base_name + '-pipeline.yaml')
|
|
subprocess.check_call([
|
|
'dsl-compile', '--py', py_file, '--output', target_yaml])
|
|
with open(os.path.join(test_data_dir, file_base_name + '.yaml'), 'r') as f:
|
|
golden = yaml.safe_load(f)
|
|
|
|
with open(os.path.join(test_data_dir, target_yaml), 'r') as f:
|
|
compiled = yaml.safe_load(f)
|
|
|
|
self.maxDiff = None
|
|
self.assertEqual(golden, compiled)
|
|
finally:
|
|
shutil.rmtree(tmpdir)
|
|
|
|
def test_py_compile_artifact_location(self):
|
|
"""Test configurable artifact location pipeline."""
|
|
self._test_py_compile_yaml('artifact_location')
|
|
|
|
def test_py_compile_basic(self):
|
|
"""Test basic sequential pipeline."""
|
|
self._test_py_compile_zip('basic')
|
|
|
|
def test_py_compile_with_sidecar(self):
|
|
"""Test pipeline with sidecar."""
|
|
self._test_py_compile_yaml('sidecar')
|
|
|
|
def test_py_compile_with_pipelineparams(self):
|
|
"""Test pipeline with multiple pipeline params."""
|
|
self._test_py_compile_yaml('pipelineparams')
|
|
|
|
def test_py_compile_condition(self):
|
|
"""Test a pipeline with conditions."""
|
|
self._test_py_compile_zip('coin')
|
|
|
|
def test_py_compile_immediate_value(self):
|
|
"""Test a pipeline with immediate value parameter."""
|
|
self._test_py_compile_targz('immediate_value')
|
|
|
|
def test_py_compile_default_value(self):
|
|
"""Test a pipeline with a parameter with default value."""
|
|
self._test_py_compile_targz('default_value')
|
|
|
|
def test_py_volume(self):
|
|
"""Test a pipeline with a volume and volume mount."""
|
|
self._test_py_compile_yaml('volume')
|
|
|
|
def test_py_retry(self):
|
|
"""Test retry functionality."""
|
|
self._test_py_compile_yaml('retry')
|
|
|
|
def test_py_image_pull_secret(self):
|
|
"""Test pipeline imagepullsecret."""
|
|
self._test_py_compile_yaml('imagepullsecret')
|
|
|
|
def test_py_recursive_do_while(self):
|
|
"""Test pipeline recursive."""
|
|
self._test_py_compile_yaml('recursive_do_while')
|
|
|
|
def test_py_recursive_while(self):
|
|
"""Test pipeline recursive."""
|
|
self._test_py_compile_yaml('recursive_while')
|
|
|
|
def test_py_resourceop_basic(self):
|
|
"""Test pipeline resourceop_basic."""
|
|
self._test_py_compile_yaml('resourceop_basic')
|
|
|
|
def test_py_volumeop_basic(self):
|
|
"""Test pipeline volumeop_basic."""
|
|
self._test_py_compile_yaml('volumeop_basic')
|
|
|
|
def test_py_volumeop_parallel(self):
|
|
"""Test pipeline volumeop_parallel."""
|
|
self._test_py_compile_yaml('volumeop_parallel')
|
|
|
|
def test_py_volumeop_dag(self):
|
|
"""Test pipeline volumeop_dag."""
|
|
self._test_py_compile_yaml('volumeop_dag')
|
|
|
|
def test_py_volume_snapshotop_sequential(self):
|
|
"""Test pipeline volume_snapshotop_sequential."""
|
|
self._test_py_compile_yaml('volume_snapshotop_sequential')
|
|
|
|
def test_py_volume_snapshotop_rokurl(self):
|
|
"""Test pipeline volumeop_sequential."""
|
|
self._test_py_compile_yaml('volume_snapshotop_rokurl')
|
|
|
|
def test_py_volumeop_sequential(self):
|
|
"""Test pipeline volumeop_sequential."""
|
|
self._test_py_compile_yaml('volumeop_sequential')
|
|
|
|
def test_py_param_substitutions(self):
|
|
"""Test pipeline param_substitutions."""
|
|
self._test_py_compile_yaml('param_substitutions')
|
|
|
|
def test_type_checking_with_consistent_types(self):
|
|
"""Test type check pipeline parameters against component metadata."""
|
|
@component
|
|
def a_op(field_m: {'GCSPath': {'path_type': 'file', 'file_type':'tsv'}}, field_o: 'Integer'):
|
|
return ContainerOp(
|
|
name = 'operator a',
|
|
image = 'gcr.io/ml-pipeline/component-b',
|
|
arguments = [
|
|
'--field-l', field_m,
|
|
'--field-o', field_o,
|
|
],
|
|
)
|
|
|
|
@pipeline(
|
|
name='p1',
|
|
description='description1'
|
|
)
|
|
def my_pipeline(a: {'GCSPath': {'path_type':'file', 'file_type': 'tsv'}}='good', b: Integer()=12):
|
|
a_op(field_m=a, field_o=b)
|
|
|
|
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
|
|
sys.path.append(test_data_dir)
|
|
tmpdir = tempfile.mkdtemp()
|
|
try:
|
|
simple_package_path = os.path.join(tmpdir, 'simple.tar.gz')
|
|
compiler.Compiler().compile(my_pipeline, simple_package_path, type_check=True)
|
|
|
|
finally:
|
|
shutil.rmtree(tmpdir)
|
|
|
|
def test_type_checking_with_inconsistent_types(self):
|
|
"""Test type check pipeline parameters against component metadata."""
|
|
@component
|
|
def a_op(field_m: {'GCSPath': {'path_type': 'file', 'file_type':'tsv'}}, field_o: 'Integer'):
|
|
return ContainerOp(
|
|
name = 'operator a',
|
|
image = 'gcr.io/ml-pipeline/component-b',
|
|
arguments = [
|
|
'--field-l', field_m,
|
|
'--field-o', field_o,
|
|
],
|
|
)
|
|
|
|
@pipeline(
|
|
name='p1',
|
|
description='description1'
|
|
)
|
|
def my_pipeline(a: {'GCSPath': {'path_type':'file', 'file_type': 'csv'}}='good', b: Integer()=12):
|
|
a_op(field_m=a, field_o=b)
|
|
|
|
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
|
|
sys.path.append(test_data_dir)
|
|
tmpdir = tempfile.mkdtemp()
|
|
try:
|
|
simple_package_path = os.path.join(tmpdir, 'simple.tar.gz')
|
|
with self.assertRaises(InconsistentTypeException):
|
|
compiler.Compiler().compile(my_pipeline, simple_package_path, type_check=True)
|
|
compiler.Compiler().compile(my_pipeline, simple_package_path, type_check=False)
|
|
|
|
finally:
|
|
shutil.rmtree(tmpdir)
|
|
|
|
def test_compile_pipeline_with_after(self):
|
|
def op():
|
|
return dsl.ContainerOp(
|
|
name='Some component name',
|
|
image='image'
|
|
)
|
|
|
|
@dsl.pipeline(name='Pipeline', description='')
|
|
def pipeline():
|
|
task1 = op()
|
|
task2 = op().after(task1)
|
|
|
|
compiler.Compiler()._compile(pipeline)
|
|
|
|
def _test_op_to_template_yaml(self, ops, file_base_name):
|
|
test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
|
|
target_yaml = os.path.join(test_data_dir, file_base_name + '.yaml')
|
|
with open(target_yaml, 'r') as f:
|
|
expected = yaml.safe_load(f)['spec']['templates'][0]
|
|
|
|
compiled_template = compiler.Compiler()._op_to_template(ops)
|
|
|
|
del compiled_template['name'], expected['name']
|
|
del compiled_template['outputs']['parameters'][0]['name'], expected['outputs']['parameters'][0]['name']
|
|
assert compiled_template == expected
|
|
|
|
def test_tolerations(self):
|
|
"""Test a pipeline with a tolerations."""
|
|
op1 = dsl.ContainerOp(
|
|
name='download',
|
|
image='busybox',
|
|
command=['sh', '-c'],
|
|
arguments=['sleep 10; wget localhost:5678 -O /tmp/results.txt'],
|
|
file_outputs={'downloaded': '/tmp/results.txt'}) \
|
|
.add_toleration(V1Toleration(
|
|
effect='NoSchedule',
|
|
key='gpu',
|
|
operator='Equal',
|
|
value='run'))
|
|
|
|
self._test_op_to_template_yaml(op1, file_base_name='tolerations')
|
|
|
|
def test_op_transformers(self):
|
|
def some_op():
|
|
return dsl.ContainerOp(
|
|
name='sleep',
|
|
image='busybox',
|
|
command=['sleep 1'],
|
|
)
|
|
|
|
@dsl.pipeline(name='some_pipeline', description='')
|
|
def some_pipeline():
|
|
task1 = some_op()
|
|
task2 = some_op()
|
|
task3 = some_op()
|
|
|
|
dsl.get_pipeline_conf().op_transformers.append(lambda op: op.set_retry(5))
|
|
|
|
workflow_dict = compiler.Compiler()._compile(some_pipeline)
|
|
for template in workflow_dict['spec']['templates']:
|
|
container = template.get('container', None)
|
|
if container:
|
|
self.assertEqual(template['retryStrategy']['limit'], 5)
|