pipelines/samples/test/two_step_test.py

# Copyright 2021 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Two step v2-compatible pipeline."""

# %%

from __future__ import annotations
import unittest
from pprint import pprint

import kfp.deprecated as kfp
import kfp_server_api

from .two_step import two_step_pipeline
from kfp.samples.test.utils import run_pipeline_func, TestCase, KfpMlmdClient, KfpTask
from ml_metadata.proto import Execution


def verify_tasks(t: unittest.TestCase, tasks: dict[str, KfpTask]):
    task_names = [*tasks.keys()]
    t.assertCountEqual(task_names, ['train-op', 'preprocess'], 'task names')

    preprocess = tasks['preprocess']
    train = tasks['train-op']

    pprint('======= preprocess task =======')
    pprint(preprocess.get_dict())
    pprint('======= train task =======')
    pprint(train.get_dict())
    pprint('==============')

    t.assertEqual(
        {
            'name': 'preprocess',
            'inputs': {
                'artifacts': [],
                'parameters': {
                    'some_int': 1234,
                    'uri': 'uri-to-import'
                }
            },
            'outputs': {
                'artifacts': [{
                    'metadata': {
                        'display_name': 'output_dataset_one',
                    },
                    'name': 'output_dataset_one',
                    'type': 'system.Dataset'
                }],
                'parameters': {
                    'output_parameter_one': 1234
                }
            },
            'type': 'system.ContainerExecution',
            'state': Execution.State.COMPLETE,
        }, preprocess.get_dict())
    t.assertEqual(
        {
            'name': 'train-op',
            'inputs': {
                'artifacts': [{
                    'metadata': {
                        'display_name': 'output_dataset_one',
                    },
                    'name': 'dataset',
                    'type': 'system.Dataset',
                }],
                'parameters': {
                    'num_steps': 1234
                }
            },
            'outputs': {
                'artifacts': [{
                    'metadata': {
                        'display_name': 'model',
                    },
                    'name': 'model',
                    'type': 'system.Model',
                }],
                'parameters': {}
            },
            'type': 'system.ContainerExecution',
            'state': Execution.State.COMPLETE,
        }, train.get_dict())


def verify_artifacts(t: unittest.TestCase, tasks: dict, artifact_uri_prefix):
    for task in tasks.values():
        for artifact in task.outputs.artifacts:
            t.assertTrue(artifact.uri.startswith(artifact_uri_prefix))


def verify(run: kfp_server_api.ApiRun, mlmd_connection_config, **kwargs):
    t = unittest.TestCase()
    t.maxDiff = None  # we always want to see full diff
    t.assertEqual(run.status, 'Succeeded')
    client = KfpMlmdClient(mlmd_connection_config=mlmd_connection_config)
    tasks = client.get_tasks(run_id=run.id)
    verify_tasks(t, tasks)


def verify_with_default_pipeline_root(run: kfp_server_api.ApiRun,
                                      mlmd_connection_config, **kwargs):
    t = unittest.TestCase()
    t.maxDiff = None  # we always want to see full diff
    t.assertEqual(run.status, 'Succeeded')
    client = KfpMlmdClient(mlmd_connection_config=mlmd_connection_config)
    tasks = client.get_tasks(run_id=run.id)
    verify_tasks(t, tasks)
    verify_artifacts(t, tasks, 'minio://mlpipeline/v2/artifacts')


def verify_with_specific_pipeline_root(run: kfp_server_api.ApiRun,
                                       mlmd_connection_config, **kwargs):
    t = unittest.TestCase()
    t.maxDiff = None  # we always want to see full diff
    t.assertEqual(run.status, 'Succeeded')
    client = KfpMlmdClient(mlmd_connection_config=mlmd_connection_config)
    tasks = client.get_tasks(run_id=run.id)
    verify_tasks(t, tasks)
    verify_artifacts(t, tasks, 'minio://mlpipeline/override/artifacts')


if __name__ == '__main__':
    run_pipeline_func([
        TestCase(
            pipeline_func=two_step_pipeline,
            mode=kfp.dsl.PipelineExecutionMode.V1_LEGACY),
        # Cannot test V2_ENGINE and V1_LEGACY using the same code.
        # V2_ENGINE requires importing everything from v2 namespace.
        # TestCase(
        #     pipeline_func=two_step_pipeline,
        #     verify_func=verify,
        #     mode=kfp.dsl.PipelineExecutionMode.V2_ENGINE,
        # ),
    ])

# %%