pipelines/samples/v2/sample_test.py

298 lines
12 KiB
Python

# Copyright 2024 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from concurrent.futures import as_completed
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
import inspect
import os
from pprint import pprint
import subprocess
from typing import List
import unittest
import uuid
import kfp
from kfp.dsl.graph_component import GraphComponent
from kubernetes import client
from kubernetes import config
from kubernetes import utils
import yaml
import functools
from kfp import dsl
def get_kfp_package_path() -> str:
path = get_package_path("sdk/python")
print(f'Using the following KFP package path for tests: {path}')
return path
def get_kfp_pipeline_spec_path() -> str:
path = get_package_path("api/v2alpha1/python")
print(f'Using the following KFP pipeline spec path for tests: {path}')
return path
def get_package_path(subdir: str) -> str:
repo_name = os.environ.get('REPO_NAME', 'kubeflow/pipelines')
if os.environ.get('PULL_NUMBER'):
path = f'git+https://github.com/{repo_name}.git@refs/pull/{os.environ["PULL_NUMBER"]}/merge#subdirectory={subdir}'
else:
path = f'git+https://github.com/{repo_name}.git@master#subdirectory={subdir}'
return path
# Set the component configuration BEFORE importing any pipeline modules
# To have pipeline execution code leverage source kfp-pipeline-spec
# in api/v2alpha1/python you can set:
# packages_to_install=[get_kfp_pipeline_spec_path()]
dsl.component = functools.partial(
dsl.component, kfp_package_path=get_kfp_package_path())
# Now import the pipeline modules, this way we can leverage the kfp_package and pipeline
# spec defined above
import component_with_optional_inputs
import collected_parameters
import hello_world
import parallel_after_dependency
import parallel_consume_upstream
import pipeline_container_no_input
import pipeline_with_env
import pipeline_with_placeholders
import pipeline_with_secret_as_env
import pipeline_with_secret_as_volume
import producer_consumer_param
import pipeline_with_retry
import pipeline_with_input_status_state
import subdagio
import two_step_pipeline_containerized
import nested_pipeline_opt_inputs_parent_level
import nested_pipeline_opt_inputs_nil
import nested_pipeline_opt_input_child_level
import pipeline_with_pod_metadata
import pipeline_with_workspace
from modelcar import modelcar
import pipeline_with_utils
import task_config
import pythonic_artifacts_test_pipeline
_MINUTE = 60 # seconds
_DEFAULT_TIMEOUT = 20 * _MINUTE
SAMPLES_DIR = os.path.realpath(os.path.dirname(os.path.dirname(__file__)))
PRE_REQ_DIR = os.path.join(SAMPLES_DIR, 'v2', 'pre-requisites')
PREREQS = [os.path.join(PRE_REQ_DIR, 'test-secrets.yaml')]
_KFP_NAMESPACE = os.getenv('KFP_NAMESPACE', 'kubeflow')
_KFP_MULTI_USER = os.getenv('KFP_MULTI_USER', 'false').lower() == 'true'
_USER_NAMESPACE = os.getenv('_USER_NAMESPACE', 'kubeflow-user-example-com')
@dataclass
class TestCase:
pipeline_func: GraphComponent
timeout: int = _DEFAULT_TIMEOUT
def deploy_k8s_yaml(namespace: str, yaml_file: str):
config.load_kube_config()
api_client = client.ApiClient()
try:
utils.create_from_yaml(api_client, yaml_file, namespace=namespace)
print(f'Resource(s) from {yaml_file} deployed successfully.')
except Exception as e:
raise RuntimeError(f'Exception when deploying from YAML: {e}')
def delete_k8s_yaml(namespace: str, yaml_file: str):
config.load_kube_config()
v1 = client.CoreV1Api()
apps_v1 = client.AppsV1Api()
try:
with open(yaml_file, 'r') as f:
yaml_docs = yaml.safe_load_all(f)
for doc in yaml_docs:
if not doc:
continue # Skip empty documents
kind = doc.get('kind', '').lower()
name = doc['metadata']['name']
print(f'Deleting {kind} named {name}...')
# There's no utils.delete_from_yaml
# as a workaround we manually fetch required data
if kind == 'deployment':
apps_v1.delete_namespaced_deployment(name, namespace)
elif kind == 'service':
v1.delete_namespaced_service(name, namespace)
elif kind == 'configmap':
v1.delete_namespaced_config_map(name, namespace)
elif kind == 'pod':
v1.delete_namespaced_pod(name, namespace)
elif kind == 'secret':
v1.delete_namespaced_secret(name, namespace)
elif kind == 'persistentvolumeclaim':
v1.delete_namespaced_persistent_volume_claim(
name, namespace)
elif kind == 'namespace':
client.CoreV1Api().delete_namespace(name)
else:
print(f'Skipping unsupported resource type: {kind}')
print(f'Resource(s) from {yaml_file} deleted successfully.')
except Exception as e:
print(f'Exception when deleting from YAML: {e}')
def get_authentication_token():
"""Get authentication token for multi-user mode."""
if _KFP_MULTI_USER:
try:
namespace = _USER_NAMESPACE
print(f'Creating authentication token for namespace {namespace}...')
result = subprocess.run([
'kubectl', '-n', namespace, 'create', 'token', 'default-editor',
'--audience=pipelines.kubeflow.org'
], capture_output=True, text=True, check=True)
token = result.stdout.strip()
print('Successfully created authentication token.')
return token
except subprocess.CalledProcessError as e:
print(f'Failed to create authentication token: {e}')
print(f'stderr: {e.stderr}')
return None
return None
class SampleTest(unittest.TestCase):
_kfp_host_and_port = os.getenv('KFP_API_HOST_AND_PORT',
'http://localhost:8888')
_kfp_ui_and_port = os.getenv('KFP_UI_HOST_AND_PORT',
'http://localhost:8080')
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Initialize client with token if in multi-user mode
auth_token = get_authentication_token()
if auth_token:
self._client = kfp.Client(
host=self._kfp_host_and_port,
ui_host=self._kfp_ui_and_port,
existing_token=auth_token
)
else:
self._client = kfp.Client(host=self._kfp_host_and_port, ui_host=self._kfp_ui_and_port)
@classmethod
def setUpClass(cls):
"""Runs once before all tests."""
print('Deploying pre-requisites....')
target_namespace = _USER_NAMESPACE if _KFP_MULTI_USER else _KFP_NAMESPACE
for p in PREREQS:
deploy_k8s_yaml(target_namespace, p)
print('Done deploying pre-requisites.')
@classmethod
def tearDownClass(cls):
"""Runs once after all tests in this class."""
print('Cleaning up resources....')
target_namespace = _USER_NAMESPACE if _KFP_MULTI_USER else _KFP_NAMESPACE
for p in PREREQS:
delete_k8s_yaml(target_namespace, p)
print('Done clean up.')
def test(self):
test_cases: List[TestCase] = [
TestCase(pipeline_func=hello_world.pipeline_hello_world),
TestCase(pipeline_func=producer_consumer_param
.producer_consumer_param_pipeline),
TestCase(pipeline_func=pipeline_container_no_input
.pipeline_container_no_input),
TestCase(pipeline_func=two_step_pipeline_containerized
.two_step_pipeline_containerized),
TestCase(pipeline_func=component_with_optional_inputs.pipeline),
TestCase(pipeline_func=pipeline_with_env.pipeline_with_env),
# The following tests are not working. Tracking issue: https://github.com/kubeflow/pipelines/issues/11053
# TestCase(pipeline_func=pipeline_with_importer.pipeline_with_importer),
# TestCase(pipeline_func=pipeline_with_volume.pipeline_with_volume),
TestCase(pipeline_func=pipeline_with_secret_as_volume
.pipeline_secret_volume),
TestCase(
pipeline_func=pipeline_with_secret_as_env.pipeline_secret_env),
TestCase(pipeline_func=subdagio.parameter.crust),
TestCase(pipeline_func=subdagio.parameter_cache.crust),
TestCase(pipeline_func=subdagio.mixed_parameters.crust),
TestCase(
pipeline_func=subdagio.multiple_parameters_namedtuple.crust),
TestCase(pipeline_func=subdagio.parameter_oneof.crust),
TestCase(pipeline_func=subdagio.artifact_cache.crust),
TestCase(pipeline_func=subdagio.artifact.crust),
TestCase(
pipeline_func=subdagio.multiple_artifacts_namedtuple.crust),
TestCase(pipeline_func=pipeline_with_placeholders
.pipeline_with_placeholders),
TestCase(pipeline_func=modelcar.pipeline_modelcar_test),
TestCase(
pipeline_func=parallel_consume_upstream.loop_consume_upstream),
TestCase(pipeline_func=parallel_after_dependency
.loop_with_after_dependency_set),
TestCase(
pipeline_func=collected_parameters.collected_param_pipeline),
TestCase(pipeline_func=pipeline_with_retry.retry_pipeline),
TestCase(pipeline_func=pipeline_with_input_status_state.status_state_pipeline),
TestCase(pipeline_func=nested_pipeline_opt_inputs_parent_level.nested_pipeline_opt_inputs_parent_level),
TestCase(pipeline_func=nested_pipeline_opt_input_child_level.nested_pipeline_opt_input_child_level),
TestCase(pipeline_func=nested_pipeline_opt_inputs_nil.nested_pipeline_opt_inputs_nil),
TestCase(pipeline_func=pipeline_with_pod_metadata.pipeline_with_pod_metadata),
TestCase(pipeline_func=pipeline_with_workspace.pipeline_with_workspace),
TestCase(pipeline_func=pipeline_with_utils.pipeline_with_utils),
TestCase(pipeline_func=task_config.pipeline_task_config),
TestCase(pipeline_func=pythonic_artifacts_test_pipeline.pythonic_artifacts_test_pipeline),
]
with ThreadPoolExecutor() as executor:
futures = [
executor.submit(self.run_test_case, test_case.pipeline_func,
test_case.timeout) for test_case in test_cases
]
for future in as_completed(futures):
future.result()
def run_test_case(self, pipeline_func: GraphComponent, timeout: int):
with self.subTest(pipeline=pipeline_func, msg=pipeline_func.name):
print(
f'Running pipeline: {inspect.getmodule(pipeline_func.pipeline_func).__name__}/{pipeline_func.name}.'
)
experiment_name = f"test-{pipeline_func.name}-{uuid.uuid4().hex[:8]}"
run_result = self._client.create_run_from_pipeline_func(
pipeline_func=pipeline_func,
namespace=_USER_NAMESPACE,
experiment_name=experiment_name)
run_response = run_result.wait_for_run_completion(timeout)
pprint(run_response.run_details)
print('Run details page URL:')
print(
f'{self._kfp_ui_and_port}/#/runs/details/{run_response.run_id}')
self.assertEqual(run_response.state, 'SUCCEEDED')
print(
f'Pipeline, {inspect.getmodule(pipeline_func.pipeline_func).__name__}/{pipeline_func.name}, succeeded.'
)
if __name__ == '__main__':
unittest.main()