test: update README of v2 sample test (#7121)

* test: update README of v2 sample test

* address review comments by @zijianjoy

* use absolute paths, suggested by @zijianjoy

* Update README.md
This commit is contained in:
Yuan (Bob) Gong 2021-12-30 16:22:54 +08:00 committed by GitHub
parent d60bc99bb6
commit ca6e05591d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 78 additions and 55 deletions

View File

@ -1,6 +1,7 @@
# Test Samples
# Sample Test
These samples are built for testing purposes only.
The [config.yaml](./config.yaml) holds test config for kubeflow-pipelines-samples-v2 test.
Refer to [V2 samples test documentation](https://github.com/kubeflow/pipelines/tree/master/v2/test) for more details.
## Test Samples
Pipeline samples in this folder are built for testing purposes only.

View File

@ -210,8 +210,6 @@ def _run_test(callback):
def main(
pipeline_root: Optional[str] = None, # example
host: Optional[str] = None,
external_host: Optional[str] = None,
launcher_v2_image: Optional[str] = None,
driver_image: Optional[str] = None,
experiment: str = 'v2_sample_test_samples',
@ -219,11 +217,10 @@ def _run_test(callback):
metadata_service_port: int = 8080,
):
"""Test file CLI entrypoint used by Fire.
To configure KFP endpoint, configure env vars following:
https://www.kubeflow.org/docs/components/pipelines/sdk/connect-api/#configure-sdk-client-by-environment-variables.
KFP UI endpoint can be configured by KF_PIPELINES_UI_ENDPOINT env var.
:param host: Hostname pipelines can access, defaults to 'http://ml-pipeline:8888'.
:type host: str, optional
:param external_host: External hostname users can access from their browsers.
:type external_host: str, optional
:param pipeline_root: pipeline root that holds intermediate
artifacts, example gs://your-bucket/path/to/workdir.
:type pipeline_root: str, optional
@ -239,12 +236,6 @@ def _run_test(callback):
:type metadata_service_port: int, optional
"""
# Default to env values, so people can set up their env and run these
# tests without specifying any commands.
if host is None:
host = os.getenv('KFP_HOST', 'http://ml-pipeline:8888')
if external_host is None:
external_host = host
if pipeline_root is None:
pipeline_root = os.getenv('KFP_PIPELINE_ROOT')
if not pipeline_root:
@ -253,19 +244,28 @@ def _run_test(callback):
logger.warning(
f'KFP_OUTPUT_DIRECTORY env var is left for backward compatibility, please use KFP_PIPELINE_ROOT instead.'
)
logger.info(f'KFP_PIPELINE_ROOT={pipeline_root}')
if metadata_service_host is None:
metadata_service_host = os.getenv('METADATA_GRPC_SERVICE_HOST',
'metadata-grpc-service')
logger.info(f'METADATA_GRPC_SERVICE_HOST={metadata_service_host}')
if launcher_v2_image is None:
launcher_v2_image = os.getenv('KFP_LAUNCHER_V2_IMAGE')
if not launcher_v2_image:
raise Exception("launcher_v2_image is empty")
logger.info(f'KFP_LAUNCHER_V2_IMAGE={launcher_v2_image}')
if driver_image is None:
driver_image = os.getenv('KFP_DRIVER_IMAGE')
if not driver_image:
raise Exception("driver_image is empty")
client = kfp.Client(host=host)
logger.info(f'KFP_DRIVER_IMAGE={driver_image}')
client = kfp.Client()
# TODO(Bobgy): avoid using private fields when getting loaded config
kfp_endpoint = client._existing_config.host
kfp_ui_endpoint = client._uihost
logger.info(f'KF_PIPELINES_ENDPOINT={kfp_endpoint}')
if kfp_ui_endpoint != kfp_endpoint:
logger.info(f'KF_PIPELINES_UI_ENDPOINT={kfp_ui_endpoint}')
def run_pipeline(
pipeline_func: Optional[Callable],
@ -347,7 +347,7 @@ def _run_test(callback):
# There is no run_result when dry_run.
return
print("Run details page URL:")
print(f"{external_host}/#/runs/details/{run_result.run_id}")
print(f"{kfp_ui_endpoint}/#/runs/details/{run_result.run_id}")
run_detail = run_result.wait_for_run_completion(timeout)
# Hide detailed information for pretty printing
workflow_spec = run_detail.run.pipeline_spec.workflow_manifest
@ -362,15 +362,8 @@ def _run_test(callback):
# When running locally, port forward MLMD grpc service to localhost:8080 by:
#
# ```bash
# NAMESPACE=kubeflow kubectl port-forward svc/metadata-grpc-service 8080:8080 -n $NAMESPACE
# ```
#
# Then you can uncomment the following config instead.
# mlmd_connection_config = metadata_store_pb2.MetadataStoreClientConfig(
# host='localhost',
# port=8080,
# )
# 1. NAMESPACE=kubeflow kubectl port-forward svc/metadata-grpc-service 8080:8080 -n $NAMESPACE
# 2. Configure env var METADATA_GRPC_SERVICE_HOST=localhost.
mlmd_connection_config = metadata_store_pb2.MetadataStoreClientConfig(
host=metadata_service_host,
port=metadata_service_port,

View File

@ -101,10 +101,8 @@ class Client(object):
host: The host name to use to talk to Kubeflow Pipelines. If not set, the in-cluster
service DNS name will be used, which only works if the current environment is a pod
in the same cluster (such as a Jupyter instance spawned by Kubeflow's
JupyterHub). If you have a different connection to cluster, such as a kubectl
proxy connection, then set it to something like "127.0.0.1:8080/pipeline.
If you connect to an IAP enabled cluster, set it to
https://<your-deployment>.endpoints.<your-project>.cloud.goog/pipeline".
JupyterHub).
Set the host based on https://www.kubeflow.org/docs/components/pipelines/sdk/connect-api/.
client_id: The client ID used by Identity-Aware Proxy.
namespace: The namespace where the kubeflow pipeline system is run.
other_client_id: The client ID used to obtain the auth codes and refresh tokens.

View File

@ -15,21 +15,21 @@ all: sample-test
sample-test: upload
# The -u flag makes python output unbuffered, so that we can see real time log.
# Reference: https://stackoverflow.com/a/107717
python -u sample_test.py \
export KF_PIPELINES_ENDPOINT=$(HOST) \
&& python -u sample_test.py \
--samples_config samples/test/config.yaml \
--context $(GCS_ROOT)/src/context.tar.gz \
--host $(HOST) \
--gcs_root $(GCS_ROOT)/data \
--gcr_root $(GCR_ROOT) \
--kfp_package_path "$(KFP_PACKAGE_PATH)"
.PHONY: integration-test
integration-test: upload
python -u sample_test.py \
export KF_PIPELINES_ENDPOINT=$(HOST) \
&& python -u sample_test.py \
--samples_config samples/test/config-integration.yaml \
--timeout_mins 60 \
--context $(GCS_ROOT)/src/context.tar.gz \
--host $(HOST) \
--gcs_root $(GCS_ROOT)/data \
--gcr_root $(GCR_ROOT) \
--kfp_package_path "$(KFP_PACKAGE_PATH)"

View File

@ -1,19 +1,26 @@
# Kubeflow Pipelines Sample Test Infra V2
The following tests are running on sample test infra v2:
* kubeflow-pipelines-samples-v2
* kubeflow-pipelines-integration-v2
Note, the sample test only runs on Google Cloud at the moment. Welcome
contribution if you want to adapt it to other platforms.
Quick Links:
* [prowjob config](https://github.com/GoogleCloudPlatform/oss-test-infra/blob/48b09567c8df28fab2d3f2fb6df86defa12207fb/prow/prowjobs/kubeflow/pipelines/kubeflow-pipelines-presubmits.yaml#L184-L192)
* [prowjob config](https://github.com/GoogleCloudPlatform/oss-test-infra/blob/8e2b1e0b57d0bf7adf8e9f3cef6a98af25012412/prow/prowjobs/kubeflow/pipelines/kubeflow-pipelines-presubmits.yaml#L185-L203)
* [past prow jobs](https://oss-prow.knative.dev/job-history/gs/oss-prow/pr-logs/directory/kubeflow-pipelines-samples-v2)
* [sample test config](../../samples/test/config.yaml)
* Sample test configs
* [kubeflow-pipelines-samples-v2 test config](/samples/test/config.yaml)
* [kubeflow-pipelines-integration-v2 test config](/samples/test/config-integration.yaml)
* [KFP test cluster hostname](https://github.com/kubeflow/testing/blob/master/test-infra/kfp/endpoint)
* [Infra as Code configuration for kfp-ci project](https://github.com/kubeflow/testing/tree/master/test-infra/kfp).
## How to access the KFP UI running these tests?
kubeflow-pipelines-sample-v2 test pipeline runs on [kfp-standalone-1 cluster](https://console.cloud.google.com/kubernetes/clusters/details/us-central1/kfp-standalone-1/details?folder=&organizationId=&project=kfp-ci),
Test Kubeflow Pipelines run on [kfp-standalone-1 cluster](https://console.cloud.google.com/kubernetes/clusters/details/us-central1/kfp-standalone-1/details?folder=&organizationId=&project=kfp-ci),
`kfp-ci` project, `kubeflow.org` organization.
The test script prints KFP host URL in logs. You need to have permission to
@ -23,9 +30,11 @@ You need to join [Kubeflow ci-team google group](https://github.com/kubeflow/int
has very wide permissions to test infra, so access will only be granted to core
developers.
Currently, it's not possible to grant KFP UI only permission, but we can grant
<!--
TODO(Bobgy): Currently, it's not possible to grant KFP UI only permission, but we can consider granting
such access to [Kubeflow ci-viewer google group](https://github.com/kubeflow/internal-acls/blob/master/google_groups/groups/ci-viewer.yaml).
Contact @Bobgy if you have such a need.
Contact @zijianjoy if you have such a need.
-->
## How to run the entire sample test suite in your own KFP?
@ -61,15 +70,28 @@ as expected.
For why the caveat exists, refer to context rule in [Makefile](./Makefile).
Run integration test by:
```bash
make integration-test
```
However, integration tests are configured to run on kfp-ci project, so modify tests locally with your own configs:
* [parameterized_tfx_oss_test.py](/samples/core/parameterized_tfx_oss/parameterized_tfx_oss_test.py)
* [dataflow_test.py](/samples/core/dataflow/dataflow_test.py)
## How to develop one single sample?
One-time environment configurations:
```bash
# These env vars are loaded by default, recommend configuring them in your
# .bashrc or .zshrc
export KFP_HOST=https://your.KFP.host
export KF_PIPELINES_ENDPOINT=https://your.KFP.host
export KFP_PIPELINE_ROOT=gs://your-bucket/path/to/output/dir
export METADATA_GRPC_SERVICE_HOST=localhost
export PATH="$HOME/bin:$PATH" # Some CLI tools will be installed to ~/bin.
export PATH="$HOME/bin:$PATH" # The KFP v2 backend compiler CLI tool will be installed to ~/bin by make install-compiler
# optional, when you want to override images to your dev project
# export KFP_LAUNCHER_V2_IMAGE=gcr.io/your-project/dev/kfp-launcher-v2:latest
# export KFP_DRIVER_IMAGE=gcr.io/your-project/kfp-driver:latest
@ -85,11 +107,18 @@ cd "${REPO_ROOT}/v2"
# Note, when you update backend compiler code, you need to run this again!
make install-compiler
# Note, for tests that use metadata grpc api, you should port-forward it locally in a separate terminal by:
# Note, for v2 tests, they use metadata grpc api, you need to port-forward it locally in a separate terminal by:
cd "${REPO_ROOT}/v2/test"
make mlmd-port-forward
# To run a single sample test:
# Install python dependencies
cd "${REPO_ROOT}/v2/test"
pip install -r requirements.txt
```
To run a single sample test:
```bash
cd "${REPO_ROOT}"
# if you have a sample test at samples/path/to/your/sample_test.py
python -m samples.path.to.your.sample_test
@ -99,16 +128,19 @@ python -m samples.path.to.your.sample_test --help
## How to add a sample to this sample test?
Edit [samples/test/config.yaml](../../samples/test/config.yaml) and add your own sample.
Edit [samples/test/config.yaml](/samples/test/config.yaml) and add your own sample.
You can also add other samples not in the `samples/test` folder.
Your sample test needs to conform to the standard interface in
[components/run_sample.yaml](components/run_sample.yaml). You can refer to
existing [sample tests](../../samples/test) for how to implement the interface.
existing [sample tests](/samples/test) for how to implement the interface.
## How can a sample verify MLMD status of a run?
Some samples can be used as examples for various cases:
Refer to [an existing test](../../samples/v2/hello_world_test.py).
* Pipeline from a notebook, [multiple_outputs_test.py](/samples/core/multiple_outputs/multiple_outputs_test.py).
* A sample that does not submit a pipeline, [dsl_static_type_checking_test.py](/samples/core/dsl_static_type_checking/dsl_static_type_checking_test.py).
* V2 pipeline and verification, [hello_world_test.py](/samples/v2/hello_world_test.py).
* V2 pipeline and control flow, [condition_test.py](/samples/core/condition/condition_test.py).
## FAQs

View File

@ -32,8 +32,8 @@ implementation:
backend_compiler_path="$0"
sample_path="$1"
output_dir="$2"
host="$3"
external_host="$4"
export KF_PIPELINES_ENDPOINT="$3"
export KF_PIPELINES_UI_ENDPOINT="$4"
launcher_v2_image="$5"
driver_image="$6"
@ -45,8 +45,6 @@ implementation:
-u \
-m "$sample_path" \
--pipeline_root "$output_dir" \
--host "$host" \
--external_host "$external_host" \
--launcher_v2_image "$launcher_v2_image" \
--driver_image "$driver_image"

View File

@ -104,7 +104,6 @@ def v2_sample_test(
def main(
context: str,
host: str,
gcr_root: str,
gcs_root: str,
experiment: str = 'v2_sample_test',
@ -119,7 +118,9 @@ def main(
with open(samples_config_path, 'r') as stream:
samples_config_content = yaml.safe_load(stream)
client = kfp.Client(host=host)
client = kfp.Client()
# TODO(Bobgy): avoid using private fields when getting loaded config
host = client._existing_config.host
client.create_experiment(
name=experiment,
description='An experiment with Kubeflow Pipelines v2 sample test runs.'