test: update README of v2 sample test (#7121)
* test: update README of v2 sample test * address review comments by @zijianjoy * use absolute paths, suggested by @zijianjoy * Update README.md
This commit is contained in:
parent
d60bc99bb6
commit
ca6e05591d
|
|
@ -1,6 +1,7 @@
|
|||
# Test Samples
|
||||
# Sample Test
|
||||
|
||||
These samples are built for testing purposes only.
|
||||
|
||||
The [config.yaml](./config.yaml) holds test config for kubeflow-pipelines-samples-v2 test.
|
||||
Refer to [V2 samples test documentation](https://github.com/kubeflow/pipelines/tree/master/v2/test) for more details.
|
||||
|
||||
## Test Samples
|
||||
|
||||
Pipeline samples in this folder are built for testing purposes only.
|
||||
|
|
|
|||
|
|
@ -210,8 +210,6 @@ def _run_test(callback):
|
|||
|
||||
def main(
|
||||
pipeline_root: Optional[str] = None, # example
|
||||
host: Optional[str] = None,
|
||||
external_host: Optional[str] = None,
|
||||
launcher_v2_image: Optional[str] = None,
|
||||
driver_image: Optional[str] = None,
|
||||
experiment: str = 'v2_sample_test_samples',
|
||||
|
|
@ -219,11 +217,10 @@ def _run_test(callback):
|
|||
metadata_service_port: int = 8080,
|
||||
):
|
||||
"""Test file CLI entrypoint used by Fire.
|
||||
To configure KFP endpoint, configure env vars following:
|
||||
https://www.kubeflow.org/docs/components/pipelines/sdk/connect-api/#configure-sdk-client-by-environment-variables.
|
||||
KFP UI endpoint can be configured by KF_PIPELINES_UI_ENDPOINT env var.
|
||||
|
||||
:param host: Hostname pipelines can access, defaults to 'http://ml-pipeline:8888'.
|
||||
:type host: str, optional
|
||||
:param external_host: External hostname users can access from their browsers.
|
||||
:type external_host: str, optional
|
||||
:param pipeline_root: pipeline root that holds intermediate
|
||||
artifacts, example gs://your-bucket/path/to/workdir.
|
||||
:type pipeline_root: str, optional
|
||||
|
|
@ -239,12 +236,6 @@ def _run_test(callback):
|
|||
:type metadata_service_port: int, optional
|
||||
"""
|
||||
|
||||
# Default to env values, so people can set up their env and run these
|
||||
# tests without specifying any commands.
|
||||
if host is None:
|
||||
host = os.getenv('KFP_HOST', 'http://ml-pipeline:8888')
|
||||
if external_host is None:
|
||||
external_host = host
|
||||
if pipeline_root is None:
|
||||
pipeline_root = os.getenv('KFP_PIPELINE_ROOT')
|
||||
if not pipeline_root:
|
||||
|
|
@ -253,19 +244,28 @@ def _run_test(callback):
|
|||
logger.warning(
|
||||
f'KFP_OUTPUT_DIRECTORY env var is left for backward compatibility, please use KFP_PIPELINE_ROOT instead.'
|
||||
)
|
||||
logger.info(f'KFP_PIPELINE_ROOT={pipeline_root}')
|
||||
if metadata_service_host is None:
|
||||
metadata_service_host = os.getenv('METADATA_GRPC_SERVICE_HOST',
|
||||
'metadata-grpc-service')
|
||||
logger.info(f'METADATA_GRPC_SERVICE_HOST={metadata_service_host}')
|
||||
if launcher_v2_image is None:
|
||||
launcher_v2_image = os.getenv('KFP_LAUNCHER_V2_IMAGE')
|
||||
if not launcher_v2_image:
|
||||
raise Exception("launcher_v2_image is empty")
|
||||
logger.info(f'KFP_LAUNCHER_V2_IMAGE={launcher_v2_image}')
|
||||
if driver_image is None:
|
||||
driver_image = os.getenv('KFP_DRIVER_IMAGE')
|
||||
if not driver_image:
|
||||
raise Exception("driver_image is empty")
|
||||
|
||||
client = kfp.Client(host=host)
|
||||
logger.info(f'KFP_DRIVER_IMAGE={driver_image}')
|
||||
client = kfp.Client()
|
||||
# TODO(Bobgy): avoid using private fields when getting loaded config
|
||||
kfp_endpoint = client._existing_config.host
|
||||
kfp_ui_endpoint = client._uihost
|
||||
logger.info(f'KF_PIPELINES_ENDPOINT={kfp_endpoint}')
|
||||
if kfp_ui_endpoint != kfp_endpoint:
|
||||
logger.info(f'KF_PIPELINES_UI_ENDPOINT={kfp_ui_endpoint}')
|
||||
|
||||
def run_pipeline(
|
||||
pipeline_func: Optional[Callable],
|
||||
|
|
@ -347,7 +347,7 @@ def _run_test(callback):
|
|||
# There is no run_result when dry_run.
|
||||
return
|
||||
print("Run details page URL:")
|
||||
print(f"{external_host}/#/runs/details/{run_result.run_id}")
|
||||
print(f"{kfp_ui_endpoint}/#/runs/details/{run_result.run_id}")
|
||||
run_detail = run_result.wait_for_run_completion(timeout)
|
||||
# Hide detailed information for pretty printing
|
||||
workflow_spec = run_detail.run.pipeline_spec.workflow_manifest
|
||||
|
|
@ -362,15 +362,8 @@ def _run_test(callback):
|
|||
|
||||
# When running locally, port forward MLMD grpc service to localhost:8080 by:
|
||||
#
|
||||
# ```bash
|
||||
# NAMESPACE=kubeflow kubectl port-forward svc/metadata-grpc-service 8080:8080 -n $NAMESPACE
|
||||
# ```
|
||||
#
|
||||
# Then you can uncomment the following config instead.
|
||||
# mlmd_connection_config = metadata_store_pb2.MetadataStoreClientConfig(
|
||||
# host='localhost',
|
||||
# port=8080,
|
||||
# )
|
||||
# 1. NAMESPACE=kubeflow kubectl port-forward svc/metadata-grpc-service 8080:8080 -n $NAMESPACE
|
||||
# 2. Configure env var METADATA_GRPC_SERVICE_HOST=localhost.
|
||||
mlmd_connection_config = metadata_store_pb2.MetadataStoreClientConfig(
|
||||
host=metadata_service_host,
|
||||
port=metadata_service_port,
|
||||
|
|
|
|||
|
|
@ -101,10 +101,8 @@ class Client(object):
|
|||
host: The host name to use to talk to Kubeflow Pipelines. If not set, the in-cluster
|
||||
service DNS name will be used, which only works if the current environment is a pod
|
||||
in the same cluster (such as a Jupyter instance spawned by Kubeflow's
|
||||
JupyterHub). If you have a different connection to cluster, such as a kubectl
|
||||
proxy connection, then set it to something like "127.0.0.1:8080/pipeline.
|
||||
If you connect to an IAP enabled cluster, set it to
|
||||
https://<your-deployment>.endpoints.<your-project>.cloud.goog/pipeline".
|
||||
JupyterHub).
|
||||
Set the host based on https://www.kubeflow.org/docs/components/pipelines/sdk/connect-api/.
|
||||
client_id: The client ID used by Identity-Aware Proxy.
|
||||
namespace: The namespace where the kubeflow pipeline system is run.
|
||||
other_client_id: The client ID used to obtain the auth codes and refresh tokens.
|
||||
|
|
|
|||
|
|
@ -15,21 +15,21 @@ all: sample-test
|
|||
sample-test: upload
|
||||
# The -u flag makes python output unbuffered, so that we can see real time log.
|
||||
# Reference: https://stackoverflow.com/a/107717
|
||||
python -u sample_test.py \
|
||||
export KF_PIPELINES_ENDPOINT=$(HOST) \
|
||||
&& python -u sample_test.py \
|
||||
--samples_config samples/test/config.yaml \
|
||||
--context $(GCS_ROOT)/src/context.tar.gz \
|
||||
--host $(HOST) \
|
||||
--gcs_root $(GCS_ROOT)/data \
|
||||
--gcr_root $(GCR_ROOT) \
|
||||
--kfp_package_path "$(KFP_PACKAGE_PATH)"
|
||||
|
||||
.PHONY: integration-test
|
||||
integration-test: upload
|
||||
python -u sample_test.py \
|
||||
export KF_PIPELINES_ENDPOINT=$(HOST) \
|
||||
&& python -u sample_test.py \
|
||||
--samples_config samples/test/config-integration.yaml \
|
||||
--timeout_mins 60 \
|
||||
--context $(GCS_ROOT)/src/context.tar.gz \
|
||||
--host $(HOST) \
|
||||
--gcs_root $(GCS_ROOT)/data \
|
||||
--gcr_root $(GCR_ROOT) \
|
||||
--kfp_package_path "$(KFP_PACKAGE_PATH)"
|
||||
|
|
|
|||
|
|
@ -1,19 +1,26 @@
|
|||
# Kubeflow Pipelines Sample Test Infra V2
|
||||
|
||||
The following tests are running on sample test infra v2:
|
||||
|
||||
* kubeflow-pipelines-samples-v2
|
||||
* kubeflow-pipelines-integration-v2
|
||||
|
||||
Note, the sample test only runs on Google Cloud at the moment. Welcome
|
||||
contribution if you want to adapt it to other platforms.
|
||||
|
||||
Quick Links:
|
||||
|
||||
* [prowjob config](https://github.com/GoogleCloudPlatform/oss-test-infra/blob/48b09567c8df28fab2d3f2fb6df86defa12207fb/prow/prowjobs/kubeflow/pipelines/kubeflow-pipelines-presubmits.yaml#L184-L192)
|
||||
* [prowjob config](https://github.com/GoogleCloudPlatform/oss-test-infra/blob/8e2b1e0b57d0bf7adf8e9f3cef6a98af25012412/prow/prowjobs/kubeflow/pipelines/kubeflow-pipelines-presubmits.yaml#L185-L203)
|
||||
* [past prow jobs](https://oss-prow.knative.dev/job-history/gs/oss-prow/pr-logs/directory/kubeflow-pipelines-samples-v2)
|
||||
* [sample test config](../../samples/test/config.yaml)
|
||||
* Sample test configs
|
||||
* [kubeflow-pipelines-samples-v2 test config](/samples/test/config.yaml)
|
||||
* [kubeflow-pipelines-integration-v2 test config](/samples/test/config-integration.yaml)
|
||||
* [KFP test cluster hostname](https://github.com/kubeflow/testing/blob/master/test-infra/kfp/endpoint)
|
||||
* [Infra as Code configuration for kfp-ci project](https://github.com/kubeflow/testing/tree/master/test-infra/kfp).
|
||||
|
||||
## How to access the KFP UI running these tests?
|
||||
|
||||
kubeflow-pipelines-sample-v2 test pipeline runs on [kfp-standalone-1 cluster](https://console.cloud.google.com/kubernetes/clusters/details/us-central1/kfp-standalone-1/details?folder=&organizationId=&project=kfp-ci),
|
||||
Test Kubeflow Pipelines run on [kfp-standalone-1 cluster](https://console.cloud.google.com/kubernetes/clusters/details/us-central1/kfp-standalone-1/details?folder=&organizationId=&project=kfp-ci),
|
||||
`kfp-ci` project, `kubeflow.org` organization.
|
||||
|
||||
The test script prints KFP host URL in logs. You need to have permission to
|
||||
|
|
@ -23,9 +30,11 @@ You need to join [Kubeflow ci-team google group](https://github.com/kubeflow/int
|
|||
has very wide permissions to test infra, so access will only be granted to core
|
||||
developers.
|
||||
|
||||
Currently, it's not possible to grant KFP UI only permission, but we can grant
|
||||
<!--
|
||||
TODO(Bobgy): Currently, it's not possible to grant KFP UI only permission, but we can consider granting
|
||||
such access to [Kubeflow ci-viewer google group](https://github.com/kubeflow/internal-acls/blob/master/google_groups/groups/ci-viewer.yaml).
|
||||
Contact @Bobgy if you have such a need.
|
||||
Contact @zijianjoy if you have such a need.
|
||||
-->
|
||||
|
||||
## How to run the entire sample test suite in your own KFP?
|
||||
|
||||
|
|
@ -61,15 +70,28 @@ as expected.
|
|||
|
||||
For why the caveat exists, refer to context rule in [Makefile](./Makefile).
|
||||
|
||||
Run integration test by:
|
||||
|
||||
```bash
|
||||
make integration-test
|
||||
```
|
||||
|
||||
However, integration tests are configured to run on kfp-ci project, so modify tests locally with your own configs:
|
||||
|
||||
* [parameterized_tfx_oss_test.py](/samples/core/parameterized_tfx_oss/parameterized_tfx_oss_test.py)
|
||||
* [dataflow_test.py](/samples/core/dataflow/dataflow_test.py)
|
||||
|
||||
## How to develop one single sample?
|
||||
|
||||
One-time environment configurations:
|
||||
|
||||
```bash
|
||||
# These env vars are loaded by default, recommend configuring them in your
|
||||
# .bashrc or .zshrc
|
||||
export KFP_HOST=https://your.KFP.host
|
||||
export KF_PIPELINES_ENDPOINT=https://your.KFP.host
|
||||
export KFP_PIPELINE_ROOT=gs://your-bucket/path/to/output/dir
|
||||
export METADATA_GRPC_SERVICE_HOST=localhost
|
||||
export PATH="$HOME/bin:$PATH" # Some CLI tools will be installed to ~/bin.
|
||||
export PATH="$HOME/bin:$PATH" # The KFP v2 backend compiler CLI tool will be installed to ~/bin by make install-compiler
|
||||
# optional, when you want to override images to your dev project
|
||||
# export KFP_LAUNCHER_V2_IMAGE=gcr.io/your-project/dev/kfp-launcher-v2:latest
|
||||
# export KFP_DRIVER_IMAGE=gcr.io/your-project/kfp-driver:latest
|
||||
|
|
@ -85,11 +107,18 @@ cd "${REPO_ROOT}/v2"
|
|||
# Note, when you update backend compiler code, you need to run this again!
|
||||
make install-compiler
|
||||
|
||||
# Note, for tests that use metadata grpc api, you should port-forward it locally in a separate terminal by:
|
||||
# Note, for v2 tests, they use metadata grpc api, you need to port-forward it locally in a separate terminal by:
|
||||
cd "${REPO_ROOT}/v2/test"
|
||||
make mlmd-port-forward
|
||||
|
||||
# To run a single sample test:
|
||||
# Install python dependencies
|
||||
cd "${REPO_ROOT}/v2/test"
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
To run a single sample test:
|
||||
|
||||
```bash
|
||||
cd "${REPO_ROOT}"
|
||||
# if you have a sample test at samples/path/to/your/sample_test.py
|
||||
python -m samples.path.to.your.sample_test
|
||||
|
|
@ -99,16 +128,19 @@ python -m samples.path.to.your.sample_test --help
|
|||
|
||||
## How to add a sample to this sample test?
|
||||
|
||||
Edit [samples/test/config.yaml](../../samples/test/config.yaml) and add your own sample.
|
||||
Edit [samples/test/config.yaml](/samples/test/config.yaml) and add your own sample.
|
||||
You can also add other samples not in the `samples/test` folder.
|
||||
|
||||
Your sample test needs to conform to the standard interface in
|
||||
[components/run_sample.yaml](components/run_sample.yaml). You can refer to
|
||||
existing [sample tests](../../samples/test) for how to implement the interface.
|
||||
existing [sample tests](/samples/test) for how to implement the interface.
|
||||
|
||||
## How can a sample verify MLMD status of a run?
|
||||
Some samples can be used as examples for various cases:
|
||||
|
||||
Refer to [an existing test](../../samples/v2/hello_world_test.py).
|
||||
* Pipeline from a notebook, [multiple_outputs_test.py](/samples/core/multiple_outputs/multiple_outputs_test.py).
|
||||
* A sample that does not submit a pipeline, [dsl_static_type_checking_test.py](/samples/core/dsl_static_type_checking/dsl_static_type_checking_test.py).
|
||||
* V2 pipeline and verification, [hello_world_test.py](/samples/v2/hello_world_test.py).
|
||||
* V2 pipeline and control flow, [condition_test.py](/samples/core/condition/condition_test.py).
|
||||
|
||||
## FAQs
|
||||
|
||||
|
|
|
|||
|
|
@ -32,8 +32,8 @@ implementation:
|
|||
backend_compiler_path="$0"
|
||||
sample_path="$1"
|
||||
output_dir="$2"
|
||||
host="$3"
|
||||
external_host="$4"
|
||||
export KF_PIPELINES_ENDPOINT="$3"
|
||||
export KF_PIPELINES_UI_ENDPOINT="$4"
|
||||
launcher_v2_image="$5"
|
||||
driver_image="$6"
|
||||
|
||||
|
|
@ -45,8 +45,6 @@ implementation:
|
|||
-u \
|
||||
-m "$sample_path" \
|
||||
--pipeline_root "$output_dir" \
|
||||
--host "$host" \
|
||||
--external_host "$external_host" \
|
||||
--launcher_v2_image "$launcher_v2_image" \
|
||||
--driver_image "$driver_image"
|
||||
|
||||
|
|
|
|||
|
|
@ -104,7 +104,6 @@ def v2_sample_test(
|
|||
|
||||
def main(
|
||||
context: str,
|
||||
host: str,
|
||||
gcr_root: str,
|
||||
gcs_root: str,
|
||||
experiment: str = 'v2_sample_test',
|
||||
|
|
@ -119,7 +118,9 @@ def main(
|
|||
with open(samples_config_path, 'r') as stream:
|
||||
samples_config_content = yaml.safe_load(stream)
|
||||
|
||||
client = kfp.Client(host=host)
|
||||
client = kfp.Client()
|
||||
# TODO(Bobgy): avoid using private fields when getting loaded config
|
||||
host = client._existing_config.host
|
||||
client.create_experiment(
|
||||
name=experiment,
|
||||
description='An experiment with Kubeflow Pipelines v2 sample test runs.'
|
||||
|
|
|
|||
Loading…
Reference in New Issue